Source code

Revision control

Other Tools

1
/* This Source Code Form is subject to the terms of the Mozilla Public
2
* License, v. 2.0. If a copy of the MPL was not distributed with this
3
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5
"use strict";
6
7
var EXPORTED_SYMBOLS = ["SearchTelemetry"];
8
9
const { XPCOMUtils } = ChromeUtils.import(
11
);
12
13
XPCOMUtils.defineLazyModuleGetters(this, {
15
});
16
17
// The various histograms and scalars that we report to.
18
const SEARCH_COUNTS_HISTOGRAM_KEY = "SEARCH_COUNTS";
19
const SEARCH_WITH_ADS_SCALAR = "browser.search.with_ads";
20
const SEARCH_AD_CLICKS_SCALAR = "browser.search.ad_clicks";
21
22
/**
23
* Used to identify various parameters used with partner search providers. This
24
* consists of the following structure:
25
* - {<string>} name
26
* Details for a particular provider with the string name.
27
* - {regexp} <string>.regexp
28
* The regular expression used to match the url for the search providers
29
* main page.
30
* - {string} <string>.queryParam
31
* The query parameter name that indicates a search has been made.
32
* - {string} [<string>.codeParam]
33
* The query parameter name that indicates a search provider's code.
34
* - {array} [<string>.codePrefixes]
35
* An array of the possible string prefixes for a codeParam, indicating a
36
* partner search.
37
* - {array} [<string>.followonParams]
38
* An array of parameters name that indicates this is a follow-on search.
39
* - {array} [<string>.extraAdServersRegexps]
40
* An array of regular expressions used to determine if a link on a search
41
* page might be an advert.
42
* - {array} [<object>.followonCookies]
43
* An array of cookie details, which should look like:
44
* - {string} [extraCodeParam]
45
* The query parameter name that indicates an extra search provider's
46
* code.
47
* - {array} [<string>.extraCodePrefixes]
48
* An array of the possible string prefixes for a codeParam, indicating
49
* a partner search.
50
* - {string} host
51
* Host name to which the cookie is linked to.
52
* - {string} name
53
* Name of the cookie to look for that should contain the search
54
* provider's code.
55
* - {string} codeParam
56
* The cookie parameter name that indicates a search provider's code.
57
* - {array} <string>.codePrefixes
58
* An array of the possible string prefixes for a codeParam, indicating
59
* a partner search.
60
*/
61
const SEARCH_PROVIDER_INFO = {
62
google: {
63
regexp: /^https:\/\/www\.google\.(?:.+)\/search/,
64
queryParam: "q",
65
codeParam: "client",
66
codePrefixes: ["firefox"],
67
followonParams: ["oq", "ved", "ei"],
68
extraAdServersRegexps: [
69
/^https:\/\/www\.google(?:adservices)?\.com\/(?:pagead\/)?aclk/,
70
],
71
},
72
duckduckgo: {
73
regexp: /^https:\/\/duckduckgo\.com\//,
74
queryParam: "q",
75
codeParam: "t",
76
codePrefixes: ["ff"],
77
extraAdServersRegexps: [
78
/^https:\/\/duckduckgo.com\/y\.js/,
79
/^https:\/\/www\.amazon\.(?:[a-z.]{2,24}).*(?:tag=duckduckgo-)/,
80
],
81
},
82
yahoo: {
83
regexp: /^https:\/\/(?:.*)search\.yahoo\.com\/search/,
84
queryParam: "p",
85
},
86
baidu: {
87
regexp: /^https:\/\/www\.baidu\.com\/(?:s|baidu)/,
88
queryParam: "wd",
89
codeParam: "tn",
90
codePrefixes: ["34046034_", "monline_"],
91
followonParams: ["oq"],
92
},
93
bing: {
94
regexp: /^https:\/\/www\.bing\.com\/search/,
95
queryParam: "q",
96
codeParam: "pc",
97
codePrefixes: ["MOZ", "MZ"],
98
followonCookies: [
99
{
100
extraCodeParam: "form",
101
extraCodePrefixes: ["QBRE"],
102
host: "www.bing.com",
103
name: "SRCHS",
104
codeParam: "PC",
105
codePrefixes: ["MOZ", "MZ"],
106
},
107
],
108
extraAdServersRegexps: [
109
/^https:\/\/www\.bing\.com\/acli?c?k/,
110
/^https:\/\/www\.bing\.com\/fd\/ls\/GLinkPingPost\.aspx.*acli?c?k/,
111
],
112
},
113
};
114
115
const BROWSER_SEARCH_PREF = "browser.search.";
116
117
XPCOMUtils.defineLazyPreferenceGetter(
118
this,
119
"loggingEnabled",
120
BROWSER_SEARCH_PREF + "log",
121
false
122
);
123
124
/**
125
* TelemetryHandler is the main class handling search telemetry. It primarily
126
* deals with tracking of what pages are loaded into tabs.
127
*
128
* It handles the *in-content:sap* keys of the SEARCH_COUNTS histogram.
129
*/
130
class TelemetryHandler {
131
constructor() {
132
// _browserInfoByURL is a map of tracked search urls to objects containing:
133
// * {object} info
134
// the search provider information associated with the url.
135
// * {WeakSet} browsers
136
// a weak set of browsers that have the url loaded.
137
// * {integer} count
138
// a manual count of browsers logged.
139
// We keep a weak set of browsers, in case we miss something on our counts
140
// and cause a memory leak - worst case our map is slightly bigger than it
141
// needs to be.
142
// The manual count is because WeakSet doesn't give us size/length
143
// information, but we want to know when we can clean up our associated
144
// entry.
145
this._browserInfoByURL = new Map();
146
this._initialized = false;
147
this.__searchProviderInfo = null;
148
this._contentHandler = new ContentHandler({
149
browserInfoByURL: this._browserInfoByURL,
150
findBrowserItemForURL: (...args) => this._findBrowserItemForURL(...args),
151
getProviderInfoForURL: (...args) => this._getProviderInfoForURL(...args),
152
});
153
}
154
155
/**
156
* Initializes the TelemetryHandler and its ContentHandler. It will add
157
* appropriate listeners to the window so that window opening and closing
158
* can be tracked.
159
*/
160
init() {
161
if (this._initialized) {
162
return;
163
}
164
165
this._contentHandler.init();
166
167
for (let win of Services.wm.getEnumerator("navigator:browser")) {
168
this._registerWindow(win);
169
}
170
Services.wm.addListener(this);
171
172
this._initialized = true;
173
}
174
175
/**
176
* Uninitializes the TelemetryHandler and its ContentHandler.
177
*/
178
uninit() {
179
if (!this._initialized) {
180
return;
181
}
182
183
this._contentHandler.uninit();
184
185
for (let win of Services.wm.getEnumerator("navigator:browser")) {
186
this._unregisterWindow(win);
187
}
188
Services.wm.removeListener(this);
189
190
this._initialized = false;
191
}
192
193
/**
194
* Handles the TabClose event received from the listeners.
195
*
196
* @param {object} event
197
*/
198
handleEvent(event) {
199
if (event.type != "TabClose") {
200
Cu.reportError(`Received unexpected event type ${event.type}`);
201
return;
202
}
203
204
this.stopTrackingBrowser(event.target.linkedBrowser);
205
}
206
207
/**
208
* Test-only function, used to override the provider information, so that
209
* unit tests can set it to easy to test values.
210
*
211
* @param {object} infoByProvider @see SEARCH_PROVIDER_INFO for type information.
212
*/
213
overrideSearchTelemetryForTests(infoByProvider) {
214
if (infoByProvider) {
215
for (let info of Object.values(infoByProvider)) {
216
info.regexp = new RegExp(info.regexp);
217
}
218
this.__searchProviderInfo = infoByProvider;
219
} else {
220
this.__searchProviderInfo = SEARCH_PROVIDER_INFO;
221
}
222
this._contentHandler.overrideSearchTelemetryForTests(
223
this.__searchProviderInfo
224
);
225
}
226
227
/**
228
* This may start tracking a tab based on the URL. If the URL matches a search
229
* partner, and it has a code, then we'll start tracking it. This will aid
230
* determining if it is a page we should be tracking for adverts.
231
*
232
* @param {object} browser The browser associated with the page.
233
* @param {string} url The url that was loaded in the browser.
234
*/
235
updateTrackingStatus(browser, url) {
236
let info = this._checkURLForSerpMatch(url);
237
if (!info) {
238
this.stopTrackingBrowser(browser);
239
return;
240
}
241
242
this._reportSerpPage(info, url);
243
244
// If we have a code, then we also track this for potential ad clicks.
245
if (info.code) {
246
let item = this._browserInfoByURL.get(url);
247
if (item) {
248
item.browsers.add(browser);
249
item.count++;
250
} else {
251
this._browserInfoByURL.set(url, {
252
browsers: new WeakSet([browser]),
253
info,
254
count: 1,
255
});
256
}
257
}
258
}
259
260
/**
261
* Stops tracking of a tab, for example the tab has loaded a different URL.
262
*
263
* @param {object} browser The browser associated with the tab to stop being
264
* tracked.
265
*/
266
stopTrackingBrowser(browser) {
267
for (let [url, item] of this._browserInfoByURL) {
268
if (item.browsers.has(browser)) {
269
item.browsers.delete(browser);
270
item.count--;
271
}
272
273
if (!item.count) {
274
this._browserInfoByURL.delete(url);
275
}
276
}
277
}
278
279
/**
280
* Parts of the URL, like search params and hashes, may be mutated by scripts
281
* on a page we're tracking. Since we don't want to keep track of that
282
* ourselves in order to keep the list of browser objects a weak-referenced
283
* set, we do optional fuzzy matching of URLs to fetch the most relevant item
284
* that contains tracking information.
285
*
286
* @param {string} url URL to fetch the tracking data for.
287
* @returns {object} Map containing the following members:
288
* - {WeakSet} browsers
289
* Set of browser elements that belong to `url`.
290
* - {object} info
291
* Info dictionary as returned by `_checkURLForSerpMatch`.
292
* - {number} count
293
* The number of browser element we can most accurately tell we're
294
* tracking, since they're inside a WeakSet.
295
*/
296
_findBrowserItemForURL(url) {
297
try {
298
url = new URL(url);
299
} catch (ex) {
300
return null;
301
}
302
303
const compareURLs = (url1, url2) => {
304
// In case of an exact match, well, that's an obvious winner.
305
if (url1.href == url2.href) {
306
return Infinity;
307
}
308
309
// Each step we get closer to the two URLs being the same, we increase the
310
// score. The consumer of this method will use these scores to see which
311
// of the URLs is the best match.
312
let score = 0;
313
if (url1.hostname == url2.hostname) {
314
++score;
315
if (url1.pathname == url2.pathname) {
316
++score;
317
for (let [key1, value1] of url1.searchParams) {
318
// Let's not fuss about the ordering of search params, since the
319
// score effect will solve that.
320
if (url2.searchParams.has(key1)) {
321
++score;
322
if (url2.searchParams.get(key1) == value1) {
323
++score;
324
}
325
}
326
}
327
if (url1.hash == url2.hash) {
328
++score;
329
}
330
}
331
}
332
return score;
333
};
334
335
let item;
336
let currentBestMatch = 0;
337
for (let [trackingURL, candidateItem] of this._browserInfoByURL) {
338
if (currentBestMatch === Infinity) {
339
break;
340
}
341
try {
342
// Make sure to cache the parsed URL object, since there's no reason to
343
// do it twice.
344
trackingURL =
345
candidateItem._trackingURL ||
346
(candidateItem._trackingURL = new URL(trackingURL));
347
} catch (ex) {
348
continue;
349
}
350
let score = compareURLs(url, trackingURL);
351
if (score > currentBestMatch) {
352
item = candidateItem;
353
currentBestMatch = score;
354
}
355
}
356
357
return item;
358
}
359
360
// nsIWindowMediatorListener
361
362
/**
363
* This is called when a new window is opened, and handles registration of
364
* that window if it is a browser window.
365
*
366
* @param {nsIAppWindow} appWin The xul window that was opened.
367
*/
368
onOpenWindow(appWin) {
369
let win = appWin.docShell.domWindow;
370
win.addEventListener(
371
"load",
372
() => {
373
if (
374
win.document.documentElement.getAttribute("windowtype") !=
375
"navigator:browser"
376
) {
377
return;
378
}
379
380
this._registerWindow(win);
381
},
382
{ once: true }
383
);
384
}
385
386
/**
387
* Listener that is called when a window is closed, and handles deregistration of
388
* that window if it is a browser window.
389
*
390
* @param {nsIAppWindow} appWin The xul window that was closed.
391
*/
392
onCloseWindow(appWin) {
393
let win = appWin.docShell.domWindow;
394
395
if (
396
win.document.documentElement.getAttribute("windowtype") !=
397
"navigator:browser"
398
) {
399
return;
400
}
401
402
this._unregisterWindow(win);
403
}
404
405
/**
406
* Adds event listeners for the window and registers it with the content handler.
407
*
408
* @param {object} win The window to register.
409
*/
410
_registerWindow(win) {
411
this._contentHandler.registerWindow(win);
412
win.gBrowser.tabContainer.addEventListener("TabClose", this);
413
}
414
415
/**
416
* Removes event listeners for the window and unregisters it with the content
417
* handler.
418
*
419
* @param {object} win The window to unregister.
420
*/
421
_unregisterWindow(win) {
422
for (let tab of win.gBrowser.tabs) {
423
this.stopTrackingBrowser(tab);
424
}
425
426
this._contentHandler.unregisterWindow(win);
427
win.gBrowser.tabContainer.removeEventListener("TabClose", this);
428
}
429
430
/**
431
* Searches for provider information for a given url.
432
*
433
* @param {string} url The url to match for a provider.
434
* @param {boolean} useOnlyExtraAdServers If true, this will use the extra
435
* ad server regexp to match instead of the main regexp.
436
* @returns {array|null} Returns an array of provider name and the provider information.
437
*/
438
_getProviderInfoForURL(url, useOnlyExtraAdServers = false) {
439
if (useOnlyExtraAdServers) {
440
return Object.entries(this._searchProviderInfo).find(([_, info]) => {
441
if (info.extraAdServersRegexps) {
442
for (let regexp of info.extraAdServersRegexps) {
443
if (regexp.test(url)) {
444
return true;
445
}
446
}
447
}
448
return false;
449
});
450
}
451
452
return Object.entries(this._searchProviderInfo).find(([_, info]) =>
453
info.regexp.test(url)
454
);
455
}
456
457
/**
458
* Checks to see if a url is a search partner location, and determines the
459
* provider and codes used.
460
*
461
* @param {string} url The url to match.
462
* @returns {null|object} Returns null if there is no match found. Otherwise,
463
* returns an object of strings for provider, code and type.
464
*/
465
_checkURLForSerpMatch(url) {
466
let info = this._getProviderInfoForURL(url);
467
if (!info) {
468
return null;
469
}
470
let [provider, searchProviderInfo] = info;
471
let queries = new URLSearchParams(url.split("#")[0].split("?")[1]);
472
if (!queries.get(searchProviderInfo.queryParam)) {
473
return null;
474
}
475
// Default to organic to simplify things.
476
// We override type in the sap cases.
477
let type = "organic";
478
let code;
479
if (searchProviderInfo.codeParam) {
480
code = queries.get(searchProviderInfo.codeParam);
481
if (
482
code &&
483
searchProviderInfo.codePrefixes.some(p => code.startsWith(p))
484
) {
485
if (
486
searchProviderInfo.followonParams &&
487
searchProviderInfo.followonParams.some(p => queries.has(p))
488
) {
489
type = "sap-follow-on";
490
} else {
491
type = "sap";
492
}
493
} else if (searchProviderInfo.followonCookies) {
494
// Especially Bing requires lots of extra work related to cookies.
495
for (let followonCookie of searchProviderInfo.followonCookies) {
496
if (followonCookie.extraCodeParam) {
497
let eCode = queries.get(followonCookie.extraCodeParam);
498
if (
499
!eCode ||
500
!followonCookie.extraCodePrefixes.some(p => eCode.startsWith(p))
501
) {
502
continue;
503
}
504
}
505
506
// If this cookie is present, it's probably an SAP follow-on.
507
// This might be an organic follow-on in the same session, but there
508
// is no way to tell the difference.
509
for (let cookie of Services.cookies.getCookiesFromHost(
510
followonCookie.host,
511
{}
512
)) {
513
if (cookie.name != followonCookie.name) {
514
continue;
515
}
516
517
let [cookieParam, cookieValue] = cookie.value
518
.split("=")
519
.map(p => p.trim());
520
if (
521
cookieParam == followonCookie.codeParam &&
522
followonCookie.codePrefixes.some(p => cookieValue.startsWith(p))
523
) {
524
type = "sap-follow-on";
525
code = cookieValue;
526
break;
527
}
528
}
529
}
530
}
531
}
532
return { provider, type, code };
533
}
534
535
/**
536
* Logs telemetry for a search provider visit.
537
*
538
* @param {object} info
539
* @param {string} info.provider The name of the provider.
540
* @param {string} info.type The type of search.
541
* @param {string} [info.code] The code for the provider.
542
* @param {string} url The url that was matched (for debug logging only).
543
*/
544
_reportSerpPage(info, url) {
545
let payload = `${info.provider}.in-content:${info.type}:${info.code ||
546
"none"}`;
547
let histogram = Services.telemetry.getKeyedHistogramById(
548
SEARCH_COUNTS_HISTOGRAM_KEY
549
);
550
histogram.add(payload);
551
LOG(`${payload} for ${url}`);
552
}
553
554
/**
555
* Returns the current search provider information in use.
556
* @see SEARCH_PROVIDER_INFO
557
*/
558
get _searchProviderInfo() {
559
if (!this.__searchProviderInfo) {
560
this.__searchProviderInfo = SEARCH_PROVIDER_INFO;
561
}
562
return this.__searchProviderInfo;
563
}
564
}
565
566
/**
567
* ContentHandler deals with handling telemetry of the content within a tab -
568
* when ads detected and when they are selected.
569
*
570
* It handles the "browser.search.with_ads" and "browser.search.ad_clicks"
571
* scalars.
572
*/
573
class ContentHandler {
574
/**
575
* Constructor.
576
*
577
* @param {object} options
578
* @param {Map} options.browserInfoByURL The map of urls from TelemetryHandler.
579
* @param {function} options.getProviderInfoForURL A function that obtains
580
* the provider information for a url.
581
*/
582
constructor(options) {
583
this._browserInfoByURL = options.browserInfoByURL;
584
this._findBrowserItemForURL = options.findBrowserItemForURL;
585
this._getProviderInfoForURL = options.getProviderInfoForURL;
586
}
587
588
/**
589
* Initializes the content handler. This will also set up the shared data that is
590
* shared with the SearchTelemetryChild actor.
591
*/
592
init() {
593
Services.ppmm.sharedData.set(
594
"SearchTelemetry:ProviderInfo",
595
SEARCH_PROVIDER_INFO
596
);
597
598
Cc["@mozilla.org/network/http-activity-distributor;1"]
599
.getService(Ci.nsIHttpActivityDistributor)
600
.addObserver(this);
601
}
602
603
/**
604
* Uninitializes the content handler.
605
*/
606
uninit() {
607
Cc["@mozilla.org/network/http-activity-distributor;1"]
608
.getService(Ci.nsIHttpActivityDistributor)
609
.removeObserver(this);
610
}
611
612
/**
613
* Receives a message from the SearchTelemetryChild actor.
614
*
615
* @param {object} msg
616
*/
617
receiveMessage(msg) {
618
if (msg.name != "SearchTelemetry:PageInfo") {
619
LOG("Received unexpected message: " + msg.name);
620
return;
621
}
622
623
this._reportPageWithAds(msg.data);
624
}
625
626
/**
627
* Test-only function to override the search provider information for use
628
* with tests. Passes it to the SearchTelemetryChild actor.
629
*
630
* @param {object} providerInfo @see SEARCH_PROVIDER_INFO for type information.
631
*/
632
overrideSearchTelemetryForTests(providerInfo) {
633
Services.ppmm.sharedData.set("SearchTelemetry:ProviderInfo", providerInfo);
634
}
635
636
/**
637
* Listener that observes network activity, so that we can determine if a link
638
* from a search provider page was followed, and if then if that link was an
639
* ad click or not.
640
*
641
* @param {nsIChannel} nativeChannel The channel that generated the activity.
642
* @param {number} activityType The type of activity.
643
* @param {number} activitySubtype The subtype for the activity.
644
*/
645
observeActivity(
646
nativeChannel,
647
activityType,
648
activitySubtype /*,
649
timestamp,
650
extraSizeData,
651
extraStringData*/
652
) {
653
// NOTE: the channel handling code here is inspired by WebRequest.jsm.
654
if (
655
!this._browserInfoByURL.size ||
656
activityType !=
657
Ci.nsIHttpActivityObserver.ACTIVITY_TYPE_HTTP_TRANSACTION ||
658
activitySubtype !=
659
Ci.nsIHttpActivityObserver.ACTIVITY_SUBTYPE_TRANSACTION_CLOSE
660
) {
661
return;
662
}
663
664
// Sometimes we get a NullHttpChannel, which implements nsIHttpChannel but
665
// not nsIChannel.
666
if (!(nativeChannel instanceof Ci.nsIChannel)) {
667
return;
668
}
669
let channel = ChannelWrapper.get(nativeChannel);
670
// The wrapper is consistent across redirects, so we can use it to track state.
671
if (channel._adClickRecorded) {
672
LOG("Ad click already recorded");
673
return;
674
}
675
676
// Make a trip through the event loop to make sure statuses have a chance to
677
// be processed before we get all the info.
678
Services.tm.dispatchToMainThread(() => {
679
// We suspect that No Content (204) responses are used to transfer or
680
// update beacons. They lead to use double-counting ad-clicks, so let's
681
// ignore them.
682
if (channel.statusCode == 204) {
683
LOG("Ignoring activity from ambiguous responses");
684
return;
685
}
686
687
let originURL = channel.originURI && channel.originURI.spec;
688
let info = this._findBrowserItemForURL(originURL);
689
if (!originURL || !info) {
690
return;
691
}
692
693
let URL = channel.finalURL;
694
info = this._getProviderInfoForURL(URL, true);
695
if (!info) {
696
return;
697
}
698
699
try {
700
Services.telemetry.keyedScalarAdd(SEARCH_AD_CLICKS_SCALAR, info[0], 1);
701
channel._adClickRecorded = true;
702
LOG(`Counting ad click in page for ${info[0]} ${originURL} ${URL}`);
703
} catch (e) {
704
Cu.reportError(e);
705
}
706
});
707
}
708
709
/**
710
* Adds a message listener for the window being registered to receive messages
711
* from SearchTelemetryChild.
712
*
713
* @param {object} win The window to register.
714
*/
715
registerWindow(win) {
716
win.messageManager.addMessageListener("SearchTelemetry:PageInfo", this);
717
}
718
719
/**
720
* Removes the message listener for the window.
721
*
722
* @param {object} win The window to unregister.
723
*/
724
unregisterWindow(win) {
725
win.messageManager.removeMessageListener("SearchTelemetry:PageInfo", this);
726
}
727
728
/**
729
* Logs telemetry for a page with adverts, if it is one of the partner search
730
* provider pages that we're tracking.
731
*
732
* @param {object} info
733
* @param {boolean} info.hasAds Whether or not the page has adverts.
734
* @param {string} info.url The url of the page.
735
*/
736
_reportPageWithAds(info) {
737
let item = this._findBrowserItemForURL(info.url);
738
if (!item) {
739
LOG(
740
`Expected to report URI for ${
741
info.url
742
} with ads but couldn't find the information`
743
);
744
return;
745
}
746
747
Services.telemetry.keyedScalarAdd(
748
SEARCH_WITH_ADS_SCALAR,
749
item.info.provider,
750
1
751
);
752
LOG(`Counting ads in page for ${item.info.provider} ${info.url}`);
753
}
754
}
755
756
/**
757
* Outputs the message to the JavaScript console as well as to stdout.
758
*
759
* @param {string} msg The message to output.
760
*/
761
function LOG(msg) {
762
if (loggingEnabled) {
763
dump(`*** SearchTelemetry: ${msg}\n"`);
764
Services.console.logStringMessage(msg);
765
}
766
}
767
768
var SearchTelemetry = new TelemetryHandler();