Revision control

1
#!/usr/bin/env python
2
3
# This Source Code Form is subject to the terms of the Mozilla Public
4
# License, v. 2.0. If a copy of the MPL was not distributed with this
5
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7
from __future__ import print_function
8
9
import json
10
import urlparse
11
12
# the list of files written out
13
files = []
14
base_dir = "../Carthage/Checkouts/shavar-prod-lists"
15
16
block_cookies_mode = False
17
18
def output_filename(category):
19
action = "block-cookies" if block_cookies_mode else "block"
20
return "Lists/disconnect-{0}-{1}.json".format(action ,category.lower())
21
22
def url_filter(resource):
23
# Match any char except a slash with: [^/]
24
return "^https?://([^/]+\\.)?" + resource.replace(".", "\\.")
25
26
27
def unless_domain(properties):
28
return ["*" + domain for domain in properties]
29
30
31
def create_blocklist_entry(resource, related_domains):
32
action = "block-cookies" if block_cookies_mode else "block"
33
34
result = {"trigger": {"url-filter": url_filter(resource),
35
"load-type": ["third-party"]},
36
"action": {"type": action }}
37
38
if len(related_domains) > 0:
39
result["trigger"]["unless-domain"] = unless_domain(related_domains)
40
return result
41
42
def add_entry_to_blocklist(blocklist, entities, name, property_, resources):
43
if not (property_.startswith("http") or property_.startswith("www")):
44
return # 'dnt', 'session-replay', 'performance' are keys that are ignored
45
if name in entities:
46
related_domains = entities[name]["properties"]
47
else:
48
prop = urlparse.urlparse(property_).netloc.split(".")
49
if prop[0] == "www":
50
prop.pop(0)
51
props = [".".join(prop)]
52
for res in resources:
53
if len(res) > 2:
54
blocklist.append(create_blocklist_entry(res, related_domains))
55
else:
56
print("Found invalid resource.")
57
58
def generate_blacklists(blacklist, entitylist):
59
# Generating the categorical lists requires some manual tweaking to the
60
# data at the moment.
61
62
def find_entry(entry, list_):
63
for d in list_:
64
if d.keys() == [entry]:
65
return d
66
67
# First, massage the existing categorical data slightly
68
with open(blacklist) as fp:
69
categories = json.load(fp)["categories"]
70
# Move the Twitter and Facebook entries into the Social category from
71
# the Disconnect category
72
disconnect = categories["Disconnect"]
73
del categories["Disconnect"]
74
categories["Social"].append(find_entry("Facebook", disconnect))
75
categories["Social"].append(find_entry("Twitter", disconnect))
76
77
# Load the entitylist to map the whitelist entries.
78
with open(entitylist) as fp:
79
entities = json.load(fp)
80
81
# Change the Google entries for the respective categories
82
with open(base_dir + "/google_mapping.json") as fp:
83
tweaks = json.load(fp)["categories"]
84
for category in ("Advertising", "Analytics", "Social"):
85
cat = categories[category]
86
goog = find_entry("Google", cat) or None
87
if goog is None:
88
# No data exist for this category, just append
89
cat.append(tweaks[category][0])
90
else:
91
for prop, resources in tweaks[category]["Google"].items():
92
if prop not in goog:
93
goog[prop] = resources
94
continue
95
for resource in resources:
96
if resource not in goog[prop]:
97
goog[prop].append(resource)
98
goog[prop].sort()
99
cat.sort()
100
101
for category in categories:
102
blocklist = []
103
104
for entity in categories[category]:
105
for name, domains in entity.iteritems():
106
for property_, resources in domains.iteritems():
107
add_entry_to_blocklist(blocklist, entities, name, property_, resources)
108
109
print("{cat} blacklist has {count} entries."
110
.format(cat=category, count=len(blocklist)))
111
112
out_file = output_filename(category)
113
files.append(out_file)
114
with open(out_file, "w") as fp:
115
out = json.dumps(blocklist, indent=0,
116
separators=(',', ':')).replace('\n', '')
117
fp.write(out)
118
119
120
def format_one_rule_per_line(files):
121
for name in files:
122
file = open(name)
123
line = file.read()
124
file.close()
125
line = line.replace('{"action"', '\n{"action"')
126
with open(name, "w") as fp:
127
fp.write(line)
128
129
130
import sys
131
import os
132
133
def help():
134
print("Specify `block` or `block-cookies` as arg.")
135
136
137
if __name__ == "__main__":
138
if len(sys.argv) < 2:
139
help()
140
exit(1)
141
142
block_cookies_mode = sys.argv[1] == 'block-cookies'
143
if not block_cookies_mode and sys.argv[1] != 'block':
144
help()
145
exit(1)
146
blacklist = '../Carthage/Checkouts/shavar-prod-lists/disconnect-blacklist.json'
147
entitylist = '../Carthage/Checkouts/shavar-prod-lists/disconnect-entitylist.json'
148
149
if not os.path.exists("Lists"):
150
os.mkdir("Lists")
151
152
generate_blacklists(blacklist, entitylist)
153
154
# format as one action per-line, which is easier to read and diff
155
format_one_rule_per_line(files)