Source code

Revision control

Other Tools

1
import argparse
2
import logging
3
import os
4
import re
5
import subprocess
6
import sys
7
8
import six
9
from collections import OrderedDict
10
from six import iteritems
11
12
try:
13
from ..manifest import manifest
14
except ValueError:
15
# if we're not within the tools package, the above is an import from above
16
# the top-level which raises ValueError, so reimport it with an absolute
17
# reference
18
#
19
# note we need both because depending on caller we may/may not have the
20
# paths set up correctly to handle both and MYPY has no knowledge of our
21
# sys.path magic
22
from manifest import manifest # type: ignore
23
24
MYPY = False
25
if MYPY:
26
# MYPY is set to True when run under Mypy.
27
from typing import Any
28
from typing import Callable
29
from typing import Dict
30
from typing import Iterable
31
from typing import List
32
from typing import Optional
33
from typing import Pattern
34
from typing import Sequence
35
from typing import Set
36
from typing import Text
37
from typing import Tuple
38
from typing import Union
39
40
here = os.path.dirname(__file__)
41
wpt_root = os.path.abspath(os.path.join(here, os.pardir, os.pardir))
42
43
logger = logging.getLogger()
44
45
46
def get_git_cmd(repo_path):
47
# type: (bytes) -> Callable[..., Text]
48
"""Create a function for invoking git commands as a subprocess."""
49
def git(cmd, *args):
50
# type: (Text, *Union[bytes, Text]) -> Text
51
full_cmd = [u"git", cmd] + list(item.decode("utf8") if isinstance(item, bytes) else item for item in args) # type: List[Text]
52
try:
53
logger.debug(" ".join(full_cmd))
54
return subprocess.check_output(full_cmd, cwd=repo_path).decode("utf8").strip()
55
except subprocess.CalledProcessError as e:
56
logger.critical("Git command exited with status %i" % e.returncode)
57
logger.critical(e.output)
58
sys.exit(1)
59
return git
60
61
62
def display_branch_point():
63
# type: () -> None
64
print(branch_point())
65
66
67
def branch_point():
68
# type: () -> Optional[Text]
69
git = get_git_cmd(wpt_root)
70
if (os.environ.get("GITHUB_PULL_REQUEST", "false") == "false" and
71
os.environ.get("GITHUB_BRANCH") == "master"):
72
# For builds on the master branch just return the HEAD commit
73
return git("rev-parse", "HEAD")
74
elif os.environ.get("GITHUB_PULL_REQUEST", "false") != "false":
75
# This is a PR, so the base branch is in GITHUB_BRANCH
76
base_branch = os.environ.get("GITHUB_BRANCH")
77
assert base_branch, "GITHUB_BRANCH environment variable is defined"
78
branch_point = git("merge-base", "HEAD", base_branch) # type: Optional[Text]
79
else:
80
# Otherwise we aren't on a PR, so we try to find commits that are only in the
81
# current branch c.f.
83
84
# parse HEAD into an object ref
85
head = git("rev-parse", "HEAD")
86
87
# get everything in refs/heads and refs/remotes that doesn't include HEAD
88
not_heads = [item for item in git("rev-parse", "--not", "--branches", "--remotes").split("\n")
89
if item != "^%s" % head]
90
91
# get all commits on HEAD but not reachable from anything in not_heads
92
commits = git("rev-list", "--topo-order", "--parents", "HEAD", *not_heads)
93
commit_parents = OrderedDict() # type: Dict[Text, List[Text]]
94
if commits:
95
for line in commits.split("\n"):
96
line_commits = line.split(" ")
97
commit_parents[line_commits[0]] = line_commits[1:]
98
99
branch_point = None
100
101
# if there are any commits, take the first parent that is not in commits
102
for commit, parents in iteritems(commit_parents):
103
for parent in parents:
104
if parent not in commit_parents:
105
branch_point = parent
106
break
107
108
if branch_point:
109
break
110
111
# if we had any commits, we should now have a branch point
112
assert branch_point or not commit_parents
113
114
# The above heuristic will fail in the following cases:
115
#
116
# - The current branch has fallen behind the remote version
117
# - Changes on the current branch were rebased and therefore do not exist on any
118
# other branch. This will result in the selection of a commit that is earlier
119
# in the history than desired (as determined by calculating the later of the
120
# branch point and the merge base)
121
#
122
# In either case, fall back to using the merge base as the branch point.
123
merge_base = git("merge-base", "HEAD", "origin/master")
124
if (branch_point is None or
125
(branch_point != merge_base and
126
not git("log", "--oneline", "%s..%s" % (merge_base, branch_point)).strip())):
127
logger.debug("Using merge-base as the branch point")
128
branch_point = merge_base
129
else:
130
logger.debug("Using first commit on another branch as the branch point")
131
132
logger.debug("Branch point from master: %s" % branch_point)
133
return branch_point
134
135
136
def compile_ignore_rule(rule):
137
# type: (str) -> Pattern[str]
138
rule = rule.replace(os.path.sep, "/")
139
parts = rule.split("/")
140
re_parts = []
141
for part in parts:
142
if part.endswith("**"):
143
re_parts.append(re.escape(part[:-2]) + ".*")
144
elif part.endswith("*"):
145
re_parts.append(re.escape(part[:-1]) + "[^/]*")
146
else:
147
re_parts.append(re.escape(part))
148
return re.compile("^%s$" % "/".join(re_parts))
149
150
151
def repo_files_changed(revish, include_uncommitted=False, include_new=False):
152
# type: (str, bool, bool) -> Set[Text]
153
git = get_git_cmd(wpt_root)
154
files_list = git("diff", "--name-only", "-z", revish).split("\0")
155
assert not files_list[-1]
156
files = set(files_list[:-1])
157
158
if include_uncommitted:
159
entries = git("status", "-z").split("\0")
160
assert not entries[-1]
161
entries = entries[:-1]
162
for item in entries:
163
status, path = item.split()
164
if status == "??" and not include_new:
165
continue
166
else:
167
if not os.path.isdir(path):
168
files.add(path)
169
else:
170
for dirpath, dirnames, filenames in os.walk(path):
171
for filename in filenames:
172
files.add(os.path.join(dirpath, filename))
173
174
return files
175
176
177
def exclude_ignored(files, ignore_rules):
178
# type: (Iterable[Text], Optional[Sequence[str]]) -> Tuple[List[Text], List[Text]]
179
if ignore_rules is None:
180
ignore_rules = []
181
compiled_ignore_rules = [compile_ignore_rule(item) for item in ignore_rules]
182
183
changed = []
184
ignored = []
185
for item in sorted(files):
186
fullpath = os.path.join(wpt_root, item)
187
rule_path = item.replace(os.path.sep, "/")
188
for rule in compiled_ignore_rules:
189
if rule.match(rule_path):
190
ignored.append(fullpath)
191
break
192
else:
193
changed.append(fullpath)
194
195
return changed, ignored
196
197
198
def files_changed(revish, # type: str
199
ignore_rules=None, # type: Optional[Sequence[str]]
200
include_uncommitted=False, # type: bool
201
include_new=False # type: bool
202
):
203
# type: (...) -> Tuple[List[Text], List[Text]]
204
"""Find files changed in certain revisions.
205
206
The function passes `revish` directly to `git diff`, so `revish` can have a
207
variety of forms; see `git diff --help` for details. Files in the diff that
208
are matched by `ignore_rules` are excluded.
209
"""
210
files = repo_files_changed(revish,
211
include_uncommitted=include_uncommitted,
212
include_new=include_new)
213
if not files:
214
return [], []
215
216
return exclude_ignored(files, ignore_rules)
217
218
219
def _in_repo_root(full_path):
220
# type: (Union[bytes, Text]) -> bool
221
rel_path = os.path.relpath(full_path, wpt_root)
222
path_components = rel_path.split(os.sep)
223
return len(path_components) < 2
224
225
226
def load_manifest(manifest_path=None, manifest_update=True):
227
# type: (Optional[str], bool) -> manifest.Manifest
228
if manifest_path is None:
229
manifest_path = os.path.join(wpt_root, "MANIFEST.json")
230
return manifest.load_and_update(wpt_root, manifest_path, "/",
231
update=manifest_update)
232
233
234
def affected_testfiles(files_changed, # type: Iterable[Text]
235
skip_dirs=None, # type: Optional[Set[str]]
236
manifest_path=None, # type: Optional[str]
237
manifest_update=True # type: bool
238
):
239
# type: (...) -> Tuple[Set[Text], Set[str]]
240
"""Determine and return list of test files that reference changed files."""
241
if skip_dirs is None:
242
skip_dirs = {"conformance-checkers", "docs", "tools"}
243
affected_testfiles = set()
244
# Exclude files that are in the repo root, because
245
# they are not part of any test.
246
files_changed = [f for f in files_changed if not _in_repo_root(f)]
247
nontests_changed = set(files_changed)
248
wpt_manifest = load_manifest(manifest_path, manifest_update)
249
250
test_types = ["crashtest", "testharness", "reftest", "wdspec"]
251
support_files = {os.path.join(wpt_root, path)
252
for _, path, _ in wpt_manifest.itertypes("support")}
253
wdspec_test_files = {os.path.join(wpt_root, path)
254
for _, path, _ in wpt_manifest.itertypes("wdspec")}
255
test_files = {os.path.join(wpt_root, path)
256
for _, path, _ in wpt_manifest.itertypes(*test_types)}
257
258
interface_dir = os.path.join(wpt_root, 'interfaces')
259
interfaces_files = {os.path.join(wpt_root, 'interfaces', filename)
260
for filename in os.listdir(interface_dir)}
261
262
interfaces_changed = interfaces_files.intersection(nontests_changed)
263
nontests_changed = nontests_changed.intersection(support_files)
264
265
tests_changed = {item for item in files_changed if item in test_files}
266
267
nontest_changed_paths = set()
268
rewrites = {"/resources/webidl2/lib/webidl2.js": "/resources/WebIDLParser.js"} # type: Dict[Text, Text]
269
for full_path in nontests_changed:
270
rel_path = os.path.relpath(full_path, wpt_root)
271
path_components = rel_path.split(os.sep)
272
top_level_subdir = path_components[0]
273
if top_level_subdir in skip_dirs:
274
continue
275
repo_path = "/" + os.path.relpath(full_path, wpt_root).replace(os.path.sep, "/")
276
if repo_path in rewrites:
277
repo_path = rewrites[repo_path]
278
full_path = os.path.join(wpt_root, repo_path[1:].replace("/", os.path.sep))
279
nontest_changed_paths.add((full_path, repo_path))
280
281
interfaces_changed_names = [os.path.splitext(os.path.basename(interface))[0]
282
for interface in interfaces_changed]
283
284
def affected_by_wdspec(test):
285
# type: (str) -> bool
286
affected = False
287
if test in wdspec_test_files:
288
for support_full_path, _ in nontest_changed_paths:
289
# parent of support file or of "support" directory
290
parent = os.path.dirname(support_full_path)
291
if os.path.basename(parent) == "support":
292
parent = os.path.dirname(parent)
293
relpath = os.path.relpath(test, parent)
294
if not relpath.startswith(os.pardir):
295
# testfile is in subtree of support file
296
affected = True
297
break
298
return affected
299
300
def affected_by_interfaces(file_contents):
301
# type: (Union[bytes, Text]) -> bool
302
if len(interfaces_changed_names) > 0:
303
if 'idlharness.js' in file_contents:
304
for interface in interfaces_changed_names:
305
regex = '[\'"]' + interface + '(\\.idl)?[\'"]'
306
if re.search(regex, file_contents):
307
return True
308
return False
309
310
for root, dirs, fnames in os.walk(wpt_root):
311
# Walk top_level_subdir looking for test files containing either the
312
# relative filepath or absolute filepath to the changed files.
313
if root == wpt_root:
314
for dir_name in skip_dirs:
315
dirs.remove(dir_name)
316
for fname in fnames:
317
test_full_path = os.path.join(root, fname)
318
# Skip any file that's not a test file.
319
if test_full_path not in test_files:
320
continue
321
if affected_by_wdspec(test_full_path):
322
affected_testfiles.add(test_full_path)
323
continue
324
325
with open(test_full_path, "rb") as fh:
326
raw_file_contents = fh.read() # type: bytes
327
if raw_file_contents.startswith(b"\xfe\xff"):
328
file_contents = raw_file_contents.decode("utf-16be", "replace") # type: Text
329
elif raw_file_contents.startswith(b"\xff\xfe"):
330
file_contents = raw_file_contents.decode("utf-16le", "replace")
331
else:
332
file_contents = raw_file_contents.decode("utf8", "replace")
333
for full_path, repo_path in nontest_changed_paths:
334
rel_path = os.path.relpath(full_path, root).replace(os.path.sep, "/")
335
if rel_path in file_contents or repo_path in file_contents or affected_by_interfaces(file_contents):
336
affected_testfiles.add(test_full_path)
337
continue
338
339
return tests_changed, affected_testfiles
340
341
342
def get_parser():
343
# type: () -> argparse.ArgumentParser
344
parser = argparse.ArgumentParser()
345
parser.add_argument("revish", default=None, help="Commits to consider. Defaults to the "
346
"commits on the current branch", nargs="?")
347
# TODO: Consolidate with `./wpt run --affected`:
349
parser.add_argument("--ignore-rules", nargs="*", type=set, # type: ignore
350
default={"resources/testharness*"},
351
help="Rules for paths to exclude from lists of changes. Rules are paths "
352
"relative to the test root, with * before a separator or the end matching "
353
"anything other than a path separator and ** in that position matching "
354
"anything")
355
parser.add_argument("--modified", action="store_true",
356
help="Include files under version control that have been "
357
"modified or staged")
358
parser.add_argument("--new", action="store_true",
359
help="Include files in the worktree that are not in version control")
360
parser.add_argument("--show-type", action="store_true",
361
help="Print the test type along with each affected test")
362
parser.add_argument("--null", action="store_true",
363
help="Separate items with a null byte")
364
return parser
365
366
367
def get_parser_affected():
368
# type: () -> argparse.ArgumentParser
369
parser = get_parser()
370
parser.add_argument("--metadata",
371
dest="metadata_root",
372
action="store",
373
default=wpt_root,
374
help="Directory that will contain MANIFEST.json")
375
return parser
376
377
378
def get_revish(**kwargs):
379
# type: (**Any) -> bytes
380
revish = kwargs.get("revish")
381
if revish is None:
382
revish = "%s..HEAD" % branch_point()
383
if isinstance(revish, six.text_type):
384
revish = revish.encode("utf8")
385
assert isinstance(revish, six.binary_type)
386
return revish
387
388
389
def run_changed_files(**kwargs):
390
# type: (**Any) -> None
391
revish = get_revish(**kwargs)
392
changed, _ = files_changed(revish, kwargs["ignore_rules"],
393
include_uncommitted=kwargs["modified"],
394
include_new=kwargs["new"])
395
396
separator = "\0" if kwargs["null"] else "\n"
397
398
for item in sorted(changed):
399
sys.stdout.write(os.path.relpath(six.ensure_str(item), wpt_root) + separator)
400
401
402
def run_tests_affected(**kwargs):
403
# type: (**Any) -> None
404
revish = get_revish(**kwargs)
405
changed, _ = files_changed(revish, kwargs["ignore_rules"],
406
include_uncommitted=kwargs["modified"],
407
include_new=kwargs["new"])
408
manifest_path = os.path.join(kwargs["metadata_root"], "MANIFEST.json")
409
tests_changed, dependents = affected_testfiles(
410
changed,
411
{"conformance-checkers", "docs", "tools"},
412
manifest_path=manifest_path
413
)
414
415
message = "{path}"
416
if kwargs["show_type"]:
417
wpt_manifest = load_manifest(manifest_path)
418
message = "{path}\t{item_type}"
419
420
message += "\0" if kwargs["null"] else "\n"
421
422
for item in sorted(tests_changed | dependents):
423
results = {
424
"path": os.path.relpath(item, wpt_root)
425
}
426
if kwargs["show_type"]:
427
item_types = {i.item_type for i in wpt_manifest.iterpath(results["path"])}
428
if len(item_types) != 1:
429
item_types = {" ".join(item_types)}
430
results["item_type"] = item_types.pop()
431
sys.stdout.write(message.format(**results))