Source code

Revision control

Other Tools

1
# -*- coding: utf-8 -*-
2
3
# This Source Code Form is subject to the terms of the Mozilla Public
4
# License, v. 2.0. If a copy of the MPL was not distributed with this
5
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7
# We don't import all modules at the top for performance reasons. See Bug 1008943
8
9
from __future__ import absolute_import, print_function
10
11
import errno
12
import os
13
import stat
14
import sys
15
import time
16
import warnings
17
from contextlib import contextmanager
18
19
from six.moves import urllib
20
21
22
__all__ = ['extract_tarball',
23
'extract_zip',
24
'extract',
25
'is_url',
26
'load',
27
'move',
28
'remove',
29
'rmtree',
30
'tree',
31
'which',
32
'NamedTemporaryFile',
33
'TemporaryDirectory']
34
35
# utilities for extracting archives
36
37
38
def extract_tarball(src, dest):
39
"""extract a .tar file"""
40
41
import tarfile
42
43
with tarfile.open(src) as bundle:
44
namelist = []
45
46
for m in bundle:
47
bundle.extract(m, path=dest)
48
namelist.append(m.name)
49
50
return namelist
51
52
53
def extract_zip(src, dest):
54
"""extract a zip file"""
55
56
import zipfile
57
58
if isinstance(src, zipfile.ZipFile):
59
bundle = src
60
else:
61
try:
62
bundle = zipfile.ZipFile(src)
63
except Exception:
64
print("src: %s" % src)
65
raise
66
67
namelist = bundle.namelist()
68
69
for name in namelist:
70
bundle.extract(name, dest)
71
filename = os.path.realpath(os.path.join(dest, name))
72
mode = bundle.getinfo(name).external_attr >> 16 & 0x1FF
73
# Only update permissions if attributes are set. Otherwise fallback to the defaults.
74
if mode:
75
os.chmod(filename, mode)
76
bundle.close()
77
return namelist
78
79
80
def extract(src, dest=None):
81
"""
82
Takes in a tar or zip file and extracts it to dest
83
84
If dest is not specified, extracts to os.path.dirname(src)
85
86
Returns the list of top level files that were extracted
87
"""
88
89
import zipfile
90
import tarfile
91
92
assert os.path.exists(src), "'%s' does not exist" % src
93
94
if dest is None:
95
dest = os.path.dirname(src)
96
elif not os.path.isdir(dest):
97
os.makedirs(dest)
98
assert not os.path.isfile(dest), "dest cannot be a file"
99
100
if tarfile.is_tarfile(src):
101
namelist = extract_tarball(src, dest)
102
elif zipfile.is_zipfile(src):
103
namelist = extract_zip(src, dest)
104
else:
105
raise Exception("mozfile.extract: no archive format found for '%s'" %
106
src)
107
108
# namelist returns paths with forward slashes even in windows
109
top_level_files = [os.path.join(dest, name.rstrip('/')) for name in namelist
110
if len(name.rstrip('/').split('/')) == 1]
111
112
# namelist doesn't include folders, append these to the list
113
for name in namelist:
114
index = name.find('/')
115
if index != -1:
116
root = os.path.join(dest, name[:index])
117
if root not in top_level_files:
118
top_level_files.append(root)
119
120
return top_level_files
121
122
123
# utilities for removal of files and directories
124
125
def rmtree(dir):
126
"""Deprecated wrapper method to remove a directory tree.
127
128
Ensure to update your code to use mozfile.remove() directly
129
130
:param dir: directory to be removed
131
"""
132
133
warnings.warn("mozfile.rmtree() is deprecated in favor of mozfile.remove()",
134
PendingDeprecationWarning, stacklevel=2)
135
return remove(dir)
136
137
138
def _call_windows_retry(func, args=(), retry_max=5, retry_delay=0.5):
139
"""
140
It's possible to see spurious errors on Windows due to various things
141
keeping a handle to the directory open (explorer, virus scanners, etc)
142
So we try a few times if it fails with a known error.
143
retry_delay is multiplied by the number of failed attempts to increase
144
the likelihood of success in subsequent attempts.
145
"""
146
retry_count = 0
147
while True:
148
try:
149
func(*args)
150
except OSError as e:
151
# Error codes are defined in:
153
if e.errno not in (errno.EACCES, errno.ENOTEMPTY):
154
raise
155
156
if retry_count == retry_max:
157
raise
158
159
retry_count += 1
160
161
print('%s() failed for "%s". Reason: %s (%s). Retrying...' %
162
(func.__name__, args, e.strerror, e.errno))
163
time.sleep(retry_count * retry_delay)
164
else:
165
# If no exception has been thrown it should be done
166
break
167
168
169
def remove(path):
170
"""Removes the specified file, link, or directory tree.
171
172
This is a replacement for shutil.rmtree that works better under
173
windows. It does the following things:
174
175
- check path access for the current user before trying to remove
176
- retry operations on some known errors due to various things keeping
177
a handle on file paths - like explorer, virus scanners, etc. The
178
known errors are errno.EACCES and errno.ENOTEMPTY, and it will
179
retry up to 5 five times with a delay of (failed_attempts * 0.5) seconds
180
between each attempt.
181
182
Note that no error will be raised if the given path does not exists.
183
184
:param path: path to be removed
185
"""
186
187
import shutil
188
189
def _call_with_windows_retry(*args, **kwargs):
190
try:
191
_call_windows_retry(*args, **kwargs)
192
except OSError as e:
193
# The file or directory to be removed doesn't exist anymore
194
if e.errno != errno.ENOENT:
195
raise
196
197
def _update_permissions(path):
198
"""Sets specified pemissions depending on filetype"""
199
if os.path.islink(path):
200
# Path is a symlink which we don't have to modify
201
# because it should already have all the needed permissions
202
return
203
204
stats = os.stat(path)
205
206
if os.path.isfile(path):
207
mode = stats.st_mode | stat.S_IWUSR
208
elif os.path.isdir(path):
209
mode = stats.st_mode | stat.S_IWUSR | stat.S_IXUSR
210
else:
211
# Not supported type
212
return
213
214
_call_with_windows_retry(os.chmod, (path, mode))
215
216
if not os.path.exists(path):
217
return
218
219
if os.path.isfile(path) or os.path.islink(path):
220
# Verify the file or link is read/write for the current user
221
_update_permissions(path)
222
_call_with_windows_retry(os.remove, (path,))
223
224
elif os.path.isdir(path):
225
# Verify the directory is read/write/execute for the current user
226
_update_permissions(path)
227
228
# We're ensuring that every nested item has writable permission.
229
for root, dirs, files in os.walk(path):
230
for entry in dirs + files:
231
_update_permissions(os.path.join(root, entry))
232
_call_with_windows_retry(shutil.rmtree, (path,))
233
234
235
def move(src, dst):
236
"""
237
Move a file or directory path.
238
239
This is a replacement for shutil.move that works better under windows,
240
retrying operations on some known errors due to various things keeping
241
a handle on file paths.
242
"""
243
import shutil
244
_call_windows_retry(shutil.move, (src, dst))
245
246
247
def depth(directory):
248
"""returns the integer depth of a directory or path relative to '/' """
249
250
directory = os.path.abspath(directory)
251
level = 0
252
while True:
253
directory, remainder = os.path.split(directory)
254
level += 1
255
if not remainder:
256
break
257
return level
258
259
260
def tree(directory, sort_key=lambda x: x.lower()):
261
"""Display tree directory structure for `directory`."""
262
vertical_line = u'│'
263
item_marker = u'├'
264
last_child = u'└'
265
266
retval = []
267
indent = []
268
last = {}
269
top = depth(directory)
270
271
for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
272
273
abspath = os.path.abspath(dirpath)
274
basename = os.path.basename(abspath)
275
parent = os.path.dirname(abspath)
276
level = depth(abspath) - top
277
278
# sort articles of interest
279
for resource in (dirnames, filenames):
280
resource[:] = sorted(resource, key=sort_key)
281
282
if level > len(indent):
283
indent.append(vertical_line)
284
indent = indent[:level]
285
286
if dirnames:
287
files_end = item_marker
288
last[abspath] = dirnames[-1]
289
else:
290
files_end = last_child
291
292
if last.get(parent) == os.path.basename(abspath):
293
# last directory of parent
294
dirpath_mark = last_child
295
indent[-1] = ' '
296
elif not indent:
297
dirpath_mark = ''
298
else:
299
dirpath_mark = item_marker
300
301
# append the directory and piece of tree structure
302
# if the top-level entry directory, print as passed
303
retval.append('%s%s%s' % (''.join(indent[:-1]),
304
dirpath_mark,
305
basename if retval else directory))
306
# add the files
307
if filenames:
308
last_file = filenames[-1]
309
retval.extend([('%s%s%s' % (''.join(indent),
310
files_end if filename == last_file else item_marker,
311
filename))
312
for index, filename in enumerate(filenames)])
313
314
return '\n'.join(retval)
315
316
317
def which(cmd, mode=os.F_OK | os.X_OK, path=None, exts=None):
318
"""A wrapper around `shutil.which` to make the behavior on Windows
319
consistent with other platforms.
320
321
On non-Windows platforms, this is a direct call to `shutil.which`. On
322
Windows, this:
323
324
* Ensures that `cmd` without an extension will be found. Previously it was
325
only found if it had an extension in `PATHEXT`.
326
* Ensures the absolute path to the binary is returned. Previously if the
327
binary was found in `cwd`, a relative path was returned.
328
329
The arguments are the same as the ones in `shutil.which`. In addition there
330
is an `exts` argument that only has an effect on Windows. This is used to
331
set a custom value for PATHEXT and is formatted as a list of file
332
extensions.
333
"""
334
try:
335
from shutil import which as shutil_which
336
except ImportError:
337
from shutil_which import which as shutil_which
338
339
if isinstance(path, (list, tuple)):
340
path = os.pathsep.join(path)
341
342
if sys.platform != "win32":
343
return shutil_which(cmd, mode=mode, path=path)
344
345
oldexts = os.environ.get("PATHEXT", "")
346
if not exts:
347
exts = oldexts.split(os.pathsep)
348
349
# This ensures that `cmd` without any extensions will be found.
351
if "." not in exts:
352
exts.append(".")
353
354
os.environ["PATHEXT"] = os.pathsep.join(exts)
355
try:
356
path = shutil_which(cmd, mode=mode, path=path)
357
return os.path.abspath(path.rstrip('.')) if path else None
358
359
finally:
360
if oldexts:
361
os.environ["PATHEXT"] = oldexts
362
else:
363
del os.environ["PATHEXT"]
364
365
366
# utilities for temporary resources
367
368
class NamedTemporaryFile(object):
369
"""
370
Like tempfile.NamedTemporaryFile except it works on Windows
371
in the case where you open the created file a second time.
372
373
This behaves very similarly to tempfile.NamedTemporaryFile but may
374
not behave exactly the same. For example, this function does not
375
prevent fd inheritance by children.
376
377
Example usage:
378
379
with NamedTemporaryFile() as fh:
380
fh.write(b'foobar')
381
382
print('Filename: %s' % fh.name)
383
385
"""
386
387
def __init__(self, mode='w+b', bufsize=-1, suffix='', prefix='tmp',
388
dir=None, delete=True):
389
390
import tempfile
391
fd, path = tempfile.mkstemp(suffix, prefix, dir, 't' in mode)
392
os.close(fd)
393
394
self.file = open(path, mode)
395
self._path = path
396
self._delete = delete
397
self._unlinked = False
398
399
def __getattr__(self, k):
400
return getattr(self.__dict__['file'], k)
401
402
def __iter__(self):
403
return self.__dict__['file']
404
405
def __enter__(self):
406
self.file.__enter__()
407
return self
408
409
def __exit__(self, exc, value, tb):
410
self.file.__exit__(exc, value, tb)
411
if self.__dict__['_delete']:
412
os.unlink(self.__dict__['_path'])
413
self._unlinked = True
414
415
def __del__(self):
416
if self.__dict__['_unlinked']:
417
return
418
self.file.__exit__(None, None, None)
419
if self.__dict__['_delete']:
420
os.unlink(self.__dict__['_path'])
421
422
423
@contextmanager
424
def TemporaryDirectory():
425
"""
426
create a temporary directory using tempfile.mkdtemp, and then clean it up.
427
428
Example usage:
429
with TemporaryDirectory() as tmp:
430
open(os.path.join(tmp, "a_temp_file"), "w").write("data")
431
432
"""
433
434
import tempfile
435
import shutil
436
437
tempdir = tempfile.mkdtemp()
438
try:
439
yield tempdir
440
finally:
441
shutil.rmtree(tempdir)
442
443
444
# utilities dealing with URLs
445
446
def is_url(thing):
447
"""
448
Return True if thing looks like a URL.
449
"""
450
451
parsed = urllib.parse.urlparse(thing)
452
if 'scheme' in parsed:
453
return len(parsed.scheme) >= 2
454
else:
455
return len(parsed[0]) >= 2
456
457
458
def load(resource):
459
"""
460
open a file or URL for reading. If the passed resource string is not a URL,
461
or begins with 'file://', return a ``file``. Otherwise, return the
462
result of urllib.urlopen()
463
"""
464
465
# handle file URLs separately due to python stdlib limitations
466
if resource.startswith('file://'):
467
resource = resource[len('file://'):]
468
469
if not is_url(resource):
470
# if no scheme is given, it is a file path
471
return open(resource)
472
473
return urllib.request.urlopen(resource)