Chromium Code Reviews| Index: client/isolateserver.py |
| diff --git a/client/isolateserver.py b/client/isolateserver.py |
| index e784a7b50cba8ca2d4600c3a15b9aa31b2cc9f68..c2a3cd0fceeef8dcf6ede79af33d4d8932364ba4 100755 |
| --- a/client/isolateserver.py |
| +++ b/client/isolateserver.py |
| @@ -14,6 +14,7 @@ import logging |
| import optparse |
| import os |
| import re |
| +import stat |
| import signal |
| import sys |
| import tempfile |
| @@ -22,10 +23,13 @@ import time |
| import types |
| import zlib |
| +import cStringIO as StringIO |
| + |
| from third_party import colorama |
| from third_party.depot_tools import fix_encoding |
| from third_party.depot_tools import subcommand |
| +from libs import arfile |
| from utils import file_path |
| from utils import fs |
| from utils import logging_utils |
| @@ -149,6 +153,56 @@ def file_write(path, content_generator): |
| return total |
| +def fileobj_path(fileobj): |
|
M-A Ruel
2016/06/21 13:31:09
add docstring to these 3 functions
mithro
2016/06/22 12:03:11
Done.
|
| + name = getattr(fileobj, 'name', None) |
| + if name is None: |
| + return |
| + |
| + if not isinstance(name, unicode): |
|
M-A Ruel
2016/06/21 13:31:08
Please return unicode paths instead of encoded str
mithro
2016/06/22 12:03:10
This is *creating* a unicode value from a byte (st
M-A Ruel
2016/06/22 17:17:49
Err, I had misread, sorry.
mithro
2016/06/23 07:17:21
Acknowledged.
|
| + name = name.decode(sys.getfilesystemencoding()) |
| + |
| + if fs.exists(name): |
|
M-A Ruel
2016/06/21 13:31:09
why check for existence?
mithro
2016/06/22 12:03:10
There are many reasons why a fileobj has a name wh
M-A Ruel
2016/06/22 17:17:49
Oh I agree in the general case, but this is not ge
|
| + return name |
| + |
| + |
| +def fileobj_copy(dstfileobj, srcfileobj, amount=-1): |
|
M-A Ruel
2016/06/21 13:31:09
s/amount/size/ ?
mithro
2016/06/22 12:03:11
Done.
|
| + written = 0 |
| + while written != amount: |
| + readsize = NET_IO_FILE_CHUNK |
| + if amount > 0: |
| + readsize = min(readsize, amount-written) |
| + data = srcfileobj.read(readsize) |
| + if not data: |
| + if amount == -1: |
| + break |
| + raise IOError('partial file, got %s, wanted %s' % (written, amount)) |
| + dstfileobj.write(data) |
| + written += len(data) |
| + |
| + |
| +def putfile(srcfileobj, dstpath, file_mode=None, limit=-1): |
| + srcpath = fileobj_path(srcfileobj) |
| + if srcpath: |
| + readonly = file_mode is None or ( |
| + file_mode & (stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH)) |
| + if readonly: |
| + # We can link the file |
| + file_path.link_file(dstpath, srcpath, file_path.HARDLINK_WITH_FALLBACK) |
| + else: |
| + # We must copy the file |
| + file_path.link_file(dstpath, srcpath, file_path.COPY) |
| + else: |
| + # Need to write out the file |
| + with fs.open(dstpath, 'wb') as dstfileobj: |
| + fileobj_copy(dstfileobj, srcfileobj, limit) |
| + |
| + srcfileobj.close() |
|
M-A Ruel
2016/06/21 13:31:08
that shouldn't be done here.
nodir
2016/06/21 22:20:05
+1
mithro
2016/06/22 12:03:10
Done.
mithro
2016/06/22 12:03:11
Done.
|
| + |
| + # file_mode of 0 is actually valid, so need explicit check. |
| + if file_mode is not None: |
| + fs.chmod(dstpath, file_mode) |
| + |
| + |
| def zip_compress(content_generator, level=7): |
| """Reads chunks from |content_generator| and yields zip compressed chunks.""" |
| compressor = zlib.compressobj(level) |
| @@ -1223,7 +1277,7 @@ class LocalCache(object): |
| self._initial_number_items = 0 |
| self._initial_size = 0 |
| self._evicted = [] |
| - self._linked = [] |
| + self._used = [] |
| def __contains__(self, digest): |
| raise NotImplementedError() |
| @@ -1245,6 +1299,10 @@ class LocalCache(object): |
| return self._evicted[:] |
| @property |
| + def used(self): |
| + return self._used[:] |
| + |
| + @property |
| def initial_number_items(self): |
| return self._initial_number_items |
| @@ -1252,10 +1310,6 @@ class LocalCache(object): |
| def initial_size(self): |
| return self._initial_size |
| - @property |
| - def linked(self): |
| - return self._linked[:] |
| - |
| def cached_set(self): |
| """Returns a set of all cached digests (always a new object).""" |
| raise NotImplementedError() |
| @@ -1280,8 +1334,12 @@ class LocalCache(object): |
| """Removes item from cache if it's there.""" |
| raise NotImplementedError() |
| - def read(self, digest): |
| - """Returns contents of the cached item as a single str.""" |
| + def getfileobj(self, digest): |
| + """Returns a readable file like object. |
| + |
| + If file exists on the file system it will have a .name attribute with an |
| + absolute path to the file. |
| + """ |
| raise NotImplementedError() |
| def write(self, digest, content): |
| @@ -1291,14 +1349,6 @@ class LocalCache(object): |
| """ |
| raise NotImplementedError() |
| - def hardlink(self, digest, dest, file_mode): |
| - """Ensures file at |dest| has same content as cached |digest|. |
| - |
| - If file_mode is provided, it is used to set the executable bit if |
| - applicable. |
| - """ |
| - raise NotImplementedError() |
| - |
| class MemoryCache(LocalCache): |
| """LocalCache implementation that stores everything in memory.""" |
| @@ -1333,10 +1383,12 @@ class MemoryCache(LocalCache): |
| if v is not None: |
| self._evicted.add(v) |
| - def read(self, digest): |
| + def getfileobj(self, digest): |
| with self._lock: |
|
M-A Ruel
2016/06/21 13:31:08
Nesting is not optimal;
with self._lock:
try:
mithro
2016/06/22 12:03:10
Done.
|
| try: |
| - return self._contents[digest] |
| + d = self._contents[digest] |
| + self._used.append(len(d)) |
| + return StringIO.StringIO(d) |
| except KeyError: |
| raise CacheMiss(digest) |
| @@ -1348,15 +1400,6 @@ class MemoryCache(LocalCache): |
| self._added.append(len(data)) |
| return digest |
| - def hardlink(self, digest, dest, file_mode): |
| - """Since data is kept in memory, there is no filenode to hardlink.""" |
| - data = self.read(digest) |
| - file_write(dest, [data]) |
| - if file_mode is not None: |
| - fs.chmod(dest, file_mode & self._file_mode_mask) |
| - with self._lock: |
| - self._linked.append(len(data)) |
| - |
| class CachePolicies(object): |
| def __init__(self, max_cache_size, min_free_space, max_items): |
| @@ -1488,10 +1531,12 @@ class DiskCache(LocalCache): |
| self._lru.pop(digest) |
| self._delete_file(digest, UNKNOWN_FILE_SIZE) |
| - def read(self, digest): |
| + def getfileobj(self, digest): |
| try: |
| - with fs.open(self._path(digest), 'rb') as f: |
| - return f.read() |
| + f = fs.open(self._path(digest), 'rb') |
| + with self._lock: |
| + self._used.append(self._lru[digest]) |
| + return f |
| except IOError: |
| raise CacheMiss(digest) |
| @@ -1522,25 +1567,6 @@ class DiskCache(LocalCache): |
| self._add(digest, size) |
| return digest |
| - def hardlink(self, digest, dest, file_mode): |
| - """Hardlinks the file to |dest|. |
| - |
| - Note that the file permission bits are on the file node, not the directory |
| - entry, so changing the access bit on any of the directory entries for the |
| - file node will affect them all. |
| - """ |
| - path = self._path(digest) |
| - if not file_path.link_file(dest, path, file_path.HARDLINK_WITH_FALLBACK): |
| - # Report to the server that it failed with more details. We'll want to |
| - # squash them all. |
| - on_error.report('Failed to hardlink\n%s -> %s' % (path, dest)) |
| - |
| - if file_mode is not None: |
| - # Ignores all other bits. |
| - fs.chmod(dest, file_mode & 0500) |
| - with self._lock: |
| - self._linked.append(self._lru[digest]) |
| - |
| def _load(self): |
| """Loads state of the cache from json file.""" |
| self._lock.assert_locked() |
| @@ -1771,7 +1797,7 @@ class IsolatedBundle(object): |
| # Wait until some *.isolated file is fetched, parse it. |
| item_hash = fetch_queue.wait(pending) |
| item = pending.pop(item_hash) |
| - item.load(fetch_queue.cache.read(item_hash)) |
| + item.load(fetch_queue.cache.getfileobj(item_hash).read()) |
|
M-A Ruel
2016/06/21 13:31:09
with fetch_queue.cache.getfileobj(item_hash) as f:
mithro
2016/06/22 12:03:10
Done
|
| # Start fetching included *.isolated files. |
| for new_child in item.children: |
| @@ -1809,8 +1835,14 @@ class IsolatedBundle(object): |
| # overridden files must not be fetched. |
| if filepath not in self.files: |
| self.files[filepath] = properties |
| + |
| + # Make sure if the isolated is read only, the mode doesn't have write |
| + # bits. |
| + if 'm' in properties and self.read_only: |
| + properties['m'] &= ~(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH) |
| + |
| + # Preemptively request hashed files. |
| if 'h' in properties: |
| - # Preemptively request files. |
| logging.debug('fetching %s', filepath) |
| fetch_queue.add( |
| properties['h'], properties['s'], threading_utils.PRIORITY_MED) |
| @@ -1972,12 +2004,33 @@ def fetch_isolated(isolated_hash, storage, cache, outdir): |
| # Wait for any item to finish fetching to cache. |
| digest = fetch_queue.wait(remaining) |
| - # Link corresponding files to a fetched item in cache. |
| + # Create the files in the destination using item in cache as the |
| + # source. |
| for filepath, props in remaining.pop(digest): |
| - dest = os.path.join(outdir, filepath) |
| - if os.path.exists(dest): |
| - raise AlreadyExists('File %s already exists' % dest) |
| - cache.hardlink(digest, dest, props.get('m')) |
| + fullpath = os.path.join(outdir, filepath) |
| + srcfileobj = cache.getfileobj(digest) |
|
M-A Ruel
2016/06/21 13:31:08
with cache.getfileobj(digest) as f:
mithro
2016/06/22 12:03:10
Done.
|
| + |
| + isarchive = props.get('a') |
| + if isarchive: |
| + basedir = os.path.dirname(fullpath) |
| + |
| + if isarchive == 'ar': |
| + extractor = arfile.ArFileReader(srcfileobj, fullparse=False) |
| + else: |
| + raise isolated_format.IsolatedError( |
| + 'Unknown archive format %r', isarchive) |
| + |
| + for ai, ifd in extractor: |
| + fp = os.path.normpath(os.path.join(basedir, ai.name)) |
| + file_path.ensure_tree(os.path.dirname(fp)) |
| + putfile(ifd, fp, ai.mode, ai.size) |
| + |
| + else: |
| + file_mode = props.get('m') |
| + if file_mode: |
| + # Ignore all bits apart from the user |
| + file_mode &= 0700 |
| + putfile(srcfileobj, fullpath, file_mode) |
| # Report progress. |
| duration = time.time() - last_update |