Chromium Code Reviews| Index: client/isolateserver.py |
| diff --git a/client/isolateserver.py b/client/isolateserver.py |
| index 3d56b9688be80eb6b4afbbf8c99a880f74b7b4d4..7d42e1f9cca9f4491cf460a983e8ee4903951200 100755 |
| --- a/client/isolateserver.py |
| +++ b/client/isolateserver.py |
| @@ -5,16 +5,18 @@ |
| """Archives a set of files or directories to an Isolate Server.""" |
| -__version__ = '0.5.1' |
| +__version__ = '0.6.0' |
| import base64 |
| -import functools |
| import errno |
| +import functools |
| +import io |
| import logging |
| import optparse |
| import os |
| import re |
| import signal |
| +import stat |
| import sys |
| import tempfile |
| import threading |
| @@ -149,6 +151,121 @@ def file_write(path, content_generator): |
| return total |
| +def fileobj_path(fileobj): |
| + """Return file system path for file like object or None. |
| + |
| + The returned path is guaranteed to exist and can be passed to file system |
| + operations like copy. |
| + """ |
| + name = getattr(fileobj, 'name', None) |
| + if name is None: |
| + return |
| + |
| + # If the file like object was created using something like open("test.txt") |
| + # name will end up being a str (such as a function outside our control, like |
| + # the standard library). We want all our paths to be unicode objects, so we |
| + # decode it. |
| + if not isinstance(name, unicode): |
| + name = name.decode(sys.getfilesystemencoding()) |
| + |
| + if fs.exists(name): |
| + return name |
| + |
| + |
| +# TODO(tansell): Replace fileobj_copy with shutil.copyfileobj once proper file |
| +# wrappers have been created. |
| +def fileobj_copy( |
| + dstfileobj, srcfileobj, size=-1, |
| + chunk_size=isolated_format.DISK_FILE_CHUNK): |
| + """Copy data from srcfileobj to dstfileobj. |
| + |
| + Providing size means exactly that amount of data will be copied (if there |
| + isn't enough data, an IOError exception is thrown). Otherwise all data until |
| + the EOF marker will be copied. |
| + """ |
| + if size == -1 and hasattr(srcfileobj, 'tell'): |
| + if srcfileobj.tell() != 0: |
| + raise IOError('partial file but not using size') |
| + |
| + written = 0 |
| + while written != size: |
| + readsize = chunk_size |
| + if size > 0: |
| + readsize = min(readsize, size-written) |
| + data = srcfileobj.read(readsize) |
| + if not data: |
| + if size == -1: |
| + break |
| + raise IOError('partial file, got %s, wanted %s' % (written, size)) |
| + dstfileobj.write(data) |
| + written += len(data) |
| + |
| + |
| +def putfile(srcfileobj, dstpath, file_mode=None, size=-1, use_symlink=False): |
| + """Put srcfileobj at the given dstpath with given mode. |
| + |
| + The function aims to do this as efficiently as possible while still allowing |
| + any possible file like object be given. |
| + |
| + Creating a tree of hardlinks has a few drawbacks: |
| + - tmpfs cannot be used for the scratch space. The tree has to be on the same |
| + partition as the cache. |
| + - involves a write to the inode, which advances ctime, cause a metadata |
| + writeback (causing disk seeking). |
| + - cache ctime cannot be used to detect modifications / corruption. |
| + - Some file systems (NTFS) have a 64k limit on the number of hardlink per |
| + partition. This is why the function automatically fallbacks to copying the |
| + file content. |
| + - /proc/sys/fs/protected_hardlinks causes an additional check to ensure the |
| + same owner is for all hardlinks. |
| + - Anecdotal report that ext2 is known to be potentially faulty on high rate |
| + of hardlink creation. |
| + |
| + Creating a tree of symlinks has a few drawbacks: |
| + - Tasks running the equivalent of os.path.realpath() will get the naked path |
| + and may fail. |
| + - Windows: |
| + - Symlinks are reparse points: |
| + https://msdn.microsoft.com/library/windows/desktop/aa365460.aspx |
| + https://msdn.microsoft.com/library/windows/desktop/aa363940.aspx |
| + - Symbolic links are Win32 paths, not NT paths. |
| + https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html |
| + - Symbolic links are supported on Windows 7 and later only. |
| + - SeCreateSymbolicLinkPrivilege is needed, which is not present by |
| + default. |
| + - SeCreateSymbolicLinkPrivilege is *stripped off* by UAC when a restricted |
| + RID is present in the token; |
| + https://msdn.microsoft.com/en-us/library/bb530410.aspx |
| + """ |
| + if isinstance(srcfileobj, unicode): |
| + srcfileobj = fs.open(srcfileobj, 'rb') |
|
M-A Ruel
2016/07/28 15:14:38
You forget to close it.
mithro
2016/07/28 15:50:48
Removed.
|
| + |
| + srcpath = fileobj_path(srcfileobj) |
| + if srcpath and size == -1: |
| + readonly = file_mode is None or ( |
| + file_mode & (stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH)) |
| + |
| + # Copy the file by default |
| + link_mode = file_path.COPY |
| + if readonly: |
| + # If the file is read only we can link the file |
| + if use_symlink: |
| + link_mode = file_path.SYMLINK_WITH_FALLBACK |
| + else: |
| + link_mode = file_path.HARDLINK_WITH_FALLBACK |
|
M-A Ruel
2016/07/28 15:14:38
else:
link_mode = file_path.COPY
and remove lin
mithro
2016/07/28 15:50:48
Done.
|
| + file_path.link_file(dstpath, srcpath, link_mode) |
| + else: |
| + # Need to write out the file |
| + with fs.open(dstpath, 'wb') as dstfileobj: |
| + fileobj_copy(dstfileobj, srcfileobj, size) |
| + |
| + assert fs.exists(dstpath) |
| + |
| + # file_mode of 0 is actually valid, so need explicit check. |
| + if file_mode is not None: |
| + fs.chmod(dstpath, file_mode) |
| + |
| + |
| def zip_compress(content_generator, level=7): |
| """Reads chunks from |content_generator| and yields zip compressed chunks.""" |
| compressor = zlib.compressobj(level) |
| @@ -1223,7 +1340,7 @@ class LocalCache(object): |
| self._initial_number_items = 0 |
| self._initial_size = 0 |
| self._evicted = [] |
| - self._linked = [] |
| + self._used = [] |
| def __contains__(self, digest): |
| raise NotImplementedError() |
| @@ -1245,6 +1362,10 @@ class LocalCache(object): |
| return self._evicted[:] |
| @property |
| + def used(self): |
| + return self._used[:] |
| + |
| + @property |
| def initial_number_items(self): |
| return self._initial_number_items |
| @@ -1252,10 +1373,6 @@ class LocalCache(object): |
| def initial_size(self): |
| return self._initial_size |
| - @property |
| - def linked(self): |
| - return self._linked[:] |
| - |
| def cached_set(self): |
| """Returns a set of all cached digests (always a new object).""" |
| raise NotImplementedError() |
| @@ -1280,8 +1397,12 @@ class LocalCache(object): |
| """Removes item from cache if it's there.""" |
| raise NotImplementedError() |
| - def read(self, digest): |
| - """Returns contents of the cached item as a single str.""" |
| + def getfileobj(self, digest): |
| + """Returns a readable file like object. |
| + |
| + If file exists on the file system it will have a .name attribute with an |
| + absolute path to the file. |
| + """ |
| raise NotImplementedError() |
| def write(self, digest, content): |
| @@ -1291,47 +1412,6 @@ class LocalCache(object): |
| """ |
| raise NotImplementedError() |
| - def link(self, digest, dest, file_mode, use_symlink): |
| - """Ensures file at |dest| has same content as cached |digest|. |
| - |
| - If file_mode is provided, it is used to set the executable bit if |
| - applicable. |
| - |
| - The function may copy the content, create a hardlink and if use_symlink is |
| - True, create a symlink if possible. |
| - |
| - Creating a tree of hardlinks has a few drawbacks: |
| - - tmpfs cannot be used for the scratch space. The tree has to be on the same |
| - partition as the cache. |
| - - involves a write to the inode, which advances ctime, cause a metadata |
| - writeback (causing disk seeking). |
| - - cache ctime cannot be used to detect modifications / corruption. |
| - - Some file systems (NTFS) have a 64k limit on the number of hardlink per |
| - partition. This is why the function automatically fallbacks to copying the |
| - file content. |
| - - /proc/sys/fs/protected_hardlinks causes an additional check to ensure the |
| - same owner is for all hardlinks. |
| - - Anecdotal report that ext2 is known to be potentially faulty on high rate |
| - of hardlink creation. |
| - |
| - Creating a tree of symlinks has a few drawbacks: |
| - - Tasks running the equivalent of os.path.realpath() will get the naked path |
| - and may fail. |
| - - Windows: |
| - - Symlinks are reparse points: |
| - https://msdn.microsoft.com/library/windows/desktop/aa365460.aspx |
| - https://msdn.microsoft.com/library/windows/desktop/aa363940.aspx |
| - - Symbolic links are Win32 paths, not NT paths. |
| - https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html |
| - - Symbolic links are supported on Windows 7 and later only. |
| - - SeCreateSymbolicLinkPrivilege is needed, which is not present by |
| - default. |
| - - SeCreateSymbolicLinkPrivilege is *stripped off* by UAC when a restricted |
| - RID is present in the token; |
| - https://msdn.microsoft.com/en-us/library/bb530410.aspx |
| - """ |
| - raise NotImplementedError() |
| - |
| class MemoryCache(LocalCache): |
| """LocalCache implementation that stores everything in memory.""" |
| @@ -1366,12 +1446,14 @@ class MemoryCache(LocalCache): |
| if v is not None: |
| self._evicted.add(v) |
| - def read(self, digest): |
| + def getfileobj(self, digest): |
| with self._lock: |
| try: |
| - return self._contents[digest] |
| + d = self._contents[digest] |
| except KeyError: |
| raise CacheMiss(digest) |
| + self._used.append(len(d)) |
| + return io.BytesIO(d) |
| def write(self, digest, content): |
| # Assemble whole stream before taking the lock. |
| @@ -1381,16 +1463,6 @@ class MemoryCache(LocalCache): |
| self._added.append(len(data)) |
| return digest |
| - def link(self, digest, dest, file_mode, use_symlink): |
| - """Since data is kept in memory, there is no filenode to hardlink/symlink. |
| - """ |
| - data = self.read(digest) |
| - file_write(dest, [data]) |
| - if file_mode is not None: |
| - fs.chmod(dest, file_mode & self._file_mode_mask) |
| - with self._lock: |
| - self._linked.append(len(data)) |
| - |
| class CachePolicies(object): |
| def __init__(self, max_cache_size, min_free_space, max_items): |
| @@ -1557,10 +1629,12 @@ class DiskCache(LocalCache): |
| self._lru.pop(digest) |
| self._delete_file(digest, UNKNOWN_FILE_SIZE) |
| - def read(self, digest): |
| + def getfileobj(self, digest): |
| try: |
| - with fs.open(self._path(digest), 'rb') as f: |
| - return f.read() |
| + f = fs.open(self._path(digest), 'rb') |
| + with self._lock: |
| + self._used.append(self._lru[digest]) |
| + return f |
| except IOError: |
| raise CacheMiss(digest) |
| @@ -1591,28 +1665,6 @@ class DiskCache(LocalCache): |
| self._add(digest, size) |
| return digest |
| - def link(self, digest, dest, file_mode, use_symlink): |
| - """Links the file to |dest|. |
| - |
| - Note that the file permission bits are on the file node, not the directory |
| - entry, so changing the access bit on any of the directory entries for the |
| - file node will affect them all. |
| - """ |
| - path = self._path(digest) |
| - mode = ( |
| - file_path.SYMLINK_WITH_FALLBACK if use_symlink |
| - else file_path.HARDLINK_WITH_FALLBACK) |
| - if not file_path.link_file(dest, path, mode): |
| - # Report to the server that it failed with more details. We'll want to |
| - # squash them all. |
| - on_error.report('Failed to link\n%s -> %s' % (path, dest)) |
| - |
| - if file_mode is not None: |
| - # Ignores all other bits. |
| - fs.chmod(dest, file_mode & 0500) |
| - with self._lock: |
| - self._linked.append(self._lru[digest]) |
| - |
| def _load(self): |
| """Loads state of the cache from json file. |
| @@ -1804,7 +1856,8 @@ class IsolatedBundle(object): |
| # Wait until some *.isolated file is fetched, parse it. |
| item_hash = fetch_queue.wait(pending) |
| item = pending.pop(item_hash) |
| - item.load(fetch_queue.cache.read(item_hash)) |
| + with fetch_queue.cache.getfileobj(item_hash) as f: |
| + item.load(f.read()) |
| # Start fetching included *.isolated files. |
| for new_child in item.children: |
| @@ -1842,8 +1895,14 @@ class IsolatedBundle(object): |
| # overridden files must not be fetched. |
| if filepath not in self.files: |
| self.files[filepath] = properties |
| + |
| + # Make sure if the isolated is read only, the mode doesn't have write |
| + # bits. |
| + if 'm' in properties and self.read_only: |
| + properties['m'] &= ~(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH) |
| + |
| + # Preemptively request hashed files. |
| if 'h' in properties: |
| - # Preemptively request files. |
| logging.debug('fetching %s', filepath) |
| fetch_queue.add( |
| properties['h'], properties['s'], threading_utils.PRIORITY_MED) |
| @@ -2007,12 +2066,19 @@ def fetch_isolated(isolated_hash, storage, cache, outdir, use_symlinks): |
| # Wait for any item to finish fetching to cache. |
| digest = fetch_queue.wait(remaining) |
| - # Link corresponding files to a fetched item in cache. |
| + # Create the files in the destination using item in cache as the |
| + # source. |
| for filepath, props in remaining.pop(digest): |
| - dest = os.path.join(outdir, filepath) |
| - if os.path.exists(dest): |
| - raise AlreadyExists('File %s already exists' % dest) |
| - cache.link(digest, dest, props.get('m'), use_symlinks) |
| + fullpath = os.path.join(outdir, filepath) |
| + |
| + with cache.getfileobj(digest) as srcfileobj: |
| + file_mode = props.get('m') |
| + if file_mode: |
| + # Ignore all bits apart from the user |
| + file_mode &= 0700 |
| + putfile( |
| + srcfileobj, fullpath, file_mode, |
| + use_symlink=use_symlinks) |
| # Report progress. |
| duration = time.time() - last_update |