Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(277)

Unified Diff: client/isolateserver.py

Issue 2186263002: luci-py: Refactor file writing code to allow file objects. (Closed) Base URL: https://github.com/luci/luci-py.git@master
Patch Set: Rebase Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « client/cipd.py ('k') | client/run_isolated.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: client/isolateserver.py
diff --git a/client/isolateserver.py b/client/isolateserver.py
index 3d56b9688be80eb6b4afbbf8c99a880f74b7b4d4..00374babb1b50e94e5b12612767091e902674c9a 100755
--- a/client/isolateserver.py
+++ b/client/isolateserver.py
@@ -5,16 +5,18 @@
"""Archives a set of files or directories to an Isolate Server."""
-__version__ = '0.5.1'
+__version__ = '0.6.0'
import base64
-import functools
import errno
+import functools
+import io
import logging
import optparse
import os
import re
import signal
+import stat
import sys
import tempfile
import threading
@@ -149,6 +151,120 @@ def file_write(path, content_generator):
return total
+def fileobj_path(fileobj):
+ """Return file system path for file like object or None.
+
+ The returned path is guaranteed to exist and can be passed to file system
+ operations like copy.
+ """
+ name = getattr(fileobj, 'name', None)
+ if name is None:
+ return
+
+ # If the file like object was created using something like open("test.txt")
+ # name will end up being a str (such as a function outside our control, like
+ # the standard library). We want all our paths to be unicode objects, so we
+ # decode it.
+ if not isinstance(name, unicode):
+ name = name.decode(sys.getfilesystemencoding())
+
+ if fs.exists(name):
+ return name
+
+
+# TODO(tansell): Replace fileobj_copy with shutil.copyfileobj once proper file
+# wrappers have been created.
+def fileobj_copy(
+ dstfileobj, srcfileobj, size=-1,
+ chunk_size=isolated_format.DISK_FILE_CHUNK):
+ """Copy data from srcfileobj to dstfileobj.
+
+ Providing size means exactly that amount of data will be copied (if there
+ isn't enough data, an IOError exception is thrown). Otherwise all data until
+ the EOF marker will be copied.
+ """
+ if size == -1 and hasattr(srcfileobj, 'tell'):
+ if srcfileobj.tell() != 0:
+ raise IOError('partial file but not using size')
+
+ written = 0
+ while written != size:
+ readsize = chunk_size
+ if size > 0:
+ readsize = min(readsize, size-written)
+ data = srcfileobj.read(readsize)
+ if not data:
+ if size == -1:
+ break
+ raise IOError('partial file, got %s, wanted %s' % (written, size))
+ dstfileobj.write(data)
+ written += len(data)
+
+
+def putfile(srcfileobj, dstpath, file_mode=None, size=-1, use_symlink=False):
+ """Put srcfileobj at the given dstpath with given mode.
+
+ The function aims to do this as efficiently as possible while still allowing
+ any possible file like object be given.
+
+ Creating a tree of hardlinks has a few drawbacks:
+ - tmpfs cannot be used for the scratch space. The tree has to be on the same
+ partition as the cache.
+ - involves a write to the inode, which advances ctime, cause a metadata
+ writeback (causing disk seeking).
+ - cache ctime cannot be used to detect modifications / corruption.
+ - Some file systems (NTFS) have a 64k limit on the number of hardlink per
+ partition. This is why the function automatically fallbacks to copying the
+ file content.
+ - /proc/sys/fs/protected_hardlinks causes an additional check to ensure the
+ same owner is for all hardlinks.
+ - Anecdotal report that ext2 is known to be potentially faulty on high rate
+ of hardlink creation.
+
+ Creating a tree of symlinks has a few drawbacks:
+ - Tasks running the equivalent of os.path.realpath() will get the naked path
+ and may fail.
+ - Windows:
+ - Symlinks are reparse points:
+ https://msdn.microsoft.com/library/windows/desktop/aa365460.aspx
+ https://msdn.microsoft.com/library/windows/desktop/aa363940.aspx
+ - Symbolic links are Win32 paths, not NT paths.
+ https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
+ - Symbolic links are supported on Windows 7 and later only.
+ - SeCreateSymbolicLinkPrivilege is needed, which is not present by
+ default.
+ - SeCreateSymbolicLinkPrivilege is *stripped off* by UAC when a restricted
+ RID is present in the token;
+ https://msdn.microsoft.com/en-us/library/bb530410.aspx
+ """
+ srcpath = fileobj_path(srcfileobj)
+ if srcpath and size == -1:
+ readonly = file_mode is None or (
+ file_mode & (stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH))
+
+ if readonly:
+ # If the file is read only we can link the file
+ if use_symlink:
+ link_mode = file_path.SYMLINK_WITH_FALLBACK
+ else:
+ link_mode = file_path.HARDLINK_WITH_FALLBACK
+ else:
+ # If not read only, we must copy the file
+ link_mode = file_path.COPY
+
+ file_path.link_file(dstpath, srcpath, link_mode)
+ else:
+ # Need to write out the file
+ with fs.open(dstpath, 'wb') as dstfileobj:
+ fileobj_copy(dstfileobj, srcfileobj, size)
+
+ assert fs.exists(dstpath)
+
+ # file_mode of 0 is actually valid, so need explicit check.
+ if file_mode is not None:
+ fs.chmod(dstpath, file_mode)
+
+
def zip_compress(content_generator, level=7):
"""Reads chunks from |content_generator| and yields zip compressed chunks."""
compressor = zlib.compressobj(level)
@@ -1223,7 +1339,7 @@ class LocalCache(object):
self._initial_number_items = 0
self._initial_size = 0
self._evicted = []
- self._linked = []
+ self._used = []
def __contains__(self, digest):
raise NotImplementedError()
@@ -1245,6 +1361,10 @@ class LocalCache(object):
return self._evicted[:]
@property
+ def used(self):
+ return self._used[:]
+
+ @property
def initial_number_items(self):
return self._initial_number_items
@@ -1252,10 +1372,6 @@ class LocalCache(object):
def initial_size(self):
return self._initial_size
- @property
- def linked(self):
- return self._linked[:]
-
def cached_set(self):
"""Returns a set of all cached digests (always a new object)."""
raise NotImplementedError()
@@ -1280,8 +1396,12 @@ class LocalCache(object):
"""Removes item from cache if it's there."""
raise NotImplementedError()
- def read(self, digest):
- """Returns contents of the cached item as a single str."""
+ def getfileobj(self, digest):
+ """Returns a readable file like object.
+
+ If file exists on the file system it will have a .name attribute with an
+ absolute path to the file.
+ """
raise NotImplementedError()
def write(self, digest, content):
@@ -1291,47 +1411,6 @@ class LocalCache(object):
"""
raise NotImplementedError()
- def link(self, digest, dest, file_mode, use_symlink):
- """Ensures file at |dest| has same content as cached |digest|.
-
- If file_mode is provided, it is used to set the executable bit if
- applicable.
-
- The function may copy the content, create a hardlink and if use_symlink is
- True, create a symlink if possible.
-
- Creating a tree of hardlinks has a few drawbacks:
- - tmpfs cannot be used for the scratch space. The tree has to be on the same
- partition as the cache.
- - involves a write to the inode, which advances ctime, cause a metadata
- writeback (causing disk seeking).
- - cache ctime cannot be used to detect modifications / corruption.
- - Some file systems (NTFS) have a 64k limit on the number of hardlink per
- partition. This is why the function automatically fallbacks to copying the
- file content.
- - /proc/sys/fs/protected_hardlinks causes an additional check to ensure the
- same owner is for all hardlinks.
- - Anecdotal report that ext2 is known to be potentially faulty on high rate
- of hardlink creation.
-
- Creating a tree of symlinks has a few drawbacks:
- - Tasks running the equivalent of os.path.realpath() will get the naked path
- and may fail.
- - Windows:
- - Symlinks are reparse points:
- https://msdn.microsoft.com/library/windows/desktop/aa365460.aspx
- https://msdn.microsoft.com/library/windows/desktop/aa363940.aspx
- - Symbolic links are Win32 paths, not NT paths.
- https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
- - Symbolic links are supported on Windows 7 and later only.
- - SeCreateSymbolicLinkPrivilege is needed, which is not present by
- default.
- - SeCreateSymbolicLinkPrivilege is *stripped off* by UAC when a restricted
- RID is present in the token;
- https://msdn.microsoft.com/en-us/library/bb530410.aspx
- """
- raise NotImplementedError()
-
class MemoryCache(LocalCache):
"""LocalCache implementation that stores everything in memory."""
@@ -1366,12 +1445,14 @@ class MemoryCache(LocalCache):
if v is not None:
self._evicted.add(v)
- def read(self, digest):
+ def getfileobj(self, digest):
with self._lock:
try:
- return self._contents[digest]
+ d = self._contents[digest]
except KeyError:
raise CacheMiss(digest)
+ self._used.append(len(d))
+ return io.BytesIO(d)
def write(self, digest, content):
# Assemble whole stream before taking the lock.
@@ -1381,16 +1462,6 @@ class MemoryCache(LocalCache):
self._added.append(len(data))
return digest
- def link(self, digest, dest, file_mode, use_symlink):
- """Since data is kept in memory, there is no filenode to hardlink/symlink.
- """
- data = self.read(digest)
- file_write(dest, [data])
- if file_mode is not None:
- fs.chmod(dest, file_mode & self._file_mode_mask)
- with self._lock:
- self._linked.append(len(data))
-
class CachePolicies(object):
def __init__(self, max_cache_size, min_free_space, max_items):
@@ -1557,10 +1628,12 @@ class DiskCache(LocalCache):
self._lru.pop(digest)
self._delete_file(digest, UNKNOWN_FILE_SIZE)
- def read(self, digest):
+ def getfileobj(self, digest):
try:
- with fs.open(self._path(digest), 'rb') as f:
- return f.read()
+ f = fs.open(self._path(digest), 'rb')
+ with self._lock:
+ self._used.append(self._lru[digest])
+ return f
except IOError:
raise CacheMiss(digest)
@@ -1591,28 +1664,6 @@ class DiskCache(LocalCache):
self._add(digest, size)
return digest
- def link(self, digest, dest, file_mode, use_symlink):
- """Links the file to |dest|.
-
- Note that the file permission bits are on the file node, not the directory
- entry, so changing the access bit on any of the directory entries for the
- file node will affect them all.
- """
- path = self._path(digest)
- mode = (
- file_path.SYMLINK_WITH_FALLBACK if use_symlink
- else file_path.HARDLINK_WITH_FALLBACK)
- if not file_path.link_file(dest, path, mode):
- # Report to the server that it failed with more details. We'll want to
- # squash them all.
- on_error.report('Failed to link\n%s -> %s' % (path, dest))
-
- if file_mode is not None:
- # Ignores all other bits.
- fs.chmod(dest, file_mode & 0500)
- with self._lock:
- self._linked.append(self._lru[digest])
-
def _load(self):
"""Loads state of the cache from json file.
@@ -1804,7 +1855,8 @@ class IsolatedBundle(object):
# Wait until some *.isolated file is fetched, parse it.
item_hash = fetch_queue.wait(pending)
item = pending.pop(item_hash)
- item.load(fetch_queue.cache.read(item_hash))
+ with fetch_queue.cache.getfileobj(item_hash) as f:
+ item.load(f.read())
# Start fetching included *.isolated files.
for new_child in item.children:
@@ -1842,8 +1894,14 @@ class IsolatedBundle(object):
# overridden files must not be fetched.
if filepath not in self.files:
self.files[filepath] = properties
+
+ # Make sure if the isolated is read only, the mode doesn't have write
+ # bits.
+ if 'm' in properties and self.read_only:
+ properties['m'] &= ~(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH)
+
+ # Preemptively request hashed files.
if 'h' in properties:
- # Preemptively request files.
logging.debug('fetching %s', filepath)
fetch_queue.add(
properties['h'], properties['s'], threading_utils.PRIORITY_MED)
@@ -2007,12 +2065,19 @@ def fetch_isolated(isolated_hash, storage, cache, outdir, use_symlinks):
# Wait for any item to finish fetching to cache.
digest = fetch_queue.wait(remaining)
- # Link corresponding files to a fetched item in cache.
+ # Create the files in the destination using item in cache as the
+ # source.
for filepath, props in remaining.pop(digest):
- dest = os.path.join(outdir, filepath)
- if os.path.exists(dest):
- raise AlreadyExists('File %s already exists' % dest)
- cache.link(digest, dest, props.get('m'), use_symlinks)
+ fullpath = os.path.join(outdir, filepath)
+
+ with cache.getfileobj(digest) as srcfileobj:
+ file_mode = props.get('m')
+ if file_mode:
+ # Ignore all bits apart from the user
+ file_mode &= 0700
+ putfile(
+ srcfileobj, fullpath, file_mode,
+ use_symlink=use_symlinks)
# Report progress.
duration = time.time() - last_update
« no previous file with comments | « client/cipd.py ('k') | client/run_isolated.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698