Index: tools/telemetry/third_party/gsutil/gslib/tracker_file.py |
diff --git a/tools/telemetry/third_party/gsutil/gslib/tracker_file.py b/tools/telemetry/third_party/gsutil/gslib/tracker_file.py |
deleted file mode 100644 |
index 4fddc8a13d5d81336fdc73d7e4077e3b68cb0690..0000000000000000000000000000000000000000 |
--- a/tools/telemetry/third_party/gsutil/gslib/tracker_file.py |
+++ /dev/null |
@@ -1,318 +0,0 @@ |
-# -*- coding: utf-8 -*- |
-# Copyright 2015 Google Inc. All Rights Reserved. |
-# |
-# Licensed under the Apache License, Version 2.0 (the "License"); |
-# you may not use this file except in compliance with the License. |
-# You may obtain a copy of the License at |
-# |
-# http://www.apache.org/licenses/LICENSE-2.0 |
-# |
-# Unless required by applicable law or agreed to in writing, software |
-# distributed under the License is distributed on an "AS IS" BASIS, |
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
-# See the License for the specific language governing permissions and |
-# limitations under the License. |
-"""Helper functions for tracker file functionality.""" |
- |
-import errno |
-import hashlib |
-import os |
-import re |
- |
-from boto import config |
-from gslib.exception import CommandException |
-from gslib.util import CreateDirIfNeeded |
-from gslib.util import GetGsutilStateDir |
-from gslib.util import ResumableThreshold |
-from gslib.util import UTF8 |
- |
-# The maximum length of a file name can vary wildly between different |
-# operating systems, so we always ensure that tracker files are less |
-# than 100 characters in order to avoid any such issues. |
-MAX_TRACKER_FILE_NAME_LENGTH = 100 |
- |
- |
-TRACKER_FILE_UNWRITABLE_EXCEPTION_TEXT = ( |
- 'Couldn\'t write tracker file (%s): %s. This can happen if gsutil is ' |
- 'configured to save tracker files to an unwritable directory)') |
- |
- |
-class TrackerFileType(object): |
- UPLOAD = 'upload' |
- DOWNLOAD = 'download' |
- PARALLEL_UPLOAD = 'parallel_upload' |
- REWRITE = 'rewrite' |
- |
- |
-def _HashFilename(filename): |
- """Apply a hash function (SHA1) to shorten the passed file name. |
- |
- The spec for the hashed file name is as follows: |
- |
- TRACKER_<hash>_<trailing> |
- |
- where hash is a SHA1 hash on the original file name and trailing is |
- the last 16 chars from the original file name. Max file name lengths |
- vary by operating system so the goal of this function is to ensure |
- the hashed version takes fewer than 100 characters. |
- |
- Args: |
- filename: file name to be hashed. |
- |
- Returns: |
- shorter, hashed version of passed file name |
- """ |
- if isinstance(filename, unicode): |
- filename = filename.encode(UTF8) |
- else: |
- filename = unicode(filename, UTF8).encode(UTF8) |
- m = hashlib.sha1(filename) |
- return 'TRACKER_' + m.hexdigest() + '.' + filename[-16:] |
- |
- |
-def CreateTrackerDirIfNeeded(): |
- """Looks up or creates the gsutil tracker file directory. |
- |
- This is the configured directory where gsutil keeps its resumable transfer |
- tracker files. This function creates it if it doesn't already exist. |
- |
- Returns: |
- The pathname to the tracker directory. |
- """ |
- tracker_dir = config.get( |
- 'GSUtil', 'resumable_tracker_dir', |
- os.path.join(GetGsutilStateDir(), 'tracker-files')) |
- CreateDirIfNeeded(tracker_dir) |
- return tracker_dir |
- |
- |
-def GetRewriteTrackerFilePath(src_bucket_name, src_obj_name, dst_bucket_name, |
- dst_obj_name, api_selector): |
- """Gets the tracker file name described by the arguments. |
- |
- Args: |
- src_bucket_name: Source bucket (string). |
- src_obj_name: Source object (string). |
- dst_bucket_name: Destination bucket (string). |
- dst_obj_name: Destination object (string) |
- api_selector: API to use for this operation. |
- |
- Returns: |
- File path to tracker file. |
- """ |
- # Encode the src and dest bucket and object names into the tracker file |
- # name. |
- res_tracker_file_name = ( |
- re.sub('[/\\\\]', '_', 'rewrite__%s__%s__%s__%s__%s.token' % |
- (src_bucket_name, src_obj_name, dst_bucket_name, |
- dst_obj_name, api_selector))) |
- |
- return _HashAndReturnPath(res_tracker_file_name, TrackerFileType.REWRITE) |
- |
- |
-def GetTrackerFilePath(dst_url, tracker_file_type, api_selector, src_url=None): |
- """Gets the tracker file name described by the arguments. |
- |
- Args: |
- dst_url: Destination URL for tracker file. |
- tracker_file_type: TrackerFileType for this operation. |
- api_selector: API to use for this operation. |
- src_url: Source URL for the source file name for parallel uploads. |
- |
- Returns: |
- File path to tracker file. |
- """ |
- if tracker_file_type == TrackerFileType.UPLOAD: |
- # Encode the dest bucket and object name into the tracker file name. |
- res_tracker_file_name = ( |
- re.sub('[/\\\\]', '_', 'resumable_upload__%s__%s__%s.url' % |
- (dst_url.bucket_name, dst_url.object_name, api_selector))) |
- elif tracker_file_type == TrackerFileType.DOWNLOAD: |
- # Encode the fully-qualified dest file name into the tracker file name. |
- res_tracker_file_name = ( |
- re.sub('[/\\\\]', '_', 'resumable_download__%s__%s.etag' % |
- (os.path.realpath(dst_url.object_name), api_selector))) |
- elif tracker_file_type == TrackerFileType.PARALLEL_UPLOAD: |
- # Encode the dest bucket and object names as well as the source file name |
- # into the tracker file name. |
- res_tracker_file_name = ( |
- re.sub('[/\\\\]', '_', 'parallel_upload__%s__%s__%s__%s.url' % |
- (dst_url.bucket_name, dst_url.object_name, |
- src_url, api_selector))) |
- elif tracker_file_type == TrackerFileType.REWRITE: |
- # Should use GetRewriteTrackerFilePath instead. |
- raise NotImplementedError() |
- |
- return _HashAndReturnPath(res_tracker_file_name, tracker_file_type) |
- |
- |
-def _HashAndReturnPath(res_tracker_file_name, tracker_file_type): |
- resumable_tracker_dir = CreateTrackerDirIfNeeded() |
- hashed_tracker_file_name = _HashFilename(res_tracker_file_name) |
- tracker_file_name = '%s_%s' % (str(tracker_file_type).lower(), |
- hashed_tracker_file_name) |
- tracker_file_path = '%s%s%s' % (resumable_tracker_dir, os.sep, |
- tracker_file_name) |
- assert len(tracker_file_name) < MAX_TRACKER_FILE_NAME_LENGTH |
- return tracker_file_path |
- |
- |
-def DeleteTrackerFile(tracker_file_name): |
- if tracker_file_name and os.path.exists(tracker_file_name): |
- os.unlink(tracker_file_name) |
- |
- |
-def HashRewriteParameters( |
- src_obj_metadata, dst_obj_metadata, projection, src_generation=None, |
- gen_match=None, meta_gen_match=None, canned_acl=None, fields=None, |
- max_bytes_per_call=None): |
- """Creates an MD5 hex digest of the parameters for a rewrite call. |
- |
- Resuming rewrites requires that the input parameters are identical. Thus, |
- the rewrite tracker file needs to represent the input parameters. For |
- easy comparison, hash the input values. If a user does a performs a |
- same-source/same-destination rewrite via a different command (for example, |
- with a changed ACL), the hashes will not match and we will restart the |
- rewrite from the beginning. |
- |
- Args: |
- src_obj_metadata: apitools Object describing source object. Must include |
- bucket, name, and etag. |
- dst_obj_metadata: apitools Object describing destination object. Must |
- include bucket and object name |
- projection: Projection used for the API call. |
- src_generation: Optional source generation. |
- gen_match: Optional generation precondition. |
- meta_gen_match: Optional metageneration precondition. |
- canned_acl: Optional canned ACL string. |
- fields: Optional fields to include in response. |
- max_bytes_per_call: Optional maximum bytes rewritten per call. |
- |
- Returns: |
- MD5 hex digest Hash of the input parameters, or None if required parameters |
- are missing. |
- """ |
- if (not src_obj_metadata or |
- not src_obj_metadata.bucket or |
- not src_obj_metadata.name or |
- not src_obj_metadata.etag or |
- not dst_obj_metadata or |
- not dst_obj_metadata.bucket or |
- not dst_obj_metadata.name or |
- not projection): |
- return |
- md5_hash = hashlib.md5() |
- for input_param in ( |
- src_obj_metadata, dst_obj_metadata, projection, src_generation, |
- gen_match, meta_gen_match, canned_acl, fields, max_bytes_per_call): |
- md5_hash.update(str(input_param)) |
- return md5_hash.hexdigest() |
- |
- |
-def ReadRewriteTrackerFile(tracker_file_name, rewrite_params_hash): |
- """Attempts to read a rewrite tracker file. |
- |
- Args: |
- tracker_file_name: Tracker file path string. |
- rewrite_params_hash: MD5 hex digest of rewrite call parameters constructed |
- by HashRewriteParameters. |
- |
- Returns: |
- String rewrite_token for resuming rewrite requests if a matching tracker |
- file exists, None otherwise (which will result in starting a new rewrite). |
- """ |
- # Check to see if we already have a matching tracker file. |
- tracker_file = None |
- if not rewrite_params_hash: |
- return |
- try: |
- tracker_file = open(tracker_file_name, 'r') |
- existing_hash = tracker_file.readline().rstrip('\n') |
- if existing_hash == rewrite_params_hash: |
- # Next line is the rewrite token. |
- return tracker_file.readline().rstrip('\n') |
- except IOError as e: |
- # Ignore non-existent file (happens first time a rewrite is attempted. |
- if e.errno != errno.ENOENT: |
- print('Couldn\'t read Copy tracker file (%s): %s. Restarting copy ' |
- 'from scratch.' % |
- (tracker_file_name, e.strerror)) |
- finally: |
- if tracker_file: |
- tracker_file.close() |
- |
- |
-def WriteRewriteTrackerFile(tracker_file_name, rewrite_params_hash, |
- rewrite_token): |
- """Writes a rewrite tracker file. |
- |
- Args: |
- tracker_file_name: Tracker file path string. |
- rewrite_params_hash: MD5 hex digest of rewrite call parameters constructed |
- by HashRewriteParameters. |
- rewrite_token: Rewrite token string returned by the service. |
- """ |
- _WriteTrackerFile(tracker_file_name, '%s\n%s\n' % (rewrite_params_hash, |
- rewrite_token)) |
- |
- |
-def ReadOrCreateDownloadTrackerFile(src_obj_metadata, dst_url, |
- api_selector): |
- """Checks for a download tracker file and creates one if it does not exist. |
- |
- Args: |
- src_obj_metadata: Metadata for the source object. Must include |
- etag and size. |
- dst_url: Destination file StorageUrl. |
- api_selector: API mode to use (for tracker file naming). |
- |
- Returns: |
- True if the tracker file already exists (resume existing download), |
- False if we created a new tracker file (new download). |
- """ |
- if src_obj_metadata.size < ResumableThreshold(): |
- # Don't create a tracker file for a small downloads; cross-process resumes |
- # won't work, but restarting a small download is inexpensive. |
- return False |
- |
- assert src_obj_metadata.etag |
- tracker_file_name = GetTrackerFilePath( |
- dst_url, TrackerFileType.DOWNLOAD, api_selector) |
- tracker_file = None |
- |
- # Check to see if we already have a matching tracker file. |
- try: |
- tracker_file = open(tracker_file_name, 'r') |
- etag_value = tracker_file.readline().rstrip('\n') |
- if etag_value == src_obj_metadata.etag: |
- return True |
- except IOError as e: |
- # Ignore non-existent file (happens first time a download |
- # is attempted on an object), but warn user for other errors. |
- if e.errno != errno.ENOENT: |
- print('Couldn\'t read URL tracker file (%s): %s. Restarting ' |
- 'download from scratch.' % |
- (tracker_file_name, e.strerror)) |
- finally: |
- if tracker_file: |
- tracker_file.close() |
- |
- # Otherwise, create a new tracker file and start from scratch. |
- _WriteTrackerFile(tracker_file_name, '%s\n' % src_obj_metadata.etag) |
- |
- |
-def _WriteTrackerFile(tracker_file_name, data): |
- """Creates a tracker file, storing the input data.""" |
- try: |
- with os.fdopen(os.open(tracker_file_name, |
- os.O_WRONLY | os.O_CREAT, 0600), 'w') as tf: |
- tf.write(data) |
- return False |
- except (IOError, OSError) as e: |
- raise RaiseUnwritableTrackerFileException(tracker_file_name, e.strerror) |
- |
- |
-def RaiseUnwritableTrackerFileException(tracker_file_name, error_str): |
- """Raises an exception when unable to write the tracker file.""" |
- raise CommandException(TRACKER_FILE_UNWRITABLE_EXCEPTION_TEXT % |
- (tracker_file_name, error_str)) |