| Index: tools/telemetry/third_party/gsutilz/gslib/util.py
|
| diff --git a/tools/telemetry/third_party/gsutilz/gslib/util.py b/tools/telemetry/third_party/gsutilz/gslib/util.py
|
| deleted file mode 100644
|
| index ece5112c34252b958b1bb5020541869c26401ff5..0000000000000000000000000000000000000000
|
| --- a/tools/telemetry/third_party/gsutilz/gslib/util.py
|
| +++ /dev/null
|
| @@ -1,1100 +0,0 @@
|
| -# -*- coding: utf-8 -*-
|
| -# Copyright 2010 Google Inc. All Rights Reserved.
|
| -#
|
| -# Licensed under the Apache License, Version 2.0 (the "License");
|
| -# you may not use this file except in compliance with the License.
|
| -# You may obtain a copy of the License at
|
| -#
|
| -# http://www.apache.org/licenses/LICENSE-2.0
|
| -#
|
| -# Unless required by applicable law or agreed to in writing, software
|
| -# distributed under the License is distributed on an "AS IS" BASIS,
|
| -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -# See the License for the specific language governing permissions and
|
| -# limitations under the License.
|
| -"""Static data and helper functions."""
|
| -
|
| -from __future__ import absolute_import
|
| -
|
| -import errno
|
| -import logging
|
| -import math
|
| -import multiprocessing
|
| -import os
|
| -import pkgutil
|
| -import re
|
| -import struct
|
| -import sys
|
| -import tempfile
|
| -import textwrap
|
| -import threading
|
| -import traceback
|
| -import xml.etree.ElementTree as ElementTree
|
| -
|
| -import boto
|
| -from boto import config
|
| -import boto.auth
|
| -from boto.exception import NoAuthHandlerFound
|
| -from boto.gs.connection import GSConnection
|
| -from boto.provider import Provider
|
| -from boto.pyami.config import BotoConfigLocations
|
| -import httplib2
|
| -from oauth2client.client import HAS_CRYPTO
|
| -from retry_decorator import retry_decorator
|
| -
|
| -import gslib
|
| -from gslib.exception import CommandException
|
| -from gslib.storage_url import StorageUrlFromString
|
| -from gslib.translation_helper import AclTranslation
|
| -from gslib.translation_helper import GenerationFromUrlAndString
|
| -from gslib.translation_helper import S3_ACL_MARKER_GUID
|
| -from gslib.translation_helper import S3_DELETE_MARKER_GUID
|
| -from gslib.translation_helper import S3_MARKER_GUIDS
|
| -
|
| -# pylint: disable=g-import-not-at-top
|
| -try:
|
| - # This module doesn't necessarily exist on Windows.
|
| - import resource
|
| - HAS_RESOURCE_MODULE = True
|
| -except ImportError, e:
|
| - HAS_RESOURCE_MODULE = False
|
| -
|
| -ONE_KIB = 1024
|
| -ONE_MIB = 1024 * 1024
|
| -TWO_MIB = 2 * ONE_MIB
|
| -EIGHT_MIB = 8 * ONE_MIB
|
| -TEN_MIB = 10 * ONE_MIB
|
| -DEFAULT_FILE_BUFFER_SIZE = 8 * ONE_KIB
|
| -_DEFAULT_LINES = 25
|
| -
|
| -# By default, the timeout for SSL read errors is infinite. This could
|
| -# cause gsutil to hang on network disconnect, so pick a more reasonable
|
| -# timeout.
|
| -SSL_TIMEOUT = 60
|
| -
|
| -# Start with a progress callback every 64 KiB during uploads/downloads (JSON
|
| -# API). Callback implementation should back off until it hits the maximum size
|
| -# so that callbacks do not create huge amounts of log output.
|
| -START_CALLBACK_PER_BYTES = 1024*64
|
| -MAX_CALLBACK_PER_BYTES = 1024*1024*100
|
| -
|
| -# Upload/download files in 8 KiB chunks over the HTTP connection.
|
| -TRANSFER_BUFFER_SIZE = 1024*8
|
| -
|
| -# Default number of progress callbacks during transfer (XML API).
|
| -XML_PROGRESS_CALLBACKS = 10
|
| -
|
| -# For files >= this size, output a message indicating that we're running an
|
| -# operation on the file (like hashing or gzipping) so it does not appear to the
|
| -# user that the command is hanging.
|
| -MIN_SIZE_COMPUTE_LOGGING = 100*1024*1024 # 100 MiB
|
| -
|
| -NO_MAX = sys.maxint
|
| -
|
| -UTF8 = 'utf-8'
|
| -
|
| -VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?')
|
| -
|
| -RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt'
|
| -
|
| -# Binary exponentiation strings.
|
| -_EXP_STRINGS = [
|
| - (0, 'B', 'bit'),
|
| - (10, 'KiB', 'Kibit', 'K'),
|
| - (20, 'MiB', 'Mibit', 'M'),
|
| - (30, 'GiB', 'Gibit', 'G'),
|
| - (40, 'TiB', 'Tibit', 'T'),
|
| - (50, 'PiB', 'Pibit', 'P'),
|
| - (60, 'EiB', 'Eibit', 'E'),
|
| -]
|
| -
|
| -
|
| -global manager # pylint: disable=global-at-module-level
|
| -certs_file_lock = threading.Lock()
|
| -configured_certs_files = []
|
| -
|
| -
|
| -def InitializeMultiprocessingVariables():
|
| - """Perform necessary initialization for multiprocessing.
|
| -
|
| - See gslib.command.InitializeMultiprocessingVariables for an explanation
|
| - of why this is necessary.
|
| - """
|
| - global manager # pylint: disable=global-variable-undefined
|
| - manager = multiprocessing.Manager()
|
| -
|
| -
|
| -def _GenerateSuffixRegex():
|
| - """Creates a suffix regex for human-readable byte counts."""
|
| - human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?'
|
| - suffixes = []
|
| - suffix_to_si = {}
|
| - for i, si in enumerate(_EXP_STRINGS):
|
| - si_suffixes = [s.lower() for s in list(si)[1:]]
|
| - for suffix in si_suffixes:
|
| - suffix_to_si[suffix] = i
|
| - suffixes.extend(si_suffixes)
|
| - human_bytes_re %= '|'.join(suffixes)
|
| - matcher = re.compile(human_bytes_re)
|
| - return suffix_to_si, matcher
|
| -
|
| -SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex()
|
| -
|
| -SECONDS_PER_DAY = 3600 * 24
|
| -
|
| -# Detect platform types.
|
| -PLATFORM = str(sys.platform).lower()
|
| -IS_WINDOWS = 'win32' in PLATFORM
|
| -IS_CYGWIN = 'cygwin' in PLATFORM
|
| -IS_LINUX = 'linux' in PLATFORM
|
| -IS_OSX = 'darwin' in PLATFORM
|
| -
|
| -# On Unix-like systems, we will set the maximum number of open files to avoid
|
| -# hitting the limit imposed by the OS. This number was obtained experimentally.
|
| -MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000
|
| -
|
| -GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz'
|
| -
|
| -Retry = retry_decorator.retry # pylint: disable=invalid-name
|
| -
|
| -# Cache the values from this check such that they're available to all callers
|
| -# without needing to run all the checks again (some of these, such as calling
|
| -# multiprocessing.Manager(), are expensive operations).
|
| -cached_multiprocessing_is_available = None
|
| -cached_multiprocessing_is_available_stack_trace = None
|
| -cached_multiprocessing_is_available_message = None
|
| -
|
| -
|
| -# Enum class for specifying listing style.
|
| -class ListingStyle(object):
|
| - SHORT = 'SHORT'
|
| - LONG = 'LONG'
|
| - LONG_LONG = 'LONG_LONG'
|
| -
|
| -
|
| -def UsingCrcmodExtension(crcmod):
|
| - return (getattr(crcmod, 'crcmod', None) and
|
| - getattr(crcmod.crcmod, '_usingExtension', None))
|
| -
|
| -
|
| -def CreateDirIfNeeded(dir_path, mode=0777):
|
| - """Creates a directory, suppressing already-exists errors."""
|
| - if not os.path.exists(dir_path):
|
| - try:
|
| - # Unfortunately, even though we catch and ignore EEXIST, this call will
|
| - # output a (needless) error message (no way to avoid that in Python).
|
| - os.makedirs(dir_path, mode)
|
| - # Ignore 'already exists' in case user tried to start up several
|
| - # resumable uploads concurrently from a machine where no tracker dir had
|
| - # yet been created.
|
| - except OSError as e:
|
| - if e.errno != errno.EEXIST:
|
| - raise
|
| -
|
| -
|
| -def GetGsutilStateDir():
|
| - """Returns the location of the directory for gsutil state files.
|
| -
|
| - Certain operations, such as cross-process credential sharing and
|
| - resumable transfer tracking, need a known location for state files which
|
| - are created by gsutil as-needed.
|
| -
|
| - This location should only be used for storing data that is required to be in
|
| - a static location.
|
| -
|
| - Returns:
|
| - Path to directory for gsutil static state files.
|
| - """
|
| - config_file_dir = config.get(
|
| - 'GSUtil', 'state_dir',
|
| - os.path.expanduser(os.path.join('~', '.gsutil')))
|
| - CreateDirIfNeeded(config_file_dir)
|
| - return config_file_dir
|
| -
|
| -
|
| -def GetCredentialStoreFilename():
|
| - return os.path.join(GetGsutilStateDir(), 'credstore')
|
| -
|
| -
|
| -def GetGceCredentialCacheFilename():
|
| - return os.path.join(GetGsutilStateDir(), 'gcecredcache')
|
| -
|
| -
|
| -def GetTabCompletionLogFilename():
|
| - return os.path.join(GetGsutilStateDir(), 'tab-completion-logs')
|
| -
|
| -
|
| -def GetTabCompletionCacheFilename():
|
| - tab_completion_dir = os.path.join(GetGsutilStateDir(), 'tab-completion')
|
| - # Limit read permissions on the directory to owner for privacy.
|
| - CreateDirIfNeeded(tab_completion_dir, mode=0700)
|
| - return os.path.join(tab_completion_dir, 'cache')
|
| -
|
| -
|
| -def PrintTrackerDirDeprecationWarningIfNeeded():
|
| - # TODO: Remove this along with the tracker_dir config value 1 year after
|
| - # 4.6 release date. Use state_dir instead.
|
| - if config.has_option('GSUtil', 'resumable_tracker_dir'):
|
| - sys.stderr.write('Warning: you have set resumable_tracker_dir in your '
|
| - '.boto configuration file. This configuration option is '
|
| - 'deprecated; please use the state_dir configuration '
|
| - 'option instead.\n')
|
| -
|
| -
|
| -# Name of file where we keep the timestamp for the last time we checked whether
|
| -# a new version of gsutil is available.
|
| -PrintTrackerDirDeprecationWarningIfNeeded()
|
| -CreateDirIfNeeded(GetGsutilStateDir())
|
| -LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = (
|
| - os.path.join(GetGsutilStateDir(), '.last_software_update_check'))
|
| -
|
| -
|
| -def HasConfiguredCredentials():
|
| - """Determines if boto credential/config file exists."""
|
| - has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and
|
| - config.has_option('Credentials', 'gs_secret_access_key'))
|
| - has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and
|
| - config.has_option('Credentials', 'aws_secret_access_key'))
|
| - has_oauth_creds = (
|
| - config.has_option('Credentials', 'gs_oauth2_refresh_token'))
|
| - has_service_account_creds = (
|
| - HAS_CRYPTO and
|
| - config.has_option('Credentials', 'gs_service_client_id') and
|
| - config.has_option('Credentials', 'gs_service_key_file'))
|
| -
|
| - if (has_goog_creds or has_amzn_creds or has_oauth_creds or
|
| - has_service_account_creds):
|
| - return True
|
| -
|
| - valid_auth_handler = None
|
| - try:
|
| - valid_auth_handler = boto.auth.get_auth_handler(
|
| - GSConnection.DefaultHost, config, Provider('google'),
|
| - requested_capability=['s3'])
|
| - # Exclude the no-op auth handler as indicating credentials are configured.
|
| - # Note we can't use isinstance() here because the no-op module may not be
|
| - # imported so we can't get a reference to the class type.
|
| - if getattr(getattr(valid_auth_handler, '__class__', None),
|
| - '__name__', None) == 'NoOpAuth':
|
| - valid_auth_handler = None
|
| - except NoAuthHandlerFound:
|
| - pass
|
| -
|
| - return valid_auth_handler
|
| -
|
| -
|
| -def ConfigureNoOpAuthIfNeeded():
|
| - """Sets up no-op auth handler if no boto credentials are configured."""
|
| - if not HasConfiguredCredentials():
|
| - if (config.has_option('Credentials', 'gs_service_client_id')
|
| - and not HAS_CRYPTO):
|
| - if os.environ.get('CLOUDSDK_WRAPPER') == '1':
|
| - raise CommandException('\n'.join(textwrap.wrap(
|
| - 'Your gsutil is configured with an OAuth2 service account, but '
|
| - 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. '
|
| - 'Service account authentication requires one of these libraries; '
|
| - 'please reactivate your service account via the gcloud auth '
|
| - 'command and ensure any gcloud packages necessary for '
|
| - 'service accounts are present.')))
|
| - else:
|
| - raise CommandException('\n'.join(textwrap.wrap(
|
| - 'Your gsutil is configured with an OAuth2 service account, but '
|
| - 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. '
|
| - 'Service account authentication requires one of these libraries; '
|
| - 'please install either of them to proceed, or configure a '
|
| - 'different type of credentials with "gsutil config".')))
|
| - else:
|
| - # With no boto config file the user can still access publicly readable
|
| - # buckets and objects.
|
| - from gslib import no_op_auth_plugin # pylint: disable=unused-variable
|
| -
|
| -
|
| -def GetConfigFilePath():
|
| - config_path = 'no config found'
|
| - for path in BotoConfigLocations:
|
| - try:
|
| - with open(path, 'r'):
|
| - config_path = path
|
| - break
|
| - except IOError:
|
| - pass
|
| - return config_path
|
| -
|
| -
|
| -def GetBotoConfigFileList():
|
| - """Returns list of boto config files that exist."""
|
| - config_paths = boto.pyami.config.BotoConfigLocations
|
| - if 'AWS_CREDENTIAL_FILE' in os.environ:
|
| - config_paths.append(os.environ['AWS_CREDENTIAL_FILE'])
|
| - config_files = {}
|
| - for config_path in config_paths:
|
| - if os.path.exists(config_path):
|
| - config_files[config_path] = 1
|
| - cf_list = []
|
| - for config_file in config_files:
|
| - cf_list.append(config_file)
|
| - return cf_list
|
| -
|
| -
|
| -def GetCertsFile():
|
| - """Configures and returns the CA Certificates file.
|
| -
|
| - If one is already configured, use it. Otherwise, amend the configuration
|
| - (in boto.config) to use the cert roots distributed with gsutil.
|
| -
|
| - Returns:
|
| - string filename of the certs file to use.
|
| - """
|
| - certs_file = boto.config.get('Boto', 'ca_certificates_file', None)
|
| - if not certs_file:
|
| - with certs_file_lock:
|
| - if configured_certs_files:
|
| - disk_certs_file = configured_certs_files[0]
|
| - else:
|
| - disk_certs_file = os.path.abspath(
|
| - os.path.join(gslib.GSLIB_DIR, 'data', 'cacerts.txt'))
|
| - if not os.path.exists(disk_certs_file):
|
| - # If the file is not present on disk, this means the gslib module
|
| - # doesn't actually exist on disk anywhere. This can happen if it's
|
| - # being imported from a zip file. Unfortunately, we have to copy the
|
| - # certs file to a local temp file on disk because the underlying SSL
|
| - # socket requires it to be a filesystem path.
|
| - certs_data = pkgutil.get_data('gslib', 'data/cacerts.txt')
|
| - if not certs_data:
|
| - raise CommandException('Certificates file not found. Please '
|
| - 'reinstall gsutil from scratch')
|
| - fd, fname = tempfile.mkstemp(suffix='.txt', prefix='gsutil-cacerts')
|
| - f = os.fdopen(fd, 'w')
|
| - f.write(certs_data)
|
| - f.close()
|
| - configured_certs_files.append(fname)
|
| - disk_certs_file = fname
|
| - certs_file = disk_certs_file
|
| - return certs_file
|
| -
|
| -
|
| -def GetCleanupFiles():
|
| - """Returns a list of temp files to delete (if possible) when program exits."""
|
| - cleanup_files = []
|
| - if configured_certs_files:
|
| - cleanup_files += configured_certs_files
|
| - return cleanup_files
|
| -
|
| -
|
| -def ProxyInfoFromEnvironmentVar(proxy_env_var):
|
| - """Reads proxy info from the environment and converts to httplib2.ProxyInfo.
|
| -
|
| - Args:
|
| - proxy_env_var: Environment variable string to read, such as http_proxy or
|
| - https_proxy.
|
| -
|
| - Returns:
|
| - httplib2.ProxyInfo constructed from the environment string.
|
| - """
|
| - proxy_url = os.environ.get(proxy_env_var)
|
| - if not proxy_url or not proxy_env_var.lower().startswith('http'):
|
| - return httplib2.ProxyInfo(httplib2.socks.PROXY_TYPE_HTTP, None, 0)
|
| - proxy_protocol = proxy_env_var.lower().split('_')[0]
|
| - if not proxy_url.lower().startswith('http'):
|
| - # proxy_info_from_url requires a protocol, which is always http or https.
|
| - proxy_url = proxy_protocol + '://' + proxy_url
|
| - return httplib2.proxy_info_from_url(proxy_url, method=proxy_protocol)
|
| -
|
| -
|
| -def GetNewHttp(http_class=httplib2.Http, **kwargs):
|
| - """Creates and returns a new httplib2.Http instance.
|
| -
|
| - Args:
|
| - http_class: Optional custom Http class to use.
|
| - **kwargs: Arguments to pass to http_class constructor.
|
| -
|
| - Returns:
|
| - An initialized httplib2.Http instance.
|
| - """
|
| - proxy_info = httplib2.ProxyInfo(
|
| - proxy_type=3,
|
| - proxy_host=boto.config.get('Boto', 'proxy', None),
|
| - proxy_port=boto.config.getint('Boto', 'proxy_port', 0),
|
| - proxy_user=boto.config.get('Boto', 'proxy_user', None),
|
| - proxy_pass=boto.config.get('Boto', 'proxy_pass', None),
|
| - proxy_rdns=boto.config.get('Boto', 'proxy_rdns', False))
|
| -
|
| - if not (proxy_info.proxy_host and proxy_info.proxy_port):
|
| - # Fall back to using the environment variable.
|
| - for proxy_env_var in ['http_proxy', 'https_proxy', 'HTTPS_PROXY']:
|
| - if proxy_env_var in os.environ and os.environ[proxy_env_var]:
|
| - proxy_info = ProxyInfoFromEnvironmentVar(proxy_env_var)
|
| - # Assume proxy_rnds is True if a proxy environment variable exists.
|
| - proxy_info.proxy_rdns = boto.config.get('Boto', 'proxy_rdns', True)
|
| - break
|
| -
|
| - # Some installers don't package a certs file with httplib2, so use the
|
| - # one included with gsutil.
|
| - kwargs['ca_certs'] = GetCertsFile()
|
| - # Use a non-infinite SSL timeout to avoid hangs during network flakiness.
|
| - kwargs['timeout'] = SSL_TIMEOUT
|
| - http = http_class(proxy_info=proxy_info, **kwargs)
|
| - http.disable_ssl_certificate_validation = (not config.getbool(
|
| - 'Boto', 'https_validate_certificates'))
|
| - return http
|
| -
|
| -
|
| -def GetNumRetries():
|
| - return config.getint('Boto', 'num_retries', 6)
|
| -
|
| -
|
| -def GetMaxRetryDelay():
|
| - return config.getint('Boto', 'max_retry_delay', 60)
|
| -
|
| -
|
| -# Resumable downloads and uploads make one HTTP call per chunk (and must be
|
| -# in multiples of 256KiB). Overridable for testing.
|
| -def GetJsonResumableChunkSize():
|
| - chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size',
|
| - 1024*1024*100L)
|
| - if chunk_size == 0:
|
| - chunk_size = 1024*256L
|
| - elif chunk_size % 1024*256L != 0:
|
| - chunk_size += (1024*256L - (chunk_size % (1024*256L)))
|
| - return chunk_size
|
| -
|
| -
|
| -def _RoundToNearestExponent(num):
|
| - i = 0
|
| - while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]):
|
| - i += 1
|
| - return i, round(float(num) / 2 ** _EXP_STRINGS[i][0], 2)
|
| -
|
| -
|
| -def MakeHumanReadable(num):
|
| - """Generates human readable string for a number of bytes.
|
| -
|
| - Args:
|
| - num: The number, in bytes.
|
| -
|
| - Returns:
|
| - A string form of the number using size abbreviations (KiB, MiB, etc.).
|
| - """
|
| - i, rounded_val = _RoundToNearestExponent(num)
|
| - return '%g %s' % (rounded_val, _EXP_STRINGS[i][1])
|
| -
|
| -
|
| -def MakeBitsHumanReadable(num):
|
| - """Generates human readable string for a number of bits.
|
| -
|
| - Args:
|
| - num: The number, in bits.
|
| -
|
| - Returns:
|
| - A string form of the number using bit size abbreviations (kbit, Mbit, etc.)
|
| - """
|
| - i, rounded_val = _RoundToNearestExponent(num)
|
| - return '%g %s' % (rounded_val, _EXP_STRINGS[i][2])
|
| -
|
| -
|
| -def HumanReadableToBytes(human_string):
|
| - """Tries to convert a human-readable string to a number of bytes.
|
| -
|
| - Args:
|
| - human_string: A string supplied by user, e.g. '1M', '3 GiB'.
|
| - Returns:
|
| - An integer containing the number of bytes.
|
| - Raises:
|
| - ValueError: on an invalid string.
|
| - """
|
| - human_string = human_string.lower()
|
| - m = MATCH_HUMAN_BYTES.match(human_string)
|
| - if m:
|
| - num = float(m.group('num'))
|
| - if m.group('suffix'):
|
| - power = _EXP_STRINGS[SUFFIX_TO_SI[m.group('suffix')]][0]
|
| - num *= (2.0 ** power)
|
| - num = int(round(num))
|
| - return num
|
| - raise ValueError('Invalid byte string specified: %s' % human_string)
|
| -
|
| -
|
| -def Percentile(values, percent, key=lambda x: x):
|
| - """Find the percentile of a list of values.
|
| -
|
| - Taken from: http://code.activestate.com/recipes/511478/
|
| -
|
| - Args:
|
| - values: a list of numeric values. Note that the values MUST BE already
|
| - sorted.
|
| - percent: a float value from 0.0 to 1.0.
|
| - key: optional key function to compute value from each element of the list
|
| - of values.
|
| -
|
| - Returns:
|
| - The percentile of the values.
|
| - """
|
| - if not values:
|
| - return None
|
| - k = (len(values) - 1) * percent
|
| - f = math.floor(k)
|
| - c = math.ceil(k)
|
| - if f == c:
|
| - return key(values[int(k)])
|
| - d0 = key(values[int(f)]) * (c-k)
|
| - d1 = key(values[int(c)]) * (k-f)
|
| - return d0 + d1
|
| -
|
| -
|
| -def RemoveCRLFFromString(input_str):
|
| - """Returns the input string with all \\n and \\r removed."""
|
| - return re.sub(r'[\r\n]', '', input_str)
|
| -
|
| -
|
| -def UnaryDictToXml(message):
|
| - """Generates XML representation of a nested dict.
|
| -
|
| - This dict contains exactly one top-level entry and an arbitrary number of
|
| - 2nd-level entries, e.g. capturing a WebsiteConfiguration message.
|
| -
|
| - Args:
|
| - message: The dict encoding the message.
|
| -
|
| - Returns:
|
| - XML string representation of the input dict.
|
| -
|
| - Raises:
|
| - Exception: if dict contains more than one top-level entry.
|
| - """
|
| - if len(message) != 1:
|
| - raise Exception('Expected dict of size 1, got size %d' % len(message))
|
| -
|
| - name, content = message.items()[0]
|
| - element_type = ElementTree.Element(name)
|
| - for element_property, value in sorted(content.items()):
|
| - node = ElementTree.SubElement(element_type, element_property)
|
| - node.text = value
|
| - return ElementTree.tostring(element_type)
|
| -
|
| -
|
| -def LookUpGsutilVersion(gsutil_api, url_str):
|
| - """Looks up the gsutil version of the specified gsutil tarball URL.
|
| -
|
| - Version is specified in the metadata field set on that object.
|
| -
|
| - Args:
|
| - gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball.
|
| - url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz').
|
| -
|
| - Returns:
|
| - Version string if URL is a cloud URL containing x-goog-meta-gsutil-version
|
| - metadata, else None.
|
| - """
|
| - url = StorageUrlFromString(url_str)
|
| - if url.IsCloudUrl():
|
| - obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name,
|
| - provider=url.scheme,
|
| - fields=['metadata'])
|
| - if obj.metadata and obj.metadata.additionalProperties:
|
| - for prop in obj.metadata.additionalProperties:
|
| - if prop.key == 'gsutil_version':
|
| - return prop.value
|
| -
|
| -
|
| -def GetGsutilVersionModifiedTime():
|
| - """Returns unix timestamp of when the VERSION file was last modified."""
|
| - if not gslib.VERSION_FILE:
|
| - return 0
|
| - return int(os.path.getmtime(gslib.VERSION_FILE))
|
| -
|
| -
|
| -def IsRunningInteractively():
|
| - """Returns True if currently running interactively on a TTY."""
|
| - return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty()
|
| -
|
| -
|
| -def _HttpsValidateCertifcatesEnabled():
|
| - return config.get('Boto', 'https_validate_certificates', True)
|
| -
|
| -CERTIFICATE_VALIDATION_ENABLED = _HttpsValidateCertifcatesEnabled()
|
| -
|
| -
|
| -def _BotoIsSecure():
|
| - return config.get('Boto', 'is_secure', True)
|
| -
|
| -BOTO_IS_SECURE = _BotoIsSecure()
|
| -
|
| -
|
| -def ResumableThreshold():
|
| - return config.getint('GSUtil', 'resumable_threshold', EIGHT_MIB)
|
| -
|
| -
|
| -def AddAcceptEncoding(headers):
|
| - """Adds accept-encoding:gzip to the dictionary of headers."""
|
| - # If Accept-Encoding is not already set, set it to enable gzip.
|
| - if 'accept-encoding' not in headers:
|
| - headers['accept-encoding'] = 'gzip'
|
| -
|
| -
|
| -# pylint: disable=too-many-statements
|
| -def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True):
|
| - """Print full info for given object (like what displays for gsutil ls -L).
|
| -
|
| - Args:
|
| - bucket_listing_ref: BucketListingRef being listed.
|
| - Must have ref_type OBJECT and a populated root_object
|
| - with the desired fields.
|
| - incl_acl: True if ACL info should be output.
|
| -
|
| - Returns:
|
| - Tuple (number of objects, object_length)
|
| -
|
| - Raises:
|
| - Exception: if calling bug encountered.
|
| - """
|
| - url_str = bucket_listing_ref.url_string
|
| - storage_url = StorageUrlFromString(url_str)
|
| - obj = bucket_listing_ref.root_object
|
| -
|
| - if (obj.metadata and S3_DELETE_MARKER_GUID in
|
| - obj.metadata.additionalProperties):
|
| - num_bytes = 0
|
| - num_objs = 0
|
| - url_str += '<DeleteMarker>'
|
| - else:
|
| - num_bytes = obj.size
|
| - num_objs = 1
|
| -
|
| - print '%s:' % url_str.encode(UTF8)
|
| - if obj.updated:
|
| - print '\tCreation time:\t\t%s' % obj.updated.strftime(
|
| - '%a, %d %b %Y %H:%M:%S GMT')
|
| - if obj.cacheControl:
|
| - print '\tCache-Control:\t\t%s' % obj.cacheControl
|
| - if obj.contentDisposition:
|
| - print '\tContent-Disposition:\t\t%s' % obj.contentDisposition
|
| - if obj.contentEncoding:
|
| - print '\tContent-Encoding:\t\t%s' % obj.contentEncoding
|
| - if obj.contentLanguage:
|
| - print '\tContent-Language:\t%s' % obj.contentLanguage
|
| - print '\tContent-Length:\t\t%s' % obj.size
|
| - print '\tContent-Type:\t\t%s' % obj.contentType
|
| - if obj.componentCount:
|
| - print '\tComponent-Count:\t%d' % obj.componentCount
|
| - marker_props = {}
|
| - if obj.metadata and obj.metadata.additionalProperties:
|
| - non_marker_props = []
|
| - for add_prop in obj.metadata.additionalProperties:
|
| - if add_prop.key not in S3_MARKER_GUIDS:
|
| - non_marker_props.append(add_prop)
|
| - else:
|
| - marker_props[add_prop.key] = add_prop.value
|
| - if non_marker_props:
|
| - print '\tMetadata:'
|
| - for ap in non_marker_props:
|
| - meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value)
|
| - print meta_string.encode(UTF8)
|
| - if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c
|
| - if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash
|
| - print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')
|
| - if obj.generation:
|
| - generation_str = GenerationFromUrlAndString(storage_url, obj.generation)
|
| - print '\tGeneration:\t\t%s' % generation_str
|
| - if obj.metageneration:
|
| - print '\tMetageneration:\t\t%s' % obj.metageneration
|
| - if incl_acl:
|
| - # JSON API won't return acls as part of the response unless we have
|
| - # full control scope
|
| - if obj.acl:
|
| - print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl)
|
| - elif S3_ACL_MARKER_GUID in marker_props:
|
| - print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID]
|
| - else:
|
| - print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER '
|
| - 'permission\n\t\t\t\ton the object to read its ACL.')
|
| -
|
| - return (num_objs, num_bytes)
|
| -
|
| -
|
| -def CompareVersions(first, second):
|
| - """Compares the first and second gsutil version strings.
|
| -
|
| - For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33.
|
| - Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes
|
| - (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as
|
| - less than its non-suffix counterpart (e.g. 3.32 > 3.32pre).
|
| -
|
| - Args:
|
| - first: First gsutil version string.
|
| - second: Second gsutil version string.
|
| -
|
| - Returns:
|
| - (g, m):
|
| - g is True if first known to be greater than second, else False.
|
| - m is True if first known to be greater by at least 1 major version,
|
| - else False.
|
| - """
|
| - m1 = VERSION_MATCHER.match(str(first))
|
| - m2 = VERSION_MATCHER.match(str(second))
|
| -
|
| - # If passed strings we don't know how to handle, be conservative.
|
| - if not m1 or not m2:
|
| - return (False, False)
|
| -
|
| - major_ver1 = int(m1.group('maj'))
|
| - minor_ver1 = int(m1.group('min')) if m1.group('min') else 0
|
| - suffix_ver1 = m1.group('suffix')
|
| - major_ver2 = int(m2.group('maj'))
|
| - minor_ver2 = int(m2.group('min')) if m2.group('min') else 0
|
| - suffix_ver2 = m2.group('suffix')
|
| -
|
| - if major_ver1 > major_ver2:
|
| - return (True, True)
|
| - elif major_ver1 == major_ver2:
|
| - if minor_ver1 > minor_ver2:
|
| - return (True, False)
|
| - elif minor_ver1 == minor_ver2:
|
| - return (bool(suffix_ver2) and not suffix_ver1, False)
|
| - return (False, False)
|
| -
|
| -
|
| -def _IncreaseSoftLimitForResource(resource_name, fallback_value):
|
| - """Sets a new soft limit for the maximum number of open files.
|
| -
|
| - The soft limit is used for this process (and its children), but the
|
| - hard limit is set by the system and cannot be exceeded.
|
| -
|
| - We will first try to set the soft limit to the hard limit's value; if that
|
| - fails, we will try to set the soft limit to the fallback_value iff this would
|
| - increase the soft limit.
|
| -
|
| - Args:
|
| - resource_name: Name of the resource to increase the soft limit for.
|
| - fallback_value: Fallback value to be used if we couldn't set the
|
| - soft value to the hard value (e.g., if the hard value
|
| - is "unlimited").
|
| -
|
| - Returns:
|
| - Current soft limit for the resource (after any changes we were able to
|
| - make), or -1 if the resource doesn't exist.
|
| - """
|
| -
|
| - # Get the value of the resource.
|
| - try:
|
| - (soft_limit, hard_limit) = resource.getrlimit(resource_name)
|
| - except (resource.error, ValueError):
|
| - # The resource wasn't present, so we can't do anything here.
|
| - return -1
|
| -
|
| - # Try to set the value of the soft limit to the value of the hard limit.
|
| - if hard_limit > soft_limit: # Some OS's report 0 for "unlimited".
|
| - try:
|
| - resource.setrlimit(resource_name, (hard_limit, hard_limit))
|
| - return hard_limit
|
| - except (resource.error, ValueError):
|
| - # We'll ignore this and try the fallback value.
|
| - pass
|
| -
|
| - # Try to set the value of the soft limit to the fallback value.
|
| - if soft_limit < fallback_value:
|
| - try:
|
| - resource.setrlimit(resource_name, (fallback_value, hard_limit))
|
| - return fallback_value
|
| - except (resource.error, ValueError):
|
| - # We couldn't change the soft limit, so just report the current
|
| - # value of the soft limit.
|
| - return soft_limit
|
| - else:
|
| - return soft_limit
|
| -
|
| -
|
| -def GetCloudApiInstance(cls, thread_state=None):
|
| - """Gets a gsutil Cloud API instance.
|
| -
|
| - Since Cloud API implementations are not guaranteed to be thread-safe, each
|
| - thread needs its own instance. These instances are passed to each thread
|
| - via the thread pool logic in command.
|
| -
|
| - Args:
|
| - cls: Command class to be used for single-threaded case.
|
| - thread_state: Per thread state from this thread containing a gsutil
|
| - Cloud API instance.
|
| -
|
| - Returns:
|
| - gsutil Cloud API instance.
|
| - """
|
| - return thread_state or cls.gsutil_api
|
| -
|
| -
|
| -def GetFileSize(fp, position_to_eof=False):
|
| - """Returns size of file, optionally leaving fp positioned at EOF."""
|
| - if not position_to_eof:
|
| - cur_pos = fp.tell()
|
| - fp.seek(0, os.SEEK_END)
|
| - cur_file_size = fp.tell()
|
| - if not position_to_eof:
|
| - fp.seek(cur_pos)
|
| - return cur_file_size
|
| -
|
| -
|
| -def GetStreamFromFileUrl(storage_url):
|
| - if storage_url.IsStream():
|
| - return sys.stdin
|
| - else:
|
| - return open(storage_url.object_name, 'rb')
|
| -
|
| -
|
| -def UrlsAreForSingleProvider(url_args):
|
| - """Tests whether the URLs are all for a single provider.
|
| -
|
| - Args:
|
| - url_args: Strings to check.
|
| -
|
| - Returns:
|
| - True if URLs are for single provider, False otherwise.
|
| - """
|
| - provider = None
|
| - url = None
|
| - for url_str in url_args:
|
| - url = StorageUrlFromString(url_str)
|
| - if not provider:
|
| - provider = url.scheme
|
| - elif url.scheme != provider:
|
| - return False
|
| - return provider is not None
|
| -
|
| -
|
| -def HaveFileUrls(args_to_check):
|
| - """Checks whether args_to_check contain any file URLs.
|
| -
|
| - Args:
|
| - args_to_check: Command-line argument subset to check.
|
| -
|
| - Returns:
|
| - True if args_to_check contains any file URLs.
|
| - """
|
| - for url_str in args_to_check:
|
| - storage_url = StorageUrlFromString(url_str)
|
| - if storage_url.IsFileUrl():
|
| - return True
|
| - return False
|
| -
|
| -
|
| -def HaveProviderUrls(args_to_check):
|
| - """Checks whether args_to_check contains any provider URLs (like 'gs://').
|
| -
|
| - Args:
|
| - args_to_check: Command-line argument subset to check.
|
| -
|
| - Returns:
|
| - True if args_to_check contains any provider URLs.
|
| - """
|
| - for url_str in args_to_check:
|
| - storage_url = StorageUrlFromString(url_str)
|
| - if storage_url.IsCloudUrl() and storage_url.IsProvider():
|
| - return True
|
| - return False
|
| -
|
| -
|
| -def MultiprocessingIsAvailable(logger=None):
|
| - """Checks if multiprocessing is available.
|
| -
|
| - There are some environments in which there is no way to use multiprocessing
|
| - logic that's built into Python (e.g., if /dev/shm is not available, then
|
| - we can't create semaphores). This simply tries out a few things that will be
|
| - needed to make sure the environment can support the pieces of the
|
| - multiprocessing module that we need.
|
| -
|
| - Args:
|
| - logger: logging.logger to use for debug output.
|
| -
|
| - Returns:
|
| - (multiprocessing_is_available, stack_trace):
|
| - multiprocessing_is_available: True iff the multiprocessing module is
|
| - available for use.
|
| - stack_trace: The stack trace generated by the call we tried that failed.
|
| - """
|
| - # pylint: disable=global-variable-undefined
|
| - global cached_multiprocessing_is_available
|
| - global cached_multiprocessing_check_stack_trace
|
| - global cached_multiprocessing_is_available_message
|
| - if cached_multiprocessing_is_available is not None:
|
| - if logger:
|
| - logger.debug(cached_multiprocessing_check_stack_trace)
|
| - logger.warn(cached_multiprocessing_is_available_message)
|
| - return (cached_multiprocessing_is_available,
|
| - cached_multiprocessing_check_stack_trace)
|
| -
|
| - stack_trace = None
|
| - multiprocessing_is_available = True
|
| - message = """
|
| -You have requested multiple threads or processes for an operation, but the
|
| -required functionality of Python\'s multiprocessing module is not available.
|
| -Your operations will be performed sequentially, and any requests for
|
| -parallelism will be ignored.
|
| -"""
|
| - try:
|
| - # Fails if /dev/shm (or some equivalent thereof) is not available for use
|
| - # (e.g., there's no implementation, or we can't write to it, etc.).
|
| - try:
|
| - multiprocessing.Value('i', 0)
|
| - except:
|
| - if not IS_WINDOWS:
|
| - message += """
|
| -Please ensure that you have write access to both /dev/shm and /run/shm.
|
| -"""
|
| - raise # We'll handle this in one place below.
|
| -
|
| - # Manager objects and Windows are generally a pain to work with, so try it
|
| - # out as a sanity check. This definitely works on some versions of Windows,
|
| - # but it's certainly possible that there is some unknown configuration for
|
| - # which it won't.
|
| - multiprocessing.Manager()
|
| -
|
| - # Check that the max number of open files is reasonable. Always check this
|
| - # after we're sure that the basic multiprocessing functionality is
|
| - # available, since this won't matter unless that's true.
|
| - limit = -1
|
| - if HAS_RESOURCE_MODULE:
|
| - # Try to set this with both resource names - RLIMIT_NOFILE for most Unix
|
| - # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the
|
| - # "resource" module is not guaranteed to know about these names.
|
| - try:
|
| - limit = max(limit,
|
| - _IncreaseSoftLimitForResource(
|
| - resource.RLIMIT_NOFILE,
|
| - MIN_ACCEPTABLE_OPEN_FILES_LIMIT))
|
| - except AttributeError:
|
| - pass
|
| - try:
|
| - limit = max(limit,
|
| - _IncreaseSoftLimitForResource(
|
| - resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT))
|
| - except AttributeError:
|
| - pass
|
| -
|
| - if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS:
|
| - message += ("""
|
| -Your max number of open files, %s, is too low to allow safe multiprocessing.
|
| -On Linux you can fix this by adding something like "ulimit -n 10000" to your
|
| -~/.bashrc or equivalent file and opening a new terminal.
|
| -
|
| -On MacOS, you may also need to run a command like this once (in addition to the
|
| -above instructions), which might require a restart of your system to take
|
| -effect:
|
| - launchctl limit maxfiles 10000
|
| -
|
| -Alternatively, edit /etc/launchd.conf with something like:
|
| - limit maxfiles 10000 10000
|
| -
|
| -""" % limit)
|
| - raise Exception('Max number of open files, %s, is too low.' % limit)
|
| - except: # pylint: disable=bare-except
|
| - stack_trace = traceback.format_exc()
|
| - multiprocessing_is_available = False
|
| - if logger is not None:
|
| - logger.debug(stack_trace)
|
| - logger.warn(message)
|
| -
|
| - # Set the cached values so that we never need to do this check again.
|
| - cached_multiprocessing_is_available = multiprocessing_is_available
|
| - cached_multiprocessing_check_stack_trace = stack_trace
|
| - cached_multiprocessing_is_available_message = message
|
| - return (multiprocessing_is_available, stack_trace)
|
| -
|
| -
|
| -def CreateLock():
|
| - """Returns either a multiprocessing lock or a threading lock.
|
| -
|
| - Use Multiprocessing lock iff we have access to the parts of the
|
| - multiprocessing module that are necessary to enable parallelism in operations.
|
| -
|
| - Returns:
|
| - Multiprocessing or threading lock.
|
| - """
|
| - if MultiprocessingIsAvailable()[0]:
|
| - return manager.Lock()
|
| - else:
|
| - return threading.Lock()
|
| -
|
| -
|
| -def IsCloudSubdirPlaceholder(url, blr=None):
|
| - """Determines if URL is a cloud subdir placeholder.
|
| -
|
| - This function is needed because GUI tools (like the GCS cloud console) allow
|
| - users to create empty "folders" by creating a placeholder object; and parts
|
| - of gsutil need to treat those placeholder objects specially. For example,
|
| - gsutil rsync needs to avoid downloading those objects because they can cause
|
| - conflicts (see comments in rsync command for details).
|
| -
|
| - We currently detect two cases:
|
| - - Cloud objects whose name ends with '_$folder$'
|
| - - Cloud objects whose name ends with '/'
|
| -
|
| - Args:
|
| - url: The URL to be checked.
|
| - blr: BucketListingRef to check, or None if not available.
|
| - If None, size won't be checked.
|
| -
|
| - Returns:
|
| - True/False.
|
| - """
|
| - if not url.IsCloudUrl():
|
| - return False
|
| - url_str = url.url_string
|
| - if url_str.endswith('_$folder$'):
|
| - return True
|
| - if blr and blr.IsObject():
|
| - size = blr.root_object.size
|
| - else:
|
| - size = 0
|
| - return size == 0 and url_str.endswith('/')
|
| -
|
| -
|
| -def GetTermLines():
|
| - """Returns number of terminal lines."""
|
| - # fcntl isn't supported in Windows.
|
| - try:
|
| - import fcntl # pylint: disable=g-import-not-at-top
|
| - import termios # pylint: disable=g-import-not-at-top
|
| - except ImportError:
|
| - return _DEFAULT_LINES
|
| - def ioctl_GWINSZ(fd): # pylint: disable=invalid-name
|
| - try:
|
| - return struct.unpack(
|
| - 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))[0]
|
| - except: # pylint: disable=bare-except
|
| - return 0 # Failure (so will retry on different file descriptor below).
|
| - # Try to find a valid number of lines from termio for stdin, stdout,
|
| - # or stderr, in that order.
|
| - ioc = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
|
| - if not ioc:
|
| - try:
|
| - fd = os.open(os.ctermid(), os.O_RDONLY)
|
| - ioc = ioctl_GWINSZ(fd)
|
| - os.close(fd)
|
| - except: # pylint: disable=bare-except
|
| - pass
|
| - if not ioc:
|
| - ioc = os.environ.get('LINES', _DEFAULT_LINES)
|
| - return int(ioc)
|
| -
|
| -
|
| -class GsutilStreamHandler(logging.StreamHandler):
|
| - """A subclass of StreamHandler for use in gsutil."""
|
| -
|
| - def flush(self):
|
| - # Note: we override the flush method here due to a python 2.6 bug. The
|
| - # python logging module will try to flush all stream handlers at exit.
|
| - # If the StreamHandler is pointing to a file that is already closed, the
|
| - # method throws an exception. Our unit tests temporarily redirect stderr,
|
| - # which causes the default StreamHandler to open its stream against a
|
| - # temporary file. By the time the process shuts down, the underlying file
|
| - # is closed, causing an exception. This was fixed in Python 2.7, but to
|
| - # remove the flake from Python 2.6, we maintain this here.
|
| - try:
|
| - logging.StreamHandler.flush(self)
|
| - except ValueError:
|
| - pass
|
| -
|
| -
|
| -def StdinIterator():
|
| - """A generator function that returns lines from stdin."""
|
| - for line in sys.stdin:
|
| - # Strip CRLF.
|
| - yield line.rstrip()
|
|
|