third_party/gsutil/gslib/util.py - Issue 1377933002: [catapult] - Copy Telemetry's gsutilz over to third_party.

Unified Diff: third_party/gsutil/gslib/util.py

Issue 1377933002: [catapult] - Copy Telemetry's gsutilz over to third_party. (Closed) Base URL: https://github.com/catapult-project/catapult.git@master

Patch Set: Rename to gsutil. Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/gsutil/gslib/util.py

diff --git a/third_party/gsutil/gslib/util.py b/third_party/gsutil/gslib/util.py

new file mode 100644

index 0000000000000000000000000000000000000000..ece5112c34252b958b1bb5020541869c26401ff5

--- /dev/null

+++ b/third_party/gsutil/gslib/util.py

@@ -0,0 +1,1100 @@

+# -*- coding: utf-8 -*-

+# Licensed under the Apache License, Version 2.0 (the "License");

+# you may not use this file except in compliance with the License.

+# You may obtain a copy of the License at

+# http://www.apache.org/licenses/LICENSE-2.0

+# Unless required by applicable law or agreed to in writing, software

+# distributed under the License is distributed on an "AS IS" BASIS,

+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+# See the License for the specific language governing permissions and

+# limitations under the License.

+"""Static data and helper functions."""

+from __future__ import absolute_import

+import errno

+import logging

+import math

+import multiprocessing

+import os

+import pkgutil

+import re

+import struct

+import sys

+import tempfile

+import textwrap

+import threading

+import traceback

+import xml.etree.ElementTree as ElementTree

+import boto

+from boto import config

+import boto.auth

+from boto.exception import NoAuthHandlerFound

+from boto.gs.connection import GSConnection

+from boto.provider import Provider

+from boto.pyami.config import BotoConfigLocations

+import httplib2

+from oauth2client.client import HAS_CRYPTO

+from retry_decorator import retry_decorator

+import gslib

+from gslib.exception import CommandException

+from gslib.storage_url import StorageUrlFromString

+from gslib.translation_helper import AclTranslation

+from gslib.translation_helper import GenerationFromUrlAndString

+from gslib.translation_helper import S3_ACL_MARKER_GUID

+from gslib.translation_helper import S3_DELETE_MARKER_GUID

+from gslib.translation_helper import S3_MARKER_GUIDS

+# pylint: disable=g-import-not-at-top

+try:

+ # This module doesn't necessarily exist on Windows.

+ import resource

+ HAS_RESOURCE_MODULE = True

+except ImportError, e:

+ HAS_RESOURCE_MODULE = False

+ONE_KIB = 1024

+ONE_MIB = 1024 * 1024

+TWO_MIB = 2 * ONE_MIB

+EIGHT_MIB = 8 * ONE_MIB

+TEN_MIB = 10 * ONE_MIB

+DEFAULT_FILE_BUFFER_SIZE = 8 * ONE_KIB

+_DEFAULT_LINES = 25

+# By default, the timeout for SSL read errors is infinite. This could

+# cause gsutil to hang on network disconnect, so pick a more reasonable

+# timeout.

+SSL_TIMEOUT = 60

+# Start with a progress callback every 64 KiB during uploads/downloads (JSON

+# API). Callback implementation should back off until it hits the maximum size

+# so that callbacks do not create huge amounts of log output.

+START_CALLBACK_PER_BYTES = 1024*64

+MAX_CALLBACK_PER_BYTES = 1024*1024*100

+# Upload/download files in 8 KiB chunks over the HTTP connection.

+TRANSFER_BUFFER_SIZE = 1024*8

+# Default number of progress callbacks during transfer (XML API).

+XML_PROGRESS_CALLBACKS = 10

+# For files >= this size, output a message indicating that we're running an

+# operation on the file (like hashing or gzipping) so it does not appear to the

+# user that the command is hanging.

+MIN_SIZE_COMPUTE_LOGGING = 100*1024*1024 # 100 MiB

+NO_MAX = sys.maxint

+UTF8 = 'utf-8'

+VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?')

+RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt'

+# Binary exponentiation strings.

+_EXP_STRINGS = [

+ (0, 'B', 'bit'),

+ (10, 'KiB', 'Kibit', 'K'),

+ (20, 'MiB', 'Mibit', 'M'),

+ (30, 'GiB', 'Gibit', 'G'),

+ (40, 'TiB', 'Tibit', 'T'),

+ (50, 'PiB', 'Pibit', 'P'),

+ (60, 'EiB', 'Eibit', 'E'),

+global manager # pylint: disable=global-at-module-level

+certs_file_lock = threading.Lock()

+configured_certs_files = []

+def InitializeMultiprocessingVariables():

+ """Perform necessary initialization for multiprocessing.

+ See gslib.command.InitializeMultiprocessingVariables for an explanation

+ of why this is necessary.

+ """

+ global manager # pylint: disable=global-variable-undefined

+ manager = multiprocessing.Manager()

+def _GenerateSuffixRegex():

+ """Creates a suffix regex for human-readable byte counts."""

+ human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?'

+ suffixes = []

+ suffix_to_si = {}

+ for i, si in enumerate(_EXP_STRINGS):

+ si_suffixes = [s.lower() for s in list(si)[1:]]

+ for suffix in si_suffixes:

+ suffix_to_si[suffix] = i

+ suffixes.extend(si_suffixes)

+ human_bytes_re %= '|'.join(suffixes)

+ matcher = re.compile(human_bytes_re)

+ return suffix_to_si, matcher

+SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex()

+SECONDS_PER_DAY = 3600 * 24

+# Detect platform types.

+PLATFORM = str(sys.platform).lower()

+IS_WINDOWS = 'win32' in PLATFORM

+IS_CYGWIN = 'cygwin' in PLATFORM

+IS_LINUX = 'linux' in PLATFORM

+IS_OSX = 'darwin' in PLATFORM

+# On Unix-like systems, we will set the maximum number of open files to avoid

+# hitting the limit imposed by the OS. This number was obtained experimentally.

+MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000

+GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz'

+Retry = retry_decorator.retry # pylint: disable=invalid-name

+# Cache the values from this check such that they're available to all callers

+# without needing to run all the checks again (some of these, such as calling

+# multiprocessing.Manager(), are expensive operations).

+cached_multiprocessing_is_available = None

+cached_multiprocessing_is_available_stack_trace = None

+cached_multiprocessing_is_available_message = None

+# Enum class for specifying listing style.

+class ListingStyle(object):

+ SHORT = 'SHORT'

+ LONG = 'LONG'

+ LONG_LONG = 'LONG_LONG'

+def UsingCrcmodExtension(crcmod):

+ return (getattr(crcmod, 'crcmod', None) and

+ getattr(crcmod.crcmod, '_usingExtension', None))

+def CreateDirIfNeeded(dir_path, mode=0777):

+ """Creates a directory, suppressing already-exists errors."""

+ if not os.path.exists(dir_path):

+ try:

+ # Unfortunately, even though we catch and ignore EEXIST, this call will

+ # output a (needless) error message (no way to avoid that in Python).

+ os.makedirs(dir_path, mode)

+ # Ignore 'already exists' in case user tried to start up several

+ # resumable uploads concurrently from a machine where no tracker dir had

+ # yet been created.

+ except OSError as e:

+ if e.errno != errno.EEXIST:

+ raise

+def GetGsutilStateDir():

+ """Returns the location of the directory for gsutil state files.

+ Certain operations, such as cross-process credential sharing and

+ resumable transfer tracking, need a known location for state files which

+ are created by gsutil as-needed.

+ This location should only be used for storing data that is required to be in

+ a static location.

+ Returns:

+ Path to directory for gsutil static state files.

+ """

+ config_file_dir = config.get(

+ 'GSUtil', 'state_dir',

+ os.path.expanduser(os.path.join('~', '.gsutil')))

+ CreateDirIfNeeded(config_file_dir)

+ return config_file_dir

+def GetCredentialStoreFilename():

+ return os.path.join(GetGsutilStateDir(), 'credstore')

+def GetGceCredentialCacheFilename():

+ return os.path.join(GetGsutilStateDir(), 'gcecredcache')

+def GetTabCompletionLogFilename():

+ return os.path.join(GetGsutilStateDir(), 'tab-completion-logs')

+def GetTabCompletionCacheFilename():

+ tab_completion_dir = os.path.join(GetGsutilStateDir(), 'tab-completion')

+ # Limit read permissions on the directory to owner for privacy.

+ CreateDirIfNeeded(tab_completion_dir, mode=0700)

+ return os.path.join(tab_completion_dir, 'cache')

+def PrintTrackerDirDeprecationWarningIfNeeded():

+ # TODO: Remove this along with the tracker_dir config value 1 year after

+ # 4.6 release date. Use state_dir instead.

+ if config.has_option('GSUtil', 'resumable_tracker_dir'):

+ sys.stderr.write('Warning: you have set resumable_tracker_dir in your '

+ '.boto configuration file. This configuration option is '

+ 'deprecated; please use the state_dir configuration '

+ 'option instead.\n')

+# Name of file where we keep the timestamp for the last time we checked whether

+# a new version of gsutil is available.

+PrintTrackerDirDeprecationWarningIfNeeded()

+CreateDirIfNeeded(GetGsutilStateDir())

+LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = (

+ os.path.join(GetGsutilStateDir(), '.last_software_update_check'))

+def HasConfiguredCredentials():

+ """Determines if boto credential/config file exists."""

+ has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and

+ config.has_option('Credentials', 'gs_secret_access_key'))

+ has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and

+ config.has_option('Credentials', 'aws_secret_access_key'))

+ has_oauth_creds = (

+ config.has_option('Credentials', 'gs_oauth2_refresh_token'))

+ has_service_account_creds = (

+ HAS_CRYPTO and

+ config.has_option('Credentials', 'gs_service_client_id') and

+ config.has_option('Credentials', 'gs_service_key_file'))

+ if (has_goog_creds or has_amzn_creds or has_oauth_creds or

+ has_service_account_creds):

+ return True

+ valid_auth_handler = None

+ try:

+ valid_auth_handler = boto.auth.get_auth_handler(

+ GSConnection.DefaultHost, config, Provider('google'),

+ requested_capability=['s3'])

+ # Exclude the no-op auth handler as indicating credentials are configured.

+ # Note we can't use isinstance() here because the no-op module may not be

+ # imported so we can't get a reference to the class type.

+ if getattr(getattr(valid_auth_handler, '__class__', None),

+ '__name__', None) == 'NoOpAuth':

+ valid_auth_handler = None

+ except NoAuthHandlerFound:

+ pass

+ return valid_auth_handler

+def ConfigureNoOpAuthIfNeeded():

+ """Sets up no-op auth handler if no boto credentials are configured."""

+ if not HasConfiguredCredentials():

+ if (config.has_option('Credentials', 'gs_service_client_id')

+ and not HAS_CRYPTO):

+ if os.environ.get('CLOUDSDK_WRAPPER') == '1':

+ raise CommandException('\n'.join(textwrap.wrap(

+ 'Your gsutil is configured with an OAuth2 service account, but '

+ 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. '

+ 'Service account authentication requires one of these libraries; '

+ 'please reactivate your service account via the gcloud auth '

+ 'command and ensure any gcloud packages necessary for '

+ 'service accounts are present.')))

+ else:

+ raise CommandException('\n'.join(textwrap.wrap(

+ 'Your gsutil is configured with an OAuth2 service account, but '

+ 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. '

+ 'Service account authentication requires one of these libraries; '

+ 'please install either of them to proceed, or configure a '

+ 'different type of credentials with "gsutil config".')))

+ else:

+ # With no boto config file the user can still access publicly readable

+ # buckets and objects.

+ from gslib import no_op_auth_plugin # pylint: disable=unused-variable

+def GetConfigFilePath():

+ config_path = 'no config found'

+ for path in BotoConfigLocations:

+ try:

+ with open(path, 'r'):

+ config_path = path

+ break

+ except IOError:

+ pass

+ return config_path

+def GetBotoConfigFileList():

+ """Returns list of boto config files that exist."""

+ config_paths = boto.pyami.config.BotoConfigLocations

+ if 'AWS_CREDENTIAL_FILE' in os.environ:

+ config_paths.append(os.environ['AWS_CREDENTIAL_FILE'])

+ config_files = {}

+ for config_path in config_paths:

+ if os.path.exists(config_path):

+ config_files[config_path] = 1

+ cf_list = []

+ for config_file in config_files:

+ cf_list.append(config_file)

+ return cf_list

+def GetCertsFile():

+ """Configures and returns the CA Certificates file.

+ If one is already configured, use it. Otherwise, amend the configuration

+ (in boto.config) to use the cert roots distributed with gsutil.

+ Returns:

+ string filename of the certs file to use.

+ """

+ certs_file = boto.config.get('Boto', 'ca_certificates_file', None)

+ if not certs_file:

+ with certs_file_lock:

+ if configured_certs_files:

+ disk_certs_file = configured_certs_files[0]

+ else:

+ disk_certs_file = os.path.abspath(

+ os.path.join(gslib.GSLIB_DIR, 'data', 'cacerts.txt'))

+ if not os.path.exists(disk_certs_file):

+ # If the file is not present on disk, this means the gslib module

+ # doesn't actually exist on disk anywhere. This can happen if it's

+ # being imported from a zip file. Unfortunately, we have to copy the

+ # certs file to a local temp file on disk because the underlying SSL

+ # socket requires it to be a filesystem path.

+ certs_data = pkgutil.get_data('gslib', 'data/cacerts.txt')

+ if not certs_data:

+ raise CommandException('Certificates file not found. Please '

+ 'reinstall gsutil from scratch')

+ fd, fname = tempfile.mkstemp(suffix='.txt', prefix='gsutil-cacerts')

+ f = os.fdopen(fd, 'w')

+ f.write(certs_data)

+ f.close()

+ configured_certs_files.append(fname)

+ disk_certs_file = fname

+ certs_file = disk_certs_file

+ return certs_file

+def GetCleanupFiles():

+ """Returns a list of temp files to delete (if possible) when program exits."""

+ cleanup_files = []

+ if configured_certs_files:

+ cleanup_files += configured_certs_files

+ return cleanup_files

+def ProxyInfoFromEnvironmentVar(proxy_env_var):

+ """Reads proxy info from the environment and converts to httplib2.ProxyInfo.

+ Args:

+ proxy_env_var: Environment variable string to read, such as http_proxy or

+ https_proxy.

+ Returns:

+ httplib2.ProxyInfo constructed from the environment string.

+ """

+ proxy_url = os.environ.get(proxy_env_var)

+ if not proxy_url or not proxy_env_var.lower().startswith('http'):

+ return httplib2.ProxyInfo(httplib2.socks.PROXY_TYPE_HTTP, None, 0)

+ proxy_protocol = proxy_env_var.lower().split('_')[0]

+ if not proxy_url.lower().startswith('http'):

+ # proxy_info_from_url requires a protocol, which is always http or https.

+ proxy_url = proxy_protocol + '://' + proxy_url

+ return httplib2.proxy_info_from_url(proxy_url, method=proxy_protocol)

+def GetNewHttp(http_class=httplib2.Http, **kwargs):

+ """Creates and returns a new httplib2.Http instance.

+ Args:

+ http_class: Optional custom Http class to use.

+ **kwargs: Arguments to pass to http_class constructor.

+ Returns:

+ An initialized httplib2.Http instance.

+ """

+ proxy_info = httplib2.ProxyInfo(

+ proxy_type=3,

+ proxy_host=boto.config.get('Boto', 'proxy', None),

+ proxy_port=boto.config.getint('Boto', 'proxy_port', 0),

+ proxy_user=boto.config.get('Boto', 'proxy_user', None),

+ proxy_pass=boto.config.get('Boto', 'proxy_pass', None),

+ proxy_rdns=boto.config.get('Boto', 'proxy_rdns', False))

+ if not (proxy_info.proxy_host and proxy_info.proxy_port):

+ # Fall back to using the environment variable.

+ for proxy_env_var in ['http_proxy', 'https_proxy', 'HTTPS_PROXY']:

+ if proxy_env_var in os.environ and os.environ[proxy_env_var]:

+ proxy_info = ProxyInfoFromEnvironmentVar(proxy_env_var)

+ # Assume proxy_rnds is True if a proxy environment variable exists.

+ proxy_info.proxy_rdns = boto.config.get('Boto', 'proxy_rdns', True)

+ break

+ # Some installers don't package a certs file with httplib2, so use the

+ # one included with gsutil.

+ kwargs['ca_certs'] = GetCertsFile()

+ # Use a non-infinite SSL timeout to avoid hangs during network flakiness.

+ kwargs['timeout'] = SSL_TIMEOUT

+ http = http_class(proxy_info=proxy_info, **kwargs)

+ http.disable_ssl_certificate_validation = (not config.getbool(

+ 'Boto', 'https_validate_certificates'))

+ return http

+def GetNumRetries():

+ return config.getint('Boto', 'num_retries', 6)

+def GetMaxRetryDelay():

+ return config.getint('Boto', 'max_retry_delay', 60)

+# Resumable downloads and uploads make one HTTP call per chunk (and must be

+# in multiples of 256KiB). Overridable for testing.

+def GetJsonResumableChunkSize():

+ chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size',

+ 1024*1024*100L)

+ if chunk_size == 0:

+ chunk_size = 1024*256L

+ elif chunk_size % 1024*256L != 0:

+ chunk_size += (1024*256L - (chunk_size % (1024*256L)))

+ return chunk_size

+def _RoundToNearestExponent(num):

+ i = 0

+ while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]):

+ i += 1

+ return i, round(float(num) / 2 ** _EXP_STRINGS[i][0], 2)

+def MakeHumanReadable(num):

+ """Generates human readable string for a number of bytes.

+ Args:

+ num: The number, in bytes.

+ Returns:

+ A string form of the number using size abbreviations (KiB, MiB, etc.).

+ """

+ i, rounded_val = _RoundToNearestExponent(num)

+ return '%g %s' % (rounded_val, _EXP_STRINGS[i][1])

+def MakeBitsHumanReadable(num):

+ """Generates human readable string for a number of bits.

+ Args:

+ num: The number, in bits.

+ Returns:

+ A string form of the number using bit size abbreviations (kbit, Mbit, etc.)

+ """

+ i, rounded_val = _RoundToNearestExponent(num)

+ return '%g %s' % (rounded_val, _EXP_STRINGS[i][2])

+def HumanReadableToBytes(human_string):

+ """Tries to convert a human-readable string to a number of bytes.

+ Args:

+ human_string: A string supplied by user, e.g. '1M', '3 GiB'.

+ Returns:

+ An integer containing the number of bytes.

+ Raises:

+ ValueError: on an invalid string.

+ """

+ human_string = human_string.lower()

+ m = MATCH_HUMAN_BYTES.match(human_string)

+ if m:

+ num = float(m.group('num'))

+ if m.group('suffix'):

+ power = _EXP_STRINGS[SUFFIX_TO_SI[m.group('suffix')]][0]

+ num *= (2.0 ** power)

+ num = int(round(num))

+ return num

+ raise ValueError('Invalid byte string specified: %s' % human_string)

+def Percentile(values, percent, key=lambda x: x):

+ """Find the percentile of a list of values.

+ Taken from: http://code.activestate.com/recipes/511478/

+ Args:

+ values: a list of numeric values. Note that the values MUST BE already

+ sorted.

+ percent: a float value from 0.0 to 1.0.

+ key: optional key function to compute value from each element of the list

+ of values.

+ Returns:

+ The percentile of the values.

+ """

+ if not values:

+ return None

+ k = (len(values) - 1) * percent

+ f = math.floor(k)

+ c = math.ceil(k)

+ if f == c:

+ return key(values[int(k)])

+ d0 = key(values[int(f)]) * (c-k)

+ d1 = key(values[int(c)]) * (k-f)

+ return d0 + d1

+def RemoveCRLFFromString(input_str):

+ """Returns the input string with all \\n and \\r removed."""

+ return re.sub(r'[\r\n]', '', input_str)

+def UnaryDictToXml(message):

+ """Generates XML representation of a nested dict.

+ This dict contains exactly one top-level entry and an arbitrary number of

+ 2nd-level entries, e.g. capturing a WebsiteConfiguration message.

+ Args:

+ message: The dict encoding the message.

+ Returns:

+ XML string representation of the input dict.

+ Raises:

+ Exception: if dict contains more than one top-level entry.

+ """

+ if len(message) != 1:

+ raise Exception('Expected dict of size 1, got size %d' % len(message))

+ name, content = message.items()[0]

+ element_type = ElementTree.Element(name)

+ for element_property, value in sorted(content.items()):

+ node = ElementTree.SubElement(element_type, element_property)

+ node.text = value

+ return ElementTree.tostring(element_type)

+def LookUpGsutilVersion(gsutil_api, url_str):

+ """Looks up the gsutil version of the specified gsutil tarball URL.

+ Version is specified in the metadata field set on that object.

+ Args:

+ gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball.

+ url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz').

+ Returns:

+ Version string if URL is a cloud URL containing x-goog-meta-gsutil-version

+ metadata, else None.

+ """

+ url = StorageUrlFromString(url_str)

+ if url.IsCloudUrl():

+ obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name,

+ provider=url.scheme,

+ fields=['metadata'])

+ if obj.metadata and obj.metadata.additionalProperties:

+ for prop in obj.metadata.additionalProperties:

+ if prop.key == 'gsutil_version':

+ return prop.value

+def GetGsutilVersionModifiedTime():

+ """Returns unix timestamp of when the VERSION file was last modified."""

+ if not gslib.VERSION_FILE:

+ return 0

+ return int(os.path.getmtime(gslib.VERSION_FILE))

+def IsRunningInteractively():

+ """Returns True if currently running interactively on a TTY."""

+ return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty()

+def _HttpsValidateCertifcatesEnabled():

+ return config.get('Boto', 'https_validate_certificates', True)

+CERTIFICATE_VALIDATION_ENABLED = _HttpsValidateCertifcatesEnabled()

+def _BotoIsSecure():

+ return config.get('Boto', 'is_secure', True)

+BOTO_IS_SECURE = _BotoIsSecure()

+def ResumableThreshold():

+ return config.getint('GSUtil', 'resumable_threshold', EIGHT_MIB)

+def AddAcceptEncoding(headers):

+ """Adds accept-encoding:gzip to the dictionary of headers."""

+ # If Accept-Encoding is not already set, set it to enable gzip.

+ if 'accept-encoding' not in headers:

+ headers['accept-encoding'] = 'gzip'

+# pylint: disable=too-many-statements

+def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True):

+ """Print full info for given object (like what displays for gsutil ls -L).

+ Args:

+ bucket_listing_ref: BucketListingRef being listed.

+ Must have ref_type OBJECT and a populated root_object

+ with the desired fields.

+ incl_acl: True if ACL info should be output.

+ Returns:

+ Tuple (number of objects, object_length)

+ Raises:

+ Exception: if calling bug encountered.

+ """

+ url_str = bucket_listing_ref.url_string

+ storage_url = StorageUrlFromString(url_str)

+ obj = bucket_listing_ref.root_object

+ if (obj.metadata and S3_DELETE_MARKER_GUID in

+ obj.metadata.additionalProperties):

+ num_bytes = 0

+ num_objs = 0

+ url_str += '<DeleteMarker>'

+ else:

+ num_bytes = obj.size

+ num_objs = 1

+ print '%s:' % url_str.encode(UTF8)

+ if obj.updated:

+ print '\tCreation time:\t\t%s' % obj.updated.strftime(

+ '%a, %d %b %Y %H:%M:%S GMT')

+ if obj.cacheControl:

+ print '\tCache-Control:\t\t%s' % obj.cacheControl

+ if obj.contentDisposition:

+ print '\tContent-Disposition:\t\t%s' % obj.contentDisposition

+ if obj.contentEncoding:

+ print '\tContent-Encoding:\t\t%s' % obj.contentEncoding

+ if obj.contentLanguage:

+ print '\tContent-Language:\t%s' % obj.contentLanguage

+ print '\tContent-Length:\t\t%s' % obj.size

+ print '\tContent-Type:\t\t%s' % obj.contentType

+ if obj.componentCount:

+ print '\tComponent-Count:\t%d' % obj.componentCount

+ marker_props = {}

+ if obj.metadata and obj.metadata.additionalProperties:

+ non_marker_props = []

+ for add_prop in obj.metadata.additionalProperties:

+ if add_prop.key not in S3_MARKER_GUIDS:

+ non_marker_props.append(add_prop)

+ else:

+ marker_props[add_prop.key] = add_prop.value

+ if non_marker_props:

+ print '\tMetadata:'

+ for ap in non_marker_props:

+ meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value)

+ print meta_string.encode(UTF8)

+ if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c

+ if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash

+ print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')

+ if obj.generation:

+ generation_str = GenerationFromUrlAndString(storage_url, obj.generation)

+ print '\tGeneration:\t\t%s' % generation_str

+ if obj.metageneration:

+ print '\tMetageneration:\t\t%s' % obj.metageneration

+ if incl_acl:

+ # JSON API won't return acls as part of the response unless we have

+ # full control scope

+ if obj.acl:

+ print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl)

+ elif S3_ACL_MARKER_GUID in marker_props:

+ print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID]

+ else:

+ print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER '

+ 'permission\n\t\t\t\ton the object to read its ACL.')

+ return (num_objs, num_bytes)

+def CompareVersions(first, second):

+ """Compares the first and second gsutil version strings.

+ For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33.

+ Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes

+ (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as

+ less than its non-suffix counterpart (e.g. 3.32 > 3.32pre).

+ Args:

+ first: First gsutil version string.

+ second: Second gsutil version string.

+ Returns:

+ (g, m):

+ g is True if first known to be greater than second, else False.

+ m is True if first known to be greater by at least 1 major version,

+ else False.

+ """

+ m1 = VERSION_MATCHER.match(str(first))

+ m2 = VERSION_MATCHER.match(str(second))

+ # If passed strings we don't know how to handle, be conservative.

+ if not m1 or not m2:

+ return (False, False)

+ major_ver1 = int(m1.group('maj'))

+ minor_ver1 = int(m1.group('min')) if m1.group('min') else 0

+ suffix_ver1 = m1.group('suffix')

+ major_ver2 = int(m2.group('maj'))

+ minor_ver2 = int(m2.group('min')) if m2.group('min') else 0

+ suffix_ver2 = m2.group('suffix')

+ if major_ver1 > major_ver2:

+ return (True, True)

+ elif major_ver1 == major_ver2:

+ if minor_ver1 > minor_ver2:

+ return (True, False)

+ elif minor_ver1 == minor_ver2:

+ return (bool(suffix_ver2) and not suffix_ver1, False)

+ return (False, False)

+def _IncreaseSoftLimitForResource(resource_name, fallback_value):

+ """Sets a new soft limit for the maximum number of open files.

+ The soft limit is used for this process (and its children), but the

+ hard limit is set by the system and cannot be exceeded.

+ We will first try to set the soft limit to the hard limit's value; if that

+ fails, we will try to set the soft limit to the fallback_value iff this would

+ increase the soft limit.

+ Args:

+ resource_name: Name of the resource to increase the soft limit for.

+ fallback_value: Fallback value to be used if we couldn't set the

+ soft value to the hard value (e.g., if the hard value

+ is "unlimited").

+ Returns:

+ Current soft limit for the resource (after any changes we were able to

+ make), or -1 if the resource doesn't exist.

+ """

+ # Get the value of the resource.

+ try:

+ (soft_limit, hard_limit) = resource.getrlimit(resource_name)

+ except (resource.error, ValueError):

+ # The resource wasn't present, so we can't do anything here.

+ return -1

+ # Try to set the value of the soft limit to the value of the hard limit.

+ if hard_limit > soft_limit: # Some OS's report 0 for "unlimited".

+ try:

+ resource.setrlimit(resource_name, (hard_limit, hard_limit))

+ return hard_limit

+ except (resource.error, ValueError):

+ # We'll ignore this and try the fallback value.

+ pass

+ # Try to set the value of the soft limit to the fallback value.

+ if soft_limit < fallback_value:

+ try:

+ resource.setrlimit(resource_name, (fallback_value, hard_limit))

+ return fallback_value

+ except (resource.error, ValueError):

+ # We couldn't change the soft limit, so just report the current

+ # value of the soft limit.

+ return soft_limit

+ else:

+ return soft_limit

+def GetCloudApiInstance(cls, thread_state=None):

+ """Gets a gsutil Cloud API instance.

+ Since Cloud API implementations are not guaranteed to be thread-safe, each

+ thread needs its own instance. These instances are passed to each thread

+ via the thread pool logic in command.

+ Args:

+ cls: Command class to be used for single-threaded case.

+ thread_state: Per thread state from this thread containing a gsutil

+ Cloud API instance.

+ Returns:

+ gsutil Cloud API instance.

+ """

+ return thread_state or cls.gsutil_api

+def GetFileSize(fp, position_to_eof=False):

+ """Returns size of file, optionally leaving fp positioned at EOF."""

+ if not position_to_eof:

+ cur_pos = fp.tell()

+ fp.seek(0, os.SEEK_END)

+ cur_file_size = fp.tell()

+ if not position_to_eof:

+ fp.seek(cur_pos)

+ return cur_file_size

+def GetStreamFromFileUrl(storage_url):

+ if storage_url.IsStream():

+ return sys.stdin

+ else:

+ return open(storage_url.object_name, 'rb')

+def UrlsAreForSingleProvider(url_args):

+ """Tests whether the URLs are all for a single provider.

+ Args:

+ url_args: Strings to check.

+ Returns:

+ True if URLs are for single provider, False otherwise.

+ """

+ provider = None

+ url = None

+ for url_str in url_args:

+ url = StorageUrlFromString(url_str)

+ if not provider:

+ provider = url.scheme

+ elif url.scheme != provider:

+ return False

+ return provider is not None

+def HaveFileUrls(args_to_check):

+ """Checks whether args_to_check contain any file URLs.

+ Args:

+ args_to_check: Command-line argument subset to check.

+ Returns:

+ True if args_to_check contains any file URLs.

+ """

+ for url_str in args_to_check:

+ storage_url = StorageUrlFromString(url_str)

+ if storage_url.IsFileUrl():

+ return True

+ return False

+def HaveProviderUrls(args_to_check):

+ """Checks whether args_to_check contains any provider URLs (like 'gs://').

+ Args:

+ args_to_check: Command-line argument subset to check.

+ Returns:

+ True if args_to_check contains any provider URLs.

+ """

+ for url_str in args_to_check:

+ storage_url = StorageUrlFromString(url_str)

+ if storage_url.IsCloudUrl() and storage_url.IsProvider():

+ return True

+ return False

+def MultiprocessingIsAvailable(logger=None):

+ """Checks if multiprocessing is available.

+ There are some environments in which there is no way to use multiprocessing

+ logic that's built into Python (e.g., if /dev/shm is not available, then

+ we can't create semaphores). This simply tries out a few things that will be

+ needed to make sure the environment can support the pieces of the

+ multiprocessing module that we need.

+ Args:

+ logger: logging.logger to use for debug output.

+ Returns:

+ (multiprocessing_is_available, stack_trace):

+ multiprocessing_is_available: True iff the multiprocessing module is

+ available for use.

+ stack_trace: The stack trace generated by the call we tried that failed.

+ """

+ # pylint: disable=global-variable-undefined

+ global cached_multiprocessing_is_available

+ global cached_multiprocessing_check_stack_trace

+ global cached_multiprocessing_is_available_message

+ if cached_multiprocessing_is_available is not None:

+ if logger:

+ logger.debug(cached_multiprocessing_check_stack_trace)

+ logger.warn(cached_multiprocessing_is_available_message)

+ return (cached_multiprocessing_is_available,

+ cached_multiprocessing_check_stack_trace)

+ stack_trace = None

+ multiprocessing_is_available = True

+ message = """

+You have requested multiple threads or processes for an operation, but the

+required functionality of Python\'s multiprocessing module is not available.

+Your operations will be performed sequentially, and any requests for

+parallelism will be ignored.

+"""

+ try:

+ # Fails if /dev/shm (or some equivalent thereof) is not available for use

+ # (e.g., there's no implementation, or we can't write to it, etc.).

+ try:

+ multiprocessing.Value('i', 0)

+ except:

+ if not IS_WINDOWS:

+ message += """

+Please ensure that you have write access to both /dev/shm and /run/shm.

+"""

+ raise # We'll handle this in one place below.

+ # Manager objects and Windows are generally a pain to work with, so try it

+ # out as a sanity check. This definitely works on some versions of Windows,

+ # but it's certainly possible that there is some unknown configuration for

+ # which it won't.

+ multiprocessing.Manager()

+ # Check that the max number of open files is reasonable. Always check this

+ # after we're sure that the basic multiprocessing functionality is

+ # available, since this won't matter unless that's true.

+ limit = -1

+ if HAS_RESOURCE_MODULE:

+ # Try to set this with both resource names - RLIMIT_NOFILE for most Unix

+ # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the

+ # "resource" module is not guaranteed to know about these names.

+ try:

+ limit = max(limit,

+ _IncreaseSoftLimitForResource(

+ resource.RLIMIT_NOFILE,

+ MIN_ACCEPTABLE_OPEN_FILES_LIMIT))

+ except AttributeError:

+ pass

+ try:

+ limit = max(limit,

+ _IncreaseSoftLimitForResource(

+ resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT))

+ except AttributeError:

+ pass

+ if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS:

+ message += ("""

+Your max number of open files, %s, is too low to allow safe multiprocessing.

+On Linux you can fix this by adding something like "ulimit -n 10000" to your

+~/.bashrc or equivalent file and opening a new terminal.

+On MacOS, you may also need to run a command like this once (in addition to the

+above instructions), which might require a restart of your system to take

+effect:

+ launchctl limit maxfiles 10000

+Alternatively, edit /etc/launchd.conf with something like:

+ limit maxfiles 10000 10000

+""" % limit)

+ raise Exception('Max number of open files, %s, is too low.' % limit)

+ except: # pylint: disable=bare-except

+ stack_trace = traceback.format_exc()

+ multiprocessing_is_available = False

+ if logger is not None:

+ logger.debug(stack_trace)

+ logger.warn(message)

+ # Set the cached values so that we never need to do this check again.

+ cached_multiprocessing_is_available = multiprocessing_is_available

+ cached_multiprocessing_check_stack_trace = stack_trace

+ cached_multiprocessing_is_available_message = message

+ return (multiprocessing_is_available, stack_trace)

+def CreateLock():

+ """Returns either a multiprocessing lock or a threading lock.

+ Use Multiprocessing lock iff we have access to the parts of the

+ multiprocessing module that are necessary to enable parallelism in operations.

+ Returns:

+ Multiprocessing or threading lock.

+ """

+ if MultiprocessingIsAvailable()[0]:

+ return manager.Lock()

+ else:

+ return threading.Lock()

+def IsCloudSubdirPlaceholder(url, blr=None):

+ """Determines if URL is a cloud subdir placeholder.

+ This function is needed because GUI tools (like the GCS cloud console) allow

+ users to create empty "folders" by creating a placeholder object; and parts

+ of gsutil need to treat those placeholder objects specially. For example,

+ gsutil rsync needs to avoid downloading those objects because they can cause

+ conflicts (see comments in rsync command for details).

+ We currently detect two cases:

+ - Cloud objects whose name ends with '_$folder$'

+ - Cloud objects whose name ends with '/'

+ Args:

+ url: The URL to be checked.

+ blr: BucketListingRef to check, or None if not available.

+ If None, size won't be checked.

+ Returns:

+ True/False.

+ """

+ if not url.IsCloudUrl():

+ return False

+ url_str = url.url_string

+ if url_str.endswith('_$folder$'):

+ return True

+ if blr and blr.IsObject():

+ size = blr.root_object.size

+ else:

+ size = 0

+ return size == 0 and url_str.endswith('/')

+def GetTermLines():

+ """Returns number of terminal lines."""

+ # fcntl isn't supported in Windows.

+ try:

+ import fcntl # pylint: disable=g-import-not-at-top

+ import termios # pylint: disable=g-import-not-at-top

+ except ImportError:

+ return _DEFAULT_LINES

+ def ioctl_GWINSZ(fd): # pylint: disable=invalid-name

+ try:

+ return struct.unpack(

+ 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))[0]

+ except: # pylint: disable=bare-except

+ return 0 # Failure (so will retry on different file descriptor below).

+ # Try to find a valid number of lines from termio for stdin, stdout,

+ # or stderr, in that order.

+ ioc = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)

+ if not ioc:

+ try:

+ fd = os.open(os.ctermid(), os.O_RDONLY)

+ ioc = ioctl_GWINSZ(fd)

+ os.close(fd)

+ except: # pylint: disable=bare-except

+ pass

+ if not ioc:

+ ioc = os.environ.get('LINES', _DEFAULT_LINES)

+ return int(ioc)

+class GsutilStreamHandler(logging.StreamHandler):

+ """A subclass of StreamHandler for use in gsutil."""

+ def flush(self):

+ # Note: we override the flush method here due to a python 2.6 bug. The

+ # python logging module will try to flush all stream handlers at exit.

+ # If the StreamHandler is pointing to a file that is already closed, the

+ # method throws an exception. Our unit tests temporarily redirect stderr,

+ # which causes the default StreamHandler to open its stream against a

+ # temporary file. By the time the process shuts down, the underlying file

+ # is closed, causing an exception. This was fixed in Python 2.7, but to

+ # remove the flake from Python 2.6, we maintain this here.

+ try:

+ logging.StreamHandler.flush(self)

+ except ValueError:

+ pass

+def StdinIterator():

+ """A generator function that returns lines from stdin."""

+ for line in sys.stdin:

+ # Strip CRLF.

+ yield line.rstrip()

« no previous file with comments | « third_party/gsutil/gslib/translation_helper.py ('k') | third_party/gsutil/gslib/wildcard_iterator.py » ('j') | no next file with comments »