gslib/util.py - Issue 698893003: Update checked in version of gsutil to version 4.6

Unified Diff: gslib/util.py

Issue 698893003: Update checked in version of gsutil to version 4.6 (Closed) Base URL: http://dart.googlecode.com/svn/third_party/gsutil/

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: gslib/util.py

===================================================================

--- gslib/util.py (revision 33376)

+++ gslib/util.py (working copy)

@@ -1,3 +1,4 @@

+# -*- coding: utf-8 -*-

# Licensed under the Apache License, Version 2.0 (the "License");

@@ -11,33 +12,45 @@

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

"""Static data and helper functions."""

-import binascii

-import boto

-import boto.auth

+from __future__ import absolute_import

import errno

-import gslib

import math

import multiprocessing

import os

+import pkgutil

import re

+import struct

import sys

+import tempfile

import textwrap

import threading

import traceback

import xml.etree.ElementTree as ElementTree

+import boto

from boto import config

+import boto.auth

from boto.exception import NoAuthHandlerFound

from boto.gs.connection import GSConnection

from boto.provider import Provider

from boto.pyami.config import BotoConfigLocations

-from gslib.exception import CommandException

+import httplib2

+from oauth2client.client import HAS_CRYPTO

from retry_decorator import retry_decorator

-from oauth2client.client import HAS_CRYPTO

+import gslib

+from gslib.exception import CommandException

+from gslib.storage_url import StorageUrlFromString

+from gslib.translation_helper import AclTranslation

+from gslib.translation_helper import GenerationFromUrlAndString

+from gslib.translation_helper import S3_ACL_MARKER_GUID

+from gslib.translation_helper import S3_DELETE_MARKER_GUID

+from gslib.translation_helper import S3_MARKER_GUIDS

+# pylint: disable=g-import-not-at-top

try:

# This module doesn't necessarily exist on Windows.

import resource

@@ -45,37 +58,70 @@

except ImportError, e:

HAS_RESOURCE_MODULE = False

+ONE_KB = 1024

TWO_MB = 2 * 1024 * 1024

+TEN_MB = 10 * 1024 * 1024

+DEFAULT_FILE_BUFFER_SIZE = 8192

+_DEFAULT_LINES = 25

+# By default, the timeout for SSL read errors is infinite. This could

+# cause gsutil to hang on network disconnect, so pick a more reasonable

+# timeout.

+SSL_TIMEOUT = 60

+# Start with a progress callback every 64KB during uploads/downloads (JSON API).

+# Callback implementation should back off until it hits the maximum size

+# so that callbacks do not create huge amounts of log output.

+START_CALLBACK_PER_BYTES = 1024*64

+MAX_CALLBACK_PER_BYTES = 1024*1024*100

+# Upload/download files in 8KB chunks over the HTTP connection.

+TRANSFER_BUFFER_SIZE = 1024*8

+# Default number of progress callbacks during transfer (XML API).

+XML_PROGRESS_CALLBACKS = 10

+# For files >= this size, output a message indicating that we're running an

+# operation on the file (like hashing or gzipping) so it does not appear to the

+# user that the command is hanging.

+MIN_SIZE_COMPUTE_LOGGING = 100*1024*1024 # 100 MB

NO_MAX = sys.maxint

+UTF8 = 'utf-8'

VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?')

RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt'

# Binary exponentiation strings.

_EXP_STRINGS = [

- (0, 'B', 'bit'),

- (10, 'KB', 'Kbit', 'K'),

- (20, 'MB', 'Mbit', 'M'),

- (30, 'GB', 'Gbit', 'G'),

- (40, 'TB', 'Tbit', 'T'),

- (50, 'PB', 'Pbit', 'P'),

- (60, 'EB', 'Ebit', 'E'),

+ (0, 'B', 'bit'),

+ (10, 'KB', 'Kbit', 'K'),

+ (20, 'MB', 'Mbit', 'M'),

+ (30, 'GB', 'Gbit', 'G'),

+ (40, 'TB', 'Tbit', 'T'),

+ (50, 'PB', 'Pbit', 'P'),

+ (60, 'EB', 'Ebit', 'E'),

]

-global manager

+configured_certs_file = None

+global manager # pylint: disable=global-at-module-level

def InitializeMultiprocessingVariables():

+ """Perform necessary initialization for multiprocessing.

+ See gslib.command.InitializeMultiprocessingVariables for an explanation

+ of why this is necessary.

"""

- Perform necessary initialization - see

- gslib.command.InitializeMultiprocessingVariables for an explanation of why

- this is necessary.

- """

- global manager

+ global manager # pylint: disable=global-variable-undefined

manager = multiprocessing.Manager()

def _GenerateSuffixRegex():

+ """Creates a suffix regex for human-readable byte counts."""

human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?'

suffixes = []

suffix_to_si = {}

@@ -84,7 +130,7 @@

for suffix in si_suffixes:

suffix_to_si[suffix] = i

suffixes.extend(si_suffixes)

- human_bytes_re = human_bytes_re % '|'.join(suffixes)

+ human_bytes_re %= '|'.join(suffixes)

matcher = re.compile(human_bytes_re)

return suffix_to_si, matcher

@@ -105,7 +151,7 @@

GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz'

-Retry = retry_decorator.retry

+Retry = retry_decorator.retry # pylint: disable=invalid-name

# Cache the values from this check such that they're available to all callers

# without needing to run all the checks again (some of these, such as calling

@@ -114,6 +160,7 @@

cached_multiprocessing_is_available_stack_trace = None

cached_multiprocessing_is_available_message = None

# Enum class for specifying listing style.

class ListingStyle(object):

SHORT = 'SHORT'

@@ -126,48 +173,91 @@

getattr(crcmod.crcmod, '_usingExtension', None))

-def CreateTrackerDirIfNeeded():

- """Looks up the configured directory where gsutil keeps its resumable

- transfer tracker files, and creates it if it doesn't already exist.

- Returns:

- The pathname to the tracker directory.

- """

- tracker_dir = config.get(

- 'GSUtil', 'resumable_tracker_dir',

- os.path.expanduser('~' + os.sep + '.gsutil'))

- if not os.path.exists(tracker_dir):

+def CreateDirIfNeeded(dir_path):

+ """Creates a directory, suppressing already-exists errors."""

+ if not os.path.exists(dir_path):

try:

# Unfortunately, even though we catch and ignore EEXIST, this call will

- # will output a (needless) error message (no way to avoid that in Python).

- os.makedirs(tracker_dir)

+ # output a (needless) error message (no way to avoid that in Python).

+ os.makedirs(dir_path)

# Ignore 'already exists' in case user tried to start up several

# resumable uploads concurrently from a machine where no tracker dir had

# yet been created.

except OSError as e:

if e.errno != errno.EEXIST:

raise

+def GetGsutilStateDir():

+ """Returns the location of the directory for gsutil state files.

+ Certain operations, such as cross-process credential sharing and

+ resumable transfer tracking, need a known location for state files which

+ are created by gsutil as-needed.

+ This location should only be used for storing data that is required to be in

+ a static location.

+ Returns:

+ Path to directory for gsutil static state files.

+ """

+ config_file_dir = config.get(

+ 'GSUtil', 'state_dir',

+ os.path.expanduser(os.path.join('~', '.gsutil')))

+ CreateDirIfNeeded(config_file_dir)

+ return config_file_dir

+def GetCredentialStoreFilename():

+ return os.path.join(GetGsutilStateDir(), 'credcache')

+def CreateTrackerDirIfNeeded():

+ """Looks up or creates the gsutil tracker file directory.

+ This is the configured directory where gsutil keeps its resumable transfer

+ tracker files. This function creates it if it doesn't already exist.

+ Returns:

+ The pathname to the tracker directory.

+ """

+ tracker_dir = config.get(

+ 'GSUtil', 'resumable_tracker_dir',

+ os.path.join(GetGsutilStateDir(), 'tracker-files'))

+ CreateDirIfNeeded(tracker_dir)

return tracker_dir

+def PrintTrackerDirDeprecationWarningIfNeeded():

+ # TODO: Remove this along with the tracker_dir config value 1 year after

+ # 4.6 release date. Use state_dir instead.

+ if config.has_option('GSUtil', 'resumable_tracker_dir'):

+ sys.stderr.write('Warning: you have set resumable_tracker_dir in your '

+ '.boto configuration file. This configuration option is '

+ 'deprecated; please use the state_dir configuration '

+ 'option instead.\n')

# Name of file where we keep the timestamp for the last time we checked whether

# a new version of gsutil is available.

+PrintTrackerDirDeprecationWarningIfNeeded()

+CreateDirIfNeeded(GetGsutilStateDir())

LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = (

- os.path.join(CreateTrackerDirIfNeeded(), '.last_software_update_check'))

+ os.path.join(GetGsutilStateDir(), '.last_software_update_check'))

def HasConfiguredCredentials():

"""Determines if boto credential/config file exists."""

- config = boto.config

has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and

config.has_option('Credentials', 'gs_secret_access_key'))

has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and

config.has_option('Credentials', 'aws_secret_access_key'))

has_oauth_creds = (

config.has_option('Credentials', 'gs_oauth2_refresh_token'))

- has_service_account_creds = (HAS_CRYPTO and

- config.has_option('Credentials', 'gs_service_client_id')

- and config.has_option('Credentials', 'gs_service_key_file'))

+ has_service_account_creds = (

+ HAS_CRYPTO and

+ config.has_option('Credentials', 'gs_service_client_id') and

+ config.has_option('Credentials', 'gs_service_key_file'))

valid_auth_handler = None

try:

@@ -188,10 +278,7 @@

def ConfigureNoOpAuthIfNeeded():

- """

- Sets up no-op auth handler if no boto credentials are configured.

- """

- config = boto.config

+ """Sets up no-op auth handler if no boto credentials are configured."""

if not HasConfiguredCredentials():

if (config.has_option('Credentials', 'gs_service_client_id')

and not HAS_CRYPTO):

@@ -199,12 +286,12 @@

'Your gsutil is configured with an OAuth2 service account, but you '

'do not have PyOpenSSL or PyCrypto 2.6 or later installed. Service '

'account authentication requires one of these libraries; please '

- 'install either of them to proceed, or configure a different type '

+ 'install either of them to proceed, or configure a different type '

'of credentials with "gsutil config".')))

else:

# With no boto config file the user can still access publicly readable

# buckets and objects.

- from gslib import no_op_auth_plugin

+ from gslib import no_op_auth_plugin # pylint: disable=unused-variable

def GetConfigFilePath():

@@ -234,6 +321,85 @@

return cf_list

+def GetCertsFile():

+ """Configures and returns the CA Certificates file.

+ If one is already configured, use it. Otherwise, amend the configuration

+ (in boto.config) to use the cert roots distributed with gsutil.

+ Returns:

+ string filename of the certs file to use.

+ """

+ certs_file = boto.config.get('Boto', 'ca_certificates_file', None)

+ if not certs_file:

+ if configured_certs_file:

+ disk_certs_file = configured_certs_file

+ else:

+ disk_certs_file = os.path.abspath(

+ os.path.join(gslib.GSLIB_DIR, 'data', 'cacerts.txt'))

+ if not os.path.exists(disk_certs_file):

+ # If the file is not present on disk, this means the gslib module

+ # doesn't actually exist on disk anywhere. This can happen if it's

+ # being imported from a zip file. Unfortunately, we have to copy the

+ # certs file to a local temp file on disk because the underlying SSL

+ # socket requires it to be a filesystem path.

+ certs_data = pkgutil.get_data('gslib', 'data/cacerts.txt')

+ if not certs_data:

+ raise CommandException('Certificates file not found. Please '

+ 'reinstall gsutil from scratch')

+ fd, fname = tempfile.mkstemp(suffix='.txt', prefix='gsutil-cacerts')

+ f = os.fdopen(fd, 'w')

+ f.write(certs_data)

+ f.close()

+ disk_certs_file = fname

+ certs_file = disk_certs_file

+ return certs_file

+def GetCleanupFiles():

+ """Returns a list of temp files to delete (if possible) when program exits."""

+ cleanup_files = []

+ if configured_certs_file:

+ cleanup_files.append(configured_certs_file)

+ return cleanup_files

+def GetNewHttp(http_class=httplib2.Http, **kwargs):

+ """Creates and returns a new httplib2.Http instance.

+ Args:

+ http_class: Optional custom Http class to use.

+ **kwargs: Arguments to pass to http_class constructor.

+ Returns:

+ An initialized httplib2.Http instance.

+ """

+ proxy_info = httplib2.ProxyInfo(

+ proxy_type=3,

+ proxy_host=boto.config.get('Boto', 'proxy', None),

+ proxy_port=boto.config.getint('Boto', 'proxy_port', 0),

+ proxy_user=boto.config.get('Boto', 'proxy_user', None),

+ proxy_pass=boto.config.get('Boto', 'proxy_pass', None),

+ proxy_rdns=boto.config.get('Boto', 'proxy_rdns', False))

+ # Some installers don't package a certs file with httplib2, so use the

+ # one included with gsutil.

+ kwargs['ca_certs'] = GetCertsFile()

+ # Use a non-infinite SSL timeout to avoid hangs during network flakiness.

+ kwargs['timeout'] = SSL_TIMEOUT

+ http = http_class(proxy_info=proxy_info, **kwargs)

+ http.disable_ssl_certificate_validation = (not config.getbool(

+ 'Boto', 'https_validate_certificates'))

+ return http

+def GetNumRetries():

+ return config.getint('Boto', 'num_retries', 6)

+def GetMaxRetryDelay():

+ return config.getint('Boto', 'max_retry_delay', 60)

def _RoundToNearestExponent(num):

i = 0

while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]):

@@ -274,6 +440,8 @@

human_string: A string supplied by user, e.g. '1M', '3 GB'.

Returns:

An integer containing the number of bytes.

+ Raises:

+ ValueError: on an invalid string.

"""

human_string = human_string.lower()

m = MATCH_HUMAN_BYTES.match(human_string)

@@ -314,82 +482,59 @@

return d0 + d1

-def ParseErrorDetail(e):

- """Parse <Message> and/or <Details> text from XML content.

+def RemoveCRLFFromString(input_str):

+ """Returns the input string with all \\n and \\r removed."""

+ return re.sub(r'[\r\n]', '', input_str)

- Args:

- e: The GSResponseError that includes XML to be parsed.

- Returns:

- (exception_name, m, d), where m is <Message> text or None,

- and d is <Details> text or None.

- """

- exc_name_parts = re.split("[\.']", str(type(e)))

- if len(exc_name_parts) < 2:

- # Shouldn't happen, but have fallback in case.

- exc_name = str(type(e))

- else:

- exc_name = exc_name_parts[-2]

- if not hasattr(e, 'body') or e.body is None:

- return (exc_name, None)

+def UnaryDictToXml(message):

+ """Generates XML representation of a nested dict.

- match = re.search(r'<Message>(?P<message>.*)</Message>', e.body)

- m = match.group('message') if match else None

- match = re.search(r'<Details>(?P<details>.*)</Details>', e.body)

- d = match.group('details') if match else None

- return (exc_name, m, d)

+ This dict contains exactly one top-level entry and an arbitrary number of

+ 2nd-level entries, e.g. capturing a WebsiteConfiguration message.

-def FormatErrorMessage(exc_name, status, code, reason, message, detail):

- """Formats an error message from components parsed by ParseErrorDetail."""

- if message and detail:

- return('%s: status=%d, code=%s, reason="%s", message="%s", detail="%s"' %

- (exc_name, status, code, reason, message, detail))

- if message:

- return('%s: status=%d, code=%s, reason="%s", message="%s"' %

- (exc_name, status, code, reason, message))

- if detail:

- return('%s: status=%d, code=%s, reason="%s", detail="%s"' %

- (exc_name, status, code, reason, detail))

- return('%s: status=%d, code=%s, reason="%s"' %

- (exc_name, status, code, reason))

-def UnaryDictToXml(message):

- """Generates XML representation of a nested dict with exactly one

- top-level entry and an arbitrary number of 2nd-level entries, e.g.

- capturing a WebsiteConfiguration message.

Args:

message: The dict encoding the message.

Returns:

XML string representation of the input dict.

+ Raises:

+ Exception: if dict contains more than one top-level entry.

"""

if len(message) != 1:

- raise Exception("Expected dict of size 1, got size %d" % len(message))

+ raise Exception('Expected dict of size 1, got size %d' % len(message))

name, content = message.items()[0]

- T = ElementTree.Element(name)

- for property, value in sorted(content.items()):

- node = ElementTree.SubElement(T, property)

+ element_type = ElementTree.Element(name)

+ for element_property, value in sorted(content.items()):

+ node = ElementTree.SubElement(element_type, element_property)

node.text = value

- return ElementTree.tostring(T)

+ return ElementTree.tostring(element_type)

-def LookUpGsutilVersion(uri):

- """Looks up the gsutil version of the specified gsutil tarball URI, from the

- metadata field set on that object.

+def LookUpGsutilVersion(gsutil_api, url_str):

+ """Looks up the gsutil version of the specified gsutil tarball URL.

+ Version is specified in the metadata field set on that object.

Args:

- URI: gsutil URI tarball (such as gs://pub/gsutil.tar.gz).

+ gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball.

+ url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz').

Returns:

- Version string if URI is a cloud URI containing x-goog-meta-gsutil-version

+ Version string if URL is a cloud URL containing x-goog-meta-gsutil-version

metadata, else None.

"""

- if uri.is_cloud_uri():

- obj = uri.get_key(False)

- if obj.metadata and 'gsutil_version' in obj.metadata:

- return obj.metadata['gsutil_version']

+ url = StorageUrlFromString(url_str)

+ if url.IsCloudUrl():

+ obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name,

+ provider=url.scheme,

+ fields=['metadata'])

+ if obj.metadata and obj.metadata.additionalProperties:

+ for prop in obj.metadata.additionalProperties:

+ if prop.key == 'gsutil_version':

+ return prop.value

def GetGsutilVersionModifiedTime():

@@ -404,16 +549,22 @@

return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty()

+def _HttpsValidateCertifcatesEnabled():

+ return config.get('Boto', 'https_validate_certificates', True)

+CERTIFICATE_VALIDATION_ENABLED = _HttpsValidateCertifcatesEnabled()

def _BotoIsSecure():

- for cfg_var in ('is_secure', 'https_validate_certificates'):

- if (config.has_option('Boto', cfg_var)

- and not config.getboolean('Boto', cfg_var)):

- return False, cfg_var

- return True, ''

+ return config.get('Boto', 'is_secure', True)

BOTO_IS_SECURE = _BotoIsSecure()

+def ResumableThreshold():

+ return config.getint('GSUtil', 'resumable_threshold', TWO_MB)

def AddAcceptEncoding(headers):

"""Adds accept-encoding:gzip to the dictionary of headers."""

# If Accept-Encoding is not already set, set it to enable gzip.

@@ -421,79 +572,86 @@

headers['accept-encoding'] = 'gzip'

-def PrintFullInfoAboutUri(uri, incl_acl, headers):

- """Print full info for given URI (like what displays for gsutil ls -L).

+# pylint: disable=too-many-statements

+def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True):

+ """Print full info for given object (like what displays for gsutil ls -L).

Args:

- uri: StorageUri being listed.

+ bucket_listing_ref: BucketListingRef being listed.

+ Must have ref_type OBJECT and a populated root_object

+ with the desired fields.

incl_acl: True if ACL info should be output.

- headers: The headers to pass to boto, if any.

Returns:

- Tuple (number of objects,

- object length, if listing_style is one of the long listing formats)

+ Tuple (number of objects, object_length)

Raises:

Exception: if calling bug encountered.

"""

- # Run in a try/except clause so we can continue listings past

- # access-denied errors (which can happen because user may have READ

- # permission on object and thus see the bucket listing data, but lack

- # FULL_CONTROL over individual objects and thus not be able to read

- # their ACLs).

- # TODO: Switch this code to use string formatting instead of tabs.

- try:

- print '%s:' % uri.uri.encode('utf-8')

- headers = headers.copy()

- # Add accept encoding so that the HEAD request matches what would be

- # sent for a GET request.

- AddAcceptEncoding(headers)

- got_key = False

- obj = uri.get_key(False, headers=headers)

- got_key = True

- print '\tCreation time:\t\t%s' % obj.last_modified

- if obj.cache_control:

- print '\tCache-Control:\t\t%s' % obj.cache_control

- if obj.content_disposition:

- print '\tContent-Disposition:\t\t%s' % obj.content_disposition

- if obj.content_encoding:

- print '\tContent-Encoding:\t%s' % obj.content_encoding

- if obj.content_language:

- print '\tContent-Language:\t%s' % obj.content_language

- print '\tContent-Length:\t\t%s' % obj.size

- print '\tContent-Type:\t\t%s' % obj.content_type

- if hasattr(obj, 'component_count') and obj.component_count:

- print '\tComponent-Count:\t%d' % obj.component_count

- if obj.metadata:

- prefix = uri.get_provider().metadata_prefix

- for name in obj.metadata:

- meta_string = '\t%s%s:\t%s' % (prefix, name, obj.metadata[name])

- print meta_string.encode('utf-8')

- if hasattr(obj, 'cloud_hashes'):

- for alg in obj.cloud_hashes:

- print '\tHash (%s):\t\t%s' % (

- alg, binascii.b2a_hex(obj.cloud_hashes[alg]))

- print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')

- if hasattr(obj, 'generation'):

- print '\tGeneration:\t\t%s' % obj.generation

- if hasattr(obj, 'metageneration'):

- print '\tMetageneration:\t\t%s' % obj.metageneration

- if incl_acl:

- print '\tACL:\t\t%s' % (uri.get_acl(False, headers))

- return (1, obj.size)

- except boto.exception.GSResponseError as e:

- if e.status == 403:

- if got_key:

- print ('\tACL:\t\t\tACCESS DENIED. Note: you need FULL_CONTROL '

- 'permission\n\t\t\ton the object to read its ACL.')

- return (1, obj.size)

+ url_str = bucket_listing_ref.url_string

+ storage_url = StorageUrlFromString(url_str)

+ obj = bucket_listing_ref.root_object

+ if (obj.metadata and S3_DELETE_MARKER_GUID in

+ obj.metadata.additionalProperties):

+ num_bytes = 0

+ num_objs = 0

+ url_str += '<DeleteMarker>'

+ else:

+ num_bytes = obj.size

+ num_objs = 1

+ print '%s:' % url_str.encode(UTF8)

+ if obj.updated:

+ print '\tCreation time:\t\t%s' % obj.updated.strftime(

+ '%a, %d %b %Y %H:%M:%S GMT')

+ if obj.cacheControl:

+ print '\tCache-Control:\t\t%s' % obj.cacheControl

+ if obj.contentDisposition:

+ print '\tContent-Disposition:\t\t%s' % obj.contentDisposition

+ if obj.contentEncoding:

+ print '\tContent-Encoding:\t\t%s' % obj.contentEncoding

+ if obj.contentLanguage:

+ print '\tContent-Language:\t%s' % obj.contentLanguage

+ print '\tContent-Length:\t\t%s' % obj.size

+ print '\tContent-Type:\t\t%s' % obj.contentType

+ if obj.componentCount:

+ print '\tComponent-Count:\t%d' % obj.componentCount

+ marker_props = {}

+ if obj.metadata and obj.metadata.additionalProperties:

+ non_marker_props = []

+ for add_prop in obj.metadata.additionalProperties:

+ if add_prop.key not in S3_MARKER_GUIDS:

+ non_marker_props.append(add_prop)

else:

- print "You aren't authorized to read %s - skipping" % uri

- return (1, 0)

+ marker_props[add_prop.key] = add_prop.value

+ if non_marker_props:

+ print '\tMetadata:'

+ for ap in non_marker_props:

+ meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value)

+ print meta_string.encode(UTF8)

+ if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c

+ if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash

+ print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')

+ if obj.generation:

+ generation_str = GenerationFromUrlAndString(storage_url, obj.generation)

+ print '\tGeneration:\t\t%s' % generation_str

+ if obj.metageneration:

+ print '\tMetageneration:\t\t%s' % obj.metageneration

+ if incl_acl:

+ # JSON API won't return acls as part of the response unless we have

+ # full control scope

+ if obj.acl:

+ print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl)

+ elif S3_ACL_MARKER_GUID in marker_props:

+ print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID]

else:

- raise e

- return (numobjs, numbytes)

+ print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER '

+ 'permission\n\t\t\t\ton the object to read its ACL.')

+ return (num_objs, num_bytes)

def CompareVersions(first, second):

"""Compares the first and second gsutil version strings.

@@ -502,6 +660,10 @@

(e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as

less than its non-suffix counterpart (e.g. 3.32 > 3.32pre).

+ Args:

+ first: First gsutil version string.

+ second: Second gsutil version string.

Returns:

(g, m):

g is True if first known to be greater than second, else False.

@@ -531,50 +693,182 @@

return (bool(suffix_ver2) and not suffix_ver1, False)

return (False, False)

-def _IncreaseSoftLimitForResource(resource_name):

- """Sets a new soft limit for the maximum number of open files.

- The soft limit is used for this process (and its children), but the

- hard limit is set by the system and cannot be exceeded.

+def _IncreaseSoftLimitForResource(resource_name, fallback_value):

+ """Sets a new soft limit for the maximum number of open files.

+ The soft limit is used for this process (and its children), but the

+ hard limit is set by the system and cannot be exceeded.

+ We will first try to set the soft limit to the hard limit's value; if that

+ fails, we will try to set the soft limit to the fallback_value iff this would

+ increase the soft limit.

+ Args:

+ resource_name: Name of the resource to increase the soft limit for.

+ fallback_value: Fallback value to be used if we couldn't set the

+ soft value to the hard value (e.g., if the hard value

+ is "unlimited").

+ Returns:

+ Current soft limit for the resource (after any changes we were able to

+ make), or -1 if the resource doesn't exist.

"""

+ # Get the value of the resource.

try:

(soft_limit, hard_limit) = resource.getrlimit(resource_name)

- resource.setrlimit(resource_name, (hard_limit, hard_limit))

- return hard_limit

- except (resource.error, ValueError), e:

- return 0

+ except (resource.error, ValueError):

+ # The resource wasn't present, so we can't do anything here.

+ return -1

+ # Try to set the value of the soft limit to the value of the hard limit.

+ if hard_limit > soft_limit: # Some OS's report 0 for "unlimited".

+ try:

+ resource.setrlimit(resource_name, (hard_limit, hard_limit))

+ return hard_limit

+ except (resource.error, ValueError):

+ # We'll ignore this and try the fallback value.

+ pass

+ # Try to set the value of the soft limit to the fallback value.

+ if soft_limit < fallback_value:

+ try:

+ resource.setrlimit(resource_name, (fallback_value, hard_limit))

+ return fallback_value

+ except (resource.error, ValueError):

+ # We couldn't change the soft limit, so just report the current

+ # value of the soft limit.

+ return soft_limit

+ else:

+ return soft_limit

+def GetCloudApiInstance(cls, thread_state=None):

+ """Gets a gsutil Cloud API instance.

+ Since Cloud API implementations are not guaranteed to be thread-safe, each

+ thread needs its own instance. These instances are passed to each thread

+ via the thread pool logic in command.

+ Args:

+ cls: Command class to be used for single-threaded case.

+ thread_state: Per thread state from this thread containing a gsutil

+ Cloud API instance.

+ Returns:

+ gsutil Cloud API instance.

+ """

+ return thread_state or cls.gsutil_api

+def GetFileSize(fp, position_to_eof=False):

+ """Returns size of file, optionally leaving fp positioned at EOF."""

+ if not position_to_eof:

+ cur_pos = fp.tell()

+ fp.seek(0, os.SEEK_END)

+ cur_file_size = fp.tell()

+ if not position_to_eof:

+ fp.seek(cur_pos)

+ return cur_file_size

+def GetStreamFromFileUrl(storage_url):

+ if storage_url.IsStream():

+ return sys.stdin

+ else:

+ return open(storage_url.object_name, 'rb')

+def UrlsAreForSingleProvider(url_args):

+ """Tests whether the URLs are all for a single provider.

+ Args:

+ url_args: Strings to check.

+ Returns:

+ True if URLs are for single provider, False otherwise.

+ """

+ provider = None

+ url = None

+ for url_str in url_args:

+ url = StorageUrlFromString(url_str)

+ if not provider:

+ provider = url.scheme

+ elif url.scheme != provider:

+ return False

+ return provider is not None

+def HaveFileUrls(args_to_check):

+ """Checks whether args_to_check contain any file URLs.

+ Args:

+ args_to_check: Command-line argument subset to check.

+ Returns:

+ True if args_to_check contains any file URLs.

+ """

+ for url_str in args_to_check:

+ storage_url = StorageUrlFromString(url_str)

+ if storage_url.IsFileUrl():

+ return True

+ return False

+def HaveProviderUrls(args_to_check):

+ """Checks whether args_to_check contains any provider URLs (like 'gs://').

+ Args:

+ args_to_check: Command-line argument subset to check.

+ Returns:

+ True if args_to_check contains any provider URLs.

+ """

+ for url_str in args_to_check:

+ storage_url = StorageUrlFromString(url_str)

+ if storage_url.IsCloudUrl() and storage_url.IsProvider():

+ return True

+ return False

def MultiprocessingIsAvailable(logger=None):

- """

+ """Checks if multiprocessing is available.

There are some environments in which there is no way to use multiprocessing

logic that's built into Python (e.g., if /dev/shm is not available, then

we can't create semaphores). This simply tries out a few things that will be

needed to make sure the environment can support the pieces of the

multiprocessing module that we need.

+ Args:

+ logger: logging.logger to use for debug output.

Returns:

(multiprocessing_is_available, stack_trace):

multiprocessing_is_available: True iff the multiprocessing module is

available for use.

stack_trace: The stack trace generated by the call we tried that failed.

"""

+ # pylint: disable=global-variable-undefined

global cached_multiprocessing_is_available

global cached_multiprocessing_check_stack_trace

global cached_multiprocessing_is_available_message

if cached_multiprocessing_is_available is not None:

if logger:

logger.debug(cached_multiprocessing_check_stack_trace)

- logger.warn('\n'.join(textwrap.wrap(

- cached_multiprocessing_is_available_message + '\n')))

+ logger.warn(cached_multiprocessing_is_available_message)

return (cached_multiprocessing_is_available,

cached_multiprocessing_check_stack_trace)

stack_trace = None

multiprocessing_is_available = True

- message = (

- 'You have requested multiple threads or processes for an operation,'

- ' but the required functionality of Python\'s multiprocessing '

- 'module is not available. Your operations will be performed '

- 'sequentially, and any requests for parallelism will be ignored.')

+ message = """

+You have requested multiple threads or processes for an operation, but the

+required functionality of Python\'s multiprocessing module is not available.

+Your operations will be performed sequentially, and any requests for

+parallelism will be ignored.

+"""

try:

# Fails if /dev/shm (or some equivalent thereof) is not available for use

# (e.g., there's no implementation, or we can't write to it, etc.).

@@ -582,48 +876,61 @@

multiprocessing.Value('i', 0)

except:

if not IS_WINDOWS:

- message += ('\nPlease ensure that you have write access to both '

- '/dev/shm and /run/shm.')

+ message += """

+Please ensure that you have write access to both /dev/shm and /run/shm.

+"""

raise # We'll handle this in one place below.

# Manager objects and Windows are generally a pain to work with, so try it

# out as a sanity check. This definitely works on some versions of Windows,

# but it's certainly possible that there is some unknown configuration for

# which it won't.

multiprocessing.Manager()

# Check that the max number of open files is reasonable. Always check this

# after we're sure that the basic multiprocessing functionality is

# available, since this won't matter unless that's true.

- limit = 0

+ limit = -1

if HAS_RESOURCE_MODULE:

# Try to set this with both resource names - RLIMIT_NOFILE for most Unix

# platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the

# "resource" module is not guaranteed to know about these names.

try:

limit = max(limit,

- _IncreaseSoftLimitForResource(resource.RLIMIT_NOFILE))

- except AttributeError, e:

+ _IncreaseSoftLimitForResource(

+ resource.RLIMIT_NOFILE,

+ MIN_ACCEPTABLE_OPEN_FILES_LIMIT))

+ except AttributeError:

pass

try:

limit = max(limit,

- _IncreaseSoftLimitForResource(resource.RLIMIT_OFILE))

- except AttributeError, e:

+ _IncreaseSoftLimitForResource(

+ resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT))

+ except AttributeError:

pass

if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS:

- message += (

- '\nYour max number of open files, %s, is too low to allow safe '

- 'multiprocessing calls. If you are on a Unix-like OS, then you can '

- 'fix this by adding something like "ulimit -n 10000" to your '

- '~/.bashrc (Linux), ~/.bash_profile (OS X), or equivalent file, '

- 'and opening a new terminal.' % limit)

+ message += ("""

+Your max number of open files, %s, is too low to allow safe multiprocessing.

+On Linux you can fix this by adding something like "ulimit -n 10000" to your

+~/.bashrc or equivalent file and opening a new terminal.

+On MacOS, you may also need to run a command like this once (in addition to the

+above instructions), which might require a restart of your system to take

+effect:

+ launchctl limit maxfiles 10000

+Alternatively, edit /etc/launchd.conf with something like:

+ limit maxfiles 10000 10000

+""" % limit)

raise Exception('Max number of open files, %s, is too low.' % limit)

- except:

+ except: # pylint: disable=bare-except

stack_trace = traceback.format_exc()

multiprocessing_is_available = False

if logger is not None:

logger.debug(stack_trace)

- logger.warn('\n'.join(textwrap.wrap(message + '\n')))

+ logger.warn(message)

# Set the cached values so that we never need to do this check again.

cached_multiprocessing_is_available = multiprocessing_is_available

@@ -631,13 +938,79 @@

cached_multiprocessing_is_available_message = message

return (multiprocessing_is_available, stack_trace)

def CreateLock():

+ """Returns either a multiprocessing lock or a threading lock.

+ Use Multiprocessing lock iff we have access to the parts of the

+ multiprocessing module that are necessary to enable parallelism in operations.

+ Returns:

+ Multiprocessing or threading lock.

"""

- Returns either a multiprocessing lock or a threading lock. We will use the

- former iff we have access to the parts of the multiprocessing module that

- are necessary to enable parallelism in operations.

- """

if MultiprocessingIsAvailable()[0]:

return manager.Lock()

else:

return threading.Lock()

+def IsCloudSubdirPlaceholder(url, blr=None):

+ """Determines if URL is a cloud subdir placeholder.

+ This function is needed because GUI tools (like the GCS cloud console) allow

+ users to create empty "folders" by creating a placeholder object; and parts

+ of gsutil need to treat those placeholder objects specially. For example,

+ gsutil rsync needs to avoid downloading those objects because they can cause

+ conflicts (see comments in rsync command for details).

+ We currently detect two cases:

+ - Cloud objects whose name ends with '_$folder$'

+ - Cloud objects whose name ends with '/'

+ Args:

+ url: The URL to be checked.

+ blr: BucketListingRef to check, or None if not available.

+ If None, size won't be checked.

+ Returns:

+ True/False.

+ """

+ if not url.IsCloudUrl():

+ return False

+ url_str = url.url_string

+ if url_str.endswith('_$folder$'):

+ return True

+ if blr and blr.IsObject():

+ size = blr.root_object.size

+ else:

+ size = 0

+ return size == 0 and url_str.endswith('/')

+def GetTermLines():

+ """Returns number of terminal lines."""

+ # fcntl isn't supported in Windows.

+ try:

+ import fcntl # pylint: disable=g-import-not-at-top

+ import termios # pylint: disable=g-import-not-at-top

+ except ImportError:

+ return _DEFAULT_LINES

+ def ioctl_GWINSZ(fd): # pylint: disable=invalid-name

+ try:

+ return struct.unpack(

+ 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))[0]

+ except: # pylint: disable=bare-except

+ return 0 # Failure (so will retry on different file descriptor below).

+ # Try to find a valid number of lines from termio for stdin, stdout,

+ # or stderr, in that order.

+ ioc = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)

+ if not ioc:

+ try:

+ fd = os.open(os.ctermid(), os.O_RDONLY)

+ ioc = ioctl_GWINSZ(fd)

+ os.close(fd)

+ except: # pylint: disable=bare-except

+ pass

+ if not ioc:

+ ioc = os.environ.get('LINES', _DEFAULT_LINES)

+ return int(ioc)

« no previous file with comments | « gslib/translation_helper.py ('k') | gslib/wildcard_iterator.py » ('j') | no next file with comments »