Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(258)

Unified Diff: gslib/util.py

Issue 698893003: Update checked in version of gsutil to version 4.6 (Closed) Base URL: http://dart.googlecode.com/svn/third_party/gsutil/
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « gslib/translation_helper.py ('k') | gslib/wildcard_iterator.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: gslib/util.py
===================================================================
--- gslib/util.py (revision 33376)
+++ gslib/util.py (working copy)
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
# Copyright 2010 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -11,33 +12,45 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""Static data and helper functions."""
-import binascii
-import boto
-import boto.auth
+from __future__ import absolute_import
+
import errno
-import gslib
import math
import multiprocessing
import os
+import pkgutil
import re
+import struct
import sys
+import tempfile
import textwrap
import threading
import traceback
import xml.etree.ElementTree as ElementTree
+import boto
from boto import config
+import boto.auth
from boto.exception import NoAuthHandlerFound
from boto.gs.connection import GSConnection
from boto.provider import Provider
from boto.pyami.config import BotoConfigLocations
-from gslib.exception import CommandException
+import httplib2
+from oauth2client.client import HAS_CRYPTO
from retry_decorator import retry_decorator
-from oauth2client.client import HAS_CRYPTO
+import gslib
+from gslib.exception import CommandException
+from gslib.storage_url import StorageUrlFromString
+from gslib.translation_helper import AclTranslation
+from gslib.translation_helper import GenerationFromUrlAndString
+from gslib.translation_helper import S3_ACL_MARKER_GUID
+from gslib.translation_helper import S3_DELETE_MARKER_GUID
+from gslib.translation_helper import S3_MARKER_GUIDS
+
+# pylint: disable=g-import-not-at-top
try:
# This module doesn't necessarily exist on Windows.
import resource
@@ -45,37 +58,70 @@
except ImportError, e:
HAS_RESOURCE_MODULE = False
+ONE_KB = 1024
TWO_MB = 2 * 1024 * 1024
+TEN_MB = 10 * 1024 * 1024
+DEFAULT_FILE_BUFFER_SIZE = 8192
+_DEFAULT_LINES = 25
+# By default, the timeout for SSL read errors is infinite. This could
+# cause gsutil to hang on network disconnect, so pick a more reasonable
+# timeout.
+SSL_TIMEOUT = 60
+
+# Start with a progress callback every 64KB during uploads/downloads (JSON API).
+# Callback implementation should back off until it hits the maximum size
+# so that callbacks do not create huge amounts of log output.
+START_CALLBACK_PER_BYTES = 1024*64
+MAX_CALLBACK_PER_BYTES = 1024*1024*100
+
+# Upload/download files in 8KB chunks over the HTTP connection.
+TRANSFER_BUFFER_SIZE = 1024*8
+
+# Default number of progress callbacks during transfer (XML API).
+XML_PROGRESS_CALLBACKS = 10
+
+# For files >= this size, output a message indicating that we're running an
+# operation on the file (like hashing or gzipping) so it does not appear to the
+# user that the command is hanging.
+MIN_SIZE_COMPUTE_LOGGING = 100*1024*1024 # 100 MB
+
NO_MAX = sys.maxint
+UTF8 = 'utf-8'
+
VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?')
RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt'
# Binary exponentiation strings.
_EXP_STRINGS = [
- (0, 'B', 'bit'),
- (10, 'KB', 'Kbit', 'K'),
- (20, 'MB', 'Mbit', 'M'),
- (30, 'GB', 'Gbit', 'G'),
- (40, 'TB', 'Tbit', 'T'),
- (50, 'PB', 'Pbit', 'P'),
- (60, 'EB', 'Ebit', 'E'),
+ (0, 'B', 'bit'),
+ (10, 'KB', 'Kbit', 'K'),
+ (20, 'MB', 'Mbit', 'M'),
+ (30, 'GB', 'Gbit', 'G'),
+ (40, 'TB', 'Tbit', 'T'),
+ (50, 'PB', 'Pbit', 'P'),
+ (60, 'EB', 'Ebit', 'E'),
]
-global manager
+configured_certs_file = None
+global manager # pylint: disable=global-at-module-level
+
+
def InitializeMultiprocessingVariables():
+ """Perform necessary initialization for multiprocessing.
+
+ See gslib.command.InitializeMultiprocessingVariables for an explanation
+ of why this is necessary.
"""
- Perform necessary initialization - see
- gslib.command.InitializeMultiprocessingVariables for an explanation of why
- this is necessary.
- """
- global manager
+ global manager # pylint: disable=global-variable-undefined
manager = multiprocessing.Manager()
+
def _GenerateSuffixRegex():
+ """Creates a suffix regex for human-readable byte counts."""
human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?'
suffixes = []
suffix_to_si = {}
@@ -84,7 +130,7 @@
for suffix in si_suffixes:
suffix_to_si[suffix] = i
suffixes.extend(si_suffixes)
- human_bytes_re = human_bytes_re % '|'.join(suffixes)
+ human_bytes_re %= '|'.join(suffixes)
matcher = re.compile(human_bytes_re)
return suffix_to_si, matcher
@@ -105,7 +151,7 @@
GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz'
-Retry = retry_decorator.retry
+Retry = retry_decorator.retry # pylint: disable=invalid-name
# Cache the values from this check such that they're available to all callers
# without needing to run all the checks again (some of these, such as calling
@@ -114,6 +160,7 @@
cached_multiprocessing_is_available_stack_trace = None
cached_multiprocessing_is_available_message = None
+
# Enum class for specifying listing style.
class ListingStyle(object):
SHORT = 'SHORT'
@@ -126,48 +173,91 @@
getattr(crcmod.crcmod, '_usingExtension', None))
-def CreateTrackerDirIfNeeded():
- """Looks up the configured directory where gsutil keeps its resumable
- transfer tracker files, and creates it if it doesn't already exist.
-
- Returns:
- The pathname to the tracker directory.
- """
- tracker_dir = config.get(
- 'GSUtil', 'resumable_tracker_dir',
- os.path.expanduser('~' + os.sep + '.gsutil'))
- if not os.path.exists(tracker_dir):
+def CreateDirIfNeeded(dir_path):
+ """Creates a directory, suppressing already-exists errors."""
+ if not os.path.exists(dir_path):
try:
# Unfortunately, even though we catch and ignore EEXIST, this call will
- # will output a (needless) error message (no way to avoid that in Python).
- os.makedirs(tracker_dir)
+ # output a (needless) error message (no way to avoid that in Python).
+ os.makedirs(dir_path)
# Ignore 'already exists' in case user tried to start up several
# resumable uploads concurrently from a machine where no tracker dir had
# yet been created.
except OSError as e:
if e.errno != errno.EEXIST:
raise
+
+
+def GetGsutilStateDir():
+ """Returns the location of the directory for gsutil state files.
+
+ Certain operations, such as cross-process credential sharing and
+ resumable transfer tracking, need a known location for state files which
+ are created by gsutil as-needed.
+
+ This location should only be used for storing data that is required to be in
+ a static location.
+
+ Returns:
+ Path to directory for gsutil static state files.
+ """
+ config_file_dir = config.get(
+ 'GSUtil', 'state_dir',
+ os.path.expanduser(os.path.join('~', '.gsutil')))
+ CreateDirIfNeeded(config_file_dir)
+ return config_file_dir
+
+
+def GetCredentialStoreFilename():
+ return os.path.join(GetGsutilStateDir(), 'credcache')
+
+
+def CreateTrackerDirIfNeeded():
+ """Looks up or creates the gsutil tracker file directory.
+
+ This is the configured directory where gsutil keeps its resumable transfer
+ tracker files. This function creates it if it doesn't already exist.
+
+ Returns:
+ The pathname to the tracker directory.
+ """
+ tracker_dir = config.get(
+ 'GSUtil', 'resumable_tracker_dir',
+ os.path.join(GetGsutilStateDir(), 'tracker-files'))
+ CreateDirIfNeeded(tracker_dir)
return tracker_dir
+def PrintTrackerDirDeprecationWarningIfNeeded():
+ # TODO: Remove this along with the tracker_dir config value 1 year after
+ # 4.6 release date. Use state_dir instead.
+ if config.has_option('GSUtil', 'resumable_tracker_dir'):
+ sys.stderr.write('Warning: you have set resumable_tracker_dir in your '
+ '.boto configuration file. This configuration option is '
+ 'deprecated; please use the state_dir configuration '
+ 'option instead.\n')
+
+
# Name of file where we keep the timestamp for the last time we checked whether
# a new version of gsutil is available.
+PrintTrackerDirDeprecationWarningIfNeeded()
+CreateDirIfNeeded(GetGsutilStateDir())
LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = (
- os.path.join(CreateTrackerDirIfNeeded(), '.last_software_update_check'))
+ os.path.join(GetGsutilStateDir(), '.last_software_update_check'))
def HasConfiguredCredentials():
"""Determines if boto credential/config file exists."""
- config = boto.config
has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and
config.has_option('Credentials', 'gs_secret_access_key'))
has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and
config.has_option('Credentials', 'aws_secret_access_key'))
has_oauth_creds = (
config.has_option('Credentials', 'gs_oauth2_refresh_token'))
- has_service_account_creds = (HAS_CRYPTO and
- config.has_option('Credentials', 'gs_service_client_id')
- and config.has_option('Credentials', 'gs_service_key_file'))
+ has_service_account_creds = (
+ HAS_CRYPTO and
+ config.has_option('Credentials', 'gs_service_client_id') and
+ config.has_option('Credentials', 'gs_service_key_file'))
valid_auth_handler = None
try:
@@ -188,10 +278,7 @@
def ConfigureNoOpAuthIfNeeded():
- """
- Sets up no-op auth handler if no boto credentials are configured.
- """
- config = boto.config
+ """Sets up no-op auth handler if no boto credentials are configured."""
if not HasConfiguredCredentials():
if (config.has_option('Credentials', 'gs_service_client_id')
and not HAS_CRYPTO):
@@ -199,12 +286,12 @@
'Your gsutil is configured with an OAuth2 service account, but you '
'do not have PyOpenSSL or PyCrypto 2.6 or later installed. Service '
'account authentication requires one of these libraries; please '
- 'install either of them to proceed, or configure a different type '
+ 'install either of them to proceed, or configure a different type '
'of credentials with "gsutil config".')))
else:
# With no boto config file the user can still access publicly readable
# buckets and objects.
- from gslib import no_op_auth_plugin
+ from gslib import no_op_auth_plugin # pylint: disable=unused-variable
def GetConfigFilePath():
@@ -234,6 +321,85 @@
return cf_list
+def GetCertsFile():
+ """Configures and returns the CA Certificates file.
+
+ If one is already configured, use it. Otherwise, amend the configuration
+ (in boto.config) to use the cert roots distributed with gsutil.
+
+ Returns:
+ string filename of the certs file to use.
+ """
+ certs_file = boto.config.get('Boto', 'ca_certificates_file', None)
+ if not certs_file:
+ if configured_certs_file:
+ disk_certs_file = configured_certs_file
+ else:
+ disk_certs_file = os.path.abspath(
+ os.path.join(gslib.GSLIB_DIR, 'data', 'cacerts.txt'))
+ if not os.path.exists(disk_certs_file):
+ # If the file is not present on disk, this means the gslib module
+ # doesn't actually exist on disk anywhere. This can happen if it's
+ # being imported from a zip file. Unfortunately, we have to copy the
+ # certs file to a local temp file on disk because the underlying SSL
+ # socket requires it to be a filesystem path.
+ certs_data = pkgutil.get_data('gslib', 'data/cacerts.txt')
+ if not certs_data:
+ raise CommandException('Certificates file not found. Please '
+ 'reinstall gsutil from scratch')
+ fd, fname = tempfile.mkstemp(suffix='.txt', prefix='gsutil-cacerts')
+ f = os.fdopen(fd, 'w')
+ f.write(certs_data)
+ f.close()
+ disk_certs_file = fname
+ certs_file = disk_certs_file
+ return certs_file
+
+
+def GetCleanupFiles():
+ """Returns a list of temp files to delete (if possible) when program exits."""
+ cleanup_files = []
+ if configured_certs_file:
+ cleanup_files.append(configured_certs_file)
+ return cleanup_files
+
+
+def GetNewHttp(http_class=httplib2.Http, **kwargs):
+ """Creates and returns a new httplib2.Http instance.
+
+ Args:
+ http_class: Optional custom Http class to use.
+ **kwargs: Arguments to pass to http_class constructor.
+
+ Returns:
+ An initialized httplib2.Http instance.
+ """
+ proxy_info = httplib2.ProxyInfo(
+ proxy_type=3,
+ proxy_host=boto.config.get('Boto', 'proxy', None),
+ proxy_port=boto.config.getint('Boto', 'proxy_port', 0),
+ proxy_user=boto.config.get('Boto', 'proxy_user', None),
+ proxy_pass=boto.config.get('Boto', 'proxy_pass', None),
+ proxy_rdns=boto.config.get('Boto', 'proxy_rdns', False))
+ # Some installers don't package a certs file with httplib2, so use the
+ # one included with gsutil.
+ kwargs['ca_certs'] = GetCertsFile()
+ # Use a non-infinite SSL timeout to avoid hangs during network flakiness.
+ kwargs['timeout'] = SSL_TIMEOUT
+ http = http_class(proxy_info=proxy_info, **kwargs)
+ http.disable_ssl_certificate_validation = (not config.getbool(
+ 'Boto', 'https_validate_certificates'))
+ return http
+
+
+def GetNumRetries():
+ return config.getint('Boto', 'num_retries', 6)
+
+
+def GetMaxRetryDelay():
+ return config.getint('Boto', 'max_retry_delay', 60)
+
+
def _RoundToNearestExponent(num):
i = 0
while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]):
@@ -274,6 +440,8 @@
human_string: A string supplied by user, e.g. '1M', '3 GB'.
Returns:
An integer containing the number of bytes.
+ Raises:
+ ValueError: on an invalid string.
"""
human_string = human_string.lower()
m = MATCH_HUMAN_BYTES.match(human_string)
@@ -314,82 +482,59 @@
return d0 + d1
-def ParseErrorDetail(e):
- """Parse <Message> and/or <Details> text from XML content.
+def RemoveCRLFFromString(input_str):
+ """Returns the input string with all \\n and \\r removed."""
+ return re.sub(r'[\r\n]', '', input_str)
- Args:
- e: The GSResponseError that includes XML to be parsed.
- Returns:
- (exception_name, m, d), where m is <Message> text or None,
- and d is <Details> text or None.
- """
- exc_name_parts = re.split("[\.']", str(type(e)))
- if len(exc_name_parts) < 2:
- # Shouldn't happen, but have fallback in case.
- exc_name = str(type(e))
- else:
- exc_name = exc_name_parts[-2]
- if not hasattr(e, 'body') or e.body is None:
- return (exc_name, None)
+def UnaryDictToXml(message):
+ """Generates XML representation of a nested dict.
- match = re.search(r'<Message>(?P<message>.*)</Message>', e.body)
- m = match.group('message') if match else None
- match = re.search(r'<Details>(?P<details>.*)</Details>', e.body)
- d = match.group('details') if match else None
- return (exc_name, m, d)
+ This dict contains exactly one top-level entry and an arbitrary number of
+ 2nd-level entries, e.g. capturing a WebsiteConfiguration message.
-def FormatErrorMessage(exc_name, status, code, reason, message, detail):
- """Formats an error message from components parsed by ParseErrorDetail."""
- if message and detail:
- return('%s: status=%d, code=%s, reason="%s", message="%s", detail="%s"' %
- (exc_name, status, code, reason, message, detail))
- if message:
- return('%s: status=%d, code=%s, reason="%s", message="%s"' %
- (exc_name, status, code, reason, message))
- if detail:
- return('%s: status=%d, code=%s, reason="%s", detail="%s"' %
- (exc_name, status, code, reason, detail))
- return('%s: status=%d, code=%s, reason="%s"' %
- (exc_name, status, code, reason))
-
-def UnaryDictToXml(message):
- """Generates XML representation of a nested dict with exactly one
- top-level entry and an arbitrary number of 2nd-level entries, e.g.
- capturing a WebsiteConfiguration message.
-
Args:
message: The dict encoding the message.
Returns:
XML string representation of the input dict.
+
+ Raises:
+ Exception: if dict contains more than one top-level entry.
"""
if len(message) != 1:
- raise Exception("Expected dict of size 1, got size %d" % len(message))
+ raise Exception('Expected dict of size 1, got size %d' % len(message))
name, content = message.items()[0]
- T = ElementTree.Element(name)
- for property, value in sorted(content.items()):
- node = ElementTree.SubElement(T, property)
+ element_type = ElementTree.Element(name)
+ for element_property, value in sorted(content.items()):
+ node = ElementTree.SubElement(element_type, element_property)
node.text = value
- return ElementTree.tostring(T)
+ return ElementTree.tostring(element_type)
-def LookUpGsutilVersion(uri):
- """Looks up the gsutil version of the specified gsutil tarball URI, from the
- metadata field set on that object.
+def LookUpGsutilVersion(gsutil_api, url_str):
+ """Looks up the gsutil version of the specified gsutil tarball URL.
+ Version is specified in the metadata field set on that object.
+
Args:
- URI: gsutil URI tarball (such as gs://pub/gsutil.tar.gz).
+ gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball.
+ url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz').
Returns:
- Version string if URI is a cloud URI containing x-goog-meta-gsutil-version
+ Version string if URL is a cloud URL containing x-goog-meta-gsutil-version
metadata, else None.
"""
- if uri.is_cloud_uri():
- obj = uri.get_key(False)
- if obj.metadata and 'gsutil_version' in obj.metadata:
- return obj.metadata['gsutil_version']
+ url = StorageUrlFromString(url_str)
+ if url.IsCloudUrl():
+ obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name,
+ provider=url.scheme,
+ fields=['metadata'])
+ if obj.metadata and obj.metadata.additionalProperties:
+ for prop in obj.metadata.additionalProperties:
+ if prop.key == 'gsutil_version':
+ return prop.value
def GetGsutilVersionModifiedTime():
@@ -404,16 +549,22 @@
return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty()
+def _HttpsValidateCertifcatesEnabled():
+ return config.get('Boto', 'https_validate_certificates', True)
+
+CERTIFICATE_VALIDATION_ENABLED = _HttpsValidateCertifcatesEnabled()
+
+
def _BotoIsSecure():
- for cfg_var in ('is_secure', 'https_validate_certificates'):
- if (config.has_option('Boto', cfg_var)
- and not config.getboolean('Boto', cfg_var)):
- return False, cfg_var
- return True, ''
+ return config.get('Boto', 'is_secure', True)
BOTO_IS_SECURE = _BotoIsSecure()
+def ResumableThreshold():
+ return config.getint('GSUtil', 'resumable_threshold', TWO_MB)
+
+
def AddAcceptEncoding(headers):
"""Adds accept-encoding:gzip to the dictionary of headers."""
# If Accept-Encoding is not already set, set it to enable gzip.
@@ -421,79 +572,86 @@
headers['accept-encoding'] = 'gzip'
-def PrintFullInfoAboutUri(uri, incl_acl, headers):
- """Print full info for given URI (like what displays for gsutil ls -L).
+# pylint: disable=too-many-statements
+def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True):
+ """Print full info for given object (like what displays for gsutil ls -L).
Args:
- uri: StorageUri being listed.
+ bucket_listing_ref: BucketListingRef being listed.
+ Must have ref_type OBJECT and a populated root_object
+ with the desired fields.
incl_acl: True if ACL info should be output.
- headers: The headers to pass to boto, if any.
Returns:
- Tuple (number of objects,
- object length, if listing_style is one of the long listing formats)
+ Tuple (number of objects, object_length)
Raises:
Exception: if calling bug encountered.
"""
- # Run in a try/except clause so we can continue listings past
- # access-denied errors (which can happen because user may have READ
- # permission on object and thus see the bucket listing data, but lack
- # FULL_CONTROL over individual objects and thus not be able to read
- # their ACLs).
- # TODO: Switch this code to use string formatting instead of tabs.
- try:
- print '%s:' % uri.uri.encode('utf-8')
- headers = headers.copy()
- # Add accept encoding so that the HEAD request matches what would be
- # sent for a GET request.
- AddAcceptEncoding(headers)
- got_key = False
- obj = uri.get_key(False, headers=headers)
- got_key = True
- print '\tCreation time:\t\t%s' % obj.last_modified
- if obj.cache_control:
- print '\tCache-Control:\t\t%s' % obj.cache_control
- if obj.content_disposition:
- print '\tContent-Disposition:\t\t%s' % obj.content_disposition
- if obj.content_encoding:
- print '\tContent-Encoding:\t%s' % obj.content_encoding
- if obj.content_language:
- print '\tContent-Language:\t%s' % obj.content_language
- print '\tContent-Length:\t\t%s' % obj.size
- print '\tContent-Type:\t\t%s' % obj.content_type
- if hasattr(obj, 'component_count') and obj.component_count:
- print '\tComponent-Count:\t%d' % obj.component_count
- if obj.metadata:
- prefix = uri.get_provider().metadata_prefix
- for name in obj.metadata:
- meta_string = '\t%s%s:\t%s' % (prefix, name, obj.metadata[name])
- print meta_string.encode('utf-8')
- if hasattr(obj, 'cloud_hashes'):
- for alg in obj.cloud_hashes:
- print '\tHash (%s):\t\t%s' % (
- alg, binascii.b2a_hex(obj.cloud_hashes[alg]))
- print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')
- if hasattr(obj, 'generation'):
- print '\tGeneration:\t\t%s' % obj.generation
- if hasattr(obj, 'metageneration'):
- print '\tMetageneration:\t\t%s' % obj.metageneration
- if incl_acl:
- print '\tACL:\t\t%s' % (uri.get_acl(False, headers))
- return (1, obj.size)
- except boto.exception.GSResponseError as e:
- if e.status == 403:
- if got_key:
- print ('\tACL:\t\t\tACCESS DENIED. Note: you need FULL_CONTROL '
- 'permission\n\t\t\ton the object to read its ACL.')
- return (1, obj.size)
+ url_str = bucket_listing_ref.url_string
+ storage_url = StorageUrlFromString(url_str)
+ obj = bucket_listing_ref.root_object
+
+ if (obj.metadata and S3_DELETE_MARKER_GUID in
+ obj.metadata.additionalProperties):
+ num_bytes = 0
+ num_objs = 0
+ url_str += '<DeleteMarker>'
+ else:
+ num_bytes = obj.size
+ num_objs = 1
+
+ print '%s:' % url_str.encode(UTF8)
+ if obj.updated:
+ print '\tCreation time:\t\t%s' % obj.updated.strftime(
+ '%a, %d %b %Y %H:%M:%S GMT')
+ if obj.cacheControl:
+ print '\tCache-Control:\t\t%s' % obj.cacheControl
+ if obj.contentDisposition:
+ print '\tContent-Disposition:\t\t%s' % obj.contentDisposition
+ if obj.contentEncoding:
+ print '\tContent-Encoding:\t\t%s' % obj.contentEncoding
+ if obj.contentLanguage:
+ print '\tContent-Language:\t%s' % obj.contentLanguage
+ print '\tContent-Length:\t\t%s' % obj.size
+ print '\tContent-Type:\t\t%s' % obj.contentType
+ if obj.componentCount:
+ print '\tComponent-Count:\t%d' % obj.componentCount
+ marker_props = {}
+ if obj.metadata and obj.metadata.additionalProperties:
+ non_marker_props = []
+ for add_prop in obj.metadata.additionalProperties:
+ if add_prop.key not in S3_MARKER_GUIDS:
+ non_marker_props.append(add_prop)
else:
- print "You aren't authorized to read %s - skipping" % uri
- return (1, 0)
+ marker_props[add_prop.key] = add_prop.value
+ if non_marker_props:
+ print '\tMetadata:'
+ for ap in non_marker_props:
+ meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value)
+ print meta_string.encode(UTF8)
+ if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c
+ if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash
+ print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')
+ if obj.generation:
+ generation_str = GenerationFromUrlAndString(storage_url, obj.generation)
+ print '\tGeneration:\t\t%s' % generation_str
+ if obj.metageneration:
+ print '\tMetageneration:\t\t%s' % obj.metageneration
+ if incl_acl:
+ # JSON API won't return acls as part of the response unless we have
+ # full control scope
+ if obj.acl:
+ print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl)
+ elif S3_ACL_MARKER_GUID in marker_props:
+ print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID]
else:
- raise e
- return (numobjs, numbytes)
+ print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER '
+ 'permission\n\t\t\t\ton the object to read its ACL.')
+ return (num_objs, num_bytes)
+
+
def CompareVersions(first, second):
"""Compares the first and second gsutil version strings.
@@ -502,6 +660,10 @@
(e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as
less than its non-suffix counterpart (e.g. 3.32 > 3.32pre).
+ Args:
+ first: First gsutil version string.
+ second: Second gsutil version string.
+
Returns:
(g, m):
g is True if first known to be greater than second, else False.
@@ -531,50 +693,182 @@
return (bool(suffix_ver2) and not suffix_ver1, False)
return (False, False)
-def _IncreaseSoftLimitForResource(resource_name):
- """Sets a new soft limit for the maximum number of open files.
- The soft limit is used for this process (and its children), but the
- hard limit is set by the system and cannot be exceeded.
+
+def _IncreaseSoftLimitForResource(resource_name, fallback_value):
+ """Sets a new soft limit for the maximum number of open files.
+
+ The soft limit is used for this process (and its children), but the
+ hard limit is set by the system and cannot be exceeded.
+
+ We will first try to set the soft limit to the hard limit's value; if that
+ fails, we will try to set the soft limit to the fallback_value iff this would
+ increase the soft limit.
+
+ Args:
+ resource_name: Name of the resource to increase the soft limit for.
+ fallback_value: Fallback value to be used if we couldn't set the
+ soft value to the hard value (e.g., if the hard value
+ is "unlimited").
+
+ Returns:
+ Current soft limit for the resource (after any changes we were able to
+ make), or -1 if the resource doesn't exist.
"""
+
+ # Get the value of the resource.
try:
(soft_limit, hard_limit) = resource.getrlimit(resource_name)
- resource.setrlimit(resource_name, (hard_limit, hard_limit))
- return hard_limit
- except (resource.error, ValueError), e:
- return 0
+ except (resource.error, ValueError):
+ # The resource wasn't present, so we can't do anything here.
+ return -1
+ # Try to set the value of the soft limit to the value of the hard limit.
+ if hard_limit > soft_limit: # Some OS's report 0 for "unlimited".
+ try:
+ resource.setrlimit(resource_name, (hard_limit, hard_limit))
+ return hard_limit
+ except (resource.error, ValueError):
+ # We'll ignore this and try the fallback value.
+ pass
+
+ # Try to set the value of the soft limit to the fallback value.
+ if soft_limit < fallback_value:
+ try:
+ resource.setrlimit(resource_name, (fallback_value, hard_limit))
+ return fallback_value
+ except (resource.error, ValueError):
+ # We couldn't change the soft limit, so just report the current
+ # value of the soft limit.
+ return soft_limit
+ else:
+ return soft_limit
+
+
+def GetCloudApiInstance(cls, thread_state=None):
+ """Gets a gsutil Cloud API instance.
+
+ Since Cloud API implementations are not guaranteed to be thread-safe, each
+ thread needs its own instance. These instances are passed to each thread
+ via the thread pool logic in command.
+
+ Args:
+ cls: Command class to be used for single-threaded case.
+ thread_state: Per thread state from this thread containing a gsutil
+ Cloud API instance.
+
+ Returns:
+ gsutil Cloud API instance.
+ """
+ return thread_state or cls.gsutil_api
+
+
+def GetFileSize(fp, position_to_eof=False):
+ """Returns size of file, optionally leaving fp positioned at EOF."""
+ if not position_to_eof:
+ cur_pos = fp.tell()
+ fp.seek(0, os.SEEK_END)
+ cur_file_size = fp.tell()
+ if not position_to_eof:
+ fp.seek(cur_pos)
+ return cur_file_size
+
+
+def GetStreamFromFileUrl(storage_url):
+ if storage_url.IsStream():
+ return sys.stdin
+ else:
+ return open(storage_url.object_name, 'rb')
+
+
+def UrlsAreForSingleProvider(url_args):
+ """Tests whether the URLs are all for a single provider.
+
+ Args:
+ url_args: Strings to check.
+
+ Returns:
+ True if URLs are for single provider, False otherwise.
+ """
+ provider = None
+ url = None
+ for url_str in url_args:
+ url = StorageUrlFromString(url_str)
+ if not provider:
+ provider = url.scheme
+ elif url.scheme != provider:
+ return False
+ return provider is not None
+
+
+def HaveFileUrls(args_to_check):
+ """Checks whether args_to_check contain any file URLs.
+
+ Args:
+ args_to_check: Command-line argument subset to check.
+
+ Returns:
+ True if args_to_check contains any file URLs.
+ """
+ for url_str in args_to_check:
+ storage_url = StorageUrlFromString(url_str)
+ if storage_url.IsFileUrl():
+ return True
+ return False
+
+
+def HaveProviderUrls(args_to_check):
+ """Checks whether args_to_check contains any provider URLs (like 'gs://').
+
+ Args:
+ args_to_check: Command-line argument subset to check.
+
+ Returns:
+ True if args_to_check contains any provider URLs.
+ """
+ for url_str in args_to_check:
+ storage_url = StorageUrlFromString(url_str)
+ if storage_url.IsCloudUrl() and storage_url.IsProvider():
+ return True
+ return False
+
+
def MultiprocessingIsAvailable(logger=None):
- """
+ """Checks if multiprocessing is available.
+
There are some environments in which there is no way to use multiprocessing
logic that's built into Python (e.g., if /dev/shm is not available, then
we can't create semaphores). This simply tries out a few things that will be
needed to make sure the environment can support the pieces of the
multiprocessing module that we need.
-
+
+ Args:
+ logger: logging.logger to use for debug output.
+
Returns:
(multiprocessing_is_available, stack_trace):
multiprocessing_is_available: True iff the multiprocessing module is
available for use.
stack_trace: The stack trace generated by the call we tried that failed.
"""
+ # pylint: disable=global-variable-undefined
global cached_multiprocessing_is_available
global cached_multiprocessing_check_stack_trace
global cached_multiprocessing_is_available_message
if cached_multiprocessing_is_available is not None:
if logger:
logger.debug(cached_multiprocessing_check_stack_trace)
- logger.warn('\n'.join(textwrap.wrap(
- cached_multiprocessing_is_available_message + '\n')))
+ logger.warn(cached_multiprocessing_is_available_message)
return (cached_multiprocessing_is_available,
cached_multiprocessing_check_stack_trace)
stack_trace = None
multiprocessing_is_available = True
- message = (
- 'You have requested multiple threads or processes for an operation,'
- ' but the required functionality of Python\'s multiprocessing '
- 'module is not available. Your operations will be performed '
- 'sequentially, and any requests for parallelism will be ignored.')
+ message = """
+You have requested multiple threads or processes for an operation, but the
+required functionality of Python\'s multiprocessing module is not available.
+Your operations will be performed sequentially, and any requests for
+parallelism will be ignored.
+"""
try:
# Fails if /dev/shm (or some equivalent thereof) is not available for use
# (e.g., there's no implementation, or we can't write to it, etc.).
@@ -582,48 +876,61 @@
multiprocessing.Value('i', 0)
except:
if not IS_WINDOWS:
- message += ('\nPlease ensure that you have write access to both '
- '/dev/shm and /run/shm.')
+ message += """
+Please ensure that you have write access to both /dev/shm and /run/shm.
+"""
raise # We'll handle this in one place below.
-
+
# Manager objects and Windows are generally a pain to work with, so try it
# out as a sanity check. This definitely works on some versions of Windows,
# but it's certainly possible that there is some unknown configuration for
# which it won't.
multiprocessing.Manager()
-
+
# Check that the max number of open files is reasonable. Always check this
# after we're sure that the basic multiprocessing functionality is
# available, since this won't matter unless that's true.
- limit = 0
+ limit = -1
if HAS_RESOURCE_MODULE:
# Try to set this with both resource names - RLIMIT_NOFILE for most Unix
# platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the
# "resource" module is not guaranteed to know about these names.
try:
limit = max(limit,
- _IncreaseSoftLimitForResource(resource.RLIMIT_NOFILE))
- except AttributeError, e:
+ _IncreaseSoftLimitForResource(
+ resource.RLIMIT_NOFILE,
+ MIN_ACCEPTABLE_OPEN_FILES_LIMIT))
+ except AttributeError:
pass
try:
limit = max(limit,
- _IncreaseSoftLimitForResource(resource.RLIMIT_OFILE))
- except AttributeError, e:
+ _IncreaseSoftLimitForResource(
+ resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT))
+ except AttributeError:
pass
+
if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS:
- message += (
- '\nYour max number of open files, %s, is too low to allow safe '
- 'multiprocessing calls. If you are on a Unix-like OS, then you can '
- 'fix this by adding something like "ulimit -n 10000" to your '
- '~/.bashrc (Linux), ~/.bash_profile (OS X), or equivalent file, '
- 'and opening a new terminal.' % limit)
+ message += ("""
+Your max number of open files, %s, is too low to allow safe multiprocessing.
+On Linux you can fix this by adding something like "ulimit -n 10000" to your
+~/.bashrc or equivalent file and opening a new terminal.
+
+On MacOS, you may also need to run a command like this once (in addition to the
+above instructions), which might require a restart of your system to take
+effect:
+ launchctl limit maxfiles 10000
+
+Alternatively, edit /etc/launchd.conf with something like:
+ limit maxfiles 10000 10000
+
+""" % limit)
raise Exception('Max number of open files, %s, is too low.' % limit)
- except:
+ except: # pylint: disable=bare-except
stack_trace = traceback.format_exc()
multiprocessing_is_available = False
if logger is not None:
logger.debug(stack_trace)
- logger.warn('\n'.join(textwrap.wrap(message + '\n')))
+ logger.warn(message)
# Set the cached values so that we never need to do this check again.
cached_multiprocessing_is_available = multiprocessing_is_available
@@ -631,13 +938,79 @@
cached_multiprocessing_is_available_message = message
return (multiprocessing_is_available, stack_trace)
+
def CreateLock():
+ """Returns either a multiprocessing lock or a threading lock.
+
+ Use Multiprocessing lock iff we have access to the parts of the
+ multiprocessing module that are necessary to enable parallelism in operations.
+
+ Returns:
+ Multiprocessing or threading lock.
"""
- Returns either a multiprocessing lock or a threading lock. We will use the
- former iff we have access to the parts of the multiprocessing module that
- are necessary to enable parallelism in operations.
- """
if MultiprocessingIsAvailable()[0]:
return manager.Lock()
else:
return threading.Lock()
+
+
+def IsCloudSubdirPlaceholder(url, blr=None):
+ """Determines if URL is a cloud subdir placeholder.
+
+ This function is needed because GUI tools (like the GCS cloud console) allow
+ users to create empty "folders" by creating a placeholder object; and parts
+ of gsutil need to treat those placeholder objects specially. For example,
+ gsutil rsync needs to avoid downloading those objects because they can cause
+ conflicts (see comments in rsync command for details).
+
+ We currently detect two cases:
+ - Cloud objects whose name ends with '_$folder$'
+ - Cloud objects whose name ends with '/'
+
+ Args:
+ url: The URL to be checked.
+ blr: BucketListingRef to check, or None if not available.
+ If None, size won't be checked.
+
+ Returns:
+ True/False.
+ """
+ if not url.IsCloudUrl():
+ return False
+ url_str = url.url_string
+ if url_str.endswith('_$folder$'):
+ return True
+ if blr and blr.IsObject():
+ size = blr.root_object.size
+ else:
+ size = 0
+ return size == 0 and url_str.endswith('/')
+
+
+def GetTermLines():
+ """Returns number of terminal lines."""
+ # fcntl isn't supported in Windows.
+ try:
+ import fcntl # pylint: disable=g-import-not-at-top
+ import termios # pylint: disable=g-import-not-at-top
+ except ImportError:
+ return _DEFAULT_LINES
+ def ioctl_GWINSZ(fd): # pylint: disable=invalid-name
+ try:
+ return struct.unpack(
+ 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))[0]
+ except: # pylint: disable=bare-except
+ return 0 # Failure (so will retry on different file descriptor below).
+ # Try to find a valid number of lines from termio for stdin, stdout,
+ # or stderr, in that order.
+ ioc = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
+ if not ioc:
+ try:
+ fd = os.open(os.ctermid(), os.O_RDONLY)
+ ioc = ioctl_GWINSZ(fd)
+ os.close(fd)
+ except: # pylint: disable=bare-except
+ pass
+ if not ioc:
+ ioc = os.environ.get('LINES', _DEFAULT_LINES)
+ return int(ioc)
« no previous file with comments | « gslib/translation_helper.py ('k') | gslib/wildcard_iterator.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698