| OLD | NEW |
| 1 # -*- coding: utf-8 -*- |
| 1 # Copyright 2010 Google Inc. All Rights Reserved. | 2 # Copyright 2010 Google Inc. All Rights Reserved. |
| 2 # | 3 # |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); | 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 # you may not use this file except in compliance with the License. | 5 # you may not use this file except in compliance with the License. |
| 5 # You may obtain a copy of the License at | 6 # You may obtain a copy of the License at |
| 6 # | 7 # |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 | 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 # | 9 # |
| 9 # Unless required by applicable law or agreed to in writing, software | 10 # Unless required by applicable law or agreed to in writing, software |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, | 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 # See the License for the specific language governing permissions and | 13 # See the License for the specific language governing permissions and |
| 13 # limitations under the License. | 14 # limitations under the License. |
| 14 | |
| 15 """Static data and helper functions.""" | 15 """Static data and helper functions.""" |
| 16 | 16 |
| 17 import binascii | 17 from __future__ import absolute_import |
| 18 import boto | 18 |
| 19 import boto.auth | |
| 20 import errno | 19 import errno |
| 21 import gslib | |
| 22 import math | 20 import math |
| 23 import multiprocessing | 21 import multiprocessing |
| 24 import os | 22 import os |
| 23 import pkgutil |
| 25 import re | 24 import re |
| 25 import struct |
| 26 import sys | 26 import sys |
| 27 import tempfile |
| 27 import textwrap | 28 import textwrap |
| 28 import threading | 29 import threading |
| 29 import traceback | 30 import traceback |
| 30 import xml.etree.ElementTree as ElementTree | 31 import xml.etree.ElementTree as ElementTree |
| 31 | 32 |
| 33 import boto |
| 32 from boto import config | 34 from boto import config |
| 35 import boto.auth |
| 33 from boto.exception import NoAuthHandlerFound | 36 from boto.exception import NoAuthHandlerFound |
| 34 from boto.gs.connection import GSConnection | 37 from boto.gs.connection import GSConnection |
| 35 from boto.provider import Provider | 38 from boto.provider import Provider |
| 36 from boto.pyami.config import BotoConfigLocations | 39 from boto.pyami.config import BotoConfigLocations |
| 40 import httplib2 |
| 41 from oauth2client.client import HAS_CRYPTO |
| 42 from retry_decorator import retry_decorator |
| 43 |
| 44 import gslib |
| 37 from gslib.exception import CommandException | 45 from gslib.exception import CommandException |
| 38 from retry_decorator import retry_decorator | 46 from gslib.storage_url import StorageUrlFromString |
| 39 from oauth2client.client import HAS_CRYPTO | 47 from gslib.translation_helper import AclTranslation |
| 48 from gslib.translation_helper import GenerationFromUrlAndString |
| 49 from gslib.translation_helper import S3_ACL_MARKER_GUID |
| 50 from gslib.translation_helper import S3_DELETE_MARKER_GUID |
| 51 from gslib.translation_helper import S3_MARKER_GUIDS |
| 40 | 52 |
| 53 # pylint: disable=g-import-not-at-top |
| 41 try: | 54 try: |
| 42 # This module doesn't necessarily exist on Windows. | 55 # This module doesn't necessarily exist on Windows. |
| 43 import resource | 56 import resource |
| 44 HAS_RESOURCE_MODULE = True | 57 HAS_RESOURCE_MODULE = True |
| 45 except ImportError, e: | 58 except ImportError, e: |
| 46 HAS_RESOURCE_MODULE = False | 59 HAS_RESOURCE_MODULE = False |
| 47 | 60 |
| 61 ONE_KB = 1024 |
| 48 TWO_MB = 2 * 1024 * 1024 | 62 TWO_MB = 2 * 1024 * 1024 |
| 63 TEN_MB = 10 * 1024 * 1024 |
| 64 DEFAULT_FILE_BUFFER_SIZE = 8192 |
| 65 _DEFAULT_LINES = 25 |
| 66 |
| 67 # By default, the timeout for SSL read errors is infinite. This could |
| 68 # cause gsutil to hang on network disconnect, so pick a more reasonable |
| 69 # timeout. |
| 70 SSL_TIMEOUT = 60 |
| 71 |
| 72 # Start with a progress callback every 64KB during uploads/downloads (JSON API). |
| 73 # Callback implementation should back off until it hits the maximum size |
| 74 # so that callbacks do not create huge amounts of log output. |
| 75 START_CALLBACK_PER_BYTES = 1024*64 |
| 76 MAX_CALLBACK_PER_BYTES = 1024*1024*100 |
| 77 |
| 78 # Upload/download files in 8KB chunks over the HTTP connection. |
| 79 TRANSFER_BUFFER_SIZE = 1024*8 |
| 80 |
| 81 # Default number of progress callbacks during transfer (XML API). |
| 82 XML_PROGRESS_CALLBACKS = 10 |
| 83 |
| 84 # For files >= this size, output a message indicating that we're running an |
| 85 # operation on the file (like hashing or gzipping) so it does not appear to the |
| 86 # user that the command is hanging. |
| 87 MIN_SIZE_COMPUTE_LOGGING = 100*1024*1024 # 100 MB |
| 49 | 88 |
| 50 NO_MAX = sys.maxint | 89 NO_MAX = sys.maxint |
| 51 | 90 |
| 91 UTF8 = 'utf-8' |
| 92 |
| 52 VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?') | 93 VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?') |
| 53 | 94 |
| 54 RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt' | 95 RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt' |
| 55 | 96 |
| 56 # Binary exponentiation strings. | 97 # Binary exponentiation strings. |
| 57 _EXP_STRINGS = [ | 98 _EXP_STRINGS = [ |
| 58 (0, 'B', 'bit'), | 99 (0, 'B', 'bit'), |
| 59 (10, 'KB', 'Kbit', 'K'), | 100 (10, 'KB', 'Kbit', 'K'), |
| 60 (20, 'MB', 'Mbit', 'M'), | 101 (20, 'MB', 'Mbit', 'M'), |
| 61 (30, 'GB', 'Gbit', 'G'), | 102 (30, 'GB', 'Gbit', 'G'), |
| 62 (40, 'TB', 'Tbit', 'T'), | 103 (40, 'TB', 'Tbit', 'T'), |
| 63 (50, 'PB', 'Pbit', 'P'), | 104 (50, 'PB', 'Pbit', 'P'), |
| 64 (60, 'EB', 'Ebit', 'E'), | 105 (60, 'EB', 'Ebit', 'E'), |
| 65 ] | 106 ] |
| 66 | 107 |
| 67 global manager | 108 configured_certs_file = None |
| 109 |
| 110 global manager # pylint: disable=global-at-module-level |
| 111 |
| 68 | 112 |
| 69 def InitializeMultiprocessingVariables(): | 113 def InitializeMultiprocessingVariables(): |
| 114 """Perform necessary initialization for multiprocessing. |
| 115 |
| 116 See gslib.command.InitializeMultiprocessingVariables for an explanation |
| 117 of why this is necessary. |
| 70 """ | 118 """ |
| 71 Perform necessary initialization - see | 119 global manager # pylint: disable=global-variable-undefined |
| 72 gslib.command.InitializeMultiprocessingVariables for an explanation of why | |
| 73 this is necessary. | |
| 74 """ | |
| 75 global manager | |
| 76 manager = multiprocessing.Manager() | 120 manager = multiprocessing.Manager() |
| 77 | 121 |
| 122 |
| 78 def _GenerateSuffixRegex(): | 123 def _GenerateSuffixRegex(): |
| 124 """Creates a suffix regex for human-readable byte counts.""" |
| 79 human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?' | 125 human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?' |
| 80 suffixes = [] | 126 suffixes = [] |
| 81 suffix_to_si = {} | 127 suffix_to_si = {} |
| 82 for i, si in enumerate(_EXP_STRINGS): | 128 for i, si in enumerate(_EXP_STRINGS): |
| 83 si_suffixes = [s.lower() for s in list(si)[1:]] | 129 si_suffixes = [s.lower() for s in list(si)[1:]] |
| 84 for suffix in si_suffixes: | 130 for suffix in si_suffixes: |
| 85 suffix_to_si[suffix] = i | 131 suffix_to_si[suffix] = i |
| 86 suffixes.extend(si_suffixes) | 132 suffixes.extend(si_suffixes) |
| 87 human_bytes_re = human_bytes_re % '|'.join(suffixes) | 133 human_bytes_re %= '|'.join(suffixes) |
| 88 matcher = re.compile(human_bytes_re) | 134 matcher = re.compile(human_bytes_re) |
| 89 return suffix_to_si, matcher | 135 return suffix_to_si, matcher |
| 90 | 136 |
| 91 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex() | 137 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex() |
| 92 | 138 |
| 93 SECONDS_PER_DAY = 3600 * 24 | 139 SECONDS_PER_DAY = 3600 * 24 |
| 94 | 140 |
| 95 # Detect platform types. | 141 # Detect platform types. |
| 96 PLATFORM = str(sys.platform).lower() | 142 PLATFORM = str(sys.platform).lower() |
| 97 IS_WINDOWS = 'win32' in PLATFORM | 143 IS_WINDOWS = 'win32' in PLATFORM |
| 98 IS_CYGWIN = 'cygwin' in PLATFORM | 144 IS_CYGWIN = 'cygwin' in PLATFORM |
| 99 IS_LINUX = 'linux' in PLATFORM | 145 IS_LINUX = 'linux' in PLATFORM |
| 100 IS_OSX = 'darwin' in PLATFORM | 146 IS_OSX = 'darwin' in PLATFORM |
| 101 | 147 |
| 102 # On Unix-like systems, we will set the maximum number of open files to avoid | 148 # On Unix-like systems, we will set the maximum number of open files to avoid |
| 103 # hitting the limit imposed by the OS. This number was obtained experimentally. | 149 # hitting the limit imposed by the OS. This number was obtained experimentally. |
| 104 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000 | 150 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000 |
| 105 | 151 |
| 106 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz' | 152 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz' |
| 107 | 153 |
| 108 Retry = retry_decorator.retry | 154 Retry = retry_decorator.retry # pylint: disable=invalid-name |
| 109 | 155 |
| 110 # Cache the values from this check such that they're available to all callers | 156 # Cache the values from this check such that they're available to all callers |
| 111 # without needing to run all the checks again (some of these, such as calling | 157 # without needing to run all the checks again (some of these, such as calling |
| 112 # multiprocessing.Manager(), are expensive operations). | 158 # multiprocessing.Manager(), are expensive operations). |
| 113 cached_multiprocessing_is_available = None | 159 cached_multiprocessing_is_available = None |
| 114 cached_multiprocessing_is_available_stack_trace = None | 160 cached_multiprocessing_is_available_stack_trace = None |
| 115 cached_multiprocessing_is_available_message = None | 161 cached_multiprocessing_is_available_message = None |
| 116 | 162 |
| 163 |
| 117 # Enum class for specifying listing style. | 164 # Enum class for specifying listing style. |
| 118 class ListingStyle(object): | 165 class ListingStyle(object): |
| 119 SHORT = 'SHORT' | 166 SHORT = 'SHORT' |
| 120 LONG = 'LONG' | 167 LONG = 'LONG' |
| 121 LONG_LONG = 'LONG_LONG' | 168 LONG_LONG = 'LONG_LONG' |
| 122 | 169 |
| 123 | 170 |
| 124 def UsingCrcmodExtension(crcmod): | 171 def UsingCrcmodExtension(crcmod): |
| 125 return (getattr(crcmod, 'crcmod', None) and | 172 return (getattr(crcmod, 'crcmod', None) and |
| 126 getattr(crcmod.crcmod, '_usingExtension', None)) | 173 getattr(crcmod.crcmod, '_usingExtension', None)) |
| 127 | 174 |
| 128 | 175 |
| 176 def CreateDirIfNeeded(dir_path): |
| 177 """Creates a directory, suppressing already-exists errors.""" |
| 178 if not os.path.exists(dir_path): |
| 179 try: |
| 180 # Unfortunately, even though we catch and ignore EEXIST, this call will |
| 181 # output a (needless) error message (no way to avoid that in Python). |
| 182 os.makedirs(dir_path) |
| 183 # Ignore 'already exists' in case user tried to start up several |
| 184 # resumable uploads concurrently from a machine where no tracker dir had |
| 185 # yet been created. |
| 186 except OSError as e: |
| 187 if e.errno != errno.EEXIST: |
| 188 raise |
| 189 |
| 190 |
| 191 def GetGsutilStateDir(): |
| 192 """Returns the location of the directory for gsutil state files. |
| 193 |
| 194 Certain operations, such as cross-process credential sharing and |
| 195 resumable transfer tracking, need a known location for state files which |
| 196 are created by gsutil as-needed. |
| 197 |
| 198 This location should only be used for storing data that is required to be in |
| 199 a static location. |
| 200 |
| 201 Returns: |
| 202 Path to directory for gsutil static state files. |
| 203 """ |
| 204 config_file_dir = config.get( |
| 205 'GSUtil', 'state_dir', |
| 206 os.path.expanduser(os.path.join('~', '.gsutil'))) |
| 207 CreateDirIfNeeded(config_file_dir) |
| 208 return config_file_dir |
| 209 |
| 210 |
| 211 def GetCredentialStoreFilename(): |
| 212 return os.path.join(GetGsutilStateDir(), 'credcache') |
| 213 |
| 214 |
| 129 def CreateTrackerDirIfNeeded(): | 215 def CreateTrackerDirIfNeeded(): |
| 130 """Looks up the configured directory where gsutil keeps its resumable | 216 """Looks up or creates the gsutil tracker file directory. |
| 131 transfer tracker files, and creates it if it doesn't already exist. | 217 |
| 218 This is the configured directory where gsutil keeps its resumable transfer |
| 219 tracker files. This function creates it if it doesn't already exist. |
| 132 | 220 |
| 133 Returns: | 221 Returns: |
| 134 The pathname to the tracker directory. | 222 The pathname to the tracker directory. |
| 135 """ | 223 """ |
| 136 tracker_dir = config.get( | 224 tracker_dir = config.get( |
| 137 'GSUtil', 'resumable_tracker_dir', | 225 'GSUtil', 'resumable_tracker_dir', |
| 138 os.path.expanduser('~' + os.sep + '.gsutil')) | 226 os.path.join(GetGsutilStateDir(), 'tracker-files')) |
| 139 if not os.path.exists(tracker_dir): | 227 CreateDirIfNeeded(tracker_dir) |
| 140 try: | |
| 141 # Unfortunately, even though we catch and ignore EEXIST, this call will | |
| 142 # will output a (needless) error message (no way to avoid that in Python). | |
| 143 os.makedirs(tracker_dir) | |
| 144 # Ignore 'already exists' in case user tried to start up several | |
| 145 # resumable uploads concurrently from a machine where no tracker dir had | |
| 146 # yet been created. | |
| 147 except OSError as e: | |
| 148 if e.errno != errno.EEXIST: | |
| 149 raise | |
| 150 return tracker_dir | 228 return tracker_dir |
| 151 | 229 |
| 152 | 230 |
| 231 def PrintTrackerDirDeprecationWarningIfNeeded(): |
| 232 # TODO: Remove this along with the tracker_dir config value 1 year after |
| 233 # 4.6 release date. Use state_dir instead. |
| 234 if config.has_option('GSUtil', 'resumable_tracker_dir'): |
| 235 sys.stderr.write('Warning: you have set resumable_tracker_dir in your ' |
| 236 '.boto configuration file. This configuration option is ' |
| 237 'deprecated; please use the state_dir configuration ' |
| 238 'option instead.\n') |
| 239 |
| 240 |
| 153 # Name of file where we keep the timestamp for the last time we checked whether | 241 # Name of file where we keep the timestamp for the last time we checked whether |
| 154 # a new version of gsutil is available. | 242 # a new version of gsutil is available. |
| 243 PrintTrackerDirDeprecationWarningIfNeeded() |
| 244 CreateDirIfNeeded(GetGsutilStateDir()) |
| 155 LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = ( | 245 LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = ( |
| 156 os.path.join(CreateTrackerDirIfNeeded(), '.last_software_update_check')) | 246 os.path.join(GetGsutilStateDir(), '.last_software_update_check')) |
| 157 | 247 |
| 158 | 248 |
| 159 def HasConfiguredCredentials(): | 249 def HasConfiguredCredentials(): |
| 160 """Determines if boto credential/config file exists.""" | 250 """Determines if boto credential/config file exists.""" |
| 161 config = boto.config | |
| 162 has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and | 251 has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and |
| 163 config.has_option('Credentials', 'gs_secret_access_key')) | 252 config.has_option('Credentials', 'gs_secret_access_key')) |
| 164 has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and | 253 has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and |
| 165 config.has_option('Credentials', 'aws_secret_access_key')) | 254 config.has_option('Credentials', 'aws_secret_access_key')) |
| 166 has_oauth_creds = ( | 255 has_oauth_creds = ( |
| 167 config.has_option('Credentials', 'gs_oauth2_refresh_token')) | 256 config.has_option('Credentials', 'gs_oauth2_refresh_token')) |
| 168 has_service_account_creds = (HAS_CRYPTO and | 257 has_service_account_creds = ( |
| 169 config.has_option('Credentials', 'gs_service_client_id') | 258 HAS_CRYPTO and |
| 170 and config.has_option('Credentials', 'gs_service_key_file')) | 259 config.has_option('Credentials', 'gs_service_client_id') and |
| 260 config.has_option('Credentials', 'gs_service_key_file')) |
| 171 | 261 |
| 172 valid_auth_handler = None | 262 valid_auth_handler = None |
| 173 try: | 263 try: |
| 174 valid_auth_handler = boto.auth.get_auth_handler( | 264 valid_auth_handler = boto.auth.get_auth_handler( |
| 175 GSConnection.DefaultHost, config, Provider('google'), | 265 GSConnection.DefaultHost, config, Provider('google'), |
| 176 requested_capability=['s3']) | 266 requested_capability=['s3']) |
| 177 # Exclude the no-op auth handler as indicating credentials are configured. | 267 # Exclude the no-op auth handler as indicating credentials are configured. |
| 178 # Note we can't use isinstance() here because the no-op module may not be | 268 # Note we can't use isinstance() here because the no-op module may not be |
| 179 # imported so we can't get a reference to the class type. | 269 # imported so we can't get a reference to the class type. |
| 180 if getattr(getattr(valid_auth_handler, '__class__', None), | 270 if getattr(getattr(valid_auth_handler, '__class__', None), |
| 181 '__name__', None) == 'NoOpAuth': | 271 '__name__', None) == 'NoOpAuth': |
| 182 valid_auth_handler = None | 272 valid_auth_handler = None |
| 183 except NoAuthHandlerFound: | 273 except NoAuthHandlerFound: |
| 184 pass | 274 pass |
| 185 | 275 |
| 186 return (has_goog_creds or has_amzn_creds or has_oauth_creds | 276 return (has_goog_creds or has_amzn_creds or has_oauth_creds |
| 187 or has_service_account_creds or valid_auth_handler) | 277 or has_service_account_creds or valid_auth_handler) |
| 188 | 278 |
| 189 | 279 |
| 190 def ConfigureNoOpAuthIfNeeded(): | 280 def ConfigureNoOpAuthIfNeeded(): |
| 191 """ | 281 """Sets up no-op auth handler if no boto credentials are configured.""" |
| 192 Sets up no-op auth handler if no boto credentials are configured. | |
| 193 """ | |
| 194 config = boto.config | |
| 195 if not HasConfiguredCredentials(): | 282 if not HasConfiguredCredentials(): |
| 196 if (config.has_option('Credentials', 'gs_service_client_id') | 283 if (config.has_option('Credentials', 'gs_service_client_id') |
| 197 and not HAS_CRYPTO): | 284 and not HAS_CRYPTO): |
| 198 raise CommandException('\n'.join(textwrap.wrap( | 285 raise CommandException('\n'.join(textwrap.wrap( |
| 199 'Your gsutil is configured with an OAuth2 service account, but you ' | 286 'Your gsutil is configured with an OAuth2 service account, but you ' |
| 200 'do not have PyOpenSSL or PyCrypto 2.6 or later installed. Service ' | 287 'do not have PyOpenSSL or PyCrypto 2.6 or later installed. Service ' |
| 201 'account authentication requires one of these libraries; please ' | 288 'account authentication requires one of these libraries; please ' |
| 202 'install either of them to proceed, or configure a different type ' | 289 'install either of them to proceed, or configure a different type ' |
| 203 'of credentials with "gsutil config".'))) | 290 'of credentials with "gsutil config".'))) |
| 204 else: | 291 else: |
| 205 # With no boto config file the user can still access publicly readable | 292 # With no boto config file the user can still access publicly readable |
| 206 # buckets and objects. | 293 # buckets and objects. |
| 207 from gslib import no_op_auth_plugin | 294 from gslib import no_op_auth_plugin # pylint: disable=unused-variable |
| 208 | 295 |
| 209 | 296 |
| 210 def GetConfigFilePath(): | 297 def GetConfigFilePath(): |
| 211 config_path = 'no config found' | 298 config_path = 'no config found' |
| 212 for path in BotoConfigLocations: | 299 for path in BotoConfigLocations: |
| 213 try: | 300 try: |
| 214 with open(path, 'r'): | 301 with open(path, 'r'): |
| 215 config_path = path | 302 config_path = path |
| 216 break | 303 break |
| 217 except IOError: | 304 except IOError: |
| 218 pass | 305 pass |
| 219 return config_path | 306 return config_path |
| 220 | 307 |
| 221 | 308 |
| 222 def GetBotoConfigFileList(): | 309 def GetBotoConfigFileList(): |
| 223 """Returns list of boto config files that exist.""" | 310 """Returns list of boto config files that exist.""" |
| 224 config_paths = boto.pyami.config.BotoConfigLocations | 311 config_paths = boto.pyami.config.BotoConfigLocations |
| 225 if 'AWS_CREDENTIAL_FILE' in os.environ: | 312 if 'AWS_CREDENTIAL_FILE' in os.environ: |
| 226 config_paths.append(os.environ['AWS_CREDENTIAL_FILE']) | 313 config_paths.append(os.environ['AWS_CREDENTIAL_FILE']) |
| 227 config_files = {} | 314 config_files = {} |
| 228 for config_path in config_paths: | 315 for config_path in config_paths: |
| 229 if os.path.exists(config_path): | 316 if os.path.exists(config_path): |
| 230 config_files[config_path] = 1 | 317 config_files[config_path] = 1 |
| 231 cf_list = [] | 318 cf_list = [] |
| 232 for config_file in config_files: | 319 for config_file in config_files: |
| 233 cf_list.append(config_file) | 320 cf_list.append(config_file) |
| 234 return cf_list | 321 return cf_list |
| 235 | 322 |
| 236 | 323 |
| 324 def GetCertsFile(): |
| 325 """Configures and returns the CA Certificates file. |
| 326 |
| 327 If one is already configured, use it. Otherwise, amend the configuration |
| 328 (in boto.config) to use the cert roots distributed with gsutil. |
| 329 |
| 330 Returns: |
| 331 string filename of the certs file to use. |
| 332 """ |
| 333 certs_file = boto.config.get('Boto', 'ca_certificates_file', None) |
| 334 if not certs_file: |
| 335 if configured_certs_file: |
| 336 disk_certs_file = configured_certs_file |
| 337 else: |
| 338 disk_certs_file = os.path.abspath( |
| 339 os.path.join(gslib.GSLIB_DIR, 'data', 'cacerts.txt')) |
| 340 if not os.path.exists(disk_certs_file): |
| 341 # If the file is not present on disk, this means the gslib module |
| 342 # doesn't actually exist on disk anywhere. This can happen if it's |
| 343 # being imported from a zip file. Unfortunately, we have to copy the |
| 344 # certs file to a local temp file on disk because the underlying SSL |
| 345 # socket requires it to be a filesystem path. |
| 346 certs_data = pkgutil.get_data('gslib', 'data/cacerts.txt') |
| 347 if not certs_data: |
| 348 raise CommandException('Certificates file not found. Please ' |
| 349 'reinstall gsutil from scratch') |
| 350 fd, fname = tempfile.mkstemp(suffix='.txt', prefix='gsutil-cacerts') |
| 351 f = os.fdopen(fd, 'w') |
| 352 f.write(certs_data) |
| 353 f.close() |
| 354 disk_certs_file = fname |
| 355 certs_file = disk_certs_file |
| 356 return certs_file |
| 357 |
| 358 |
| 359 def GetCleanupFiles(): |
| 360 """Returns a list of temp files to delete (if possible) when program exits.""" |
| 361 cleanup_files = [] |
| 362 if configured_certs_file: |
| 363 cleanup_files.append(configured_certs_file) |
| 364 return cleanup_files |
| 365 |
| 366 |
| 367 def GetNewHttp(http_class=httplib2.Http, **kwargs): |
| 368 """Creates and returns a new httplib2.Http instance. |
| 369 |
| 370 Args: |
| 371 http_class: Optional custom Http class to use. |
| 372 **kwargs: Arguments to pass to http_class constructor. |
| 373 |
| 374 Returns: |
| 375 An initialized httplib2.Http instance. |
| 376 """ |
| 377 proxy_info = httplib2.ProxyInfo( |
| 378 proxy_type=3, |
| 379 proxy_host=boto.config.get('Boto', 'proxy', None), |
| 380 proxy_port=boto.config.getint('Boto', 'proxy_port', 0), |
| 381 proxy_user=boto.config.get('Boto', 'proxy_user', None), |
| 382 proxy_pass=boto.config.get('Boto', 'proxy_pass', None), |
| 383 proxy_rdns=boto.config.get('Boto', 'proxy_rdns', False)) |
| 384 # Some installers don't package a certs file with httplib2, so use the |
| 385 # one included with gsutil. |
| 386 kwargs['ca_certs'] = GetCertsFile() |
| 387 # Use a non-infinite SSL timeout to avoid hangs during network flakiness. |
| 388 kwargs['timeout'] = SSL_TIMEOUT |
| 389 http = http_class(proxy_info=proxy_info, **kwargs) |
| 390 http.disable_ssl_certificate_validation = (not config.getbool( |
| 391 'Boto', 'https_validate_certificates')) |
| 392 return http |
| 393 |
| 394 |
| 395 def GetNumRetries(): |
| 396 return config.getint('Boto', 'num_retries', 6) |
| 397 |
| 398 |
| 399 def GetMaxRetryDelay(): |
| 400 return config.getint('Boto', 'max_retry_delay', 60) |
| 401 |
| 402 |
| 237 def _RoundToNearestExponent(num): | 403 def _RoundToNearestExponent(num): |
| 238 i = 0 | 404 i = 0 |
| 239 while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]): | 405 while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]): |
| 240 i += 1 | 406 i += 1 |
| 241 return i, round(float(num) / 2 ** _EXP_STRINGS[i][0], 2) | 407 return i, round(float(num) / 2 ** _EXP_STRINGS[i][0], 2) |
| 242 | 408 |
| 243 | 409 |
| 244 def MakeHumanReadable(num): | 410 def MakeHumanReadable(num): |
| 245 """Generates human readable string for a number of bytes. | 411 """Generates human readable string for a number of bytes. |
| 246 | 412 |
| (...skipping 20 matching lines...) Expand all Loading... |
| 267 return '%g %s' % (rounded_val, _EXP_STRINGS[i][2]) | 433 return '%g %s' % (rounded_val, _EXP_STRINGS[i][2]) |
| 268 | 434 |
| 269 | 435 |
| 270 def HumanReadableToBytes(human_string): | 436 def HumanReadableToBytes(human_string): |
| 271 """Tries to convert a human-readable string to a number of bytes. | 437 """Tries to convert a human-readable string to a number of bytes. |
| 272 | 438 |
| 273 Args: | 439 Args: |
| 274 human_string: A string supplied by user, e.g. '1M', '3 GB'. | 440 human_string: A string supplied by user, e.g. '1M', '3 GB'. |
| 275 Returns: | 441 Returns: |
| 276 An integer containing the number of bytes. | 442 An integer containing the number of bytes. |
| 443 Raises: |
| 444 ValueError: on an invalid string. |
| 277 """ | 445 """ |
| 278 human_string = human_string.lower() | 446 human_string = human_string.lower() |
| 279 m = MATCH_HUMAN_BYTES.match(human_string) | 447 m = MATCH_HUMAN_BYTES.match(human_string) |
| 280 if m: | 448 if m: |
| 281 num = float(m.group('num')) | 449 num = float(m.group('num')) |
| 282 if m.group('suffix'): | 450 if m.group('suffix'): |
| 283 power = _EXP_STRINGS[SUFFIX_TO_SI[m.group('suffix')]][0] | 451 power = _EXP_STRINGS[SUFFIX_TO_SI[m.group('suffix')]][0] |
| 284 num *= (2.0 ** power) | 452 num *= (2.0 ** power) |
| 285 num = int(round(num)) | 453 num = int(round(num)) |
| 286 return num | 454 return num |
| (...skipping 20 matching lines...) Expand all Loading... |
| 307 k = (len(values) - 1) * percent | 475 k = (len(values) - 1) * percent |
| 308 f = math.floor(k) | 476 f = math.floor(k) |
| 309 c = math.ceil(k) | 477 c = math.ceil(k) |
| 310 if f == c: | 478 if f == c: |
| 311 return key(values[int(k)]) | 479 return key(values[int(k)]) |
| 312 d0 = key(values[int(f)]) * (c-k) | 480 d0 = key(values[int(f)]) * (c-k) |
| 313 d1 = key(values[int(c)]) * (k-f) | 481 d1 = key(values[int(c)]) * (k-f) |
| 314 return d0 + d1 | 482 return d0 + d1 |
| 315 | 483 |
| 316 | 484 |
| 317 def ParseErrorDetail(e): | 485 def RemoveCRLFFromString(input_str): |
| 318 """Parse <Message> and/or <Details> text from XML content. | 486 """Returns the input string with all \\n and \\r removed.""" |
| 487 return re.sub(r'[\r\n]', '', input_str) |
| 319 | 488 |
| 320 Args: | |
| 321 e: The GSResponseError that includes XML to be parsed. | |
| 322 | |
| 323 Returns: | |
| 324 (exception_name, m, d), where m is <Message> text or None, | |
| 325 and d is <Details> text or None. | |
| 326 """ | |
| 327 exc_name_parts = re.split("[\.']", str(type(e))) | |
| 328 if len(exc_name_parts) < 2: | |
| 329 # Shouldn't happen, but have fallback in case. | |
| 330 exc_name = str(type(e)) | |
| 331 else: | |
| 332 exc_name = exc_name_parts[-2] | |
| 333 if not hasattr(e, 'body') or e.body is None: | |
| 334 return (exc_name, None) | |
| 335 | |
| 336 match = re.search(r'<Message>(?P<message>.*)</Message>', e.body) | |
| 337 m = match.group('message') if match else None | |
| 338 match = re.search(r'<Details>(?P<details>.*)</Details>', e.body) | |
| 339 d = match.group('details') if match else None | |
| 340 return (exc_name, m, d) | |
| 341 | |
| 342 def FormatErrorMessage(exc_name, status, code, reason, message, detail): | |
| 343 """Formats an error message from components parsed by ParseErrorDetail.""" | |
| 344 if message and detail: | |
| 345 return('%s: status=%d, code=%s, reason="%s", message="%s", detail="%s"' % | |
| 346 (exc_name, status, code, reason, message, detail)) | |
| 347 if message: | |
| 348 return('%s: status=%d, code=%s, reason="%s", message="%s"' % | |
| 349 (exc_name, status, code, reason, message)) | |
| 350 if detail: | |
| 351 return('%s: status=%d, code=%s, reason="%s", detail="%s"' % | |
| 352 (exc_name, status, code, reason, detail)) | |
| 353 return('%s: status=%d, code=%s, reason="%s"' % | |
| 354 (exc_name, status, code, reason)) | |
| 355 | 489 |
| 356 def UnaryDictToXml(message): | 490 def UnaryDictToXml(message): |
| 357 """Generates XML representation of a nested dict with exactly one | 491 """Generates XML representation of a nested dict. |
| 358 top-level entry and an arbitrary number of 2nd-level entries, e.g. | 492 |
| 359 capturing a WebsiteConfiguration message. | 493 This dict contains exactly one top-level entry and an arbitrary number of |
| 494 2nd-level entries, e.g. capturing a WebsiteConfiguration message. |
| 360 | 495 |
| 361 Args: | 496 Args: |
| 362 message: The dict encoding the message. | 497 message: The dict encoding the message. |
| 363 | 498 |
| 364 Returns: | 499 Returns: |
| 365 XML string representation of the input dict. | 500 XML string representation of the input dict. |
| 501 |
| 502 Raises: |
| 503 Exception: if dict contains more than one top-level entry. |
| 366 """ | 504 """ |
| 367 if len(message) != 1: | 505 if len(message) != 1: |
| 368 raise Exception("Expected dict of size 1, got size %d" % len(message)) | 506 raise Exception('Expected dict of size 1, got size %d' % len(message)) |
| 369 | 507 |
| 370 name, content = message.items()[0] | 508 name, content = message.items()[0] |
| 371 T = ElementTree.Element(name) | 509 element_type = ElementTree.Element(name) |
| 372 for property, value in sorted(content.items()): | 510 for element_property, value in sorted(content.items()): |
| 373 node = ElementTree.SubElement(T, property) | 511 node = ElementTree.SubElement(element_type, element_property) |
| 374 node.text = value | 512 node.text = value |
| 375 return ElementTree.tostring(T) | 513 return ElementTree.tostring(element_type) |
| 376 | 514 |
| 377 | 515 |
| 378 def LookUpGsutilVersion(uri): | 516 def LookUpGsutilVersion(gsutil_api, url_str): |
| 379 """Looks up the gsutil version of the specified gsutil tarball URI, from the | 517 """Looks up the gsutil version of the specified gsutil tarball URL. |
| 380 metadata field set on that object. | 518 |
| 519 Version is specified in the metadata field set on that object. |
| 381 | 520 |
| 382 Args: | 521 Args: |
| 383 URI: gsutil URI tarball (such as gs://pub/gsutil.tar.gz). | 522 gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball. |
| 523 url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz'). |
| 384 | 524 |
| 385 Returns: | 525 Returns: |
| 386 Version string if URI is a cloud URI containing x-goog-meta-gsutil-version | 526 Version string if URL is a cloud URL containing x-goog-meta-gsutil-version |
| 387 metadata, else None. | 527 metadata, else None. |
| 388 """ | 528 """ |
| 389 if uri.is_cloud_uri(): | 529 url = StorageUrlFromString(url_str) |
| 390 obj = uri.get_key(False) | 530 if url.IsCloudUrl(): |
| 391 if obj.metadata and 'gsutil_version' in obj.metadata: | 531 obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name, |
| 392 return obj.metadata['gsutil_version'] | 532 provider=url.scheme, |
| 533 fields=['metadata']) |
| 534 if obj.metadata and obj.metadata.additionalProperties: |
| 535 for prop in obj.metadata.additionalProperties: |
| 536 if prop.key == 'gsutil_version': |
| 537 return prop.value |
| 393 | 538 |
| 394 | 539 |
| 395 def GetGsutilVersionModifiedTime(): | 540 def GetGsutilVersionModifiedTime(): |
| 396 """Returns unix timestamp of when the VERSION file was last modified.""" | 541 """Returns unix timestamp of when the VERSION file was last modified.""" |
| 397 if not gslib.VERSION_FILE: | 542 if not gslib.VERSION_FILE: |
| 398 return 0 | 543 return 0 |
| 399 return int(os.path.getmtime(gslib.VERSION_FILE)) | 544 return int(os.path.getmtime(gslib.VERSION_FILE)) |
| 400 | 545 |
| 401 | 546 |
| 402 def IsRunningInteractively(): | 547 def IsRunningInteractively(): |
| 403 """Returns True if currently running interactively on a TTY.""" | 548 """Returns True if currently running interactively on a TTY.""" |
| 404 return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty() | 549 return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty() |
| 405 | 550 |
| 406 | 551 |
| 552 def _HttpsValidateCertifcatesEnabled(): |
| 553 return config.get('Boto', 'https_validate_certificates', True) |
| 554 |
| 555 CERTIFICATE_VALIDATION_ENABLED = _HttpsValidateCertifcatesEnabled() |
| 556 |
| 557 |
| 407 def _BotoIsSecure(): | 558 def _BotoIsSecure(): |
| 408 for cfg_var in ('is_secure', 'https_validate_certificates'): | 559 return config.get('Boto', 'is_secure', True) |
| 409 if (config.has_option('Boto', cfg_var) | |
| 410 and not config.getboolean('Boto', cfg_var)): | |
| 411 return False, cfg_var | |
| 412 return True, '' | |
| 413 | 560 |
| 414 BOTO_IS_SECURE = _BotoIsSecure() | 561 BOTO_IS_SECURE = _BotoIsSecure() |
| 415 | 562 |
| 416 | 563 |
| 564 def ResumableThreshold(): |
| 565 return config.getint('GSUtil', 'resumable_threshold', TWO_MB) |
| 566 |
| 567 |
| 417 def AddAcceptEncoding(headers): | 568 def AddAcceptEncoding(headers): |
| 418 """Adds accept-encoding:gzip to the dictionary of headers.""" | 569 """Adds accept-encoding:gzip to the dictionary of headers.""" |
| 419 # If Accept-Encoding is not already set, set it to enable gzip. | 570 # If Accept-Encoding is not already set, set it to enable gzip. |
| 420 if 'accept-encoding' not in headers: | 571 if 'accept-encoding' not in headers: |
| 421 headers['accept-encoding'] = 'gzip' | 572 headers['accept-encoding'] = 'gzip' |
| 422 | 573 |
| 423 | 574 |
| 424 def PrintFullInfoAboutUri(uri, incl_acl, headers): | 575 # pylint: disable=too-many-statements |
| 425 """Print full info for given URI (like what displays for gsutil ls -L). | 576 def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True): |
| 577 """Print full info for given object (like what displays for gsutil ls -L). |
| 426 | 578 |
| 427 Args: | 579 Args: |
| 428 uri: StorageUri being listed. | 580 bucket_listing_ref: BucketListingRef being listed. |
| 581 Must have ref_type OBJECT and a populated root_object |
| 582 with the desired fields. |
| 429 incl_acl: True if ACL info should be output. | 583 incl_acl: True if ACL info should be output. |
| 430 headers: The headers to pass to boto, if any. | |
| 431 | 584 |
| 432 Returns: | 585 Returns: |
| 433 Tuple (number of objects, | 586 Tuple (number of objects, object_length) |
| 434 object length, if listing_style is one of the long listing formats) | |
| 435 | 587 |
| 436 Raises: | 588 Raises: |
| 437 Exception: if calling bug encountered. | 589 Exception: if calling bug encountered. |
| 438 """ | 590 """ |
| 439 # Run in a try/except clause so we can continue listings past | 591 url_str = bucket_listing_ref.url_string |
| 440 # access-denied errors (which can happen because user may have READ | 592 storage_url = StorageUrlFromString(url_str) |
| 441 # permission on object and thus see the bucket listing data, but lack | 593 obj = bucket_listing_ref.root_object |
| 442 # FULL_CONTROL over individual objects and thus not be able to read | 594 |
| 443 # their ACLs). | 595 if (obj.metadata and S3_DELETE_MARKER_GUID in |
| 444 # TODO: Switch this code to use string formatting instead of tabs. | 596 obj.metadata.additionalProperties): |
| 445 try: | 597 num_bytes = 0 |
| 446 print '%s:' % uri.uri.encode('utf-8') | 598 num_objs = 0 |
| 447 headers = headers.copy() | 599 url_str += '<DeleteMarker>' |
| 448 # Add accept encoding so that the HEAD request matches what would be | 600 else: |
| 449 # sent for a GET request. | 601 num_bytes = obj.size |
| 450 AddAcceptEncoding(headers) | 602 num_objs = 1 |
| 451 got_key = False | 603 |
| 452 obj = uri.get_key(False, headers=headers) | 604 print '%s:' % url_str.encode(UTF8) |
| 453 got_key = True | 605 if obj.updated: |
| 454 print '\tCreation time:\t\t%s' % obj.last_modified | 606 print '\tCreation time:\t\t%s' % obj.updated.strftime( |
| 455 if obj.cache_control: | 607 '%a, %d %b %Y %H:%M:%S GMT') |
| 456 print '\tCache-Control:\t\t%s' % obj.cache_control | 608 if obj.cacheControl: |
| 457 if obj.content_disposition: | 609 print '\tCache-Control:\t\t%s' % obj.cacheControl |
| 458 print '\tContent-Disposition:\t\t%s' % obj.content_disposition | 610 if obj.contentDisposition: |
| 459 if obj.content_encoding: | 611 print '\tContent-Disposition:\t\t%s' % obj.contentDisposition |
| 460 print '\tContent-Encoding:\t%s' % obj.content_encoding | 612 if obj.contentEncoding: |
| 461 if obj.content_language: | 613 print '\tContent-Encoding:\t\t%s' % obj.contentEncoding |
| 462 print '\tContent-Language:\t%s' % obj.content_language | 614 if obj.contentLanguage: |
| 463 print '\tContent-Length:\t\t%s' % obj.size | 615 print '\tContent-Language:\t%s' % obj.contentLanguage |
| 464 print '\tContent-Type:\t\t%s' % obj.content_type | 616 print '\tContent-Length:\t\t%s' % obj.size |
| 465 if hasattr(obj, 'component_count') and obj.component_count: | 617 print '\tContent-Type:\t\t%s' % obj.contentType |
| 466 print '\tComponent-Count:\t%d' % obj.component_count | 618 if obj.componentCount: |
| 467 if obj.metadata: | 619 print '\tComponent-Count:\t%d' % obj.componentCount |
| 468 prefix = uri.get_provider().metadata_prefix | 620 marker_props = {} |
| 469 for name in obj.metadata: | 621 if obj.metadata and obj.metadata.additionalProperties: |
| 470 meta_string = '\t%s%s:\t%s' % (prefix, name, obj.metadata[name]) | 622 non_marker_props = [] |
| 471 print meta_string.encode('utf-8') | 623 for add_prop in obj.metadata.additionalProperties: |
| 472 if hasattr(obj, 'cloud_hashes'): | 624 if add_prop.key not in S3_MARKER_GUIDS: |
| 473 for alg in obj.cloud_hashes: | 625 non_marker_props.append(add_prop) |
| 474 print '\tHash (%s):\t\t%s' % ( | |
| 475 alg, binascii.b2a_hex(obj.cloud_hashes[alg])) | |
| 476 print '\tETag:\t\t\t%s' % obj.etag.strip('"\'') | |
| 477 if hasattr(obj, 'generation'): | |
| 478 print '\tGeneration:\t\t%s' % obj.generation | |
| 479 if hasattr(obj, 'metageneration'): | |
| 480 print '\tMetageneration:\t\t%s' % obj.metageneration | |
| 481 if incl_acl: | |
| 482 print '\tACL:\t\t%s' % (uri.get_acl(False, headers)) | |
| 483 return (1, obj.size) | |
| 484 except boto.exception.GSResponseError as e: | |
| 485 if e.status == 403: | |
| 486 if got_key: | |
| 487 print ('\tACL:\t\t\tACCESS DENIED. Note: you need FULL_CONTROL ' | |
| 488 'permission\n\t\t\ton the object to read its ACL.') | |
| 489 return (1, obj.size) | |
| 490 else: | 626 else: |
| 491 print "You aren't authorized to read %s - skipping" % uri | 627 marker_props[add_prop.key] = add_prop.value |
| 492 return (1, 0) | 628 if non_marker_props: |
| 629 print '\tMetadata:' |
| 630 for ap in non_marker_props: |
| 631 meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value) |
| 632 print meta_string.encode(UTF8) |
| 633 if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c |
| 634 if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash |
| 635 print '\tETag:\t\t\t%s' % obj.etag.strip('"\'') |
| 636 if obj.generation: |
| 637 generation_str = GenerationFromUrlAndString(storage_url, obj.generation) |
| 638 print '\tGeneration:\t\t%s' % generation_str |
| 639 if obj.metageneration: |
| 640 print '\tMetageneration:\t\t%s' % obj.metageneration |
| 641 if incl_acl: |
| 642 # JSON API won't return acls as part of the response unless we have |
| 643 # full control scope |
| 644 if obj.acl: |
| 645 print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl) |
| 646 elif S3_ACL_MARKER_GUID in marker_props: |
| 647 print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID] |
| 493 else: | 648 else: |
| 494 raise e | 649 print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER ' |
| 495 return (numobjs, numbytes) | 650 'permission\n\t\t\t\ton the object to read its ACL.') |
| 651 |
| 652 return (num_objs, num_bytes) |
| 653 |
| 496 | 654 |
| 497 def CompareVersions(first, second): | 655 def CompareVersions(first, second): |
| 498 """Compares the first and second gsutil version strings. | 656 """Compares the first and second gsutil version strings. |
| 499 | 657 |
| 500 For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33. | 658 For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33. |
| 501 Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes | 659 Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes |
| 502 (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as | 660 (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as |
| 503 less than its non-suffix counterpart (e.g. 3.32 > 3.32pre). | 661 less than its non-suffix counterpart (e.g. 3.32 > 3.32pre). |
| 504 | 662 |
| 663 Args: |
| 664 first: First gsutil version string. |
| 665 second: Second gsutil version string. |
| 666 |
| 505 Returns: | 667 Returns: |
| 506 (g, m): | 668 (g, m): |
| 507 g is True if first known to be greater than second, else False. | 669 g is True if first known to be greater than second, else False. |
| 508 m is True if first known to be greater by at least 1 major version, | 670 m is True if first known to be greater by at least 1 major version, |
| 509 else False. | 671 else False. |
| 510 """ | 672 """ |
| 511 m1 = VERSION_MATCHER.match(str(first)) | 673 m1 = VERSION_MATCHER.match(str(first)) |
| 512 m2 = VERSION_MATCHER.match(str(second)) | 674 m2 = VERSION_MATCHER.match(str(second)) |
| 513 | 675 |
| 514 # If passed strings we don't know how to handle, be conservative. | 676 # If passed strings we don't know how to handle, be conservative. |
| 515 if not m1 or not m2: | 677 if not m1 or not m2: |
| 516 return (False, False) | 678 return (False, False) |
| 517 | 679 |
| 518 major_ver1 = int(m1.group('maj')) | 680 major_ver1 = int(m1.group('maj')) |
| 519 minor_ver1 = int(m1.group('min')) if m1.group('min') else 0 | 681 minor_ver1 = int(m1.group('min')) if m1.group('min') else 0 |
| 520 suffix_ver1 = m1.group('suffix') | 682 suffix_ver1 = m1.group('suffix') |
| 521 major_ver2 = int(m2.group('maj')) | 683 major_ver2 = int(m2.group('maj')) |
| 522 minor_ver2 = int(m2.group('min')) if m2.group('min') else 0 | 684 minor_ver2 = int(m2.group('min')) if m2.group('min') else 0 |
| 523 suffix_ver2 = m2.group('suffix') | 685 suffix_ver2 = m2.group('suffix') |
| 524 | 686 |
| 525 if major_ver1 > major_ver2: | 687 if major_ver1 > major_ver2: |
| 526 return (True, True) | 688 return (True, True) |
| 527 elif major_ver1 == major_ver2: | 689 elif major_ver1 == major_ver2: |
| 528 if minor_ver1 > minor_ver2: | 690 if minor_ver1 > minor_ver2: |
| 529 return (True, False) | 691 return (True, False) |
| 530 elif minor_ver1 == minor_ver2: | 692 elif minor_ver1 == minor_ver2: |
| 531 return (bool(suffix_ver2) and not suffix_ver1, False) | 693 return (bool(suffix_ver2) and not suffix_ver1, False) |
| 532 return (False, False) | 694 return (False, False) |
| 533 | 695 |
| 534 def _IncreaseSoftLimitForResource(resource_name): | 696 |
| 535 """Sets a new soft limit for the maximum number of open files. | 697 def _IncreaseSoftLimitForResource(resource_name, fallback_value): |
| 536 The soft limit is used for this process (and its children), but the | 698 """Sets a new soft limit for the maximum number of open files. |
| 537 hard limit is set by the system and cannot be exceeded. | 699 |
| 700 The soft limit is used for this process (and its children), but the |
| 701 hard limit is set by the system and cannot be exceeded. |
| 702 |
| 703 We will first try to set the soft limit to the hard limit's value; if that |
| 704 fails, we will try to set the soft limit to the fallback_value iff this would |
| 705 increase the soft limit. |
| 706 |
| 707 Args: |
| 708 resource_name: Name of the resource to increase the soft limit for. |
| 709 fallback_value: Fallback value to be used if we couldn't set the |
| 710 soft value to the hard value (e.g., if the hard value |
| 711 is "unlimited"). |
| 712 |
| 713 Returns: |
| 714 Current soft limit for the resource (after any changes we were able to |
| 715 make), or -1 if the resource doesn't exist. |
| 538 """ | 716 """ |
| 717 |
| 718 # Get the value of the resource. |
| 539 try: | 719 try: |
| 540 (soft_limit, hard_limit) = resource.getrlimit(resource_name) | 720 (soft_limit, hard_limit) = resource.getrlimit(resource_name) |
| 541 resource.setrlimit(resource_name, (hard_limit, hard_limit)) | 721 except (resource.error, ValueError): |
| 542 return hard_limit | 722 # The resource wasn't present, so we can't do anything here. |
| 543 except (resource.error, ValueError), e: | 723 return -1 |
| 544 return 0 | 724 |
| 725 # Try to set the value of the soft limit to the value of the hard limit. |
| 726 if hard_limit > soft_limit: # Some OS's report 0 for "unlimited". |
| 727 try: |
| 728 resource.setrlimit(resource_name, (hard_limit, hard_limit)) |
| 729 return hard_limit |
| 730 except (resource.error, ValueError): |
| 731 # We'll ignore this and try the fallback value. |
| 732 pass |
| 733 |
| 734 # Try to set the value of the soft limit to the fallback value. |
| 735 if soft_limit < fallback_value: |
| 736 try: |
| 737 resource.setrlimit(resource_name, (fallback_value, hard_limit)) |
| 738 return fallback_value |
| 739 except (resource.error, ValueError): |
| 740 # We couldn't change the soft limit, so just report the current |
| 741 # value of the soft limit. |
| 742 return soft_limit |
| 743 else: |
| 744 return soft_limit |
| 745 |
| 746 |
| 747 def GetCloudApiInstance(cls, thread_state=None): |
| 748 """Gets a gsutil Cloud API instance. |
| 749 |
| 750 Since Cloud API implementations are not guaranteed to be thread-safe, each |
| 751 thread needs its own instance. These instances are passed to each thread |
| 752 via the thread pool logic in command. |
| 753 |
| 754 Args: |
| 755 cls: Command class to be used for single-threaded case. |
| 756 thread_state: Per thread state from this thread containing a gsutil |
| 757 Cloud API instance. |
| 758 |
| 759 Returns: |
| 760 gsutil Cloud API instance. |
| 761 """ |
| 762 return thread_state or cls.gsutil_api |
| 763 |
| 764 |
| 765 def GetFileSize(fp, position_to_eof=False): |
| 766 """Returns size of file, optionally leaving fp positioned at EOF.""" |
| 767 if not position_to_eof: |
| 768 cur_pos = fp.tell() |
| 769 fp.seek(0, os.SEEK_END) |
| 770 cur_file_size = fp.tell() |
| 771 if not position_to_eof: |
| 772 fp.seek(cur_pos) |
| 773 return cur_file_size |
| 774 |
| 775 |
| 776 def GetStreamFromFileUrl(storage_url): |
| 777 if storage_url.IsStream(): |
| 778 return sys.stdin |
| 779 else: |
| 780 return open(storage_url.object_name, 'rb') |
| 781 |
| 782 |
| 783 def UrlsAreForSingleProvider(url_args): |
| 784 """Tests whether the URLs are all for a single provider. |
| 785 |
| 786 Args: |
| 787 url_args: Strings to check. |
| 788 |
| 789 Returns: |
| 790 True if URLs are for single provider, False otherwise. |
| 791 """ |
| 792 provider = None |
| 793 url = None |
| 794 for url_str in url_args: |
| 795 url = StorageUrlFromString(url_str) |
| 796 if not provider: |
| 797 provider = url.scheme |
| 798 elif url.scheme != provider: |
| 799 return False |
| 800 return provider is not None |
| 801 |
| 802 |
| 803 def HaveFileUrls(args_to_check): |
| 804 """Checks whether args_to_check contain any file URLs. |
| 805 |
| 806 Args: |
| 807 args_to_check: Command-line argument subset to check. |
| 808 |
| 809 Returns: |
| 810 True if args_to_check contains any file URLs. |
| 811 """ |
| 812 for url_str in args_to_check: |
| 813 storage_url = StorageUrlFromString(url_str) |
| 814 if storage_url.IsFileUrl(): |
| 815 return True |
| 816 return False |
| 817 |
| 818 |
| 819 def HaveProviderUrls(args_to_check): |
| 820 """Checks whether args_to_check contains any provider URLs (like 'gs://'). |
| 821 |
| 822 Args: |
| 823 args_to_check: Command-line argument subset to check. |
| 824 |
| 825 Returns: |
| 826 True if args_to_check contains any provider URLs. |
| 827 """ |
| 828 for url_str in args_to_check: |
| 829 storage_url = StorageUrlFromString(url_str) |
| 830 if storage_url.IsCloudUrl() and storage_url.IsProvider(): |
| 831 return True |
| 832 return False |
| 833 |
| 545 | 834 |
| 546 def MultiprocessingIsAvailable(logger=None): | 835 def MultiprocessingIsAvailable(logger=None): |
| 547 """ | 836 """Checks if multiprocessing is available. |
| 837 |
| 548 There are some environments in which there is no way to use multiprocessing | 838 There are some environments in which there is no way to use multiprocessing |
| 549 logic that's built into Python (e.g., if /dev/shm is not available, then | 839 logic that's built into Python (e.g., if /dev/shm is not available, then |
| 550 we can't create semaphores). This simply tries out a few things that will be | 840 we can't create semaphores). This simply tries out a few things that will be |
| 551 needed to make sure the environment can support the pieces of the | 841 needed to make sure the environment can support the pieces of the |
| 552 multiprocessing module that we need. | 842 multiprocessing module that we need. |
| 553 | 843 |
| 844 Args: |
| 845 logger: logging.logger to use for debug output. |
| 846 |
| 554 Returns: | 847 Returns: |
| 555 (multiprocessing_is_available, stack_trace): | 848 (multiprocessing_is_available, stack_trace): |
| 556 multiprocessing_is_available: True iff the multiprocessing module is | 849 multiprocessing_is_available: True iff the multiprocessing module is |
| 557 available for use. | 850 available for use. |
| 558 stack_trace: The stack trace generated by the call we tried that failed. | 851 stack_trace: The stack trace generated by the call we tried that failed. |
| 559 """ | 852 """ |
| 853 # pylint: disable=global-variable-undefined |
| 560 global cached_multiprocessing_is_available | 854 global cached_multiprocessing_is_available |
| 561 global cached_multiprocessing_check_stack_trace | 855 global cached_multiprocessing_check_stack_trace |
| 562 global cached_multiprocessing_is_available_message | 856 global cached_multiprocessing_is_available_message |
| 563 if cached_multiprocessing_is_available is not None: | 857 if cached_multiprocessing_is_available is not None: |
| 564 if logger: | 858 if logger: |
| 565 logger.debug(cached_multiprocessing_check_stack_trace) | 859 logger.debug(cached_multiprocessing_check_stack_trace) |
| 566 logger.warn('\n'.join(textwrap.wrap( | 860 logger.warn(cached_multiprocessing_is_available_message) |
| 567 cached_multiprocessing_is_available_message + '\n'))) | |
| 568 return (cached_multiprocessing_is_available, | 861 return (cached_multiprocessing_is_available, |
| 569 cached_multiprocessing_check_stack_trace) | 862 cached_multiprocessing_check_stack_trace) |
| 570 | 863 |
| 571 stack_trace = None | 864 stack_trace = None |
| 572 multiprocessing_is_available = True | 865 multiprocessing_is_available = True |
| 573 message = ( | 866 message = """ |
| 574 'You have requested multiple threads or processes for an operation,' | 867 You have requested multiple threads or processes for an operation, but the |
| 575 ' but the required functionality of Python\'s multiprocessing ' | 868 required functionality of Python\'s multiprocessing module is not available. |
| 576 'module is not available. Your operations will be performed ' | 869 Your operations will be performed sequentially, and any requests for |
| 577 'sequentially, and any requests for parallelism will be ignored.') | 870 parallelism will be ignored. |
| 871 """ |
| 578 try: | 872 try: |
| 579 # Fails if /dev/shm (or some equivalent thereof) is not available for use | 873 # Fails if /dev/shm (or some equivalent thereof) is not available for use |
| 580 # (e.g., there's no implementation, or we can't write to it, etc.). | 874 # (e.g., there's no implementation, or we can't write to it, etc.). |
| 581 try: | 875 try: |
| 582 multiprocessing.Value('i', 0) | 876 multiprocessing.Value('i', 0) |
| 583 except: | 877 except: |
| 584 if not IS_WINDOWS: | 878 if not IS_WINDOWS: |
| 585 message += ('\nPlease ensure that you have write access to both ' | 879 message += """ |
| 586 '/dev/shm and /run/shm.') | 880 Please ensure that you have write access to both /dev/shm and /run/shm. |
| 881 """ |
| 587 raise # We'll handle this in one place below. | 882 raise # We'll handle this in one place below. |
| 588 | 883 |
| 589 # Manager objects and Windows are generally a pain to work with, so try it | 884 # Manager objects and Windows are generally a pain to work with, so try it |
| 590 # out as a sanity check. This definitely works on some versions of Windows, | 885 # out as a sanity check. This definitely works on some versions of Windows, |
| 591 # but it's certainly possible that there is some unknown configuration for | 886 # but it's certainly possible that there is some unknown configuration for |
| 592 # which it won't. | 887 # which it won't. |
| 593 multiprocessing.Manager() | 888 multiprocessing.Manager() |
| 594 | 889 |
| 595 # Check that the max number of open files is reasonable. Always check this | 890 # Check that the max number of open files is reasonable. Always check this |
| 596 # after we're sure that the basic multiprocessing functionality is | 891 # after we're sure that the basic multiprocessing functionality is |
| 597 # available, since this won't matter unless that's true. | 892 # available, since this won't matter unless that's true. |
| 598 limit = 0 | 893 limit = -1 |
| 599 if HAS_RESOURCE_MODULE: | 894 if HAS_RESOURCE_MODULE: |
| 600 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix | 895 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix |
| 601 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the | 896 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the |
| 602 # "resource" module is not guaranteed to know about these names. | 897 # "resource" module is not guaranteed to know about these names. |
| 603 try: | 898 try: |
| 604 limit = max(limit, | 899 limit = max(limit, |
| 605 _IncreaseSoftLimitForResource(resource.RLIMIT_NOFILE)) | 900 _IncreaseSoftLimitForResource( |
| 606 except AttributeError, e: | 901 resource.RLIMIT_NOFILE, |
| 902 MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) |
| 903 except AttributeError: |
| 607 pass | 904 pass |
| 608 try: | 905 try: |
| 609 limit = max(limit, | 906 limit = max(limit, |
| 610 _IncreaseSoftLimitForResource(resource.RLIMIT_OFILE)) | 907 _IncreaseSoftLimitForResource( |
| 611 except AttributeError, e: | 908 resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) |
| 909 except AttributeError: |
| 612 pass | 910 pass |
| 911 |
| 613 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS: | 912 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS: |
| 614 message += ( | 913 message += (""" |
| 615 '\nYour max number of open files, %s, is too low to allow safe ' | 914 Your max number of open files, %s, is too low to allow safe multiprocessing. |
| 616 'multiprocessing calls. If you are on a Unix-like OS, then you can ' | 915 On Linux you can fix this by adding something like "ulimit -n 10000" to your |
| 617 'fix this by adding something like "ulimit -n 10000" to your ' | 916 ~/.bashrc or equivalent file and opening a new terminal. |
| 618 '~/.bashrc (Linux), ~/.bash_profile (OS X), or equivalent file, ' | 917 |
| 619 'and opening a new terminal.' % limit) | 918 On MacOS, you may also need to run a command like this once (in addition to the |
| 919 above instructions), which might require a restart of your system to take |
| 920 effect: |
| 921 launchctl limit maxfiles 10000 |
| 922 |
| 923 Alternatively, edit /etc/launchd.conf with something like: |
| 924 limit maxfiles 10000 10000 |
| 925 |
| 926 """ % limit) |
| 620 raise Exception('Max number of open files, %s, is too low.' % limit) | 927 raise Exception('Max number of open files, %s, is too low.' % limit) |
| 621 except: | 928 except: # pylint: disable=bare-except |
| 622 stack_trace = traceback.format_exc() | 929 stack_trace = traceback.format_exc() |
| 623 multiprocessing_is_available = False | 930 multiprocessing_is_available = False |
| 624 if logger is not None: | 931 if logger is not None: |
| 625 logger.debug(stack_trace) | 932 logger.debug(stack_trace) |
| 626 logger.warn('\n'.join(textwrap.wrap(message + '\n'))) | 933 logger.warn(message) |
| 627 | 934 |
| 628 # Set the cached values so that we never need to do this check again. | 935 # Set the cached values so that we never need to do this check again. |
| 629 cached_multiprocessing_is_available = multiprocessing_is_available | 936 cached_multiprocessing_is_available = multiprocessing_is_available |
| 630 cached_multiprocessing_check_stack_trace = stack_trace | 937 cached_multiprocessing_check_stack_trace = stack_trace |
| 631 cached_multiprocessing_is_available_message = message | 938 cached_multiprocessing_is_available_message = message |
| 632 return (multiprocessing_is_available, stack_trace) | 939 return (multiprocessing_is_available, stack_trace) |
| 633 | 940 |
| 941 |
| 634 def CreateLock(): | 942 def CreateLock(): |
| 635 """ | 943 """Returns either a multiprocessing lock or a threading lock. |
| 636 Returns either a multiprocessing lock or a threading lock. We will use the | 944 |
| 637 former iff we have access to the parts of the multiprocessing module that | 945 Use Multiprocessing lock iff we have access to the parts of the |
| 638 are necessary to enable parallelism in operations. | 946 multiprocessing module that are necessary to enable parallelism in operations. |
| 947 |
| 948 Returns: |
| 949 Multiprocessing or threading lock. |
| 639 """ | 950 """ |
| 640 if MultiprocessingIsAvailable()[0]: | 951 if MultiprocessingIsAvailable()[0]: |
| 641 return manager.Lock() | 952 return manager.Lock() |
| 642 else: | 953 else: |
| 643 return threading.Lock() | 954 return threading.Lock() |
| 955 |
| 956 |
| 957 def IsCloudSubdirPlaceholder(url, blr=None): |
| 958 """Determines if URL is a cloud subdir placeholder. |
| 959 |
| 960 This function is needed because GUI tools (like the GCS cloud console) allow |
| 961 users to create empty "folders" by creating a placeholder object; and parts |
| 962 of gsutil need to treat those placeholder objects specially. For example, |
| 963 gsutil rsync needs to avoid downloading those objects because they can cause |
| 964 conflicts (see comments in rsync command for details). |
| 965 |
| 966 We currently detect two cases: |
| 967 - Cloud objects whose name ends with '_$folder$' |
| 968 - Cloud objects whose name ends with '/' |
| 969 |
| 970 Args: |
| 971 url: The URL to be checked. |
| 972 blr: BucketListingRef to check, or None if not available. |
| 973 If None, size won't be checked. |
| 974 |
| 975 Returns: |
| 976 True/False. |
| 977 """ |
| 978 if not url.IsCloudUrl(): |
| 979 return False |
| 980 url_str = url.url_string |
| 981 if url_str.endswith('_$folder$'): |
| 982 return True |
| 983 if blr and blr.IsObject(): |
| 984 size = blr.root_object.size |
| 985 else: |
| 986 size = 0 |
| 987 return size == 0 and url_str.endswith('/') |
| 988 |
| 989 |
| 990 def GetTermLines(): |
| 991 """Returns number of terminal lines.""" |
| 992 # fcntl isn't supported in Windows. |
| 993 try: |
| 994 import fcntl # pylint: disable=g-import-not-at-top |
| 995 import termios # pylint: disable=g-import-not-at-top |
| 996 except ImportError: |
| 997 return _DEFAULT_LINES |
| 998 def ioctl_GWINSZ(fd): # pylint: disable=invalid-name |
| 999 try: |
| 1000 return struct.unpack( |
| 1001 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))[0] |
| 1002 except: # pylint: disable=bare-except |
| 1003 return 0 # Failure (so will retry on different file descriptor below). |
| 1004 # Try to find a valid number of lines from termio for stdin, stdout, |
| 1005 # or stderr, in that order. |
| 1006 ioc = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2) |
| 1007 if not ioc: |
| 1008 try: |
| 1009 fd = os.open(os.ctermid(), os.O_RDONLY) |
| 1010 ioc = ioctl_GWINSZ(fd) |
| 1011 os.close(fd) |
| 1012 except: # pylint: disable=bare-except |
| 1013 pass |
| 1014 if not ioc: |
| 1015 ioc = os.environ.get('LINES', _DEFAULT_LINES) |
| 1016 return int(ioc) |
| OLD | NEW |