| OLD | NEW |
| 1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2010 Google Inc. All Rights Reserved. | 2 # Copyright 2010 Google Inc. All Rights Reserved. |
| 3 # | 3 # |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); | 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 # you may not use this file except in compliance with the License. | 5 # you may not use this file except in compliance with the License. |
| 6 # You may obtain a copy of the License at | 6 # You may obtain a copy of the License at |
| 7 # | 7 # |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 | 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 # | 9 # |
| 10 # Unless required by applicable law or agreed to in writing, software | 10 # Unless required by applicable law or agreed to in writing, software |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, | 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 # See the License for the specific language governing permissions and | 13 # See the License for the specific language governing permissions and |
| 14 # limitations under the License. | 14 # limitations under the License. |
| 15 """Static data and helper functions.""" | 15 """Static data and helper functions.""" |
| 16 | 16 |
| 17 from __future__ import absolute_import | 17 from __future__ import absolute_import |
| 18 | 18 |
| 19 import collections |
| 19 import errno | 20 import errno |
| 20 import logging | 21 import logging |
| 21 import math | 22 import math |
| 22 import multiprocessing | 23 import multiprocessing |
| 23 import os | 24 import os |
| 24 import pkgutil | 25 import pkgutil |
| 25 import re | 26 import re |
| 26 import struct | 27 import struct |
| 27 import sys | 28 import sys |
| 28 import tempfile | 29 import tempfile |
| (...skipping 15 matching lines...) Expand all Loading... |
| 44 | 45 |
| 45 import gslib | 46 import gslib |
| 46 from gslib.exception import CommandException | 47 from gslib.exception import CommandException |
| 47 from gslib.storage_url import StorageUrlFromString | 48 from gslib.storage_url import StorageUrlFromString |
| 48 from gslib.translation_helper import AclTranslation | 49 from gslib.translation_helper import AclTranslation |
| 49 from gslib.translation_helper import GenerationFromUrlAndString | 50 from gslib.translation_helper import GenerationFromUrlAndString |
| 50 from gslib.translation_helper import S3_ACL_MARKER_GUID | 51 from gslib.translation_helper import S3_ACL_MARKER_GUID |
| 51 from gslib.translation_helper import S3_DELETE_MARKER_GUID | 52 from gslib.translation_helper import S3_DELETE_MARKER_GUID |
| 52 from gslib.translation_helper import S3_MARKER_GUIDS | 53 from gslib.translation_helper import S3_MARKER_GUIDS |
| 53 | 54 |
| 55 # Detect platform types. |
| 56 PLATFORM = str(sys.platform).lower() |
| 57 IS_WINDOWS = 'win32' in PLATFORM |
| 58 IS_CYGWIN = 'cygwin' in PLATFORM |
| 59 IS_LINUX = 'linux' in PLATFORM |
| 60 IS_OSX = 'darwin' in PLATFORM |
| 61 |
| 62 # pylint: disable=g-import-not-at-top |
| 63 if IS_WINDOWS: |
| 64 from ctypes import c_int |
| 65 from ctypes import c_uint64 |
| 66 from ctypes import c_char_p |
| 67 from ctypes import c_wchar_p |
| 68 from ctypes import windll |
| 69 from ctypes import POINTER |
| 70 from ctypes import WINFUNCTYPE |
| 71 from ctypes import WinError |
| 72 |
| 54 # pylint: disable=g-import-not-at-top | 73 # pylint: disable=g-import-not-at-top |
| 55 try: | 74 try: |
| 56 # This module doesn't necessarily exist on Windows. | 75 # This module doesn't necessarily exist on Windows. |
| 57 import resource | 76 import resource |
| 58 HAS_RESOURCE_MODULE = True | 77 HAS_RESOURCE_MODULE = True |
| 59 except ImportError, e: | 78 except ImportError, e: |
| 60 HAS_RESOURCE_MODULE = False | 79 HAS_RESOURCE_MODULE = False |
| 61 | 80 |
| 62 ONE_KIB = 1024 | 81 ONE_KIB = 1024 |
| 63 ONE_MIB = 1024 * 1024 | 82 ONE_MIB = 1024 * 1024 |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 107 (50, 'PiB', 'Pibit', 'P'), | 126 (50, 'PiB', 'Pibit', 'P'), |
| 108 (60, 'EiB', 'Eibit', 'E'), | 127 (60, 'EiB', 'Eibit', 'E'), |
| 109 ] | 128 ] |
| 110 | 129 |
| 111 | 130 |
| 112 global manager # pylint: disable=global-at-module-level | 131 global manager # pylint: disable=global-at-module-level |
| 113 certs_file_lock = threading.Lock() | 132 certs_file_lock = threading.Lock() |
| 114 configured_certs_files = [] | 133 configured_certs_files = [] |
| 115 | 134 |
| 116 | 135 |
| 117 def InitializeMultiprocessingVariables(): | |
| 118 """Perform necessary initialization for multiprocessing. | |
| 119 | |
| 120 See gslib.command.InitializeMultiprocessingVariables for an explanation | |
| 121 of why this is necessary. | |
| 122 """ | |
| 123 global manager # pylint: disable=global-variable-undefined | |
| 124 manager = multiprocessing.Manager() | |
| 125 | |
| 126 | |
| 127 def _GenerateSuffixRegex(): | 136 def _GenerateSuffixRegex(): |
| 128 """Creates a suffix regex for human-readable byte counts.""" | 137 """Creates a suffix regex for human-readable byte counts.""" |
| 129 human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?' | 138 human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?' |
| 130 suffixes = [] | 139 suffixes = [] |
| 131 suffix_to_si = {} | 140 suffix_to_si = {} |
| 132 for i, si in enumerate(_EXP_STRINGS): | 141 for i, si in enumerate(_EXP_STRINGS): |
| 133 si_suffixes = [s.lower() for s in list(si)[1:]] | 142 si_suffixes = [s.lower() for s in list(si)[1:]] |
| 134 for suffix in si_suffixes: | 143 for suffix in si_suffixes: |
| 135 suffix_to_si[suffix] = i | 144 suffix_to_si[suffix] = i |
| 136 suffixes.extend(si_suffixes) | 145 suffixes.extend(si_suffixes) |
| 137 human_bytes_re %= '|'.join(suffixes) | 146 human_bytes_re %= '|'.join(suffixes) |
| 138 matcher = re.compile(human_bytes_re) | 147 matcher = re.compile(human_bytes_re) |
| 139 return suffix_to_si, matcher | 148 return suffix_to_si, matcher |
| 140 | 149 |
| 141 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex() | 150 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex() |
| 142 | 151 |
| 143 SECONDS_PER_DAY = 3600 * 24 | 152 SECONDS_PER_DAY = 3600 * 24 |
| 144 | 153 |
| 145 # Detect platform types. | |
| 146 PLATFORM = str(sys.platform).lower() | |
| 147 IS_WINDOWS = 'win32' in PLATFORM | |
| 148 IS_CYGWIN = 'cygwin' in PLATFORM | |
| 149 IS_LINUX = 'linux' in PLATFORM | |
| 150 IS_OSX = 'darwin' in PLATFORM | |
| 151 | |
| 152 # On Unix-like systems, we will set the maximum number of open files to avoid | 154 # On Unix-like systems, we will set the maximum number of open files to avoid |
| 153 # hitting the limit imposed by the OS. This number was obtained experimentally. | 155 # hitting the limit imposed by the OS. This number was obtained experimentally. |
| 154 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000 | 156 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000 |
| 155 | 157 |
| 156 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz' | 158 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz' |
| 157 | 159 |
| 158 Retry = retry_decorator.retry # pylint: disable=invalid-name | 160 Retry = retry_decorator.retry # pylint: disable=invalid-name |
| 159 | 161 |
| 160 # Cache the values from this check such that they're available to all callers | 162 # Cache the values from this check such that they're available to all callers |
| 161 # without needing to run all the checks again (some of these, such as calling | 163 # without needing to run all the checks again (some of these, such as calling |
| 162 # multiprocessing.Manager(), are expensive operations). | 164 # multiprocessing.Manager(), are expensive operations). |
| 163 cached_multiprocessing_is_available = None | 165 cached_multiprocessing_is_available = None |
| 164 cached_multiprocessing_is_available_stack_trace = None | 166 cached_multiprocessing_is_available_stack_trace = None |
| 165 cached_multiprocessing_is_available_message = None | 167 cached_multiprocessing_is_available_message = None |
| 166 | 168 |
| 167 | 169 |
| 168 # Enum class for specifying listing style. | 170 # Enum class for specifying listing style. |
| 169 class ListingStyle(object): | 171 class ListingStyle(object): |
| 170 SHORT = 'SHORT' | 172 SHORT = 'SHORT' |
| 171 LONG = 'LONG' | 173 LONG = 'LONG' |
| 172 LONG_LONG = 'LONG_LONG' | 174 LONG_LONG = 'LONG_LONG' |
| 173 | 175 |
| 174 | 176 |
| 175 def UsingCrcmodExtension(crcmod): | 177 def UsingCrcmodExtension(crcmod): |
| 176 return (getattr(crcmod, 'crcmod', None) and | 178 return (boto.config.get('GSUtil', 'test_assume_fast_crcmod', None) or |
| 177 getattr(crcmod.crcmod, '_usingExtension', None)) | 179 (getattr(crcmod, 'crcmod', None) and |
| 180 getattr(crcmod.crcmod, '_usingExtension', None))) |
| 181 |
| 182 |
| 183 def CheckFreeSpace(path): |
| 184 """Return path/drive free space (in bytes).""" |
| 185 if IS_WINDOWS: |
| 186 try: |
| 187 # pylint: disable=invalid-name |
| 188 get_disk_free_space_ex = WINFUNCTYPE(c_int, c_wchar_p, |
| 189 POINTER(c_uint64), |
| 190 POINTER(c_uint64), |
| 191 POINTER(c_uint64)) |
| 192 get_disk_free_space_ex = get_disk_free_space_ex( |
| 193 ('GetDiskFreeSpaceExW', windll.kernel32), ( |
| 194 (1, 'lpszPathName'), |
| 195 (2, 'lpFreeUserSpace'), |
| 196 (2, 'lpTotalSpace'), |
| 197 (2, 'lpFreeSpace'),)) |
| 198 except AttributeError: |
| 199 get_disk_free_space_ex = WINFUNCTYPE(c_int, c_char_p, |
| 200 POINTER(c_uint64), |
| 201 POINTER(c_uint64), |
| 202 POINTER(c_uint64)) |
| 203 get_disk_free_space_ex = get_disk_free_space_ex( |
| 204 ('GetDiskFreeSpaceExA', windll.kernel32), ( |
| 205 (1, 'lpszPathName'), |
| 206 (2, 'lpFreeUserSpace'), |
| 207 (2, 'lpTotalSpace'), |
| 208 (2, 'lpFreeSpace'),)) |
| 209 |
| 210 def GetDiskFreeSpaceExErrCheck(result, unused_func, args): |
| 211 if not result: |
| 212 raise WinError() |
| 213 return args[1].value |
| 214 get_disk_free_space_ex.errcheck = GetDiskFreeSpaceExErrCheck |
| 215 |
| 216 return get_disk_free_space_ex(os.getenv('SystemDrive')) |
| 217 else: |
| 218 (_, f_frsize, _, _, f_bavail, _, _, _, _, _) = os.statvfs(path) |
| 219 return f_frsize * f_bavail |
| 178 | 220 |
| 179 | 221 |
| 180 def CreateDirIfNeeded(dir_path, mode=0777): | 222 def CreateDirIfNeeded(dir_path, mode=0777): |
| 181 """Creates a directory, suppressing already-exists errors.""" | 223 """Creates a directory, suppressing already-exists errors.""" |
| 182 if not os.path.exists(dir_path): | 224 if not os.path.exists(dir_path): |
| 183 try: | 225 try: |
| 184 # Unfortunately, even though we catch and ignore EEXIST, this call will | 226 # Unfortunately, even though we catch and ignore EEXIST, this call will |
| 185 # output a (needless) error message (no way to avoid that in Python). | 227 # output a (needless) error message (no way to avoid that in Python). |
| 186 os.makedirs(dir_path, mode) | 228 os.makedirs(dir_path, mode) |
| 187 # Ignore 'already exists' in case user tried to start up several | 229 # Ignore 'already exists' in case user tried to start up several |
| 188 # resumable uploads concurrently from a machine where no tracker dir had | 230 # resumable uploads concurrently from a machine where no tracker dir had |
| 189 # yet been created. | 231 # yet been created. |
| 190 except OSError as e: | 232 except OSError as e: |
| 191 if e.errno != errno.EEXIST: | 233 if e.errno != errno.EEXIST: |
| 192 raise | 234 raise |
| 193 | 235 |
| 194 | 236 |
| 237 def DivideAndCeil(dividend, divisor): |
| 238 """Returns ceil(dividend / divisor). |
| 239 |
| 240 Takes care to avoid the pitfalls of floating point arithmetic that could |
| 241 otherwise yield the wrong result for large numbers. |
| 242 |
| 243 Args: |
| 244 dividend: Dividend for the operation. |
| 245 divisor: Divisor for the operation. |
| 246 |
| 247 Returns: |
| 248 Quotient. |
| 249 """ |
| 250 quotient = dividend // divisor |
| 251 if (dividend % divisor) != 0: |
| 252 quotient += 1 |
| 253 return quotient |
| 254 |
| 255 |
| 195 def GetGsutilStateDir(): | 256 def GetGsutilStateDir(): |
| 196 """Returns the location of the directory for gsutil state files. | 257 """Returns the location of the directory for gsutil state files. |
| 197 | 258 |
| 198 Certain operations, such as cross-process credential sharing and | 259 Certain operations, such as cross-process credential sharing and |
| 199 resumable transfer tracking, need a known location for state files which | 260 resumable transfer tracking, need a known location for state files which |
| 200 are created by gsutil as-needed. | 261 are created by gsutil as-needed. |
| 201 | 262 |
| 202 This location should only be used for storing data that is required to be in | 263 This location should only be used for storing data that is required to be in |
| 203 a static location. | 264 a static location. |
| 204 | 265 |
| (...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 432 # one included with gsutil. | 493 # one included with gsutil. |
| 433 kwargs['ca_certs'] = GetCertsFile() | 494 kwargs['ca_certs'] = GetCertsFile() |
| 434 # Use a non-infinite SSL timeout to avoid hangs during network flakiness. | 495 # Use a non-infinite SSL timeout to avoid hangs during network flakiness. |
| 435 kwargs['timeout'] = SSL_TIMEOUT | 496 kwargs['timeout'] = SSL_TIMEOUT |
| 436 http = http_class(proxy_info=proxy_info, **kwargs) | 497 http = http_class(proxy_info=proxy_info, **kwargs) |
| 437 http.disable_ssl_certificate_validation = (not config.getbool( | 498 http.disable_ssl_certificate_validation = (not config.getbool( |
| 438 'Boto', 'https_validate_certificates')) | 499 'Boto', 'https_validate_certificates')) |
| 439 return http | 500 return http |
| 440 | 501 |
| 441 | 502 |
| 503 # Retry for 10 minutes with exponential backoff, which corresponds to |
| 504 # the maximum Downtime Period specified in the GCS SLA |
| 505 # (https://cloud.google.com/storage/sla) |
| 442 def GetNumRetries(): | 506 def GetNumRetries(): |
| 443 return config.getint('Boto', 'num_retries', 6) | 507 return config.getint('Boto', 'num_retries', 23) |
| 444 | 508 |
| 445 | 509 |
| 446 def GetMaxRetryDelay(): | 510 def GetMaxRetryDelay(): |
| 447 return config.getint('Boto', 'max_retry_delay', 60) | 511 return config.getint('Boto', 'max_retry_delay', 32) |
| 448 | 512 |
| 449 | 513 |
| 450 # Resumable downloads and uploads make one HTTP call per chunk (and must be | 514 # Resumable downloads and uploads make one HTTP call per chunk (and must be |
| 451 # in multiples of 256KiB). Overridable for testing. | 515 # in multiples of 256KiB). Overridable for testing. |
| 452 def GetJsonResumableChunkSize(): | 516 def GetJsonResumableChunkSize(): |
| 453 chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size', | 517 chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size', |
| 454 1024*1024*100L) | 518 1024*1024*100L) |
| 455 if chunk_size == 0: | 519 if chunk_size == 0: |
| 456 chunk_size = 1024*256L | 520 chunk_size = 1024*256L |
| 457 elif chunk_size % 1024*256L != 0: | 521 elif chunk_size % 1024*256L != 0: |
| (...skipping 425 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 883 | 947 |
| 884 Returns: | 948 Returns: |
| 885 True if args_to_check contains any provider URLs. | 949 True if args_to_check contains any provider URLs. |
| 886 """ | 950 """ |
| 887 for url_str in args_to_check: | 951 for url_str in args_to_check: |
| 888 storage_url = StorageUrlFromString(url_str) | 952 storage_url = StorageUrlFromString(url_str) |
| 889 if storage_url.IsCloudUrl() and storage_url.IsProvider(): | 953 if storage_url.IsCloudUrl() and storage_url.IsProvider(): |
| 890 return True | 954 return True |
| 891 return False | 955 return False |
| 892 | 956 |
| 957 # This must be defined at the module level for pickling across processes. |
| 958 MultiprocessingIsAvailableResult = collections.namedtuple( |
| 959 'MultiprocessingIsAvailableResult', ['is_available', 'stack_trace']) |
| 893 | 960 |
| 894 def MultiprocessingIsAvailable(logger=None): | 961 |
| 962 def CheckMultiprocessingAvailableAndInit(logger=None): |
| 895 """Checks if multiprocessing is available. | 963 """Checks if multiprocessing is available. |
| 896 | 964 |
| 897 There are some environments in which there is no way to use multiprocessing | 965 There are some environments in which there is no way to use multiprocessing |
| 898 logic that's built into Python (e.g., if /dev/shm is not available, then | 966 logic that's built into Python (e.g., if /dev/shm is not available, then |
| 899 we can't create semaphores). This simply tries out a few things that will be | 967 we can't create semaphores). This simply tries out a few things that will be |
| 900 needed to make sure the environment can support the pieces of the | 968 needed to make sure the environment can support the pieces of the |
| 901 multiprocessing module that we need. | 969 multiprocessing module that we need. |
| 902 | 970 |
| 971 If multiprocessing is available, this performs necessary initialization for |
| 972 multiprocessing. See gslib.command.InitializeMultiprocessingVariables for |
| 973 an explanation of why this is necessary. |
| 974 |
| 903 Args: | 975 Args: |
| 904 logger: logging.logger to use for debug output. | 976 logger: logging.logger to use for debug output. |
| 905 | 977 |
| 906 Returns: | 978 Returns: |
| 907 (multiprocessing_is_available, stack_trace): | 979 (multiprocessing_is_available, stack_trace): |
| 908 multiprocessing_is_available: True iff the multiprocessing module is | 980 multiprocessing_is_available: True iff the multiprocessing module is |
| 909 available for use. | 981 available for use. |
| 910 stack_trace: The stack trace generated by the call we tried that failed. | 982 stack_trace: The stack trace generated by the call we tried that failed. |
| 911 """ | 983 """ |
| 912 # pylint: disable=global-variable-undefined | 984 # pylint: disable=global-variable-undefined |
| 913 global cached_multiprocessing_is_available | 985 global cached_multiprocessing_is_available |
| 914 global cached_multiprocessing_check_stack_trace | 986 global cached_multiprocessing_check_stack_trace |
| 915 global cached_multiprocessing_is_available_message | 987 global cached_multiprocessing_is_available_message |
| 916 if cached_multiprocessing_is_available is not None: | 988 if cached_multiprocessing_is_available is not None: |
| 917 if logger: | 989 if logger: |
| 918 logger.debug(cached_multiprocessing_check_stack_trace) | 990 logger.debug(cached_multiprocessing_check_stack_trace) |
| 919 logger.warn(cached_multiprocessing_is_available_message) | 991 logger.warn(cached_multiprocessing_is_available_message) |
| 920 return (cached_multiprocessing_is_available, | 992 return MultiprocessingIsAvailableResult( |
| 921 cached_multiprocessing_check_stack_trace) | 993 is_available=cached_multiprocessing_is_available, |
| 994 stack_trace=cached_multiprocessing_check_stack_trace) |
| 995 |
| 996 if IS_WINDOWS: |
| 997 message = """ |
| 998 Multiple processes are not supported on Windows. Operations requesting |
| 999 parallelism will be executed with multiple threads in a single process only. |
| 1000 """ |
| 1001 if logger: |
| 1002 logger.warn(message) |
| 1003 return MultiprocessingIsAvailableResult(is_available=False, |
| 1004 stack_trace=None) |
| 922 | 1005 |
| 923 stack_trace = None | 1006 stack_trace = None |
| 924 multiprocessing_is_available = True | 1007 multiprocessing_is_available = True |
| 925 message = """ | 1008 message = """ |
| 926 You have requested multiple threads or processes for an operation, but the | 1009 You have requested multiple processes for an operation, but the |
| 927 required functionality of Python\'s multiprocessing module is not available. | 1010 required functionality of Python\'s multiprocessing module is not available. |
| 928 Your operations will be performed sequentially, and any requests for | 1011 Operations requesting parallelism will be executed with multiple threads in a |
| 929 parallelism will be ignored. | 1012 single process only. |
| 930 """ | 1013 """ |
| 931 try: | 1014 try: |
| 932 # Fails if /dev/shm (or some equivalent thereof) is not available for use | 1015 # Fails if /dev/shm (or some equivalent thereof) is not available for use |
| 933 # (e.g., there's no implementation, or we can't write to it, etc.). | 1016 # (e.g., there's no implementation, or we can't write to it, etc.). |
| 934 try: | 1017 try: |
| 935 multiprocessing.Value('i', 0) | 1018 multiprocessing.Value('i', 0) |
| 936 except: | 1019 except: |
| 937 if not IS_WINDOWS: | 1020 message += """ |
| 938 message += """ | |
| 939 Please ensure that you have write access to both /dev/shm and /run/shm. | 1021 Please ensure that you have write access to both /dev/shm and /run/shm. |
| 940 """ | 1022 """ |
| 941 raise # We'll handle this in one place below. | 1023 raise # We'll handle this in one place below. |
| 942 | 1024 |
| 943 # Manager objects and Windows are generally a pain to work with, so try it | 1025 # Manager objects and Windows are generally a pain to work with, so try it |
| 944 # out as a sanity check. This definitely works on some versions of Windows, | 1026 # out as a sanity check. This definitely works on some versions of Windows, |
| 945 # but it's certainly possible that there is some unknown configuration for | 1027 # but it's certainly possible that there is some unknown configuration for |
| 946 # which it won't. | 1028 # which it won't. |
| 947 multiprocessing.Manager() | 1029 global manager # pylint: disable=global-variable-undefined |
| 1030 |
| 1031 manager = multiprocessing.Manager() |
| 948 | 1032 |
| 949 # Check that the max number of open files is reasonable. Always check this | 1033 # Check that the max number of open files is reasonable. Always check this |
| 950 # after we're sure that the basic multiprocessing functionality is | 1034 # after we're sure that the basic multiprocessing functionality is |
| 951 # available, since this won't matter unless that's true. | 1035 # available, since this won't matter unless that's true. |
| 952 limit = -1 | 1036 limit = -1 |
| 953 if HAS_RESOURCE_MODULE: | 1037 if HAS_RESOURCE_MODULE: |
| 954 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix | 1038 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix |
| 955 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the | 1039 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the |
| 956 # "resource" module is not guaranteed to know about these names. | 1040 # "resource" module is not guaranteed to know about these names. |
| 957 try: | 1041 try: |
| 958 limit = max(limit, | 1042 limit = max(limit, |
| 959 _IncreaseSoftLimitForResource( | 1043 _IncreaseSoftLimitForResource( |
| 960 resource.RLIMIT_NOFILE, | 1044 resource.RLIMIT_NOFILE, |
| 961 MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) | 1045 MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) |
| 962 except AttributeError: | 1046 except AttributeError: |
| 963 pass | 1047 pass |
| 964 try: | 1048 try: |
| 965 limit = max(limit, | 1049 limit = max(limit, |
| 966 _IncreaseSoftLimitForResource( | 1050 _IncreaseSoftLimitForResource( |
| 967 resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) | 1051 resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) |
| 968 except AttributeError: | 1052 except AttributeError: |
| 969 pass | 1053 pass |
| 970 | 1054 |
| 971 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS: | 1055 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT: |
| 972 message += (""" | 1056 message += (""" |
| 973 Your max number of open files, %s, is too low to allow safe multiprocessing. | 1057 Your max number of open files, %s, is too low to allow safe multiprocessing. |
| 974 On Linux you can fix this by adding something like "ulimit -n 10000" to your | 1058 On Linux you can fix this by adding something like "ulimit -n 10000" to your |
| 975 ~/.bashrc or equivalent file and opening a new terminal. | 1059 ~/.bashrc or equivalent file and opening a new terminal. |
| 976 | 1060 |
| 977 On MacOS, you may also need to run a command like this once (in addition to the | 1061 On MacOS, you may also need to run a command like this once (in addition to the |
| 978 above instructions), which might require a restart of your system to take | 1062 above instructions), which might require a restart of your system to take |
| 979 effect: | 1063 effect: |
| 980 launchctl limit maxfiles 10000 | 1064 launchctl limit maxfiles 10000 |
| 981 | 1065 |
| 982 Alternatively, edit /etc/launchd.conf with something like: | 1066 Alternatively, edit /etc/launchd.conf with something like: |
| 983 limit maxfiles 10000 10000 | 1067 limit maxfiles 10000 10000 |
| 984 | 1068 |
| 985 """ % limit) | 1069 """ % limit) |
| 986 raise Exception('Max number of open files, %s, is too low.' % limit) | 1070 raise Exception('Max number of open files, %s, is too low.' % limit) |
| 987 except: # pylint: disable=bare-except | 1071 except: # pylint: disable=bare-except |
| 988 stack_trace = traceback.format_exc() | 1072 stack_trace = traceback.format_exc() |
| 989 multiprocessing_is_available = False | 1073 multiprocessing_is_available = False |
| 990 if logger is not None: | 1074 if logger is not None: |
| 991 logger.debug(stack_trace) | 1075 logger.debug(stack_trace) |
| 992 logger.warn(message) | 1076 logger.warn(message) |
| 993 | 1077 |
| 994 # Set the cached values so that we never need to do this check again. | 1078 # Set the cached values so that we never need to do this check again. |
| 995 cached_multiprocessing_is_available = multiprocessing_is_available | 1079 cached_multiprocessing_is_available = multiprocessing_is_available |
| 996 cached_multiprocessing_check_stack_trace = stack_trace | 1080 cached_multiprocessing_check_stack_trace = stack_trace |
| 997 cached_multiprocessing_is_available_message = message | 1081 cached_multiprocessing_is_available_message = message |
| 998 return (multiprocessing_is_available, stack_trace) | 1082 return MultiprocessingIsAvailableResult( |
| 1083 is_available=cached_multiprocessing_is_available, |
| 1084 stack_trace=cached_multiprocessing_check_stack_trace) |
| 999 | 1085 |
| 1000 | 1086 |
| 1001 def CreateLock(): | 1087 def CreateLock(): |
| 1002 """Returns either a multiprocessing lock or a threading lock. | 1088 """Returns either a multiprocessing lock or a threading lock. |
| 1003 | 1089 |
| 1004 Use Multiprocessing lock iff we have access to the parts of the | 1090 Use Multiprocessing lock iff we have access to the parts of the |
| 1005 multiprocessing module that are necessary to enable parallelism in operations. | 1091 multiprocessing module that are necessary to enable parallelism in operations. |
| 1006 | 1092 |
| 1007 Returns: | 1093 Returns: |
| 1008 Multiprocessing or threading lock. | 1094 Multiprocessing or threading lock. |
| 1009 """ | 1095 """ |
| 1010 if MultiprocessingIsAvailable()[0]: | 1096 if CheckMultiprocessingAvailableAndInit().is_available: |
| 1011 return manager.Lock() | 1097 return manager.Lock() |
| 1012 else: | 1098 else: |
| 1013 return threading.Lock() | 1099 return threading.Lock() |
| 1014 | 1100 |
| 1015 | 1101 |
| 1016 def IsCloudSubdirPlaceholder(url, blr=None): | 1102 def IsCloudSubdirPlaceholder(url, blr=None): |
| 1017 """Determines if URL is a cloud subdir placeholder. | 1103 """Determines if URL is a cloud subdir placeholder. |
| 1018 | 1104 |
| 1019 This function is needed because GUI tools (like the GCS cloud console) allow | 1105 This function is needed because GUI tools (like the GCS cloud console) allow |
| 1020 users to create empty "folders" by creating a placeholder object; and parts | 1106 users to create empty "folders" by creating a placeholder object; and parts |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1091 logging.StreamHandler.flush(self) | 1177 logging.StreamHandler.flush(self) |
| 1092 except ValueError: | 1178 except ValueError: |
| 1093 pass | 1179 pass |
| 1094 | 1180 |
| 1095 | 1181 |
| 1096 def StdinIterator(): | 1182 def StdinIterator(): |
| 1097 """A generator function that returns lines from stdin.""" | 1183 """A generator function that returns lines from stdin.""" |
| 1098 for line in sys.stdin: | 1184 for line in sys.stdin: |
| 1099 # Strip CRLF. | 1185 # Strip CRLF. |
| 1100 yield line.rstrip() | 1186 yield line.rstrip() |
| OLD | NEW |