OLD | NEW |
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 # Copyright 2010 Google Inc. All Rights Reserved. | 2 # Copyright 2010 Google Inc. All Rights Reserved. |
3 # | 3 # |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
5 # you may not use this file except in compliance with the License. | 5 # you may not use this file except in compliance with the License. |
6 # You may obtain a copy of the License at | 6 # You may obtain a copy of the License at |
7 # | 7 # |
8 # http://www.apache.org/licenses/LICENSE-2.0 | 8 # http://www.apache.org/licenses/LICENSE-2.0 |
9 # | 9 # |
10 # Unless required by applicable law or agreed to in writing, software | 10 # Unless required by applicable law or agreed to in writing, software |
11 # distributed under the License is distributed on an "AS IS" BASIS, | 11 # distributed under the License is distributed on an "AS IS" BASIS, |
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 # See the License for the specific language governing permissions and | 13 # See the License for the specific language governing permissions and |
14 # limitations under the License. | 14 # limitations under the License. |
15 """Static data and helper functions.""" | 15 """Static data and helper functions.""" |
16 | 16 |
17 from __future__ import absolute_import | 17 from __future__ import absolute_import |
18 | 18 |
| 19 import collections |
19 import errno | 20 import errno |
20 import logging | 21 import logging |
21 import math | 22 import math |
22 import multiprocessing | 23 import multiprocessing |
23 import os | 24 import os |
24 import pkgutil | 25 import pkgutil |
25 import re | 26 import re |
26 import struct | 27 import struct |
27 import sys | 28 import sys |
28 import tempfile | 29 import tempfile |
(...skipping 15 matching lines...) Expand all Loading... |
44 | 45 |
45 import gslib | 46 import gslib |
46 from gslib.exception import CommandException | 47 from gslib.exception import CommandException |
47 from gslib.storage_url import StorageUrlFromString | 48 from gslib.storage_url import StorageUrlFromString |
48 from gslib.translation_helper import AclTranslation | 49 from gslib.translation_helper import AclTranslation |
49 from gslib.translation_helper import GenerationFromUrlAndString | 50 from gslib.translation_helper import GenerationFromUrlAndString |
50 from gslib.translation_helper import S3_ACL_MARKER_GUID | 51 from gslib.translation_helper import S3_ACL_MARKER_GUID |
51 from gslib.translation_helper import S3_DELETE_MARKER_GUID | 52 from gslib.translation_helper import S3_DELETE_MARKER_GUID |
52 from gslib.translation_helper import S3_MARKER_GUIDS | 53 from gslib.translation_helper import S3_MARKER_GUIDS |
53 | 54 |
| 55 # Detect platform types. |
| 56 PLATFORM = str(sys.platform).lower() |
| 57 IS_WINDOWS = 'win32' in PLATFORM |
| 58 IS_CYGWIN = 'cygwin' in PLATFORM |
| 59 IS_LINUX = 'linux' in PLATFORM |
| 60 IS_OSX = 'darwin' in PLATFORM |
| 61 |
| 62 # pylint: disable=g-import-not-at-top |
| 63 if IS_WINDOWS: |
| 64 from ctypes import c_int |
| 65 from ctypes import c_uint64 |
| 66 from ctypes import c_char_p |
| 67 from ctypes import c_wchar_p |
| 68 from ctypes import windll |
| 69 from ctypes import POINTER |
| 70 from ctypes import WINFUNCTYPE |
| 71 from ctypes import WinError |
| 72 |
54 # pylint: disable=g-import-not-at-top | 73 # pylint: disable=g-import-not-at-top |
55 try: | 74 try: |
56 # This module doesn't necessarily exist on Windows. | 75 # This module doesn't necessarily exist on Windows. |
57 import resource | 76 import resource |
58 HAS_RESOURCE_MODULE = True | 77 HAS_RESOURCE_MODULE = True |
59 except ImportError, e: | 78 except ImportError, e: |
60 HAS_RESOURCE_MODULE = False | 79 HAS_RESOURCE_MODULE = False |
61 | 80 |
62 ONE_KIB = 1024 | 81 ONE_KIB = 1024 |
63 ONE_MIB = 1024 * 1024 | 82 ONE_MIB = 1024 * 1024 |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
107 (50, 'PiB', 'Pibit', 'P'), | 126 (50, 'PiB', 'Pibit', 'P'), |
108 (60, 'EiB', 'Eibit', 'E'), | 127 (60, 'EiB', 'Eibit', 'E'), |
109 ] | 128 ] |
110 | 129 |
111 | 130 |
112 global manager # pylint: disable=global-at-module-level | 131 global manager # pylint: disable=global-at-module-level |
113 certs_file_lock = threading.Lock() | 132 certs_file_lock = threading.Lock() |
114 configured_certs_files = [] | 133 configured_certs_files = [] |
115 | 134 |
116 | 135 |
117 def InitializeMultiprocessingVariables(): | |
118 """Perform necessary initialization for multiprocessing. | |
119 | |
120 See gslib.command.InitializeMultiprocessingVariables for an explanation | |
121 of why this is necessary. | |
122 """ | |
123 global manager # pylint: disable=global-variable-undefined | |
124 manager = multiprocessing.Manager() | |
125 | |
126 | |
127 def _GenerateSuffixRegex(): | 136 def _GenerateSuffixRegex(): |
128 """Creates a suffix regex for human-readable byte counts.""" | 137 """Creates a suffix regex for human-readable byte counts.""" |
129 human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?' | 138 human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?' |
130 suffixes = [] | 139 suffixes = [] |
131 suffix_to_si = {} | 140 suffix_to_si = {} |
132 for i, si in enumerate(_EXP_STRINGS): | 141 for i, si in enumerate(_EXP_STRINGS): |
133 si_suffixes = [s.lower() for s in list(si)[1:]] | 142 si_suffixes = [s.lower() for s in list(si)[1:]] |
134 for suffix in si_suffixes: | 143 for suffix in si_suffixes: |
135 suffix_to_si[suffix] = i | 144 suffix_to_si[suffix] = i |
136 suffixes.extend(si_suffixes) | 145 suffixes.extend(si_suffixes) |
137 human_bytes_re %= '|'.join(suffixes) | 146 human_bytes_re %= '|'.join(suffixes) |
138 matcher = re.compile(human_bytes_re) | 147 matcher = re.compile(human_bytes_re) |
139 return suffix_to_si, matcher | 148 return suffix_to_si, matcher |
140 | 149 |
141 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex() | 150 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex() |
142 | 151 |
143 SECONDS_PER_DAY = 3600 * 24 | 152 SECONDS_PER_DAY = 3600 * 24 |
144 | 153 |
145 # Detect platform types. | |
146 PLATFORM = str(sys.platform).lower() | |
147 IS_WINDOWS = 'win32' in PLATFORM | |
148 IS_CYGWIN = 'cygwin' in PLATFORM | |
149 IS_LINUX = 'linux' in PLATFORM | |
150 IS_OSX = 'darwin' in PLATFORM | |
151 | |
152 # On Unix-like systems, we will set the maximum number of open files to avoid | 154 # On Unix-like systems, we will set the maximum number of open files to avoid |
153 # hitting the limit imposed by the OS. This number was obtained experimentally. | 155 # hitting the limit imposed by the OS. This number was obtained experimentally. |
154 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000 | 156 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000 |
155 | 157 |
156 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz' | 158 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz' |
157 | 159 |
158 Retry = retry_decorator.retry # pylint: disable=invalid-name | 160 Retry = retry_decorator.retry # pylint: disable=invalid-name |
159 | 161 |
160 # Cache the values from this check such that they're available to all callers | 162 # Cache the values from this check such that they're available to all callers |
161 # without needing to run all the checks again (some of these, such as calling | 163 # without needing to run all the checks again (some of these, such as calling |
162 # multiprocessing.Manager(), are expensive operations). | 164 # multiprocessing.Manager(), are expensive operations). |
163 cached_multiprocessing_is_available = None | 165 cached_multiprocessing_is_available = None |
164 cached_multiprocessing_is_available_stack_trace = None | 166 cached_multiprocessing_is_available_stack_trace = None |
165 cached_multiprocessing_is_available_message = None | 167 cached_multiprocessing_is_available_message = None |
166 | 168 |
167 | 169 |
168 # Enum class for specifying listing style. | 170 # Enum class for specifying listing style. |
169 class ListingStyle(object): | 171 class ListingStyle(object): |
170 SHORT = 'SHORT' | 172 SHORT = 'SHORT' |
171 LONG = 'LONG' | 173 LONG = 'LONG' |
172 LONG_LONG = 'LONG_LONG' | 174 LONG_LONG = 'LONG_LONG' |
173 | 175 |
174 | 176 |
175 def UsingCrcmodExtension(crcmod): | 177 def UsingCrcmodExtension(crcmod): |
176 return (getattr(crcmod, 'crcmod', None) and | 178 return (boto.config.get('GSUtil', 'test_assume_fast_crcmod', None) or |
177 getattr(crcmod.crcmod, '_usingExtension', None)) | 179 (getattr(crcmod, 'crcmod', None) and |
| 180 getattr(crcmod.crcmod, '_usingExtension', None))) |
| 181 |
| 182 |
| 183 def CheckFreeSpace(path): |
| 184 """Return path/drive free space (in bytes).""" |
| 185 if IS_WINDOWS: |
| 186 try: |
| 187 # pylint: disable=invalid-name |
| 188 get_disk_free_space_ex = WINFUNCTYPE(c_int, c_wchar_p, |
| 189 POINTER(c_uint64), |
| 190 POINTER(c_uint64), |
| 191 POINTER(c_uint64)) |
| 192 get_disk_free_space_ex = get_disk_free_space_ex( |
| 193 ('GetDiskFreeSpaceExW', windll.kernel32), ( |
| 194 (1, 'lpszPathName'), |
| 195 (2, 'lpFreeUserSpace'), |
| 196 (2, 'lpTotalSpace'), |
| 197 (2, 'lpFreeSpace'),)) |
| 198 except AttributeError: |
| 199 get_disk_free_space_ex = WINFUNCTYPE(c_int, c_char_p, |
| 200 POINTER(c_uint64), |
| 201 POINTER(c_uint64), |
| 202 POINTER(c_uint64)) |
| 203 get_disk_free_space_ex = get_disk_free_space_ex( |
| 204 ('GetDiskFreeSpaceExA', windll.kernel32), ( |
| 205 (1, 'lpszPathName'), |
| 206 (2, 'lpFreeUserSpace'), |
| 207 (2, 'lpTotalSpace'), |
| 208 (2, 'lpFreeSpace'),)) |
| 209 |
| 210 def GetDiskFreeSpaceExErrCheck(result, unused_func, args): |
| 211 if not result: |
| 212 raise WinError() |
| 213 return args[1].value |
| 214 get_disk_free_space_ex.errcheck = GetDiskFreeSpaceExErrCheck |
| 215 |
| 216 return get_disk_free_space_ex(os.getenv('SystemDrive')) |
| 217 else: |
| 218 (_, f_frsize, _, _, f_bavail, _, _, _, _, _) = os.statvfs(path) |
| 219 return f_frsize * f_bavail |
178 | 220 |
179 | 221 |
180 def CreateDirIfNeeded(dir_path, mode=0777): | 222 def CreateDirIfNeeded(dir_path, mode=0777): |
181 """Creates a directory, suppressing already-exists errors.""" | 223 """Creates a directory, suppressing already-exists errors.""" |
182 if not os.path.exists(dir_path): | 224 if not os.path.exists(dir_path): |
183 try: | 225 try: |
184 # Unfortunately, even though we catch and ignore EEXIST, this call will | 226 # Unfortunately, even though we catch and ignore EEXIST, this call will |
185 # output a (needless) error message (no way to avoid that in Python). | 227 # output a (needless) error message (no way to avoid that in Python). |
186 os.makedirs(dir_path, mode) | 228 os.makedirs(dir_path, mode) |
187 # Ignore 'already exists' in case user tried to start up several | 229 # Ignore 'already exists' in case user tried to start up several |
188 # resumable uploads concurrently from a machine where no tracker dir had | 230 # resumable uploads concurrently from a machine where no tracker dir had |
189 # yet been created. | 231 # yet been created. |
190 except OSError as e: | 232 except OSError as e: |
191 if e.errno != errno.EEXIST: | 233 if e.errno != errno.EEXIST: |
192 raise | 234 raise |
193 | 235 |
194 | 236 |
| 237 def DivideAndCeil(dividend, divisor): |
| 238 """Returns ceil(dividend / divisor). |
| 239 |
| 240 Takes care to avoid the pitfalls of floating point arithmetic that could |
| 241 otherwise yield the wrong result for large numbers. |
| 242 |
| 243 Args: |
| 244 dividend: Dividend for the operation. |
| 245 divisor: Divisor for the operation. |
| 246 |
| 247 Returns: |
| 248 Quotient. |
| 249 """ |
| 250 quotient = dividend // divisor |
| 251 if (dividend % divisor) != 0: |
| 252 quotient += 1 |
| 253 return quotient |
| 254 |
| 255 |
195 def GetGsutilStateDir(): | 256 def GetGsutilStateDir(): |
196 """Returns the location of the directory for gsutil state files. | 257 """Returns the location of the directory for gsutil state files. |
197 | 258 |
198 Certain operations, such as cross-process credential sharing and | 259 Certain operations, such as cross-process credential sharing and |
199 resumable transfer tracking, need a known location for state files which | 260 resumable transfer tracking, need a known location for state files which |
200 are created by gsutil as-needed. | 261 are created by gsutil as-needed. |
201 | 262 |
202 This location should only be used for storing data that is required to be in | 263 This location should only be used for storing data that is required to be in |
203 a static location. | 264 a static location. |
204 | 265 |
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
432 # one included with gsutil. | 493 # one included with gsutil. |
433 kwargs['ca_certs'] = GetCertsFile() | 494 kwargs['ca_certs'] = GetCertsFile() |
434 # Use a non-infinite SSL timeout to avoid hangs during network flakiness. | 495 # Use a non-infinite SSL timeout to avoid hangs during network flakiness. |
435 kwargs['timeout'] = SSL_TIMEOUT | 496 kwargs['timeout'] = SSL_TIMEOUT |
436 http = http_class(proxy_info=proxy_info, **kwargs) | 497 http = http_class(proxy_info=proxy_info, **kwargs) |
437 http.disable_ssl_certificate_validation = (not config.getbool( | 498 http.disable_ssl_certificate_validation = (not config.getbool( |
438 'Boto', 'https_validate_certificates')) | 499 'Boto', 'https_validate_certificates')) |
439 return http | 500 return http |
440 | 501 |
441 | 502 |
| 503 # Retry for 10 minutes with exponential backoff, which corresponds to |
| 504 # the maximum Downtime Period specified in the GCS SLA |
| 505 # (https://cloud.google.com/storage/sla) |
442 def GetNumRetries(): | 506 def GetNumRetries(): |
443 return config.getint('Boto', 'num_retries', 6) | 507 return config.getint('Boto', 'num_retries', 23) |
444 | 508 |
445 | 509 |
446 def GetMaxRetryDelay(): | 510 def GetMaxRetryDelay(): |
447 return config.getint('Boto', 'max_retry_delay', 60) | 511 return config.getint('Boto', 'max_retry_delay', 32) |
448 | 512 |
449 | 513 |
450 # Resumable downloads and uploads make one HTTP call per chunk (and must be | 514 # Resumable downloads and uploads make one HTTP call per chunk (and must be |
451 # in multiples of 256KiB). Overridable for testing. | 515 # in multiples of 256KiB). Overridable for testing. |
452 def GetJsonResumableChunkSize(): | 516 def GetJsonResumableChunkSize(): |
453 chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size', | 517 chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size', |
454 1024*1024*100L) | 518 1024*1024*100L) |
455 if chunk_size == 0: | 519 if chunk_size == 0: |
456 chunk_size = 1024*256L | 520 chunk_size = 1024*256L |
457 elif chunk_size % 1024*256L != 0: | 521 elif chunk_size % 1024*256L != 0: |
(...skipping 425 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
883 | 947 |
884 Returns: | 948 Returns: |
885 True if args_to_check contains any provider URLs. | 949 True if args_to_check contains any provider URLs. |
886 """ | 950 """ |
887 for url_str in args_to_check: | 951 for url_str in args_to_check: |
888 storage_url = StorageUrlFromString(url_str) | 952 storage_url = StorageUrlFromString(url_str) |
889 if storage_url.IsCloudUrl() and storage_url.IsProvider(): | 953 if storage_url.IsCloudUrl() and storage_url.IsProvider(): |
890 return True | 954 return True |
891 return False | 955 return False |
892 | 956 |
| 957 # This must be defined at the module level for pickling across processes. |
| 958 MultiprocessingIsAvailableResult = collections.namedtuple( |
| 959 'MultiprocessingIsAvailableResult', ['is_available', 'stack_trace']) |
893 | 960 |
894 def MultiprocessingIsAvailable(logger=None): | 961 |
| 962 def CheckMultiprocessingAvailableAndInit(logger=None): |
895 """Checks if multiprocessing is available. | 963 """Checks if multiprocessing is available. |
896 | 964 |
897 There are some environments in which there is no way to use multiprocessing | 965 There are some environments in which there is no way to use multiprocessing |
898 logic that's built into Python (e.g., if /dev/shm is not available, then | 966 logic that's built into Python (e.g., if /dev/shm is not available, then |
899 we can't create semaphores). This simply tries out a few things that will be | 967 we can't create semaphores). This simply tries out a few things that will be |
900 needed to make sure the environment can support the pieces of the | 968 needed to make sure the environment can support the pieces of the |
901 multiprocessing module that we need. | 969 multiprocessing module that we need. |
902 | 970 |
| 971 If multiprocessing is available, this performs necessary initialization for |
| 972 multiprocessing. See gslib.command.InitializeMultiprocessingVariables for |
| 973 an explanation of why this is necessary. |
| 974 |
903 Args: | 975 Args: |
904 logger: logging.logger to use for debug output. | 976 logger: logging.logger to use for debug output. |
905 | 977 |
906 Returns: | 978 Returns: |
907 (multiprocessing_is_available, stack_trace): | 979 (multiprocessing_is_available, stack_trace): |
908 multiprocessing_is_available: True iff the multiprocessing module is | 980 multiprocessing_is_available: True iff the multiprocessing module is |
909 available for use. | 981 available for use. |
910 stack_trace: The stack trace generated by the call we tried that failed. | 982 stack_trace: The stack trace generated by the call we tried that failed. |
911 """ | 983 """ |
912 # pylint: disable=global-variable-undefined | 984 # pylint: disable=global-variable-undefined |
913 global cached_multiprocessing_is_available | 985 global cached_multiprocessing_is_available |
914 global cached_multiprocessing_check_stack_trace | 986 global cached_multiprocessing_check_stack_trace |
915 global cached_multiprocessing_is_available_message | 987 global cached_multiprocessing_is_available_message |
916 if cached_multiprocessing_is_available is not None: | 988 if cached_multiprocessing_is_available is not None: |
917 if logger: | 989 if logger: |
918 logger.debug(cached_multiprocessing_check_stack_trace) | 990 logger.debug(cached_multiprocessing_check_stack_trace) |
919 logger.warn(cached_multiprocessing_is_available_message) | 991 logger.warn(cached_multiprocessing_is_available_message) |
920 return (cached_multiprocessing_is_available, | 992 return MultiprocessingIsAvailableResult( |
921 cached_multiprocessing_check_stack_trace) | 993 is_available=cached_multiprocessing_is_available, |
| 994 stack_trace=cached_multiprocessing_check_stack_trace) |
| 995 |
| 996 if IS_WINDOWS: |
| 997 message = """ |
| 998 Multiple processes are not supported on Windows. Operations requesting |
| 999 parallelism will be executed with multiple threads in a single process only. |
| 1000 """ |
| 1001 if logger: |
| 1002 logger.warn(message) |
| 1003 return MultiprocessingIsAvailableResult(is_available=False, |
| 1004 stack_trace=None) |
922 | 1005 |
923 stack_trace = None | 1006 stack_trace = None |
924 multiprocessing_is_available = True | 1007 multiprocessing_is_available = True |
925 message = """ | 1008 message = """ |
926 You have requested multiple threads or processes for an operation, but the | 1009 You have requested multiple processes for an operation, but the |
927 required functionality of Python\'s multiprocessing module is not available. | 1010 required functionality of Python\'s multiprocessing module is not available. |
928 Your operations will be performed sequentially, and any requests for | 1011 Operations requesting parallelism will be executed with multiple threads in a |
929 parallelism will be ignored. | 1012 single process only. |
930 """ | 1013 """ |
931 try: | 1014 try: |
932 # Fails if /dev/shm (or some equivalent thereof) is not available for use | 1015 # Fails if /dev/shm (or some equivalent thereof) is not available for use |
933 # (e.g., there's no implementation, or we can't write to it, etc.). | 1016 # (e.g., there's no implementation, or we can't write to it, etc.). |
934 try: | 1017 try: |
935 multiprocessing.Value('i', 0) | 1018 multiprocessing.Value('i', 0) |
936 except: | 1019 except: |
937 if not IS_WINDOWS: | 1020 message += """ |
938 message += """ | |
939 Please ensure that you have write access to both /dev/shm and /run/shm. | 1021 Please ensure that you have write access to both /dev/shm and /run/shm. |
940 """ | 1022 """ |
941 raise # We'll handle this in one place below. | 1023 raise # We'll handle this in one place below. |
942 | 1024 |
943 # Manager objects and Windows are generally a pain to work with, so try it | 1025 # Manager objects and Windows are generally a pain to work with, so try it |
944 # out as a sanity check. This definitely works on some versions of Windows, | 1026 # out as a sanity check. This definitely works on some versions of Windows, |
945 # but it's certainly possible that there is some unknown configuration for | 1027 # but it's certainly possible that there is some unknown configuration for |
946 # which it won't. | 1028 # which it won't. |
947 multiprocessing.Manager() | 1029 global manager # pylint: disable=global-variable-undefined |
| 1030 |
| 1031 manager = multiprocessing.Manager() |
948 | 1032 |
949 # Check that the max number of open files is reasonable. Always check this | 1033 # Check that the max number of open files is reasonable. Always check this |
950 # after we're sure that the basic multiprocessing functionality is | 1034 # after we're sure that the basic multiprocessing functionality is |
951 # available, since this won't matter unless that's true. | 1035 # available, since this won't matter unless that's true. |
952 limit = -1 | 1036 limit = -1 |
953 if HAS_RESOURCE_MODULE: | 1037 if HAS_RESOURCE_MODULE: |
954 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix | 1038 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix |
955 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the | 1039 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the |
956 # "resource" module is not guaranteed to know about these names. | 1040 # "resource" module is not guaranteed to know about these names. |
957 try: | 1041 try: |
958 limit = max(limit, | 1042 limit = max(limit, |
959 _IncreaseSoftLimitForResource( | 1043 _IncreaseSoftLimitForResource( |
960 resource.RLIMIT_NOFILE, | 1044 resource.RLIMIT_NOFILE, |
961 MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) | 1045 MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) |
962 except AttributeError: | 1046 except AttributeError: |
963 pass | 1047 pass |
964 try: | 1048 try: |
965 limit = max(limit, | 1049 limit = max(limit, |
966 _IncreaseSoftLimitForResource( | 1050 _IncreaseSoftLimitForResource( |
967 resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) | 1051 resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) |
968 except AttributeError: | 1052 except AttributeError: |
969 pass | 1053 pass |
970 | 1054 |
971 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS: | 1055 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT: |
972 message += (""" | 1056 message += (""" |
973 Your max number of open files, %s, is too low to allow safe multiprocessing. | 1057 Your max number of open files, %s, is too low to allow safe multiprocessing. |
974 On Linux you can fix this by adding something like "ulimit -n 10000" to your | 1058 On Linux you can fix this by adding something like "ulimit -n 10000" to your |
975 ~/.bashrc or equivalent file and opening a new terminal. | 1059 ~/.bashrc or equivalent file and opening a new terminal. |
976 | 1060 |
977 On MacOS, you may also need to run a command like this once (in addition to the | 1061 On MacOS, you may also need to run a command like this once (in addition to the |
978 above instructions), which might require a restart of your system to take | 1062 above instructions), which might require a restart of your system to take |
979 effect: | 1063 effect: |
980 launchctl limit maxfiles 10000 | 1064 launchctl limit maxfiles 10000 |
981 | 1065 |
982 Alternatively, edit /etc/launchd.conf with something like: | 1066 Alternatively, edit /etc/launchd.conf with something like: |
983 limit maxfiles 10000 10000 | 1067 limit maxfiles 10000 10000 |
984 | 1068 |
985 """ % limit) | 1069 """ % limit) |
986 raise Exception('Max number of open files, %s, is too low.' % limit) | 1070 raise Exception('Max number of open files, %s, is too low.' % limit) |
987 except: # pylint: disable=bare-except | 1071 except: # pylint: disable=bare-except |
988 stack_trace = traceback.format_exc() | 1072 stack_trace = traceback.format_exc() |
989 multiprocessing_is_available = False | 1073 multiprocessing_is_available = False |
990 if logger is not None: | 1074 if logger is not None: |
991 logger.debug(stack_trace) | 1075 logger.debug(stack_trace) |
992 logger.warn(message) | 1076 logger.warn(message) |
993 | 1077 |
994 # Set the cached values so that we never need to do this check again. | 1078 # Set the cached values so that we never need to do this check again. |
995 cached_multiprocessing_is_available = multiprocessing_is_available | 1079 cached_multiprocessing_is_available = multiprocessing_is_available |
996 cached_multiprocessing_check_stack_trace = stack_trace | 1080 cached_multiprocessing_check_stack_trace = stack_trace |
997 cached_multiprocessing_is_available_message = message | 1081 cached_multiprocessing_is_available_message = message |
998 return (multiprocessing_is_available, stack_trace) | 1082 return MultiprocessingIsAvailableResult( |
| 1083 is_available=cached_multiprocessing_is_available, |
| 1084 stack_trace=cached_multiprocessing_check_stack_trace) |
999 | 1085 |
1000 | 1086 |
1001 def CreateLock(): | 1087 def CreateLock(): |
1002 """Returns either a multiprocessing lock or a threading lock. | 1088 """Returns either a multiprocessing lock or a threading lock. |
1003 | 1089 |
1004 Use Multiprocessing lock iff we have access to the parts of the | 1090 Use Multiprocessing lock iff we have access to the parts of the |
1005 multiprocessing module that are necessary to enable parallelism in operations. | 1091 multiprocessing module that are necessary to enable parallelism in operations. |
1006 | 1092 |
1007 Returns: | 1093 Returns: |
1008 Multiprocessing or threading lock. | 1094 Multiprocessing or threading lock. |
1009 """ | 1095 """ |
1010 if MultiprocessingIsAvailable()[0]: | 1096 if CheckMultiprocessingAvailableAndInit().is_available: |
1011 return manager.Lock() | 1097 return manager.Lock() |
1012 else: | 1098 else: |
1013 return threading.Lock() | 1099 return threading.Lock() |
1014 | 1100 |
1015 | 1101 |
1016 def IsCloudSubdirPlaceholder(url, blr=None): | 1102 def IsCloudSubdirPlaceholder(url, blr=None): |
1017 """Determines if URL is a cloud subdir placeholder. | 1103 """Determines if URL is a cloud subdir placeholder. |
1018 | 1104 |
1019 This function is needed because GUI tools (like the GCS cloud console) allow | 1105 This function is needed because GUI tools (like the GCS cloud console) allow |
1020 users to create empty "folders" by creating a placeholder object; and parts | 1106 users to create empty "folders" by creating a placeholder object; and parts |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1091 logging.StreamHandler.flush(self) | 1177 logging.StreamHandler.flush(self) |
1092 except ValueError: | 1178 except ValueError: |
1093 pass | 1179 pass |
1094 | 1180 |
1095 | 1181 |
1096 def StdinIterator(): | 1182 def StdinIterator(): |
1097 """A generator function that returns lines from stdin.""" | 1183 """A generator function that returns lines from stdin.""" |
1098 for line in sys.stdin: | 1184 for line in sys.stdin: |
1099 # Strip CRLF. | 1185 # Strip CRLF. |
1100 yield line.rstrip() | 1186 yield line.rstrip() |
OLD | NEW |