Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(11)

Side by Side Diff: third_party/gsutil/gslib/hashing_helper.py

Issue 1380943003: Roll version of gsutil to 4.15. (Closed) Base URL: https://github.com/catapult-project/catapult.git@master
Patch Set: rebase Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/gsutil/gslib/gcs_json_media.py ('k') | third_party/gsutil/gslib/name_expansion.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 # Copyright 2014 Google Inc. All Rights Reserved. 2 # Copyright 2014 Google Inc. All Rights Reserved.
3 # 3 #
4 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License. 5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at 6 # You may obtain a copy of the License at
7 # 7 #
8 # http://www.apache.org/licenses/LICENSE-2.0 8 # http://www.apache.org/licenses/LICENSE-2.0
9 # 9 #
10 # Unless required by applicable law or agreed to in writing, software 10 # Unless required by applicable law or agreed to in writing, software
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
72 file. 72 file.
73 """ 73 """
74 74
75 75
76 # Configuration values for hashing. 76 # Configuration values for hashing.
77 CHECK_HASH_IF_FAST_ELSE_FAIL = 'if_fast_else_fail' 77 CHECK_HASH_IF_FAST_ELSE_FAIL = 'if_fast_else_fail'
78 CHECK_HASH_IF_FAST_ELSE_SKIP = 'if_fast_else_skip' 78 CHECK_HASH_IF_FAST_ELSE_SKIP = 'if_fast_else_skip'
79 CHECK_HASH_ALWAYS = 'always' 79 CHECK_HASH_ALWAYS = 'always'
80 CHECK_HASH_NEVER = 'never' 80 CHECK_HASH_NEVER = 'never'
81 81
82 # Table storing polynomial values of x^(2^k) mod CASTAGNOLI_POLY for all k < 31,
83 # where x^(2^k) and CASTAGNOLI_POLY are both considered polynomials. This is
84 # sufficient since x^(2^31) mod CASTAGNOLI_POLY = x.
85 X_POW_2K_TABLE = [2, 4, 16, 256, 65536, 517762881, 984302966,
86 408362264, 1503875210, 2862076957, 3884826397, 1324787473,
87 621200174, 1758783527, 1416537776, 1180494764, 648569364,
88 2521473789, 994858823, 1728245375, 3498467999, 4059169852,
89 3345064394, 2828422810, 2429203150, 3336788029, 860151998,
90 2102628683, 1033187991, 4243778976, 1123580069]
91 # Castagnoli polynomial and its degree.
92 CASTAGNOLI_POLY = 4812730177
93 DEGREE = 32
94
95
96 def ConcatCrc32c(crc_a, crc_b, num_bytes_in_b):
97 """Computes CRC32C for concat(A, B) given crc(A), crc(B) and len(B).
98
99 An explanation of the algorithm can be found at
100 crcutil.googlecode.com/files/crc-doc.1.0.pdf.
101
102 Args:
103 crc_a: A 32-bit integer representing crc(A) with least-significant
104 coefficient first.
105 crc_b: Same as crc_a.
106 num_bytes_in_b: Length of B in bytes.
107
108 Returns:
109 CRC32C for concat(A, B)
110 """
111 if not num_bytes_in_b:
112 return crc_a
113
114 return _ExtendByZeros(crc_a, 8 * num_bytes_in_b) ^ crc_b
115
116
117 def _CrcMultiply(p, q):
118 """Multiplies two polynomials together modulo CASTAGNOLI_POLY.
119
120 Args:
121 p: The first polynomial.
122 q: The second polynomial.
123
124 Returns:
125 Result of the multiplication.
126 """
127
128 result = 0
129 top_bit = 1 << DEGREE
130 for _ in range(DEGREE):
131 if p & 1:
132 result ^= q
133 q <<= 1
134 if q & top_bit:
135 q ^= CASTAGNOLI_POLY
136 p >>= 1
137 return result
138
139
140 def _ExtendByZeros(crc, num_bits):
141 """Given crc representing polynomial P(x), compute P(x)*x^num_bits.
142
143 Args:
144 crc: crc respresenting polynomial P(x).
145 num_bits: number of bits in crc.
146
147 Returns:
148 P(x)*x^num_bits
149 """
150 def _ReverseBits32(crc):
151 return int('{0:032b}'.format(crc, width=32)[::-1], 2)
152 crc = _ReverseBits32(crc)
153 i = 0
154
155 while num_bits != 0:
156 if num_bits & 1:
157 crc = _CrcMultiply(crc, X_POW_2K_TABLE[i % len(X_POW_2K_TABLE)])
158 i += 1
159 num_bits >>= 1
160 crc = _ReverseBits32(crc)
161 return crc
162
82 163
83 def _CalculateHashFromContents(fp, hash_alg): 164 def _CalculateHashFromContents(fp, hash_alg):
84 """Calculates a base64 digest of the contents of a seekable stream. 165 """Calculates a base64 digest of the contents of a seekable stream.
85 166
86 This function resets the file pointer to position 0. 167 This function resets the file pointer to position 0.
87 168
88 Args: 169 Args:
89 fp: An already-open file object. 170 fp: An already-open file object.
90 hash_alg: Instance of hashing class initialized to start state. 171 hash_alg: Instance of hashing class initialized to start state.
91 172
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
205 Returns: 286 Returns:
206 dict of (algorithm_name: hash_algorithm) 287 dict of (algorithm_name: hash_algorithm)
207 """ 288 """
208 check_hashes_config = config.get( 289 check_hashes_config = config.get(
209 'GSUtil', 'check_hashes', CHECK_HASH_IF_FAST_ELSE_FAIL) 290 'GSUtil', 'check_hashes', CHECK_HASH_IF_FAST_ELSE_FAIL)
210 if check_hashes_config == 'never': 291 if check_hashes_config == 'never':
211 return {} 292 return {}
212 return {'md5': md5} 293 return {'md5': md5}
213 294
214 295
215 def GetDownloadHashAlgs(logger, src_has_md5=False, src_has_crc32c=False): 296 def GetDownloadHashAlgs(logger, consider_md5=False, consider_crc32c=False):
216 """Returns a dict of hash algorithms for validating an object. 297 """Returns a dict of hash algorithms for validating an object.
217 298
218 Args: 299 Args:
219 logger: logging.Logger for outputting log messages. 300 logger: logging.Logger for outputting log messages.
220 src_has_md5: If True, source object has an md5 hash. 301 consider_md5: If True, consider using a md5 hash.
221 src_has_crc32c: If True, source object has a crc32c hash. 302 consider_crc32c: If True, consider using a crc32c hash.
222 303
223 Returns: 304 Returns:
224 Dict of (string, hash algorithm). 305 Dict of (string, hash algorithm).
225 306
226 Raises: 307 Raises:
227 CommandException if hash algorithms satisfying the boto config file 308 CommandException if hash algorithms satisfying the boto config file
228 cannot be returned. 309 cannot be returned.
229 """ 310 """
230 check_hashes_config = config.get( 311 check_hashes_config = config.get(
231 'GSUtil', 'check_hashes', CHECK_HASH_IF_FAST_ELSE_FAIL) 312 'GSUtil', 'check_hashes', CHECK_HASH_IF_FAST_ELSE_FAIL)
232 if check_hashes_config == CHECK_HASH_NEVER: 313 if check_hashes_config == CHECK_HASH_NEVER:
233 return {} 314 return {}
234 315
235 hash_algs = {} 316 hash_algs = {}
236 if src_has_md5: 317 if consider_md5:
237 hash_algs['md5'] = md5 318 hash_algs['md5'] = md5
238 elif src_has_crc32c: 319 elif consider_crc32c:
239 # If the cloud provider supplies a CRC, we'll compute a checksum to 320 # If the cloud provider supplies a CRC, we'll compute a checksum to
240 # validate if we're using a native crcmod installation and MD5 isn't 321 # validate if we're using a native crcmod installation and MD5 isn't
241 # offered as an alternative. 322 # offered as an alternative.
242 if UsingCrcmodExtension(crcmod): 323 if UsingCrcmodExtension(crcmod):
243 hash_algs['crc32c'] = lambda: crcmod.predefined.Crc('crc-32c') 324 hash_algs['crc32c'] = lambda: crcmod.predefined.Crc('crc-32c')
244 elif not hash_algs: 325 elif not hash_algs:
245 if check_hashes_config == CHECK_HASH_IF_FAST_ELSE_FAIL: 326 if check_hashes_config == CHECK_HASH_IF_FAST_ELSE_FAIL:
246 raise CommandException(_SLOW_CRC_EXCEPTION_TEXT) 327 raise CommandException(_SLOW_CRC_EXCEPTION_TEXT)
247 elif check_hashes_config == CHECK_HASH_IF_FAST_ELSE_SKIP: 328 elif check_hashes_config == CHECK_HASH_IF_FAST_ELSE_SKIP:
248 logger.warn(_NO_HASH_CHECK_WARNING) 329 logger.warn(_NO_HASH_CHECK_WARNING)
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after
409 self._digesters_previous_mark = self._digesters_current_mark 490 self._digesters_previous_mark = self._digesters_current_mark
410 bytes_remaining = bytes_to_read 491 bytes_remaining = bytes_to_read
411 bytes_this_round = min(bytes_remaining, TRANSFER_BUFFER_SIZE) 492 bytes_this_round = min(bytes_remaining, TRANSFER_BUFFER_SIZE)
412 while bytes_this_round: 493 while bytes_this_round:
413 data = self._orig_fp.read(bytes_this_round) 494 data = self._orig_fp.read(bytes_this_round)
414 bytes_remaining -= bytes_this_round 495 bytes_remaining -= bytes_this_round
415 for alg in self._digesters: 496 for alg in self._digesters:
416 self._digesters[alg].update(data) 497 self._digesters[alg].update(data)
417 bytes_this_round = min(bytes_remaining, TRANSFER_BUFFER_SIZE) 498 bytes_this_round = min(bytes_remaining, TRANSFER_BUFFER_SIZE)
418 self._digesters_current_mark += bytes_to_read 499 self._digesters_current_mark += bytes_to_read
OLDNEW
« no previous file with comments | « third_party/gsutil/gslib/gcs_json_media.py ('k') | third_party/gsutil/gslib/name_expansion.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698