OLD | NEW |
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 # Copyright 2014 Google Inc. All Rights Reserved. | 2 # Copyright 2014 Google Inc. All Rights Reserved. |
3 # | 3 # |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
5 # you may not use this file except in compliance with the License. | 5 # you may not use this file except in compliance with the License. |
6 # You may obtain a copy of the License at | 6 # You may obtain a copy of the License at |
7 # | 7 # |
8 # http://www.apache.org/licenses/LICENSE-2.0 | 8 # http://www.apache.org/licenses/LICENSE-2.0 |
9 # | 9 # |
10 # Unless required by applicable law or agreed to in writing, software | 10 # Unless required by applicable law or agreed to in writing, software |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
72 file. | 72 file. |
73 """ | 73 """ |
74 | 74 |
75 | 75 |
76 # Configuration values for hashing. | 76 # Configuration values for hashing. |
77 CHECK_HASH_IF_FAST_ELSE_FAIL = 'if_fast_else_fail' | 77 CHECK_HASH_IF_FAST_ELSE_FAIL = 'if_fast_else_fail' |
78 CHECK_HASH_IF_FAST_ELSE_SKIP = 'if_fast_else_skip' | 78 CHECK_HASH_IF_FAST_ELSE_SKIP = 'if_fast_else_skip' |
79 CHECK_HASH_ALWAYS = 'always' | 79 CHECK_HASH_ALWAYS = 'always' |
80 CHECK_HASH_NEVER = 'never' | 80 CHECK_HASH_NEVER = 'never' |
81 | 81 |
| 82 # Table storing polynomial values of x^(2^k) mod CASTAGNOLI_POLY for all k < 31, |
| 83 # where x^(2^k) and CASTAGNOLI_POLY are both considered polynomials. This is |
| 84 # sufficient since x^(2^31) mod CASTAGNOLI_POLY = x. |
| 85 X_POW_2K_TABLE = [2, 4, 16, 256, 65536, 517762881, 984302966, |
| 86 408362264, 1503875210, 2862076957, 3884826397, 1324787473, |
| 87 621200174, 1758783527, 1416537776, 1180494764, 648569364, |
| 88 2521473789, 994858823, 1728245375, 3498467999, 4059169852, |
| 89 3345064394, 2828422810, 2429203150, 3336788029, 860151998, |
| 90 2102628683, 1033187991, 4243778976, 1123580069] |
| 91 # Castagnoli polynomial and its degree. |
| 92 CASTAGNOLI_POLY = 4812730177 |
| 93 DEGREE = 32 |
| 94 |
| 95 |
| 96 def ConcatCrc32c(crc_a, crc_b, num_bytes_in_b): |
| 97 """Computes CRC32C for concat(A, B) given crc(A), crc(B) and len(B). |
| 98 |
| 99 An explanation of the algorithm can be found at |
| 100 crcutil.googlecode.com/files/crc-doc.1.0.pdf. |
| 101 |
| 102 Args: |
| 103 crc_a: A 32-bit integer representing crc(A) with least-significant |
| 104 coefficient first. |
| 105 crc_b: Same as crc_a. |
| 106 num_bytes_in_b: Length of B in bytes. |
| 107 |
| 108 Returns: |
| 109 CRC32C for concat(A, B) |
| 110 """ |
| 111 if not num_bytes_in_b: |
| 112 return crc_a |
| 113 |
| 114 return _ExtendByZeros(crc_a, 8 * num_bytes_in_b) ^ crc_b |
| 115 |
| 116 |
| 117 def _CrcMultiply(p, q): |
| 118 """Multiplies two polynomials together modulo CASTAGNOLI_POLY. |
| 119 |
| 120 Args: |
| 121 p: The first polynomial. |
| 122 q: The second polynomial. |
| 123 |
| 124 Returns: |
| 125 Result of the multiplication. |
| 126 """ |
| 127 |
| 128 result = 0 |
| 129 top_bit = 1 << DEGREE |
| 130 for _ in range(DEGREE): |
| 131 if p & 1: |
| 132 result ^= q |
| 133 q <<= 1 |
| 134 if q & top_bit: |
| 135 q ^= CASTAGNOLI_POLY |
| 136 p >>= 1 |
| 137 return result |
| 138 |
| 139 |
| 140 def _ExtendByZeros(crc, num_bits): |
| 141 """Given crc representing polynomial P(x), compute P(x)*x^num_bits. |
| 142 |
| 143 Args: |
| 144 crc: crc respresenting polynomial P(x). |
| 145 num_bits: number of bits in crc. |
| 146 |
| 147 Returns: |
| 148 P(x)*x^num_bits |
| 149 """ |
| 150 def _ReverseBits32(crc): |
| 151 return int('{0:032b}'.format(crc, width=32)[::-1], 2) |
| 152 crc = _ReverseBits32(crc) |
| 153 i = 0 |
| 154 |
| 155 while num_bits != 0: |
| 156 if num_bits & 1: |
| 157 crc = _CrcMultiply(crc, X_POW_2K_TABLE[i % len(X_POW_2K_TABLE)]) |
| 158 i += 1 |
| 159 num_bits >>= 1 |
| 160 crc = _ReverseBits32(crc) |
| 161 return crc |
| 162 |
82 | 163 |
83 def _CalculateHashFromContents(fp, hash_alg): | 164 def _CalculateHashFromContents(fp, hash_alg): |
84 """Calculates a base64 digest of the contents of a seekable stream. | 165 """Calculates a base64 digest of the contents of a seekable stream. |
85 | 166 |
86 This function resets the file pointer to position 0. | 167 This function resets the file pointer to position 0. |
87 | 168 |
88 Args: | 169 Args: |
89 fp: An already-open file object. | 170 fp: An already-open file object. |
90 hash_alg: Instance of hashing class initialized to start state. | 171 hash_alg: Instance of hashing class initialized to start state. |
91 | 172 |
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
205 Returns: | 286 Returns: |
206 dict of (algorithm_name: hash_algorithm) | 287 dict of (algorithm_name: hash_algorithm) |
207 """ | 288 """ |
208 check_hashes_config = config.get( | 289 check_hashes_config = config.get( |
209 'GSUtil', 'check_hashes', CHECK_HASH_IF_FAST_ELSE_FAIL) | 290 'GSUtil', 'check_hashes', CHECK_HASH_IF_FAST_ELSE_FAIL) |
210 if check_hashes_config == 'never': | 291 if check_hashes_config == 'never': |
211 return {} | 292 return {} |
212 return {'md5': md5} | 293 return {'md5': md5} |
213 | 294 |
214 | 295 |
215 def GetDownloadHashAlgs(logger, src_has_md5=False, src_has_crc32c=False): | 296 def GetDownloadHashAlgs(logger, consider_md5=False, consider_crc32c=False): |
216 """Returns a dict of hash algorithms for validating an object. | 297 """Returns a dict of hash algorithms for validating an object. |
217 | 298 |
218 Args: | 299 Args: |
219 logger: logging.Logger for outputting log messages. | 300 logger: logging.Logger for outputting log messages. |
220 src_has_md5: If True, source object has an md5 hash. | 301 consider_md5: If True, consider using a md5 hash. |
221 src_has_crc32c: If True, source object has a crc32c hash. | 302 consider_crc32c: If True, consider using a crc32c hash. |
222 | 303 |
223 Returns: | 304 Returns: |
224 Dict of (string, hash algorithm). | 305 Dict of (string, hash algorithm). |
225 | 306 |
226 Raises: | 307 Raises: |
227 CommandException if hash algorithms satisfying the boto config file | 308 CommandException if hash algorithms satisfying the boto config file |
228 cannot be returned. | 309 cannot be returned. |
229 """ | 310 """ |
230 check_hashes_config = config.get( | 311 check_hashes_config = config.get( |
231 'GSUtil', 'check_hashes', CHECK_HASH_IF_FAST_ELSE_FAIL) | 312 'GSUtil', 'check_hashes', CHECK_HASH_IF_FAST_ELSE_FAIL) |
232 if check_hashes_config == CHECK_HASH_NEVER: | 313 if check_hashes_config == CHECK_HASH_NEVER: |
233 return {} | 314 return {} |
234 | 315 |
235 hash_algs = {} | 316 hash_algs = {} |
236 if src_has_md5: | 317 if consider_md5: |
237 hash_algs['md5'] = md5 | 318 hash_algs['md5'] = md5 |
238 elif src_has_crc32c: | 319 elif consider_crc32c: |
239 # If the cloud provider supplies a CRC, we'll compute a checksum to | 320 # If the cloud provider supplies a CRC, we'll compute a checksum to |
240 # validate if we're using a native crcmod installation and MD5 isn't | 321 # validate if we're using a native crcmod installation and MD5 isn't |
241 # offered as an alternative. | 322 # offered as an alternative. |
242 if UsingCrcmodExtension(crcmod): | 323 if UsingCrcmodExtension(crcmod): |
243 hash_algs['crc32c'] = lambda: crcmod.predefined.Crc('crc-32c') | 324 hash_algs['crc32c'] = lambda: crcmod.predefined.Crc('crc-32c') |
244 elif not hash_algs: | 325 elif not hash_algs: |
245 if check_hashes_config == CHECK_HASH_IF_FAST_ELSE_FAIL: | 326 if check_hashes_config == CHECK_HASH_IF_FAST_ELSE_FAIL: |
246 raise CommandException(_SLOW_CRC_EXCEPTION_TEXT) | 327 raise CommandException(_SLOW_CRC_EXCEPTION_TEXT) |
247 elif check_hashes_config == CHECK_HASH_IF_FAST_ELSE_SKIP: | 328 elif check_hashes_config == CHECK_HASH_IF_FAST_ELSE_SKIP: |
248 logger.warn(_NO_HASH_CHECK_WARNING) | 329 logger.warn(_NO_HASH_CHECK_WARNING) |
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
409 self._digesters_previous_mark = self._digesters_current_mark | 490 self._digesters_previous_mark = self._digesters_current_mark |
410 bytes_remaining = bytes_to_read | 491 bytes_remaining = bytes_to_read |
411 bytes_this_round = min(bytes_remaining, TRANSFER_BUFFER_SIZE) | 492 bytes_this_round = min(bytes_remaining, TRANSFER_BUFFER_SIZE) |
412 while bytes_this_round: | 493 while bytes_this_round: |
413 data = self._orig_fp.read(bytes_this_round) | 494 data = self._orig_fp.read(bytes_this_round) |
414 bytes_remaining -= bytes_this_round | 495 bytes_remaining -= bytes_this_round |
415 for alg in self._digesters: | 496 for alg in self._digesters: |
416 self._digesters[alg].update(data) | 497 self._digesters[alg].update(data) |
417 bytes_this_round = min(bytes_remaining, TRANSFER_BUFFER_SIZE) | 498 bytes_this_round = min(bytes_remaining, TRANSFER_BUFFER_SIZE) |
418 self._digesters_current_mark += bytes_to_read | 499 self._digesters_current_mark += bytes_to_read |
OLD | NEW |