OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ | 2 * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ |
3 * instruction. | 3 * instruction. |
4 * | 4 * |
5 * A white paper describing this algorithm can be found at: | 5 * A white paper describing this algorithm can be found at: |
6 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast -crc-computation-generic-polynomials-pclmulqdq-paper.pdf | 6 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast -crc-computation-generic-polynomials-pclmulqdq-paper.pdf |
7 * | 7 * |
8 * Copyright (C) 2013 Intel Corporation. All rights reserved. | 8 * Copyright (C) 2013 Intel Corporation. All rights reserved. |
9 * Authors: | 9 * Authors: |
10 * Wajdi Feghali <wajdi.k.feghali@intel.com> | 10 * Wajdi Feghali <wajdi.k.feghali@intel.com> |
(...skipping 265 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
276 __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; | 276 __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; |
277 | 277 |
278 CRC_LOAD(s) | 278 CRC_LOAD(s) |
279 | 279 |
280 if (len < 16) { | 280 if (len < 16) { |
281 if (len == 0) | 281 if (len == 0) |
282 return; | 282 return; |
283 goto partial; | 283 goto partial; |
284 } | 284 } |
285 | 285 |
286 algn_diff = 0 - (unsigned long)src & 0xF; | 286 // Double cast to avoid truncation errors in 64-bit builds by making the |
287 // truncation explicit. | |
288 algn_diff = 0 - (unsigned long)(size_t)src & 0xF; | |
Peter Kasting
2015/10/05 03:56:04
I'm sorry, I don't understand what this is doing.
brucedawson
2015/10/05 16:03:23
The "known-negative in an unsigned" is definitely
Peter Kasting
2015/10/05 18:33:03
I think changing the source is OK, but maybe one o
| |
287 if (algn_diff) { | 289 if (algn_diff) { |
288 xmm_crc_part = _mm_loadu_si128((__m128i *)src); | 290 xmm_crc_part = _mm_loadu_si128((__m128i *)src); |
289 _mm_storeu_si128((__m128i *)dst, xmm_crc_part); | 291 _mm_storeu_si128((__m128i *)dst, xmm_crc_part); |
290 | 292 |
291 dst += algn_diff; | 293 dst += algn_diff; |
292 src += algn_diff; | 294 src += algn_diff; |
293 len -= algn_diff; | 295 len -= algn_diff; |
294 | 296 |
295 partial_fold(s, algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, | 297 partial_fold(s, algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, |
296 &xmm_crc_part); | 298 &xmm_crc_part); |
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
484 | 486 |
485 xmm_crc2 = xmm_crc3; | 487 xmm_crc2 = xmm_crc3; |
486 xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); | 488 xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); |
487 xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); | 489 xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); |
488 xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); | 490 xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); |
489 | 491 |
490 crc = _mm_extract_epi32(xmm_crc3, 2); | 492 crc = _mm_extract_epi32(xmm_crc3, 2); |
491 return ~crc; | 493 return ~crc; |
492 CRC_SAVE(s) | 494 CRC_SAVE(s) |
493 } | 495 } |
OLD | NEW |