OLD | NEW |
1 /* | 1 /* |
2 * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ | 2 * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ |
3 * instruction. | 3 * instruction. |
4 * | 4 * |
5 * A white paper describing this algorithm can be found at: | 5 * A white paper describing this algorithm can be found at: |
6 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast
-crc-computation-generic-polynomials-pclmulqdq-paper.pdf | 6 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast
-crc-computation-generic-polynomials-pclmulqdq-paper.pdf |
7 * | 7 * |
8 * Copyright (C) 2013 Intel Corporation. All rights reserved. | 8 * Copyright (C) 2013 Intel Corporation. All rights reserved. |
9 * Authors: | 9 * Authors: |
10 * Wajdi Feghali <wajdi.k.feghali@intel.com> | 10 * Wajdi Feghali <wajdi.k.feghali@intel.com> |
(...skipping 265 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
276 __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; | 276 __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; |
277 | 277 |
278 CRC_LOAD(s) | 278 CRC_LOAD(s) |
279 | 279 |
280 if (len < 16) { | 280 if (len < 16) { |
281 if (len == 0) | 281 if (len == 0) |
282 return; | 282 return; |
283 goto partial; | 283 goto partial; |
284 } | 284 } |
285 | 285 |
286 algn_diff = 0 - (unsigned long)src & 0xF; | 286 algn_diff = 0 - (uintptr_t)src & 0xF; |
287 if (algn_diff) { | 287 if (algn_diff) { |
288 xmm_crc_part = _mm_loadu_si128((__m128i *)src); | 288 xmm_crc_part = _mm_loadu_si128((__m128i *)src); |
289 _mm_storeu_si128((__m128i *)dst, xmm_crc_part); | 289 _mm_storeu_si128((__m128i *)dst, xmm_crc_part); |
290 | 290 |
291 dst += algn_diff; | 291 dst += algn_diff; |
292 src += algn_diff; | 292 src += algn_diff; |
293 len -= algn_diff; | 293 len -= algn_diff; |
294 | 294 |
295 partial_fold(s, algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, | 295 partial_fold(s, algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, |
296 &xmm_crc_part); | 296 &xmm_crc_part); |
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
484 | 484 |
485 xmm_crc2 = xmm_crc3; | 485 xmm_crc2 = xmm_crc3; |
486 xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); | 486 xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); |
487 xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); | 487 xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); |
488 xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); | 488 xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); |
489 | 489 |
490 crc = _mm_extract_epi32(xmm_crc3, 2); | 490 crc = _mm_extract_epi32(xmm_crc3, 2); |
491 return ~crc; | 491 return ~crc; |
492 CRC_SAVE(s) | 492 CRC_SAVE(s) |
493 } | 493 } |
OLD | NEW |