| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ | 2 * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ |
| 3 * instruction. | 3 * instruction. |
| 4 * | 4 * |
| 5 * A white paper describing this algorithm can be found at: | 5 * A white paper describing this algorithm can be found at: |
| 6 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast
-crc-computation-generic-polynomials-pclmulqdq-paper.pdf | 6 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast
-crc-computation-generic-polynomials-pclmulqdq-paper.pdf |
| 7 * | 7 * |
| 8 * Copyright (C) 2013 Intel Corporation. All rights reserved. | 8 * Copyright (C) 2013 Intel Corporation. All rights reserved. |
| 9 * Authors: | 9 * Authors: |
| 10 * Wajdi Feghali <wajdi.k.feghali@intel.com> | 10 * Wajdi Feghali <wajdi.k.feghali@intel.com> |
| (...skipping 265 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 276 __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; | 276 __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; |
| 277 | 277 |
| 278 CRC_LOAD(s) | 278 CRC_LOAD(s) |
| 279 | 279 |
| 280 if (len < 16) { | 280 if (len < 16) { |
| 281 if (len == 0) | 281 if (len == 0) |
| 282 return; | 282 return; |
| 283 goto partial; | 283 goto partial; |
| 284 } | 284 } |
| 285 | 285 |
| 286 algn_diff = 0 - (unsigned long)src & 0xF; | 286 algn_diff = 0 - (uintptr_t)src & 0xF; |
| 287 if (algn_diff) { | 287 if (algn_diff) { |
| 288 xmm_crc_part = _mm_loadu_si128((__m128i *)src); | 288 xmm_crc_part = _mm_loadu_si128((__m128i *)src); |
| 289 _mm_storeu_si128((__m128i *)dst, xmm_crc_part); | 289 _mm_storeu_si128((__m128i *)dst, xmm_crc_part); |
| 290 | 290 |
| 291 dst += algn_diff; | 291 dst += algn_diff; |
| 292 src += algn_diff; | 292 src += algn_diff; |
| 293 len -= algn_diff; | 293 len -= algn_diff; |
| 294 | 294 |
| 295 partial_fold(s, algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, | 295 partial_fold(s, algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, |
| 296 &xmm_crc_part); | 296 &xmm_crc_part); |
| (...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 484 | 484 |
| 485 xmm_crc2 = xmm_crc3; | 485 xmm_crc2 = xmm_crc3; |
| 486 xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); | 486 xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); |
| 487 xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); | 487 xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); |
| 488 xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); | 488 xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); |
| 489 | 489 |
| 490 crc = _mm_extract_epi32(xmm_crc3, 2); | 490 crc = _mm_extract_epi32(xmm_crc3, 2); |
| 491 return ~crc; | 491 return ~crc; |
| 492 CRC_SAVE(s) | 492 CRC_SAVE(s) |
| 493 } | 493 } |
| OLD | NEW |