Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2009 The Android Open Source Project | 2 * Copyright 2009 The Android Open Source Project |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include <emmintrin.h> | 8 #include <emmintrin.h> |
| 9 #include "SkUtils_opts_SSE2.h" | 9 #include "SkUtils_opts_SSE2.h" |
| 10 | 10 |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 60 d += 4; | 60 d += 4; |
| 61 count -= 16; | 61 count -= 16; |
| 62 } | 62 } |
| 63 dst = reinterpret_cast<uint32_t*>(d); | 63 dst = reinterpret_cast<uint32_t*>(d); |
| 64 } | 64 } |
| 65 while (count > 0) { | 65 while (count > 0) { |
| 66 *dst++ = value; | 66 *dst++ = value; |
| 67 --count; | 67 --count; |
| 68 } | 68 } |
| 69 } | 69 } |
| 70 | |
| 71 void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count) | |
| 72 { | |
| 73 if (count >= 16) { | |
| 74 while (((size_t)dst) & 0x0F) { | |
| 75 *dst++ = *src++; | |
| 76 --count; | |
| 77 } | |
| 78 __m128i *d = reinterpret_cast<__m128i*>(dst); | |
| 79 const __m128i *s = reinterpret_cast<const __m128i*>(src); | |
| 80 while (count >= 16) { | |
| 81 __m128i src_pixel1 = _mm_loadu_si128(s++); | |
|
mtklein
2014/05/15 15:38:30
src_pixelN seems like a misleading name, as it's r
qiankun
2014/05/20 09:35:30
Done.
| |
| 82 __m128i src_pixel2 = _mm_loadu_si128(s++); | |
| 83 __m128i src_pixel3 = _mm_loadu_si128(s++); | |
| 84 __m128i src_pixel4 = _mm_loadu_si128(s++); | |
| 85 | |
| 86 _mm_store_si128(d , src_pixel1); | |
|
mtklein
2014/05/15 15:38:30
Any chance you compared _mm_store and _mm_stream h
mtklein
2014/05/15 15:38:30
For symmetry with s, maybe _mm_store_si128(d++, ..
qiankun
2014/05/20 09:35:30
Done.
qiankun
2014/05/20 09:35:30
At my side, _mm_store is better than _mm_stream to
| |
| 87 _mm_store_si128(d + 1, src_pixel2); | |
| 88 _mm_store_si128(d + 2, src_pixel3); | |
| 89 _mm_store_si128(d + 3, src_pixel4); | |
| 90 d += 4; | |
| 91 count -= 16; | |
| 92 } | |
| 93 dst = reinterpret_cast<uint32_t*>(d); | |
| 94 src = reinterpret_cast<const uint32_t*>(s); | |
| 95 } | |
| 96 while (count > 0) { | |
| 97 *dst++ = *src++; | |
| 98 --count; | |
| 99 } | |
| 100 } | |
| OLD | NEW |