Chromium Code Reviews| Index: src/opts/SkUtils_opts_SSE2.cpp |
| diff --git a/src/opts/SkUtils_opts_SSE2.cpp b/src/opts/SkUtils_opts_SSE2.cpp |
| index a3c5aa5dfd7f1cf417abda23f10d662d80bf8045..205cd6f3a2eea48253edd9b75086b7c784df057a 100644 |
| --- a/src/opts/SkUtils_opts_SSE2.cpp |
| +++ b/src/opts/SkUtils_opts_SSE2.cpp |
| @@ -67,3 +67,34 @@ void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count) |
| --count; |
| } |
| } |
| + |
| +void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count) |
| +{ |
| + if (count >= 16) { |
| + while (((size_t)dst) & 0x0F) { |
| + *dst++ = *src++; |
| + --count; |
| + } |
| + __m128i *d = reinterpret_cast<__m128i*>(dst); |
| + const __m128i *s = reinterpret_cast<const __m128i*>(src); |
| + while (count >= 16) { |
| + __m128i src_pixel1 = _mm_loadu_si128(s++); |
|
mtklein
2014/05/15 15:38:30
src_pixelN seems like a misleading name, as it's r
qiankun
2014/05/20 09:35:30
Done.
|
| + __m128i src_pixel2 = _mm_loadu_si128(s++); |
| + __m128i src_pixel3 = _mm_loadu_si128(s++); |
| + __m128i src_pixel4 = _mm_loadu_si128(s++); |
| + |
| + _mm_store_si128(d , src_pixel1); |
|
mtklein
2014/05/15 15:38:30
Any chance you compared _mm_store and _mm_stream h
mtklein
2014/05/15 15:38:30
For symmetry with s, maybe _mm_store_si128(d++, ..
qiankun
2014/05/20 09:35:30
Done.
qiankun
2014/05/20 09:35:30
At my side, _mm_store is better than _mm_stream to
|
| + _mm_store_si128(d + 1, src_pixel2); |
| + _mm_store_si128(d + 2, src_pixel3); |
| + _mm_store_si128(d + 3, src_pixel4); |
| + d += 4; |
| + count -= 16; |
| + } |
| + dst = reinterpret_cast<uint32_t*>(d); |
| + src = reinterpret_cast<const uint32_t*>(s); |
| + } |
| + while (count > 0) { |
| + *dst++ = *src++; |
| + --count; |
| + } |
| +} |