Index: src/opts/SkUtils_opts_SSE2.cpp |
diff --git a/src/opts/SkUtils_opts_SSE2.cpp b/src/opts/SkUtils_opts_SSE2.cpp |
index a3c5aa5dfd7f1cf417abda23f10d662d80bf8045..205cd6f3a2eea48253edd9b75086b7c784df057a 100644 |
--- a/src/opts/SkUtils_opts_SSE2.cpp |
+++ b/src/opts/SkUtils_opts_SSE2.cpp |
@@ -67,3 +67,34 @@ void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count) |
--count; |
} |
} |
+ |
+void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count) |
+{ |
+ if (count >= 16) { |
+ while (((size_t)dst) & 0x0F) { |
+ *dst++ = *src++; |
+ --count; |
+ } |
+ __m128i *d = reinterpret_cast<__m128i*>(dst); |
+ const __m128i *s = reinterpret_cast<const __m128i*>(src); |
+ while (count >= 16) { |
+ __m128i src_pixel1 = _mm_loadu_si128(s++); |
mtklein
2014/05/15 15:38:30
src_pixelN seems like a misleading name, as it's r
qiankun
2014/05/20 09:35:30
Done.
|
+ __m128i src_pixel2 = _mm_loadu_si128(s++); |
+ __m128i src_pixel3 = _mm_loadu_si128(s++); |
+ __m128i src_pixel4 = _mm_loadu_si128(s++); |
+ |
+ _mm_store_si128(d , src_pixel1); |
mtklein
2014/05/15 15:38:30
Any chance you compared _mm_store and _mm_stream h
mtklein
2014/05/15 15:38:30
For symmetry with s, maybe _mm_store_si128(d++, ..
qiankun
2014/05/20 09:35:30
Done.
qiankun
2014/05/20 09:35:30
At my side, _mm_store is better than _mm_stream to
|
+ _mm_store_si128(d + 1, src_pixel2); |
+ _mm_store_si128(d + 2, src_pixel3); |
+ _mm_store_si128(d + 3, src_pixel4); |
+ d += 4; |
+ count -= 16; |
+ } |
+ dst = reinterpret_cast<uint32_t*>(d); |
+ src = reinterpret_cast<const uint32_t*>(s); |
+ } |
+ while (count > 0) { |
+ *dst++ = *src++; |
+ --count; |
+ } |
+} |