Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(424)

Unified Diff: src/opts/SkUtils_opts_SSE2.cpp

Issue 285313002: SSE2 implementation of memcpy32 (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/opts/SkUtils_opts_SSE2.cpp
diff --git a/src/opts/SkUtils_opts_SSE2.cpp b/src/opts/SkUtils_opts_SSE2.cpp
index a3c5aa5dfd7f1cf417abda23f10d662d80bf8045..205cd6f3a2eea48253edd9b75086b7c784df057a 100644
--- a/src/opts/SkUtils_opts_SSE2.cpp
+++ b/src/opts/SkUtils_opts_SSE2.cpp
@@ -67,3 +67,34 @@ void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count)
--count;
}
}
+
+void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count)
+{
+ if (count >= 16) {
+ while (((size_t)dst) & 0x0F) {
+ *dst++ = *src++;
+ --count;
+ }
+ __m128i *d = reinterpret_cast<__m128i*>(dst);
+ const __m128i *s = reinterpret_cast<const __m128i*>(src);
+ while (count >= 16) {
+ __m128i src_pixel1 = _mm_loadu_si128(s++);
mtklein 2014/05/15 15:38:30 src_pixelN seems like a misleading name, as it's r
qiankun 2014/05/20 09:35:30 Done.
+ __m128i src_pixel2 = _mm_loadu_si128(s++);
+ __m128i src_pixel3 = _mm_loadu_si128(s++);
+ __m128i src_pixel4 = _mm_loadu_si128(s++);
+
+ _mm_store_si128(d , src_pixel1);
mtklein 2014/05/15 15:38:30 Any chance you compared _mm_store and _mm_stream h
mtklein 2014/05/15 15:38:30 For symmetry with s, maybe _mm_store_si128(d++, ..
qiankun 2014/05/20 09:35:30 Done.
qiankun 2014/05/20 09:35:30 At my side, _mm_store is better than _mm_stream to
+ _mm_store_si128(d + 1, src_pixel2);
+ _mm_store_si128(d + 2, src_pixel3);
+ _mm_store_si128(d + 3, src_pixel4);
+ d += 4;
+ count -= 16;
+ }
+ dst = reinterpret_cast<uint32_t*>(d);
+ src = reinterpret_cast<const uint32_t*>(s);
+ }
+ while (count > 0) {
+ *dst++ = *src++;
+ --count;
+ }
+}

Powered by Google App Engine
This is Rietveld 408576698