Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/opts/SkUtils_opts_SSE2.cpp

Issue 285313002: SSE2 implementation of memcpy32 (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2009 The Android Open Source Project 2 * Copyright 2009 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include <emmintrin.h> 8 #include <emmintrin.h>
9 #include "SkUtils_opts_SSE2.h" 9 #include "SkUtils_opts_SSE2.h"
10 10
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
60 d += 4; 60 d += 4;
61 count -= 16; 61 count -= 16;
62 } 62 }
63 dst = reinterpret_cast<uint32_t*>(d); 63 dst = reinterpret_cast<uint32_t*>(d);
64 } 64 }
65 while (count > 0) { 65 while (count > 0) {
66 *dst++ = value; 66 *dst++ = value;
67 --count; 67 --count;
68 } 68 }
69 } 69 }
70
71 void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count)
72 {
73 if (count >= 16) {
74 while (((size_t)dst) & 0x0F) {
75 *dst++ = *src++;
76 --count;
77 }
78 __m128i *d = reinterpret_cast<__m128i*>(dst);
79 const __m128i *s = reinterpret_cast<const __m128i*>(src);
80 while (count >= 16) {
81 __m128i src_pixel1 = _mm_loadu_si128(s++);
mtklein 2014/05/15 15:38:30 src_pixelN seems like a misleading name, as it's r
qiankun 2014/05/20 09:35:30 Done.
82 __m128i src_pixel2 = _mm_loadu_si128(s++);
83 __m128i src_pixel3 = _mm_loadu_si128(s++);
84 __m128i src_pixel4 = _mm_loadu_si128(s++);
85
86 _mm_store_si128(d , src_pixel1);
mtklein 2014/05/15 15:38:30 Any chance you compared _mm_store and _mm_stream h
mtklein 2014/05/15 15:38:30 For symmetry with s, maybe _mm_store_si128(d++, ..
qiankun 2014/05/20 09:35:30 Done.
qiankun 2014/05/20 09:35:30 At my side, _mm_store is better than _mm_stream to
87 _mm_store_si128(d + 1, src_pixel2);
88 _mm_store_si128(d + 2, src_pixel3);
89 _mm_store_si128(d + 3, src_pixel4);
90 d += 4;
91 count -= 16;
92 }
93 dst = reinterpret_cast<uint32_t*>(d);
94 src = reinterpret_cast<const uint32_t*>(s);
95 }
96 while (count > 0) {
97 *dst++ = *src++;
98 --count;
99 }
100 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698