Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1066)

Side by Side Diff: bench/MemcpyBench.cpp

Issue 512503002: Clean up some benches that answer questions we're no longer asking. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | bench/MemoryBench.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 Google Inc. 2 * Copyright 2014 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "Benchmark.h" 8 #include "Benchmark.h"
9 #include "SkRandom.h" 9 #include "SkRandom.h"
10 #include "SkTemplates.h" 10 #include "SkTemplates.h"
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
60 // Let the libc developers do what they think is best. 60 // Let the libc developers do what they think is best.
61 static void memcpy32_memcpy(uint32_t* dst, const uint32_t* src, int count) { 61 static void memcpy32_memcpy(uint32_t* dst, const uint32_t* src, int count) {
62 memcpy(dst, src, sizeof(uint32_t) * count); 62 memcpy(dst, src, sizeof(uint32_t) * count);
63 } 63 }
64 BENCH(memcpy32_memcpy, 10) 64 BENCH(memcpy32_memcpy, 10)
65 BENCH(memcpy32_memcpy, 100) 65 BENCH(memcpy32_memcpy, 100)
66 BENCH(memcpy32_memcpy, 1000) 66 BENCH(memcpy32_memcpy, 1000)
67 BENCH(memcpy32_memcpy, 10000) 67 BENCH(memcpy32_memcpy, 10000)
68 BENCH(memcpy32_memcpy, 100000) 68 BENCH(memcpy32_memcpy, 100000)
69 69
70 // Let the compiler's autovectorizer do what it thinks is best.
71 static void memcpy32_autovectorize(uint32_t* dst, const uint32_t* src, int count ) {
72 while (count --> 0) {
73 *dst++ = *src++;
74 }
75 }
76 BENCH(memcpy32_autovectorize, 10)
77 BENCH(memcpy32_autovectorize, 100)
78 BENCH(memcpy32_autovectorize, 1000)
79 BENCH(memcpy32_autovectorize, 10000)
80 BENCH(memcpy32_autovectorize, 100000)
81
82 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
83
84 // Align dst to 16 bytes, then use aligned stores. src isn't algined, so use un aligned loads.
85 static void memcpy32_sse2_align(uint32_t* dst, const uint32_t* src, int count) {
86 if (count >= 16) {
87 while (uintptr_t(dst) & 0xF) {
88 *dst++ = *src++;
89 count--;
90 }
91
92 __m128i* dst128 = reinterpret_cast<__m128i*>(dst);
93 const __m128i* src128 = reinterpret_cast<const __m128i*>(src);
94 dst += 16 * (count / 16);
95 src += 16 * (count / 16);
96 while (count >= 16) {
97 __m128i a = _mm_loadu_si128(src128++);
98 __m128i b = _mm_loadu_si128(src128++);
99 __m128i c = _mm_loadu_si128(src128++);
100 __m128i d = _mm_loadu_si128(src128++);
101
102 _mm_store_si128(dst128++, a);
103 _mm_store_si128(dst128++, b);
104 _mm_store_si128(dst128++, c);
105 _mm_store_si128(dst128++, d);
106
107 count -= 16;
108 }
109 }
110
111 while (count --> 0) {
112 *dst++ = *src++;
113 }
114 }
115 BENCH(memcpy32_sse2_align, 10)
116 BENCH(memcpy32_sse2_align, 100)
117 BENCH(memcpy32_sse2_align, 1000)
118 BENCH(memcpy32_sse2_align, 10000)
119 BENCH(memcpy32_sse2_align, 100000)
120
121 // Leave both dst and src unaliged, and so use unaligned stores for dst and unal igned loads for src.
122 static void memcpy32_sse2_unalign(uint32_t* dst, const uint32_t* src, int count) {
123 __m128i* dst128 = reinterpret_cast<__m128i*>(dst);
124 const __m128i* src128 = reinterpret_cast<const __m128i*>(src);
125 dst += 16 * (count / 16);
126 src += 16 * (count / 16);
127 while (count >= 16) {
128 __m128i a = _mm_loadu_si128(src128++);
129 __m128i b = _mm_loadu_si128(src128++);
130 __m128i c = _mm_loadu_si128(src128++);
131 __m128i d = _mm_loadu_si128(src128++);
132
133 _mm_storeu_si128(dst128++, a);
134 _mm_storeu_si128(dst128++, b);
135 _mm_storeu_si128(dst128++, c);
136 _mm_storeu_si128(dst128++, d);
137
138 count -= 16;
139 }
140
141 while (count --> 0) {
142 *dst++ = *src++;
143 }
144 }
145 BENCH(memcpy32_sse2_unalign, 10)
146 BENCH(memcpy32_sse2_unalign, 100)
147 BENCH(memcpy32_sse2_unalign, 1000)
148 BENCH(memcpy32_sse2_unalign, 10000)
149 BENCH(memcpy32_sse2_unalign, 100000)
150
151 // Test our chosen best, from SkUtils.h 70 // Test our chosen best, from SkUtils.h
152 BENCH(sk_memcpy32, 10) 71 BENCH(sk_memcpy32, 10)
153 BENCH(sk_memcpy32, 100) 72 BENCH(sk_memcpy32, 100)
154 BENCH(sk_memcpy32, 1000) 73 BENCH(sk_memcpy32, 1000)
155 BENCH(sk_memcpy32, 10000) 74 BENCH(sk_memcpy32, 10000)
156 BENCH(sk_memcpy32, 100000) 75 BENCH(sk_memcpy32, 100000)
157 76
158 #endif // SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
159
160 #undef BENCH 77 #undef BENCH
OLDNEW
« no previous file with comments | « no previous file | bench/MemoryBench.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698