Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: source/cpu_id.cc

Issue 1505673003: Optimize yuv alpha blend AVX2 code to do 32 pixels at time. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: gcc port of avx2 that does 32 pixels Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | source/planar_functions.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
99 #endif 99 #endif
100 100
101 // TODO(fbarchard): Enable xgetbv when validator supports it. 101 // TODO(fbarchard): Enable xgetbv when validator supports it.
102 #if (defined(_M_IX86) || defined(_M_X64) || \ 102 #if (defined(_M_IX86) || defined(_M_X64) || \
103 defined(__i386__) || defined(__x86_64__)) && \ 103 defined(__i386__) || defined(__x86_64__)) && \
104 !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) 104 !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
105 #define HAS_XGETBV 105 #define HAS_XGETBV
106 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. 106 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
107 int GetXCR0() { 107 int GetXCR0() {
108 uint32 xcr0 = 0u; 108 uint32 xcr0 = 0u;
109 #if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219) 109 #if defined(_MSC_VER)
110 //0 && (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219 )
harryjin 2015/12/08 07:48:58 ? Remove?
fbarchard 2015/12/08 19:24:04 Done.
110 xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. 111 xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
112 printf("xgetbv xcr0 %d\n", xcr0);
harryjin 2015/12/08 07:48:58 Remove the printf?
fbarchard 2015/12/08 19:24:04 Done.
111 #elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__) 113 #elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__)
112 __asm { 114 __asm {
113 xor ecx, ecx // xcr 0 115 xor ecx, ecx // xcr 0
114 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier. 116 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
115 mov xcr0, eax 117 mov xcr0, eax
116 } 118 }
119 printf("asm xcr0 %d\n", xcr0);
harryjin 2015/12/08 07:48:58 same here?
fbarchard 2015/12/08 19:24:04 Done.
117 #elif defined(__i386__) || defined(__x86_64__) 120 #elif defined(__i386__) || defined(__x86_64__)
118 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); 121 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
119 #endif // defined(__i386__) || defined(__x86_64__) 122 #endif // defined(__i386__) || defined(__x86_64__)
120 return xcr0; 123 return xcr0;
121 } 124 }
122 #endif // defined(_M_IX86) || defined(_M_X64) .. 125 #endif // defined(_M_IX86) || defined(_M_X64) ..
123 126
124 // based on libvpx arm_cpudetect.c 127 // based on libvpx arm_cpudetect.c
125 // For Arm, but public to allow testing on any CPU 128 // For Arm, but public to allow testing on any CPU
126 LIBYUV_API SAFEBUFFERS 129 LIBYUV_API SAFEBUFFERS
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
190 cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | 193 cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
191 ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | 194 ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
192 ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | 195 ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
193 ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | 196 ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
194 ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) | 197 ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
195 ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | 198 ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
196 kCpuHasX86; 199 kCpuHasX86;
197 200
198 #ifdef HAS_XGETBV 201 #ifdef HAS_XGETBV
199 // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv 202 // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv
200 if ((cpu_info1[2] & 0x1c000000) == 0x1c000000 && // AVX and OSXSave 203 if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
201 (GetXCR0() & 6) == 6) { // Test OD saves YMM registers 204 ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
202 cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX; 205 cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX;
203 206
204 // Detect AVX512bw 207 // Detect AVX512bw
205 if ((GetXCR0() & 0xe0) == 0xe0) { 208 if ((GetXCR0() & 0xe0) == 0xe0) {
206 cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0; 209 cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0;
207 } 210 }
208 } 211 }
209 #endif 212 #endif
210 213
211 // Environment variable overrides for testing. 214 // Environment variable overrides for testing.
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
285 // Note that use of this function is not thread safe. 288 // Note that use of this function is not thread safe.
286 LIBYUV_API 289 LIBYUV_API
287 void MaskCpuFlags(int enable_flags) { 290 void MaskCpuFlags(int enable_flags) {
288 cpu_info_ = InitCpuFlags() & enable_flags; 291 cpu_info_ = InitCpuFlags() & enable_flags;
289 } 292 }
290 293
291 #ifdef __cplusplus 294 #ifdef __cplusplus
292 } // extern "C" 295 } // extern "C"
293 } // namespace libyuv 296 } // namespace libyuv
294 #endif 297 #endif
OLDNEW
« no previous file with comments | « no previous file | source/planar_functions.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698