Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(164)

Side by Side Diff: Source/platform/audio/VectorMath.cpp

Issue 307963005: Fix incorrectly disabled SSE2 WebAudio optimizations for Windows. (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Fix checks. Rebaseline. Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Source/platform/audio/SincResampler.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2010, Google Inc. All rights reserved. 2 * Copyright (C) 2010, Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions 5 * modification, are permitted provided that the following conditions
6 * are met: 6 * are met:
7 * 1. Redistributions of source code must retain the above copyright 7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer. 8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright 9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the 10 * notice, this list of conditions and the following disclaimer in the
(...skipping 17 matching lines...) Expand all
28 28
29 #include "platform/audio/VectorMath.h" 29 #include "platform/audio/VectorMath.h"
30 #include "wtf/Assertions.h" 30 #include "wtf/Assertions.h"
31 #include "wtf/CPU.h" 31 #include "wtf/CPU.h"
32 #include <stdint.h> 32 #include <stdint.h>
33 33
34 #if OS(MACOSX) 34 #if OS(MACOSX)
35 #include <Accelerate/Accelerate.h> 35 #include <Accelerate/Accelerate.h>
36 #endif 36 #endif
37 37
38 #ifdef __SSE2__ 38 #if CPU(X86) || CPU(X86_64)
39 #include <emmintrin.h> 39 #include <emmintrin.h>
40 #endif 40 #endif
41 41
42 #if HAVE(ARM_NEON_INTRINSICS) 42 #if HAVE(ARM_NEON_INTRINSICS)
43 #include <arm_neon.h> 43 #include <arm_neon.h>
44 #endif 44 #endif
45 45
46 #include <math.h> 46 #include <math.h>
47 #include <algorithm> 47 #include <algorithm>
48 48
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
128 #pragma float_control(except, off, push) 128 #pragma float_control(except, off, push)
129 #pragma float_control(precise, off, push) 129 #pragma float_control(precise, off, push)
130 #pragma fp_contract(on) 130 #pragma fp_contract(on)
131 #pragma fenv_access(off) 131 #pragma fenv_access(off)
132 #endif 132 #endif
133 133
134 void vsma(const float* sourceP, int sourceStride, const float* scale, float* des tP, int destStride, size_t framesToProcess) 134 void vsma(const float* sourceP, int sourceStride, const float* scale, float* des tP, int destStride, size_t framesToProcess)
135 { 135 {
136 int n = framesToProcess; 136 int n = framesToProcess;
137 137
138 #ifdef __SSE2__ 138 #if CPU(X86) || CPU(X86_64)
139 if ((sourceStride == 1) && (destStride == 1)) { 139 if ((sourceStride == 1) && (destStride == 1)) {
140 float k = *scale; 140 float k = *scale;
141 141
142 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately. 142 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.
143 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { 143 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {
144 *destP += k * *sourceP; 144 *destP += k * *sourceP;
145 sourceP++; 145 sourceP++;
146 destP++; 146 destP++;
147 n--; 147 n--;
148 } 148 }
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
201 sourceP += sourceStride; 201 sourceP += sourceStride;
202 destP += destStride; 202 destP += destStride;
203 n--; 203 n--;
204 } 204 }
205 } 205 }
206 206
207 void vsmul(const float* sourceP, int sourceStride, const float* scale, float* de stP, int destStride, size_t framesToProcess) 207 void vsmul(const float* sourceP, int sourceStride, const float* scale, float* de stP, int destStride, size_t framesToProcess)
208 { 208 {
209 int n = framesToProcess; 209 int n = framesToProcess;
210 210
211 #ifdef __SSE2__ 211 #if CPU(X86) || CPU(X86_64)
212 if ((sourceStride == 1) && (destStride == 1)) { 212 if ((sourceStride == 1) && (destStride == 1)) {
213 float k = *scale; 213 float k = *scale;
214 214
215 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately. 215 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.
216 while ((reinterpret_cast<size_t>(sourceP) & 0x0F) && n) { 216 while ((reinterpret_cast<size_t>(sourceP) & 0x0F) && n) {
217 *destP = k * *sourceP; 217 *destP = k * *sourceP;
218 sourceP++; 218 sourceP++;
219 destP++; 219 destP++;
220 n--; 220 n--;
221 } 221 }
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
272 } 272 }
273 n = tailFrames; 273 n = tailFrames;
274 } 274 }
275 #endif 275 #endif
276 float k = *scale; 276 float k = *scale;
277 while (n--) { 277 while (n--) {
278 *destP = k * *sourceP; 278 *destP = k * *sourceP;
279 sourceP += sourceStride; 279 sourceP += sourceStride;
280 destP += destStride; 280 destP += destStride;
281 } 281 }
282 #ifdef __SSE2__ 282 #if CPU(X86) || CPU(X86_64)
283 } 283 }
284 #endif 284 #endif
285 } 285 }
286 286
287 void vadd(const float* source1P, int sourceStride1, const float* source2P, int s ourceStride2, float* destP, int destStride, size_t framesToProcess) 287 void vadd(const float* source1P, int sourceStride1, const float* source2P, int s ourceStride2, float* destP, int destStride, size_t framesToProcess)
288 { 288 {
289 int n = framesToProcess; 289 int n = framesToProcess;
290 290
291 #ifdef __SSE2__ 291 #if CPU(X86) || CPU(X86_64)
292 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) { 292 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {
293 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately. 293 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.
294 while ((reinterpret_cast<size_t>(source1P) & 0x0F) && n) { 294 while ((reinterpret_cast<size_t>(source1P) & 0x0F) && n) {
295 *destP = *source1P + *source2P; 295 *destP = *source1P + *source2P;
296 source1P++; 296 source1P++;
297 source2P++; 297 source2P++;
298 destP++; 298 destP++;
299 n--; 299 n--;
300 } 300 }
301 301
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
384 } 384 }
385 n = tailFrames; 385 n = tailFrames;
386 } 386 }
387 #endif 387 #endif
388 while (n--) { 388 while (n--) {
389 *destP = *source1P + *source2P; 389 *destP = *source1P + *source2P;
390 source1P += sourceStride1; 390 source1P += sourceStride1;
391 source2P += sourceStride2; 391 source2P += sourceStride2;
392 destP += destStride; 392 destP += destStride;
393 } 393 }
394 #ifdef __SSE2__ 394 #if CPU(X86) || CPU(X86_64)
395 } 395 }
396 #endif 396 #endif
397 } 397 }
398 398
399 void vmul(const float* source1P, int sourceStride1, const float* source2P, int s ourceStride2, float* destP, int destStride, size_t framesToProcess) 399 void vmul(const float* source1P, int sourceStride1, const float* source2P, int s ourceStride2, float* destP, int destStride, size_t framesToProcess)
400 { 400 {
401 401
402 int n = framesToProcess; 402 int n = framesToProcess;
403 403
404 #ifdef __SSE2__ 404 #if CPU(X86) || CPU(X86_64)
405 if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) { 405 if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) {
406 // If the source1P address is not 16-byte aligned, the first several fra mes (at most three) should be processed separately. 406 // If the source1P address is not 16-byte aligned, the first several fra mes (at most three) should be processed separately.
407 while ((reinterpret_cast<uintptr_t>(source1P) & 0x0F) && n) { 407 while ((reinterpret_cast<uintptr_t>(source1P) & 0x0F) && n) {
408 *destP = *source1P * *source2P; 408 *destP = *source1P * *source2P;
409 source1P++; 409 source1P++;
410 source2P++; 410 source2P++;
411 destP++; 411 destP++;
412 n--; 412 n--;
413 } 413 }
414 414
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
467 source1P += sourceStride1; 467 source1P += sourceStride1;
468 source2P += sourceStride2; 468 source2P += sourceStride2;
469 destP += destStride; 469 destP += destStride;
470 n--; 470 n--;
471 } 471 }
472 } 472 }
473 473
474 void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess) 474 void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess)
475 { 475 {
476 unsigned i = 0; 476 unsigned i = 0;
477 #ifdef __SSE2__ 477 #if CPU(X86) || CPU(X86_64)
478 // Only use the SSE optimization in the very common case that all addresses are 16-byte aligned. 478 // Only use the SSE optimization in the very common case that all addresses are 16-byte aligned.
479 // Otherwise, fall through to the scalar code below. 479 // Otherwise, fall through to the scalar code below.
480 if (!(reinterpret_cast<uintptr_t>(real1P) & 0x0F) 480 if (!(reinterpret_cast<uintptr_t>(real1P) & 0x0F)
481 && !(reinterpret_cast<uintptr_t>(imag1P) & 0x0F) 481 && !(reinterpret_cast<uintptr_t>(imag1P) & 0x0F)
482 && !(reinterpret_cast<uintptr_t>(real2P) & 0x0F) 482 && !(reinterpret_cast<uintptr_t>(real2P) & 0x0F)
483 && !(reinterpret_cast<uintptr_t>(imag2P) & 0x0F) 483 && !(reinterpret_cast<uintptr_t>(imag2P) & 0x0F)
484 && !(reinterpret_cast<uintptr_t>(realDestP) & 0x0F) 484 && !(reinterpret_cast<uintptr_t>(realDestP) & 0x0F)
485 && !(reinterpret_cast<uintptr_t>(imagDestP) & 0x0F)) { 485 && !(reinterpret_cast<uintptr_t>(imagDestP) & 0x0F)) {
486 486
487 unsigned endSize = framesToProcess - framesToProcess % 4; 487 unsigned endSize = framesToProcess - framesToProcess % 4;
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
525 realDestP[i] = realResult; 525 realDestP[i] = realResult;
526 imagDestP[i] = imagResult; 526 imagDestP[i] = imagResult;
527 } 527 }
528 } 528 }
529 529
530 void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesTo Process) 530 void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesTo Process)
531 { 531 {
532 int n = framesToProcess; 532 int n = framesToProcess;
533 float sum = 0; 533 float sum = 0;
534 534
535 #ifdef __SSE2__ 535 #if CPU(X86) || CPU(X86_64)
536 if (sourceStride == 1) { 536 if (sourceStride == 1) {
537 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately. 537 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.
538 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { 538 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {
539 float sample = *sourceP; 539 float sample = *sourceP;
540 sum += sample * sample; 540 sum += sample * sample;
541 sourceP++; 541 sourceP++;
542 n--; 542 n--;
543 } 543 }
544 544
545 // Now the sourceP is aligned, use SSE. 545 // Now the sourceP is aligned, use SSE.
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
590 590
591 ASSERT(sumP); 591 ASSERT(sumP);
592 *sumP = sum; 592 *sumP = sum;
593 } 593 }
594 594
595 void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesT oProcess) 595 void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesT oProcess)
596 { 596 {
597 int n = framesToProcess; 597 int n = framesToProcess;
598 float max = 0; 598 float max = 0;
599 599
600 #ifdef __SSE2__ 600 #if CPU(X86) || CPU(X86_64)
601 if (sourceStride == 1) { 601 if (sourceStride == 1) {
602 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately. 602 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.
603 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { 603 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {
604 max = std::max(max, fabsf(*sourceP)); 604 max = std::max(max, fabsf(*sourceP));
605 sourceP++; 605 sourceP++;
606 n--; 606 n--;
607 } 607 }
608 608
609 // Now the sourceP is aligned, use SSE. 609 // Now the sourceP is aligned, use SSE.
610 int tailFrames = n % 4; 610 int tailFrames = n % 4;
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
691 } 691 }
692 } 692 }
693 693
694 #endif // OS(MACOSX) 694 #endif // OS(MACOSX)
695 695
696 } // namespace VectorMath 696 } // namespace VectorMath
697 697
698 } // namespace WebCore 698 } // namespace WebCore
699 699
700 #endif // ENABLE(WEB_AUDIO) 700 #endif // ENABLE(WEB_AUDIO)
OLDNEW
« no previous file with comments | « Source/platform/audio/SincResampler.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698