Source/platform/audio/VectorMath.cpp - Issue 307963005: Fix incorrectly disabled SSE2 WebAudio optimizations for Windows.

Side by Side Diff: Source/platform/audio/VectorMath.cpp

Issue 307963005: Fix incorrectly disabled SSE2 WebAudio optimizations for Windows. (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Fix checks. Rebaseline. Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2010, Google Inc. All rights reserved.	2 * Copyright (C) 2010, Google Inc. All rights reserved.

3 *	3 *

4 * Redistribution and use in source and binary forms, with or without	4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions	5 * modification, are permitted provided that the following conditions

6 * are met:	6 * are met:

7 * 1. Redistributions of source code must retain the above copyright	7 * 1. Redistributions of source code must retain the above copyright

8 * notice, this list of conditions and the following disclaimer.	8 * notice, this list of conditions and the following disclaimer.

9 * 2. Redistributions in binary form must reproduce the above copyright	9 * 2. Redistributions in binary form must reproduce the above copyright

10 * notice, this list of conditions and the following disclaimer in the	10 * notice, this list of conditions and the following disclaimer in the

(...skipping 17 matching lines...) Expand all Loading...
28	28

29 #include "platform/audio/VectorMath.h"	29 #include "platform/audio/VectorMath.h"

30 #include "wtf/Assertions.h"	30 #include "wtf/Assertions.h"

31 #include "wtf/CPU.h"	31 #include "wtf/CPU.h"

32 #include <stdint.h>	32 #include <stdint.h>

33	33

34 #if OS(MACOSX)	34 #if OS(MACOSX)

35 #include <Accelerate/Accelerate.h>	35 #include <Accelerate/Accelerate.h>

36 #endif	36 #endif

37	37

38 #ifdef __SSE2__	38 #if CPU(X86) \|\| CPU(X86_64)

39 #include <emmintrin.h>	39 #include <emmintrin.h>

40 #endif	40 #endif

41	41

42 #if HAVE(ARM_NEON_INTRINSICS)	42 #if HAVE(ARM_NEON_INTRINSICS)

43 #include <arm_neon.h>	43 #include <arm_neon.h>

44 #endif	44 #endif

45	45

46 #include <math.h>	46 #include <math.h>

47 #include <algorithm>	47 #include <algorithm>

48	48

(...skipping 79 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
128 #pragma float_control(except, off, push)	128 #pragma float_control(except, off, push)

129 #pragma float_control(precise, off, push)	129 #pragma float_control(precise, off, push)

130 #pragma fp_contract(on)	130 #pragma fp_contract(on)

131 #pragma fenv_access(off)	131 #pragma fenv_access(off)

132 #endif	132 #endif

133	133

134 void vsma(const float* sourceP, int sourceStride, const float* scale, float* des tP, int destStride, size_t framesToProcess)	134 void vsma(const float* sourceP, int sourceStride, const float* scale, float* des tP, int destStride, size_t framesToProcess)

135 {	135 {

136 int n = framesToProcess;	136 int n = framesToProcess;

137	137

138 #ifdef __SSE2__	138 #if CPU(X86) \|\| CPU(X86_64)

139 if ((sourceStride == 1) && (destStride == 1)) {	139 if ((sourceStride == 1) && (destStride == 1)) {

140 float k = *scale;	140 float k = *scale;

141	141

142 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.	142 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.

143 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {	143 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {

144 destP += k *sourceP;	144 destP += k *sourceP;

145 sourceP++;	145 sourceP++;

146 destP++;	146 destP++;

147 n--;	147 n--;

148 }	148 }

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
201 sourceP += sourceStride;	201 sourceP += sourceStride;

202 destP += destStride;	202 destP += destStride;

203 n--;	203 n--;

204 }	204 }

205 }	205 }

206	206

207 void vsmul(const float* sourceP, int sourceStride, const float* scale, float* de stP, int destStride, size_t framesToProcess)	207 void vsmul(const float* sourceP, int sourceStride, const float* scale, float* de stP, int destStride, size_t framesToProcess)

208 {	208 {

209 int n = framesToProcess;	209 int n = framesToProcess;

210	210

211 #ifdef __SSE2__	211 #if CPU(X86) \|\| CPU(X86_64)

212 if ((sourceStride == 1) && (destStride == 1)) {	212 if ((sourceStride == 1) && (destStride == 1)) {

213 float k = *scale;	213 float k = *scale;

214	214

215 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.	215 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.

216 while ((reinterpret_cast<size_t>(sourceP) & 0x0F) && n) {	216 while ((reinterpret_cast<size_t>(sourceP) & 0x0F) && n) {

217 destP = k *sourceP;	217 destP = k *sourceP;

218 sourceP++;	218 sourceP++;

219 destP++;	219 destP++;

220 n--;	220 n--;

221 }	221 }

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
272 }	272 }

273 n = tailFrames;	273 n = tailFrames;

274 }	274 }

275 #endif	275 #endif

276 float k = *scale;	276 float k = *scale;

277 while (n--) {	277 while (n--) {

278 destP = k *sourceP;	278 destP = k *sourceP;

279 sourceP += sourceStride;	279 sourceP += sourceStride;

280 destP += destStride;	280 destP += destStride;

281 }	281 }

282 #ifdef __SSE2__	282 #if CPU(X86) \|\| CPU(X86_64)

283 }	283 }

284 #endif	284 #endif

285 }	285 }

286	286

287 void vadd(const float* source1P, int sourceStride1, const float* source2P, int s ourceStride2, float* destP, int destStride, size_t framesToProcess)	287 void vadd(const float* source1P, int sourceStride1, const float* source2P, int s ourceStride2, float* destP, int destStride, size_t framesToProcess)

288 {	288 {

289 int n = framesToProcess;	289 int n = framesToProcess;

290	290

291 #ifdef __SSE2__	291 #if CPU(X86) \|\| CPU(X86_64)

292 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {	292 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {

293 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.	293 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.

294 while ((reinterpret_cast<size_t>(source1P) & 0x0F) && n) {	294 while ((reinterpret_cast<size_t>(source1P) & 0x0F) && n) {

295 destP = source1P + *source2P;	295 destP = source1P + *source2P;

296 source1P++;	296 source1P++;

297 source2P++;	297 source2P++;

298 destP++;	298 destP++;

299 n--;	299 n--;

300 }	300 }

301	301

(...skipping 82 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
384 }	384 }

385 n = tailFrames;	385 n = tailFrames;

386 }	386 }

387 #endif	387 #endif

388 while (n--) {	388 while (n--) {

389 destP = source1P + *source2P;	389 destP = source1P + *source2P;

390 source1P += sourceStride1;	390 source1P += sourceStride1;

391 source2P += sourceStride2;	391 source2P += sourceStride2;

392 destP += destStride;	392 destP += destStride;

393 }	393 }

394 #ifdef __SSE2__	394 #if CPU(X86) \|\| CPU(X86_64)

395 }	395 }

396 #endif	396 #endif

397 }	397 }

398	398

399 void vmul(const float* source1P, int sourceStride1, const float* source2P, int s ourceStride2, float* destP, int destStride, size_t framesToProcess)	399 void vmul(const float* source1P, int sourceStride1, const float* source2P, int s ourceStride2, float* destP, int destStride, size_t framesToProcess)

400 {	400 {

401	401

402 int n = framesToProcess;	402 int n = framesToProcess;

403	403

404 #ifdef __SSE2__	404 #if CPU(X86) \|\| CPU(X86_64)

405 if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) {	405 if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) {

406 // If the source1P address is not 16-byte aligned, the first several fra mes (at most three) should be processed separately.	406 // If the source1P address is not 16-byte aligned, the first several fra mes (at most three) should be processed separately.

407 while ((reinterpret_cast<uintptr_t>(source1P) & 0x0F) && n) {	407 while ((reinterpret_cast<uintptr_t>(source1P) & 0x0F) && n) {

408 destP = source1P * *source2P;	408 destP = source1P * *source2P;

409 source1P++;	409 source1P++;

410 source2P++;	410 source2P++;

411 destP++;	411 destP++;

412 n--;	412 n--;

413 }	413 }

414	414

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
467 source1P += sourceStride1;	467 source1P += sourceStride1;

468 source2P += sourceStride2;	468 source2P += sourceStride2;

469 destP += destStride;	469 destP += destStride;

470 n--;	470 n--;

471 }	471 }

472 }	472 }

473	473

474 void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess)	474 void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess)

475 {	475 {

476 unsigned i = 0;	476 unsigned i = 0;

477 #ifdef __SSE2__	477 #if CPU(X86) \|\| CPU(X86_64)

478 // Only use the SSE optimization in the very common case that all addresses are 16-byte aligned.	478 // Only use the SSE optimization in the very common case that all addresses are 16-byte aligned.

479 // Otherwise, fall through to the scalar code below.	479 // Otherwise, fall through to the scalar code below.

480 if (!(reinterpret_cast<uintptr_t>(real1P) & 0x0F)	480 if (!(reinterpret_cast<uintptr_t>(real1P) & 0x0F)

481 && !(reinterpret_cast<uintptr_t>(imag1P) & 0x0F)	481 && !(reinterpret_cast<uintptr_t>(imag1P) & 0x0F)

482 && !(reinterpret_cast<uintptr_t>(real2P) & 0x0F)	482 && !(reinterpret_cast<uintptr_t>(real2P) & 0x0F)

483 && !(reinterpret_cast<uintptr_t>(imag2P) & 0x0F)	483 && !(reinterpret_cast<uintptr_t>(imag2P) & 0x0F)

484 && !(reinterpret_cast<uintptr_t>(realDestP) & 0x0F)	484 && !(reinterpret_cast<uintptr_t>(realDestP) & 0x0F)

485 && !(reinterpret_cast<uintptr_t>(imagDestP) & 0x0F)) {	485 && !(reinterpret_cast<uintptr_t>(imagDestP) & 0x0F)) {

486	486

487 unsigned endSize = framesToProcess - framesToProcess % 4;	487 unsigned endSize = framesToProcess - framesToProcess % 4;

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
525 realDestP[i] = realResult;	525 realDestP[i] = realResult;

526 imagDestP[i] = imagResult;	526 imagDestP[i] = imagResult;

527 }	527 }

528 }	528 }

529	529

530 void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesTo Process)	530 void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesTo Process)

531 {	531 {

532 int n = framesToProcess;	532 int n = framesToProcess;

533 float sum = 0;	533 float sum = 0;

534	534

535 #ifdef __SSE2__	535 #if CPU(X86) \|\| CPU(X86_64)

536 if (sourceStride == 1) {	536 if (sourceStride == 1) {

537 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.	537 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.

538 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {	538 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {

539 float sample = *sourceP;	539 float sample = *sourceP;

540 sum += sample * sample;	540 sum += sample * sample;

541 sourceP++;	541 sourceP++;

542 n--;	542 n--;

543 }	543 }

544	544

545 // Now the sourceP is aligned, use SSE.	545 // Now the sourceP is aligned, use SSE.

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
590	590

591 ASSERT(sumP);	591 ASSERT(sumP);

592 *sumP = sum;	592 *sumP = sum;

593 }	593 }

594	594

595 void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesT oProcess)	595 void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesT oProcess)

596 {	596 {

597 int n = framesToProcess;	597 int n = framesToProcess;

598 float max = 0;	598 float max = 0;

599	599

600 #ifdef __SSE2__	600 #if CPU(X86) \|\| CPU(X86_64)

601 if (sourceStride == 1) {	601 if (sourceStride == 1) {

602 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.	602 // If the sourceP address is not 16-byte aligned, the first several fram es (at most three) should be processed separately.

603 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {	603 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {

604 max = std::max(max, fabsf(*sourceP));	604 max = std::max(max, fabsf(*sourceP));

605 sourceP++;	605 sourceP++;

606 n--;	606 n--;

607 }	607 }

608	608

609 // Now the sourceP is aligned, use SSE.	609 // Now the sourceP is aligned, use SSE.

610 int tailFrames = n % 4;	610 int tailFrames = n % 4;

(...skipping 80 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
691 }	691 }

692 }	692 }

693	693

694 #endif // OS(MACOSX)	694 #endif // OS(MACOSX)

695	695

696 } // namespace VectorMath	696 } // namespace VectorMath

697	697

698 } // namespace WebCore	698 } // namespace WebCore

699	699

700 #endif // ENABLE(WEB_AUDIO)	700 #endif // ENABLE(WEB_AUDIO)

OLD	NEW

« no previous file with comments | « Source/platform/audio/SincResampler.cpp ('k') | no next file » | no next file with comments »