| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2010, Google Inc. All rights reserved. | 2 * Copyright (C) 2010, Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
| 6 * are met: | 6 * are met: |
| 7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
| 8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
| 9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
| 10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
| (...skipping 17 matching lines...) Expand all Loading... |
| 28 | 28 |
| 29 #include "platform/audio/VectorMath.h" | 29 #include "platform/audio/VectorMath.h" |
| 30 #include "wtf/Assertions.h" | 30 #include "wtf/Assertions.h" |
| 31 #include "wtf/CPU.h" | 31 #include "wtf/CPU.h" |
| 32 #include <stdint.h> | 32 #include <stdint.h> |
| 33 | 33 |
| 34 #if OS(MACOSX) | 34 #if OS(MACOSX) |
| 35 #include <Accelerate/Accelerate.h> | 35 #include <Accelerate/Accelerate.h> |
| 36 #endif | 36 #endif |
| 37 | 37 |
| 38 #ifdef __SSE2__ | 38 #if CPU(X86) || CPU(X86_64) |
| 39 #include <emmintrin.h> | 39 #include <emmintrin.h> |
| 40 #endif | 40 #endif |
| 41 | 41 |
| 42 #if HAVE(ARM_NEON_INTRINSICS) | 42 #if HAVE(ARM_NEON_INTRINSICS) |
| 43 #include <arm_neon.h> | 43 #include <arm_neon.h> |
| 44 #endif | 44 #endif |
| 45 | 45 |
| 46 #include <math.h> | 46 #include <math.h> |
| 47 #include <algorithm> | 47 #include <algorithm> |
| 48 | 48 |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 128 #pragma float_control(except, off, push) | 128 #pragma float_control(except, off, push) |
| 129 #pragma float_control(precise, off, push) | 129 #pragma float_control(precise, off, push) |
| 130 #pragma fp_contract(on) | 130 #pragma fp_contract(on) |
| 131 #pragma fenv_access(off) | 131 #pragma fenv_access(off) |
| 132 #endif | 132 #endif |
| 133 | 133 |
| 134 void vsma(const float* sourceP, int sourceStride, const float* scale, float* des
tP, int destStride, size_t framesToProcess) | 134 void vsma(const float* sourceP, int sourceStride, const float* scale, float* des
tP, int destStride, size_t framesToProcess) |
| 135 { | 135 { |
| 136 int n = framesToProcess; | 136 int n = framesToProcess; |
| 137 | 137 |
| 138 #ifdef __SSE2__ | 138 #if CPU(X86) || CPU(X86_64) |
| 139 if ((sourceStride == 1) && (destStride == 1)) { | 139 if ((sourceStride == 1) && (destStride == 1)) { |
| 140 float k = *scale; | 140 float k = *scale; |
| 141 | 141 |
| 142 // If the sourceP address is not 16-byte aligned, the first several fram
es (at most three) should be processed separately. | 142 // If the sourceP address is not 16-byte aligned, the first several fram
es (at most three) should be processed separately. |
| 143 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { | 143 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { |
| 144 *destP += k * *sourceP; | 144 *destP += k * *sourceP; |
| 145 sourceP++; | 145 sourceP++; |
| 146 destP++; | 146 destP++; |
| 147 n--; | 147 n--; |
| 148 } | 148 } |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 201 sourceP += sourceStride; | 201 sourceP += sourceStride; |
| 202 destP += destStride; | 202 destP += destStride; |
| 203 n--; | 203 n--; |
| 204 } | 204 } |
| 205 } | 205 } |
| 206 | 206 |
| 207 void vsmul(const float* sourceP, int sourceStride, const float* scale, float* de
stP, int destStride, size_t framesToProcess) | 207 void vsmul(const float* sourceP, int sourceStride, const float* scale, float* de
stP, int destStride, size_t framesToProcess) |
| 208 { | 208 { |
| 209 int n = framesToProcess; | 209 int n = framesToProcess; |
| 210 | 210 |
| 211 #ifdef __SSE2__ | 211 #if CPU(X86) || CPU(X86_64) |
| 212 if ((sourceStride == 1) && (destStride == 1)) { | 212 if ((sourceStride == 1) && (destStride == 1)) { |
| 213 float k = *scale; | 213 float k = *scale; |
| 214 | 214 |
| 215 // If the sourceP address is not 16-byte aligned, the first several fram
es (at most three) should be processed separately. | 215 // If the sourceP address is not 16-byte aligned, the first several fram
es (at most three) should be processed separately. |
| 216 while ((reinterpret_cast<size_t>(sourceP) & 0x0F) && n) { | 216 while ((reinterpret_cast<size_t>(sourceP) & 0x0F) && n) { |
| 217 *destP = k * *sourceP; | 217 *destP = k * *sourceP; |
| 218 sourceP++; | 218 sourceP++; |
| 219 destP++; | 219 destP++; |
| 220 n--; | 220 n--; |
| 221 } | 221 } |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 272 } | 272 } |
| 273 n = tailFrames; | 273 n = tailFrames; |
| 274 } | 274 } |
| 275 #endif | 275 #endif |
| 276 float k = *scale; | 276 float k = *scale; |
| 277 while (n--) { | 277 while (n--) { |
| 278 *destP = k * *sourceP; | 278 *destP = k * *sourceP; |
| 279 sourceP += sourceStride; | 279 sourceP += sourceStride; |
| 280 destP += destStride; | 280 destP += destStride; |
| 281 } | 281 } |
| 282 #ifdef __SSE2__ | 282 #if CPU(X86) || CPU(X86_64) |
| 283 } | 283 } |
| 284 #endif | 284 #endif |
| 285 } | 285 } |
| 286 | 286 |
| 287 void vadd(const float* source1P, int sourceStride1, const float* source2P, int s
ourceStride2, float* destP, int destStride, size_t framesToProcess) | 287 void vadd(const float* source1P, int sourceStride1, const float* source2P, int s
ourceStride2, float* destP, int destStride, size_t framesToProcess) |
| 288 { | 288 { |
| 289 int n = framesToProcess; | 289 int n = framesToProcess; |
| 290 | 290 |
| 291 #ifdef __SSE2__ | 291 #if CPU(X86) || CPU(X86_64) |
| 292 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) { | 292 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) { |
| 293 // If the sourceP address is not 16-byte aligned, the first several fram
es (at most three) should be processed separately. | 293 // If the sourceP address is not 16-byte aligned, the first several fram
es (at most three) should be processed separately. |
| 294 while ((reinterpret_cast<size_t>(source1P) & 0x0F) && n) { | 294 while ((reinterpret_cast<size_t>(source1P) & 0x0F) && n) { |
| 295 *destP = *source1P + *source2P; | 295 *destP = *source1P + *source2P; |
| 296 source1P++; | 296 source1P++; |
| 297 source2P++; | 297 source2P++; |
| 298 destP++; | 298 destP++; |
| 299 n--; | 299 n--; |
| 300 } | 300 } |
| 301 | 301 |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 384 } | 384 } |
| 385 n = tailFrames; | 385 n = tailFrames; |
| 386 } | 386 } |
| 387 #endif | 387 #endif |
| 388 while (n--) { | 388 while (n--) { |
| 389 *destP = *source1P + *source2P; | 389 *destP = *source1P + *source2P; |
| 390 source1P += sourceStride1; | 390 source1P += sourceStride1; |
| 391 source2P += sourceStride2; | 391 source2P += sourceStride2; |
| 392 destP += destStride; | 392 destP += destStride; |
| 393 } | 393 } |
| 394 #ifdef __SSE2__ | 394 #if CPU(X86) || CPU(X86_64) |
| 395 } | 395 } |
| 396 #endif | 396 #endif |
| 397 } | 397 } |
| 398 | 398 |
| 399 void vmul(const float* source1P, int sourceStride1, const float* source2P, int s
ourceStride2, float* destP, int destStride, size_t framesToProcess) | 399 void vmul(const float* source1P, int sourceStride1, const float* source2P, int s
ourceStride2, float* destP, int destStride, size_t framesToProcess) |
| 400 { | 400 { |
| 401 | 401 |
| 402 int n = framesToProcess; | 402 int n = framesToProcess; |
| 403 | 403 |
| 404 #ifdef __SSE2__ | 404 #if CPU(X86) || CPU(X86_64) |
| 405 if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) { | 405 if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) { |
| 406 // If the source1P address is not 16-byte aligned, the first several fra
mes (at most three) should be processed separately. | 406 // If the source1P address is not 16-byte aligned, the first several fra
mes (at most three) should be processed separately. |
| 407 while ((reinterpret_cast<uintptr_t>(source1P) & 0x0F) && n) { | 407 while ((reinterpret_cast<uintptr_t>(source1P) & 0x0F) && n) { |
| 408 *destP = *source1P * *source2P; | 408 *destP = *source1P * *source2P; |
| 409 source1P++; | 409 source1P++; |
| 410 source2P++; | 410 source2P++; |
| 411 destP++; | 411 destP++; |
| 412 n--; | 412 n--; |
| 413 } | 413 } |
| 414 | 414 |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 467 source1P += sourceStride1; | 467 source1P += sourceStride1; |
| 468 source2P += sourceStride2; | 468 source2P += sourceStride2; |
| 469 destP += destStride; | 469 destP += destStride; |
| 470 n--; | 470 n--; |
| 471 } | 471 } |
| 472 } | 472 } |
| 473 | 473 |
| 474 void zvmul(const float* real1P, const float* imag1P, const float* real2P, const
float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess) | 474 void zvmul(const float* real1P, const float* imag1P, const float* real2P, const
float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess) |
| 475 { | 475 { |
| 476 unsigned i = 0; | 476 unsigned i = 0; |
| 477 #ifdef __SSE2__ | 477 #if CPU(X86) || CPU(X86_64) |
| 478 // Only use the SSE optimization in the very common case that all addresses
are 16-byte aligned. | 478 // Only use the SSE optimization in the very common case that all addresses
are 16-byte aligned. |
| 479 // Otherwise, fall through to the scalar code below. | 479 // Otherwise, fall through to the scalar code below. |
| 480 if (!(reinterpret_cast<uintptr_t>(real1P) & 0x0F) | 480 if (!(reinterpret_cast<uintptr_t>(real1P) & 0x0F) |
| 481 && !(reinterpret_cast<uintptr_t>(imag1P) & 0x0F) | 481 && !(reinterpret_cast<uintptr_t>(imag1P) & 0x0F) |
| 482 && !(reinterpret_cast<uintptr_t>(real2P) & 0x0F) | 482 && !(reinterpret_cast<uintptr_t>(real2P) & 0x0F) |
| 483 && !(reinterpret_cast<uintptr_t>(imag2P) & 0x0F) | 483 && !(reinterpret_cast<uintptr_t>(imag2P) & 0x0F) |
| 484 && !(reinterpret_cast<uintptr_t>(realDestP) & 0x0F) | 484 && !(reinterpret_cast<uintptr_t>(realDestP) & 0x0F) |
| 485 && !(reinterpret_cast<uintptr_t>(imagDestP) & 0x0F)) { | 485 && !(reinterpret_cast<uintptr_t>(imagDestP) & 0x0F)) { |
| 486 | 486 |
| 487 unsigned endSize = framesToProcess - framesToProcess % 4; | 487 unsigned endSize = framesToProcess - framesToProcess % 4; |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 525 realDestP[i] = realResult; | 525 realDestP[i] = realResult; |
| 526 imagDestP[i] = imagResult; | 526 imagDestP[i] = imagResult; |
| 527 } | 527 } |
| 528 } | 528 } |
| 529 | 529 |
| 530 void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesTo
Process) | 530 void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesTo
Process) |
| 531 { | 531 { |
| 532 int n = framesToProcess; | 532 int n = framesToProcess; |
| 533 float sum = 0; | 533 float sum = 0; |
| 534 | 534 |
| 535 #ifdef __SSE2__ | 535 #if CPU(X86) || CPU(X86_64) |
| 536 if (sourceStride == 1) { | 536 if (sourceStride == 1) { |
| 537 // If the sourceP address is not 16-byte aligned, the first several fram
es (at most three) should be processed separately. | 537 // If the sourceP address is not 16-byte aligned, the first several fram
es (at most three) should be processed separately. |
| 538 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { | 538 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { |
| 539 float sample = *sourceP; | 539 float sample = *sourceP; |
| 540 sum += sample * sample; | 540 sum += sample * sample; |
| 541 sourceP++; | 541 sourceP++; |
| 542 n--; | 542 n--; |
| 543 } | 543 } |
| 544 | 544 |
| 545 // Now the sourceP is aligned, use SSE. | 545 // Now the sourceP is aligned, use SSE. |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 590 | 590 |
| 591 ASSERT(sumP); | 591 ASSERT(sumP); |
| 592 *sumP = sum; | 592 *sumP = sum; |
| 593 } | 593 } |
| 594 | 594 |
| 595 void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesT
oProcess) | 595 void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesT
oProcess) |
| 596 { | 596 { |
| 597 int n = framesToProcess; | 597 int n = framesToProcess; |
| 598 float max = 0; | 598 float max = 0; |
| 599 | 599 |
| 600 #ifdef __SSE2__ | 600 #if CPU(X86) || CPU(X86_64) |
| 601 if (sourceStride == 1) { | 601 if (sourceStride == 1) { |
| 602 // If the sourceP address is not 16-byte aligned, the first several fram
es (at most three) should be processed separately. | 602 // If the sourceP address is not 16-byte aligned, the first several fram
es (at most three) should be processed separately. |
| 603 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { | 603 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { |
| 604 max = std::max(max, fabsf(*sourceP)); | 604 max = std::max(max, fabsf(*sourceP)); |
| 605 sourceP++; | 605 sourceP++; |
| 606 n--; | 606 n--; |
| 607 } | 607 } |
| 608 | 608 |
| 609 // Now the sourceP is aligned, use SSE. | 609 // Now the sourceP is aligned, use SSE. |
| 610 int tailFrames = n % 4; | 610 int tailFrames = n % 4; |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 691 } | 691 } |
| 692 } | 692 } |
| 693 | 693 |
| 694 #endif // OS(MACOSX) | 694 #endif // OS(MACOSX) |
| 695 | 695 |
| 696 } // namespace VectorMath | 696 } // namespace VectorMath |
| 697 | 697 |
| 698 } // namespace WebCore | 698 } // namespace WebCore |
| 699 | 699 |
| 700 #endif // ENABLE(WEB_AUDIO) | 700 #endif // ENABLE(WEB_AUDIO) |
| OLD | NEW |