third_party/WebKit/Source/platform/audio/DirectConvolver.cpp - Issue 2384073002: reflow comments in platform/audio

Side by Side Diff: third_party/WebKit/Source/platform/audio/DirectConvolver.cpp

Issue 2384073002: reflow comments in platform/audio (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« third_party/WebKit/Source/platform/audio/Biquad.cpp ('K') | « third_party/WebKit/Source/platform/audio/DenormalDisabler.h ('k') | third_party/WebKit/Source/platform/audio/Distance.h » ('j') | third_party/WebKit/Source/platform/audio/DownSampler.cpp » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2012 Intel Inc. All rights reserved.	2 * Copyright (C) 2012 Intel Inc. All rights reserved.

3 *	3 *

4 * Redistribution and use in source and binary forms, with or without	4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions	5 * modification, are permitted provided that the following conditions

6 * are met:	6 * are met:

7 *	7 *

8 * 1. Redistributions of source code must retain the above copyright	8 * 1. Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.	9 * notice, this list of conditions and the following disclaimer.

10 * 2. Redistributions in binary form must reproduce the above copyright	10 * 2. Redistributions in binary form must reproduce the above copyright

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
77 #if CPU(X86)	77 #if CPU(X86)

78 conv(inputP - kernelSize + 1, 1, kernelP + kernelSize - 1, -1, destP, 1,	78 conv(inputP - kernelSize + 1, 1, kernelP + kernelSize - 1, -1, destP, 1,

79 framesToProcess, kernelSize);	79 framesToProcess, kernelSize);

80 #else	80 #else

81 vDSP_conv(inputP - kernelSize + 1, 1, kernelP + kernelSize - 1, -1, destP, 1,	81 vDSP_conv(inputP - kernelSize + 1, 1, kernelP + kernelSize - 1, -1, destP, 1,

82 framesToProcess, kernelSize);	82 framesToProcess, kernelSize);

83 #endif // CPU(X86)	83 #endif // CPU(X86)

84 #else	84 #else

85 size_t i = 0;	85 size_t i = 0;

86 #if CPU(X86) \|\| CPU(X86_64)	86 #if CPU(X86) \|\| CPU(X86_64)

87 // Convolution using SSE2. Currently only do this if both \|kernelSize\| and \|fr amesToProcess\|	87 // Convolution using SSE2. Currently only do this if both \|kernelSize\| and

88 // are multiples of 4. If not, use the straightforward loop below.	88 // \|framesToProcess\| are multiples of 4. If not, use the straightforward loop

	89 // below.

89	90

90 if ((kernelSize % 4 == 0) && (framesToProcess % 4 == 0)) {	91 if ((kernelSize % 4 == 0) && (framesToProcess % 4 == 0)) {

91 // AudioFloatArray's are always aligned on at least a 16-byte boundary.	92 // AudioFloatArray's are always aligned on at least a 16-byte boundary.

92 AudioFloatArray kernelBuffer(4 * kernelSize);	93 AudioFloatArray kernelBuffer(4 * kernelSize);

93 __m128* kernelReversed = reinterpret_cast<__m128*>(kernelBuffer.data());	94 __m128* kernelReversed = reinterpret_cast<__m128*>(kernelBuffer.data());

94	95

95 // Reverse the kernel and repeat each value across a vector	96 // Reverse the kernel and repeat each value across a vector

96 for (i = 0; i < kernelSize; ++i) {	97 for (i = 0; i < kernelSize; ++i) {

97 kernelReversed[i] = _mm_set1_ps(kernelP[kernelSize - i - 1]);	98 kernelReversed[i] = _mm_set1_ps(kernelP[kernelSize - i - 1]);

98 }	99 }

99	100

100 float* inputStartP = inputP - kernelSize + 1;	101 float* inputStartP = inputP - kernelSize + 1;

101	102

102 // Do convolution with 4 inputs at a time.	103 // Do convolution with 4 inputs at a time.

103 for (i = 0; i < framesToProcess; i += 4) {	104 for (i = 0; i < framesToProcess; i += 4) {

104 __m128 convolutionSum;	105 __m128 convolutionSum;

105	106

106 convolutionSum = _mm_setzero_ps();	107 convolutionSum = _mm_setzero_ps();

107	108

108 // \|kernelSize\| is a multiple of 4 so we can unroll the loop by 4, manuall y.	109 // \|kernelSize\| is a multiple of 4 so we can unroll the loop by 4,

	110 // manually.

109 for (size_t k = 0; k < kernelSize; k += 4) {	111 for (size_t k = 0; k < kernelSize; k += 4) {

110 size_t dataOffset = i + k;	112 size_t dataOffset = i + k;

111	113

112 for (size_t m = 0; m < 4; ++m) {	114 for (size_t m = 0; m < 4; ++m) {

113 __m128 sourceBlock;	115 __m128 sourceBlock;

114 __m128 product;	116 __m128 product;

115	117

116 sourceBlock = _mm_loadu_ps(inputStartP + dataOffset + m);	118 sourceBlock = _mm_loadu_ps(inputStartP + dataOffset + m);

117 product = _mm_mul_ps(kernelReversed[k + m], sourceBlock);	119 product = _mm_mul_ps(kernelReversed[k + m], sourceBlock);

118 convolutionSum = _mm_add_ps(convolutionSum, product);	120 convolutionSum = _mm_add_ps(convolutionSum, product);

119 }	121 }

120 }	122 }

121 _mm_storeu_ps(destP + i, convolutionSum);	123 _mm_storeu_ps(destP + i, convolutionSum);

122 }	124 }

123 } else {	125 } else {

124 #endif	126 #endif

125	127

126 // FIXME: The macro can be further optimized to avoid pipeline stalls. One possi bility is to maintain 4 separate sums and change the macro to CONVOLVE_FOUR_SAMP LES.	128 // FIXME: The macro can be further optimized to avoid pipeline stalls. One

	129 // possibility is to maintain 4 separate sums and change the macro to

	130 // CONVOLVE_FOUR_SAMPLES.

127 #define CONVOLVE_ONE_SAMPLE \	131 #define CONVOLVE_ONE_SAMPLE \

128 do { \	132 do { \

129 sum += inputP[i - j] * kernelP[j]; \	133 sum += inputP[i - j] * kernelP[j]; \

130 j++; \	134 j++; \

131 } while (0)	135 } while (0)

132	136

133 while (i < framesToProcess) {	137 while (i < framesToProcess) {

134 size_t j = 0;	138 size_t j = 0;

135 float sum = 0;	139 float sum = 0;

136	140

(...skipping 263 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
400	404

401 // Copy 2nd half of input buffer to 1st half.	405 // Copy 2nd half of input buffer to 1st half.

402 memcpy(m_buffer.data(), inputP, sizeof(float) * framesToProcess);	406 memcpy(m_buffer.data(), inputP, sizeof(float) * framesToProcess);

403 }	407 }

404	408

405 void DirectConvolver::reset() {	409 void DirectConvolver::reset() {

406 m_buffer.zero();	410 m_buffer.zero();

407 }	411 }

408	412

409 } // namespace blink	413 } // namespace blink

OLD	NEW