src/core/SkConvolver.cpp - Issue 2500113004: Port convolve functions to SkOpts

Side by Side Diff: src/core/SkConvolver.cpp

Issue 2500113004: Port convolve functions to SkOpts (Closed)

Patch Set: Fix typo Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "SkConvolver.h"	5 #include "SkConvolver.h"

	6 #include "SkOpts.h"

6 #include "SkTArray.h"	7 #include "SkTArray.h"

7	8

8 namespace {	9 namespace {

9

10 // Converts the argument to an 8-bit unsigned value by clamping to the range

11 // 0-255.

12 inline unsigned char ClampTo8(int a) {

13 if (static_cast<unsigned>(a) < 256) {

14 return a; // Avoid the extra check in the common case.

15 }

16 if (a < 0) {

17 return 0;

18 }

19 return 255;

20 }

21

22 // Stores a list of rows in a circular buffer. The usage is you write into i t	10 // Stores a list of rows in a circular buffer. The usage is you write into i t

23 // by calling AdvanceRow. It will keep track of which row in the buffer it	11 // by calling AdvanceRow. It will keep track of which row in the buffer it

24 // should use next, and the total number of rows added.	12 // should use next, and the total number of rows added.

25 class CircularRowBuffer {	13 class CircularRowBuffer {

26 public:	14 public:

27 // The number of pixels in each row is given in \|sourceRowPixelWidth\|.	15 // The number of pixels in each row is given in \|sourceRowPixelWidth\|.

28 // The maximum number of rows needed in the buffer is \|maxYFilterSize\|	16 // The maximum number of rows needed in the buffer is \|maxYFilterSize\|

29 // (we only need to store enough rows for the biggest filter).	17 // (we only need to store enough rows for the biggest filter).

30 //	18 //

31 // We use the \|firstInputRow\| to compute the coordinates of all of the	19 // We use the \|firstInputRow\| to compute the coordinates of all of the

(...skipping 69 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
101 int fNextRow;	89 int fNextRow;

102	90

103 // The y coordinate of the \|fNextRow\|. This is incremented each time a	91 // The y coordinate of the \|fNextRow\|. This is incremented each time a

104 // new row is appended and does not wrap.	92 // new row is appended and does not wrap.

105 int fNextRowCoordinate;	93 int fNextRowCoordinate;

106	94

107 // Buffer used by GetRowAddresses().	95 // Buffer used by GetRowAddresses().

108 SkTArray<unsigned char*> fRowAddresses;	96 SkTArray<unsigned char*> fRowAddresses;

109 };	97 };

110	98

111 // Convolves horizontally along a single row. The row data is given in

112 // \|srcData\| and continues for the numValues() of the filter.

113 template<bool hasAlpha>

114 void ConvolveHorizontally(const unsigned char* srcData,

115 const SkConvolutionFilter1D& filter,

116 unsigned char* outRow) {

117 // Loop over each pixel on this row in the output image.

118 int numValues = filter.numValues();

119 for (int outX = 0; outX < numValues; outX++) {

120 // Get the filter that determines the current output pixel.

121 int filterOffset, filterLength;

122 const SkConvolutionFilter1D::ConvolutionFixed* filterValues =

123 filter.FilterForValue(outX, &filterOffset, &filterLength);

124

125 // Compute the first pixel in this row that the filter affects. It w ill

126 // touch \|filterLength\| pixels (4 bytes each) after this.

127 const unsigned char* rowToFilter = &srcData[filterOffset * 4];

128

129 // Apply the filter to the row to get the destination pixel in \|accu m\|.

130 int accum[4] = {0};

131 for (int filterX = 0; filterX < filterLength; filterX++) {

132 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues [filterX];

133 accum[0] += curFilter * rowToFilter[filterX * 4 + 0];

134 accum[1] += curFilter * rowToFilter[filterX * 4 + 1];

135 accum[2] += curFilter * rowToFilter[filterX * 4 + 2];

136 if (hasAlpha) {

137 accum[3] += curFilter * rowToFilter[filterX * 4 + 3];

138 }

139 }

140

141 // Bring this value back in range. All of the filter scaling factors

142 // are in fixed point with kShiftBits bits of fractional part.

143 accum[0] >>= SkConvolutionFilter1D::kShiftBits;

144 accum[1] >>= SkConvolutionFilter1D::kShiftBits;

145 accum[2] >>= SkConvolutionFilter1D::kShiftBits;

146 if (hasAlpha) {

147 accum[3] >>= SkConvolutionFilter1D::kShiftBits;

148 }

149

150 // Store the new pixel.

151 outRow[outX * 4 + 0] = ClampTo8(accum[0]);

152 outRow[outX * 4 + 1] = ClampTo8(accum[1]);

153 outRow[outX * 4 + 2] = ClampTo8(accum[2]);

154 if (hasAlpha) {

155 outRow[outX * 4 + 3] = ClampTo8(accum[3]);

156 }

157 }

158 }

159

160 // There's a bug somewhere here with GCC autovectorization (-ftree-vectorize ). We originally

161 // thought this was 32 bit only, but subsequent tests show that some 64 bit gcc compiles

162 // suffer here too.

163 //

164 // Dropping to -O2 disables -ftree-vectorize. GCC 4.6 needs noinline. http s://bug.skia.org/2575

165 #if SK_HAS_ATTRIBUTE(optimize) && defined(SK_RELEASE)

166 #define SK_MAYBE_DISABLE_VECTORIZATION __attribute__((optimize("O2"), no inline))

167 #else

168 #define SK_MAYBE_DISABLE_VECTORIZATION

169 #endif

170

171 SK_MAYBE_DISABLE_VECTORIZATION

172 static void ConvolveHorizontallyAlpha(const unsigned char* srcData,

173 const SkConvolutionFilter1D& filter,

174 unsigned char* outRow) {

175 return ConvolveHorizontally<true>(srcData, filter, outRow);

176 }

177

178 SK_MAYBE_DISABLE_VECTORIZATION

179 static void ConvolveHorizontallyNoAlpha(const unsigned char* srcData,

180 const SkConvolutionFilter1D& filter,

181 unsigned char* outRow) {

182 return ConvolveHorizontally<false>(srcData, filter, outRow);

183 }

184

185 #undef SK_MAYBE_DISABLE_VECTORIZATION

186

187

188 // Does vertical convolution to produce one output row. The filter values and

189 // length are given in the first two parameters. These are applied to each

190 // of the rows pointed to in the \|sourceDataRows\| array, with each row

191 // being \|pixelWidth\| wide.

192 //

193 // The output must have room for \|pixelWidth * 4\| bytes.

194 template<bool hasAlpha>

195 void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filte rValues,

196 int filterLength,

197 unsigned char* const* sourceDataRows,

198 int pixelWidth,

199 unsigned char* outRow) {

200 // We go through each column in the output and do a vertical convolution ,

201 // generating one output pixel each time.

202 for (int outX = 0; outX < pixelWidth; outX++) {

203 // Compute the number of bytes over in each row that the current col umn

204 // we're convolving starts at. The pixel will cover the next 4 bytes .

205 int byteOffset = outX * 4;

206

207 // Apply the filter to one column of pixels.

208 int accum[4] = {0};

209 for (int filterY = 0; filterY < filterLength; filterY++) {

210 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues [filterY];

211 accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];

212 accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];

213 accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];

214 if (hasAlpha) {

215 accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];

216 }

217 }

218

219 // Bring this value back in range. All of the filter scaling factors

220 // are in fixed point with kShiftBits bits of precision.

221 accum[0] >>= SkConvolutionFilter1D::kShiftBits;

222 accum[1] >>= SkConvolutionFilter1D::kShiftBits;

223 accum[2] >>= SkConvolutionFilter1D::kShiftBits;

224 if (hasAlpha) {

225 accum[3] >>= SkConvolutionFilter1D::kShiftBits;

226 }

227

228 // Store the new pixel.

229 outRow[byteOffset + 0] = ClampTo8(accum[0]);

230 outRow[byteOffset + 1] = ClampTo8(accum[1]);

231 outRow[byteOffset + 2] = ClampTo8(accum[2]);

232 if (hasAlpha) {

233 unsigned char alpha = ClampTo8(accum[3]);

234

235 // Make sure the alpha channel doesn't come out smaller than any of the

236 // color channels. We use premultipled alpha channels, so this s hould

237 // never happen, but rounding errors will cause this from time t o time.

238 // These "impossible" colors will cause overflows (and hence ran dom pixel

239 // values) when the resulting bitmap is drawn to the screen.

240 //

241 // We only need to do this when generating the final output row (here).

242 int maxColorChannel = SkTMax(outRow[byteOffset + 0],

243 SkTMax(outRow[byteOffset + 1],

244 outRow[byteOffset + 2]));

245 if (alpha < maxColorChannel) {

246 outRow[byteOffset + 3] = maxColorChannel;

247 } else {

248 outRow[byteOffset + 3] = alpha;

249 }

250 } else {

251 // No alpha channel, the image is opaque.

252 outRow[byteOffset + 3] = 0xff;

253 }

254 }

255 }

256

257 void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filte rValues,

258 int filterLength,

259 unsigned char* const* sourceDataRows,

260 int pixelWidth,

261 unsigned char* outRow,

262 bool sourceHasAlpha) {

263 if (sourceHasAlpha) {

264 ConvolveVertically<true>(filterValues, filterLength,

265 sourceDataRows, pixelWidth,

266 outRow);

267 } else {

268 ConvolveVertically<false>(filterValues, filterLength,

269 sourceDataRows, pixelWidth,

270 outRow);

271 }

272 }

273

274 } // namespace	99 } // namespace

275	100

276 // SkConvolutionFilter1D ------------------------------------------------------- --	101 // SkConvolutionFilter1D ------------------------------------------------------- --

277	102

278 SkConvolutionFilter1D::SkConvolutionFilter1D()	103 SkConvolutionFilter1D::SkConvolutionFilter1D()

279 : fMaxFilter(0) {	104 : fMaxFilter(0) {

280 }	105 }

281	106

282 SkConvolutionFilter1D::~SkConvolutionFilter1D() {	107 SkConvolutionFilter1D::~SkConvolutionFilter1D() {

283 }	108 }

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
339	164

340 return &fFilterValues[filter.fDataLocation];	165 return &fFilterValues[filter.fDataLocation];

341 }	166 }

342	167

343 bool BGRAConvolve2D(const unsigned char* sourceData,	168 bool BGRAConvolve2D(const unsigned char* sourceData,

344 int sourceByteRowStride,	169 int sourceByteRowStride,

345 bool sourceHasAlpha,	170 bool sourceHasAlpha,

346 const SkConvolutionFilter1D& filterX,	171 const SkConvolutionFilter1D& filterX,

347 const SkConvolutionFilter1D& filterY,	172 const SkConvolutionFilter1D& filterY,

348 int outputByteRowStride,	173 int outputByteRowStride,

349 unsigned char* output,	174 unsigned char* output) {

350 const SkConvolutionProcs& convolveProcs,

351 bool useSimdIfPossible) {

352	175

353 int maxYFilterSize = filterY.maxFilter();	176 int maxYFilterSize = filterY.maxFilter();

354	177

355 // The next row in the input that we will generate a horizontally	178 // The next row in the input that we will generate a horizontally

356 // convolved row for. If the filter doesn't start at the beginning of the	179 // convolved row for. If the filter doesn't start at the beginning of the

357 // image (this is the case when we are only resizing a subset), then we	180 // image (this is the case when we are only resizing a subset), then we

358 // don't want to generate any output rows before that. Compute the starting	181 // don't want to generate any output rows before that. Compute the starting

359 // row for convolution as the first pixel for the first vertical filter.	182 // row for convolution as the first pixel for the first vertical filter.

360 int filterOffset, filterLength;	183 int filterOffset, filterLength;

361 const SkConvolutionFilter1D::ConvolutionFixed* filterValues =	184 const SkConvolutionFilter1D::ConvolutionFixed* filterValues =

362 filterY.FilterForValue(0, &filterOffset, &filterLength);	185 filterY.FilterForValue(0, &filterOffset, &filterLength);

363 int nextXRow = filterOffset;	186 int nextXRow = filterOffset;

364	187

365 // We loop over each row in the input doing a horizontal convolution. This	188 // We loop over each row in the input doing a horizontal convolution. This

366 // will result in a horizontally convolved image. We write the results into	189 // will result in a horizontally convolved image. We write the results into

367 // a circular buffer of convolved rows and do vertical convolution as rows	190 // a circular buffer of convolved rows and do vertical convolution as rows

368 // are available. This prevents us from having to store the entire	191 // are available. This prevents us from having to store the entire

369 // intermediate image and helps cache coherency.	192 // intermediate image and helps cache coherency.

370 // We will need four extra rows to allow horizontal convolution could be don e	193 // We will need four extra rows to allow horizontal convolution could be don e

371 // simultaneously. We also pad each row in row buffer to be aligned-up to	194 // simultaneously. We also pad each row in row buffer to be aligned-up to

372 // 16 bytes.	195 // 16 bytes.

373 // TODO(jiesun): We do not use aligned load from row buffer in vertical	196 // TODO(jiesun): We do not use aligned load from row buffer in vertical

374 // convolution pass yet. Somehow Windows does not like it.	197 // convolution pass yet. Somehow Windows does not like it.

375 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF;	198 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF;

376 int rowBufferHeight = maxYFilterSize +	199 int rowBufferHeight = maxYFilterSize +

377 (convolveProcs.fConvolve4RowsHorizontally ? 4 : 0);	200 (SkOpts::convolve_4_rows_horizontally != nullptr ? 4 : 0);

378	201

379 // check for too-big allocation requests : crbug.com/528628	202 // check for too-big allocation requests : crbug.com/528628

380 {	203 {

381 int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight);	204 int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight);

382 // need some limit, to avoid over-committing success from malloc, but th en	205 // need some limit, to avoid over-committing success from malloc, but th en

383 // crashing when we try to actually use the memory.	206 // crashing when we try to actually use the memory.

384 // 100meg seems big enough to allow "normal" zoom factors and image size s through	207 // 100meg seems big enough to allow "normal" zoom factors and image size s through

385 // while avoiding the crash seen by the bug (crbug.com/528628)	208 // while avoiding the crash seen by the bug (crbug.com/528628)

386 if (size > 100 * 1024 * 1024) {	209 if (size > 100 * 1024 * 1024) {

387 // SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size);	210 // SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size);

(...skipping 15 matching lines...) Expand all Loading...
403 int lastFilterOffset, lastFilterLength;	226 int lastFilterOffset, lastFilterLength;

404 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,	227 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,

405 &lastFilterLength);	228 &lastFilterLength);

406	229

407 for (int outY = 0; outY < numOutputRows; outY++) {	230 for (int outY = 0; outY < numOutputRows; outY++) {

408 filterValues = filterY.FilterForValue(outY,	231 filterValues = filterY.FilterForValue(outY,

409 &filterOffset, &filterLength);	232 &filterOffset, &filterLength);

410	233

411 // Generate output rows until we have enough to run the current filter.	234 // Generate output rows until we have enough to run the current filter.

412 while (nextXRow < filterOffset + filterLength) {	235 while (nextXRow < filterOffset + filterLength) {

413 if (convolveProcs.fConvolve4RowsHorizontally &&	236 if (SkOpts::convolve_4_rows_horizontally != nullptr &&

414 nextXRow + 3 < lastFilterOffset + lastFilterLength) {	237 nextXRow + 3 < lastFilterOffset + lastFilterLength) {

415 const unsigned char* src[4];	238 const unsigned char* src[4];

416 unsigned char* outRow[4];	239 unsigned char* outRow[4];

417 for (int i = 0; i < 4; ++i) {	240 for (int i = 0; i < 4; ++i) {

418 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo wStride];	241 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo wStride];

419 outRow[i] = rowBuffer.advanceRow();	242 outRow[i] = rowBuffer.advanceRow();

420 }	243 }

421 convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow, 4 *rowBufferWidth);	244 SkOpts::convolve_4_rows_horizontally(src, filterX, outRow, 4*row BufferWidth);

422 nextXRow += 4;	245 nextXRow += 4;

423 } else {	246 } else {

424 if (convolveProcs.fConvolveHorizontally) {	247 SkOpts::convolve_horizontally(

425 convolveProcs.fConvolveHorizontally(

426 &sourceData[(uint64_t)nextXRow * sourceByteRowStride],	248 &sourceData[(uint64_t)nextXRow * sourceByteRowStride],

427 filterX, rowBuffer.advanceRow(), sourceHasAlpha);	249 filterX, rowBuffer.advanceRow(), sourceHasAlpha);

428 } else {

429 if (sourceHasAlpha) {

430 ConvolveHorizontallyAlpha(

431 &sourceData[(uint64_t)nextXRow * sourceByteRowStride ],

432 filterX, rowBuffer.advanceRow());

433 } else {

434 ConvolveHorizontallyNoAlpha(

435 &sourceData[(uint64_t)nextXRow * sourceByteRowStride ],

436 filterX, rowBuffer.advanceRow());

437 }

438 }

439 nextXRow++;	250 nextXRow++;

440 }	251 }

441 }	252 }

442	253

443 // Compute where in the output image this row of final data will go.	254 // Compute where in the output image this row of final data will go.

444 unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStri de];	255 unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStri de];

445	256

446 // Get the list of rows that the circular buffer has, in order.	257 // Get the list of rows that the circular buffer has, in order.

447 int firstRowInCircularBuffer;	258 int firstRowInCircularBuffer;

448 unsigned char* const* rowsToConvolve =	259 unsigned char* const* rowsToConvolve =

449 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);	260 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);

450	261

451 // Now compute the start of the subset of those rows that the filter	262 // Now compute the start of the subset of those rows that the filter nee ds.

452 // needs.

453 unsigned char* const* firstRowForFilter =	263 unsigned char* const* firstRowForFilter =

454 &rowsToConvolve[filterOffset - firstRowInCircularBuffer];	264 &rowsToConvolve[filterOffset - firstRowInCircularBuffer];

455	265

456 if (convolveProcs.fConvolveVertically) {	266 SkOpts::convolve_vertically(filterValues, filterLength,

457 convolveProcs.fConvolveVertically(filterValues, filterLength,	267 firstRowForFilter,

458 firstRowForFilter,	268 filterX.numValues(), curOutputRow,

459 filterX.numValues(), curOutputRow ,	269 sourceHasAlpha);

460 sourceHasAlpha);

461 } else {

462 ConvolveVertically(filterValues, filterLength,

463 firstRowForFilter,

464 filterX.numValues(), curOutputRow,

465 sourceHasAlpha);

466 }

467 }	270 }

468 return true;	271 return true;

469 }	272 }

OLD	NEW

« no previous file with comments | « src/core/SkConvolver.h ('k') | src/core/SkOpts.h » ('j') | no next file with comments »