OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "SkConvolver.h" | 5 #include "SkConvolver.h" |
6 #include "SkSize.h" | 6 #include "SkSize.h" |
7 #include "SkTypes.h" | 7 #include "SkTypes.h" |
8 | 8 |
9 namespace { | 9 namespace { |
10 | 10 |
(...skipping 319 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
330 return &fFilterValues[filter.fDataLocation]; | 330 return &fFilterValues[filter.fDataLocation]; |
331 } | 331 } |
332 | 332 |
333 void BGRAConvolve2D(const unsigned char* sourceData, | 333 void BGRAConvolve2D(const unsigned char* sourceData, |
334 int sourceByteRowStride, | 334 int sourceByteRowStride, |
335 bool sourceHasAlpha, | 335 bool sourceHasAlpha, |
336 const SkConvolutionFilter1D& filterX, | 336 const SkConvolutionFilter1D& filterX, |
337 const SkConvolutionFilter1D& filterY, | 337 const SkConvolutionFilter1D& filterY, |
338 int outputByteRowStride, | 338 int outputByteRowStride, |
339 unsigned char* output, | 339 unsigned char* output, |
340 SkConvolutionProcs* convolveProcs, | 340 const SkConvolutionProcs& convolveProcs, |
341 bool useSimdIfPossible) { | 341 bool useSimdIfPossible) { |
342 | 342 |
343 int maxYFilterSize = filterY.maxFilter(); | 343 int maxYFilterSize = filterY.maxFilter(); |
344 | 344 |
345 // The next row in the input that we will generate a horizontally | 345 // The next row in the input that we will generate a horizontally |
346 // convolved row for. If the filter doesn't start at the beginning of the | 346 // convolved row for. If the filter doesn't start at the beginning of the |
347 // image (this is the case when we are only resizing a subset), then we | 347 // image (this is the case when we are only resizing a subset), then we |
348 // don't want to generate any output rows before that. Compute the starting | 348 // don't want to generate any output rows before that. Compute the starting |
349 // row for convolution as the first pixel for the first vertical filter. | 349 // row for convolution as the first pixel for the first vertical filter. |
350 int filterOffset, filterLength; | 350 int filterOffset, filterLength; |
351 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = | 351 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = |
352 filterY.FilterForValue(0, &filterOffset, &filterLength); | 352 filterY.FilterForValue(0, &filterOffset, &filterLength); |
353 int nextXRow = filterOffset; | 353 int nextXRow = filterOffset; |
354 | 354 |
355 // We loop over each row in the input doing a horizontal convolution. This | 355 // We loop over each row in the input doing a horizontal convolution. This |
356 // will result in a horizontally convolved image. We write the results into | 356 // will result in a horizontally convolved image. We write the results into |
357 // a circular buffer of convolved rows and do vertical convolution as rows | 357 // a circular buffer of convolved rows and do vertical convolution as rows |
358 // are available. This prevents us from having to store the entire | 358 // are available. This prevents us from having to store the entire |
359 // intermediate image and helps cache coherency. | 359 // intermediate image and helps cache coherency. |
360 // We will need four extra rows to allow horizontal convolution could be don
e | 360 // We will need four extra rows to allow horizontal convolution could be don
e |
361 // simultaneously. We also pad each row in row buffer to be aligned-up to | 361 // simultaneously. We also pad each row in row buffer to be aligned-up to |
362 // 16 bytes. | 362 // 16 bytes. |
363 // TODO(jiesun): We do not use aligned load from row buffer in vertical | 363 // TODO(jiesun): We do not use aligned load from row buffer in vertical |
364 // convolution pass yet. Somehow Windows does not like it. | 364 // convolution pass yet. Somehow Windows does not like it. |
365 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; | 365 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; |
366 int rowBufferHeight = maxYFilterSize + | 366 int rowBufferHeight = maxYFilterSize + |
367 (convolveProcs->fConvolve4RowsHorizontally ? 4 : 0); | 367 (convolveProcs.fConvolve4RowsHorizontally ? 4 : 0); |
368 CircularRowBuffer rowBuffer(rowBufferWidth, | 368 CircularRowBuffer rowBuffer(rowBufferWidth, |
369 rowBufferHeight, | 369 rowBufferHeight, |
370 filterOffset); | 370 filterOffset); |
371 | 371 |
372 // Loop over every possible output row, processing just enough horizontal | 372 // Loop over every possible output row, processing just enough horizontal |
373 // convolutions to run each subsequent vertical convolution. | 373 // convolutions to run each subsequent vertical convolution. |
374 SkASSERT(outputByteRowStride >= filterX.numValues() * 4); | 374 SkASSERT(outputByteRowStride >= filterX.numValues() * 4); |
375 int numOutputRows = filterY.numValues(); | 375 int numOutputRows = filterY.numValues(); |
376 | 376 |
377 // We need to check which is the last line to convolve before we advance 4 | 377 // We need to check which is the last line to convolve before we advance 4 |
378 // lines in one iteration. | 378 // lines in one iteration. |
379 int lastFilterOffset, lastFilterLength; | 379 int lastFilterOffset, lastFilterLength; |
380 | 380 |
381 // SSE2 can access up to 3 extra pixels past the end of the | 381 // SSE2 can access up to 3 extra pixels past the end of the |
382 // buffer. At the bottom of the image, we have to be careful | 382 // buffer. At the bottom of the image, we have to be careful |
383 // not to access data past the end of the buffer. Normally | 383 // not to access data past the end of the buffer. Normally |
384 // we fall back to the C++ implementation for the last row. | 384 // we fall back to the C++ implementation for the last row. |
385 // If the last row is less than 3 pixels wide, we may have to fall | 385 // If the last row is less than 3 pixels wide, we may have to fall |
386 // back to the C++ version for more rows. Compute how many | 386 // back to the C++ version for more rows. Compute how many |
387 // rows we need to avoid the SSE implementation for here. | 387 // rows we need to avoid the SSE implementation for here. |
388 filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset, | 388 filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset, |
389 &lastFilterLength); | 389 &lastFilterLength); |
390 int avoidSimdRows = 1 + convolveProcs->fExtraHorizontalReads / | 390 int avoidSimdRows = 1 + convolveProcs.fExtraHorizontalReads / |
391 (lastFilterOffset + lastFilterLength); | 391 (lastFilterOffset + lastFilterLength); |
392 | 392 |
393 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, | 393 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, |
394 &lastFilterLength); | 394 &lastFilterLength); |
395 | 395 |
396 for (int outY = 0; outY < numOutputRows; outY++) { | 396 for (int outY = 0; outY < numOutputRows; outY++) { |
397 filterValues = filterY.FilterForValue(outY, | 397 filterValues = filterY.FilterForValue(outY, |
398 &filterOffset, &filterLength); | 398 &filterOffset, &filterLength); |
399 | 399 |
400 // Generate output rows until we have enough to run the current filter. | 400 // Generate output rows until we have enough to run the current filter. |
401 while (nextXRow < filterOffset + filterLength) { | 401 while (nextXRow < filterOffset + filterLength) { |
402 if (convolveProcs->fConvolve4RowsHorizontally && | 402 if (convolveProcs.fConvolve4RowsHorizontally && |
403 nextXRow + 3 < lastFilterOffset + lastFilterLength - | 403 nextXRow + 3 < lastFilterOffset + lastFilterLength - |
404 avoidSimdRows) { | 404 avoidSimdRows) { |
405 const unsigned char* src[4]; | 405 const unsigned char* src[4]; |
406 unsigned char* outRow[4]; | 406 unsigned char* outRow[4]; |
407 for (int i = 0; i < 4; ++i) { | 407 for (int i = 0; i < 4; ++i) { |
408 src[i] = &sourceData[(nextXRow + i) * sourceByteRowStride]; | 408 src[i] = &sourceData[(nextXRow + i) * sourceByteRowStride]; |
409 outRow[i] = rowBuffer.advanceRow(); | 409 outRow[i] = rowBuffer.advanceRow(); |
410 } | 410 } |
411 convolveProcs->fConvolve4RowsHorizontally(src, filterX, outRow); | 411 convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow); |
412 nextXRow += 4; | 412 nextXRow += 4; |
413 } else { | 413 } else { |
414 // Check if we need to avoid SSE2 for this row. | 414 // Check if we need to avoid SSE2 for this row. |
415 if (convolveProcs->fConvolveHorizontally && | 415 if (convolveProcs.fConvolveHorizontally && |
416 nextXRow < lastFilterOffset + lastFilterLength - | 416 nextXRow < lastFilterOffset + lastFilterLength - |
417 avoidSimdRows) { | 417 avoidSimdRows) { |
418 convolveProcs->fConvolveHorizontally( | 418 convolveProcs.fConvolveHorizontally( |
419 &sourceData[nextXRow * sourceByteRowStride], | 419 &sourceData[nextXRow * sourceByteRowStride], |
420 filterX, rowBuffer.advanceRow(), sourceHasAlpha); | 420 filterX, rowBuffer.advanceRow(), sourceHasAlpha); |
421 } else { | 421 } else { |
422 if (sourceHasAlpha) { | 422 if (sourceHasAlpha) { |
423 ConvolveHorizontally<true>( | 423 ConvolveHorizontally<true>( |
424 &sourceData[nextXRow * sourceByteRowStride], | 424 &sourceData[nextXRow * sourceByteRowStride], |
425 filterX, rowBuffer.advanceRow()); | 425 filterX, rowBuffer.advanceRow()); |
426 } else { | 426 } else { |
427 ConvolveHorizontally<false>( | 427 ConvolveHorizontally<false>( |
428 &sourceData[nextXRow * sourceByteRowStride], | 428 &sourceData[nextXRow * sourceByteRowStride], |
(...skipping 10 matching lines...) Expand all Loading... |
439 // Get the list of rows that the circular buffer has, in order. | 439 // Get the list of rows that the circular buffer has, in order. |
440 int firstRowInCircularBuffer; | 440 int firstRowInCircularBuffer; |
441 unsigned char* const* rowsToConvolve = | 441 unsigned char* const* rowsToConvolve = |
442 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); | 442 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); |
443 | 443 |
444 // Now compute the start of the subset of those rows that the filter | 444 // Now compute the start of the subset of those rows that the filter |
445 // needs. | 445 // needs. |
446 unsigned char* const* firstRowForFilter = | 446 unsigned char* const* firstRowForFilter = |
447 &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; | 447 &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; |
448 | 448 |
449 if (convolveProcs->fConvolveVertically) { | 449 if (convolveProcs.fConvolveVertically) { |
450 convolveProcs->fConvolveVertically(filterValues, filterLength, | 450 convolveProcs.fConvolveVertically(filterValues, filterLength, |
451 firstRowForFilter, | 451 firstRowForFilter, |
452 filterX.numValues(), curOutputRow
, | 452 filterX.numValues(), curOutputRow
, |
453 sourceHasAlpha); | 453 sourceHasAlpha); |
454 } else { | 454 } else { |
455 ConvolveVertically(filterValues, filterLength, | 455 ConvolveVertically(filterValues, filterLength, |
456 firstRowForFilter, | 456 firstRowForFilter, |
457 filterX.numValues(), curOutputRow, | 457 filterX.numValues(), curOutputRow, |
458 sourceHasAlpha); | 458 sourceHasAlpha); |
459 } | 459 } |
460 } | 460 } |
461 } | 461 } |
OLD | NEW |