Chromium Code Reviews| Index: third_party/WebKit/Source/platform/image-encoders/JPEGImageEncoder.cpp |
| diff --git a/third_party/WebKit/Source/platform/image-encoders/JPEGImageEncoder.cpp b/third_party/WebKit/Source/platform/image-encoders/JPEGImageEncoder.cpp |
| index 0a70e73db5f7e4733d051ac4b8799add20c74c29..f33de9a045b648ece7f42431214a3799698c79f5 100644 |
| --- a/third_party/WebKit/Source/platform/image-encoders/JPEGImageEncoder.cpp |
| +++ b/third_party/WebKit/Source/platform/image-encoders/JPEGImageEncoder.cpp |
| @@ -33,6 +33,7 @@ |
| #include "SkColorPriv.h" |
| #include "platform/geometry/IntSize.h" |
| #include "platform/graphics/ImageBuffer.h" |
| +#include "platform/image-encoders/RGBAtoRGB.h" |
| #include "wtf/CurrentTime.h" |
| #include "wtf/PtrUtil.h" |
| #include <memory> |
| @@ -45,6 +46,97 @@ extern "C" { |
| namespace blink { |
| +void RGBAtoRGBScalar(const unsigned char* pixels, |
|
msarett1
2016/12/16 13:07:54
libjpeg-turbo actually supports RGBA input (in add
cavalcantii1
2016/12/17 02:35:33
Acknowledged.
|
| + unsigned pixelCount, |
| + unsigned char* output) { |
| + // Per <canvas> spec, composite the input image pixels source-over on black. |
| + for (; pixelCount-- > 0; pixels += 4) { |
| + unsigned char alpha = pixels[3]; |
| + if (alpha != 255) { |
| + *output++ = SkMulDiv255Round(pixels[0], alpha); |
| + *output++ = SkMulDiv255Round(pixels[1], alpha); |
| + *output++ = SkMulDiv255Round(pixels[2], alpha); |
| + } else { |
| + *output++ = pixels[0]; |
| + *output++ = pixels[1]; |
| + *output++ = pixels[2]; |
| + } |
| + } |
| +} |
| + |
| +// TODO(cavalcantii): use regular macro, see https://crbug.com/673067. |
| +#ifdef __ARM_NEON__ |
| +void RGBAtoRGBNeon(const unsigned char* input, |
| + const unsigned pixelCount, |
| + unsigned char* output) { |
| + const unsigned pixelsPerLoad = 16; |
| + const unsigned rgbaStep = pixelsPerLoad * 4, rgbStep = pixelsPerLoad * 3; |
| + // Input registers. |
| + uint8x16x4_t rgba; |
| + // Output registers. |
| + uint8x16x3_t rgb; |
| + // Intermediate registers. |
| + uint16x8_t low, high; |
| + uint8x16_t result; |
| + unsigned counter; |
| + |
| + for (counter = 0; counter + pixelsPerLoad <= pixelCount; |
| + counter += pixelsPerLoad) { |
| + // Reads 16 pixels at once, each color channel in a different |
| + // 128 bits register. |
| + rgba = vld4q_u8(input); |
| + |
| + // Extracts the low/high part of the 128 bits, multiplying by the |
| + // respective alpha channel. |
| + low = vmull_u8(vget_low_u8(rgba.val[0]), vget_low_u8(rgba.val[3])); |
| + high = vmull_u8(vget_high_u8(rgba.val[0]), vget_high_u8(rgba.val[3])); |
| + |
| + // Original Skia formula is: (x + (x >> 8)) >> 8, where x = a*b + 128. |
| + // This shifts and accumulates following by rounding in a single |
| + // instruction. |
| + low = vrsraq_n_u16(low, low, 8); |
|
msarett1
2016/12/16 13:07:54
Skia has a NEON implementation of "mul and rounded
cavalcantii1
2016/12/17 02:35:33
I searched for SkSwizzler_opts.h and it seems it i
|
| + high = vrsraq_n_u16(high, high, 8); |
| + |
| + // And now to the last shift and combining the vector. |
| + result = vcombine_u8(vqrshrn_n_u16(low, 8), vqrshrn_n_u16(high, 8)); |
| + |
| + // Write back the Red channel to the first 128 bits register. |
| + rgb.val[0] = result; |
| + |
| + // Now the Green channel (don't trust the compiler to unroll the loop). |
|
msarett1
2016/12/16 13:07:54
What about using an inline helper function? Ex: m
cavalcantii1
2016/12/17 02:35:33
I tested moving the pixel manipulation code to a l
|
| + low = vmull_u8(vget_low_u8(rgba.val[1]), vget_low_u8(rgba.val[3])); |
| + high = vmull_u8(vget_high_u8(rgba.val[1]), vget_high_u8(rgba.val[3])); |
| + low = vrsraq_n_u16(low, low, 8); |
| + high = vrsraq_n_u16(high, high, 8); |
| + result = vcombine_u8(vqrshrn_n_u16(low, 8), vqrshrn_n_u16(high, 8)); |
| + rgb.val[1] = result; |
| + |
| + // Finally the Blue channel. |
| + low = vmull_u8(vget_low_u8(rgba.val[2]), vget_low_u8(rgba.val[3])); |
| + high = vmull_u8(vget_high_u8(rgba.val[2]), vget_high_u8(rgba.val[3])); |
| + low = vrsraq_n_u16(low, low, 8); |
| + high = vrsraq_n_u16(high, high, 8); |
| + result = vcombine_u8(vqrshrn_n_u16(low, 8), vqrshrn_n_u16(high, 8)); |
| + rgb.val[2] = result; |
| + |
| + // Write back (interleaved) results to output. |
| + vst3q_u8(output, rgb); |
| + |
| + // Advance to next elements (could be avoided loading register with |
| + // increment after i.e. "vld4 {vector}, [r1]!"). |
| + input += rgbaStep; |
| + output += rgbStep; |
| + } |
| + |
| + // Handle the tail elements. |
| + unsigned remaining = pixelCount; |
| + remaining -= counter; |
| + if (remaining != 0) { |
| + RGBAtoRGBScalar(input, remaining, output); |
| + } |
| +} |
| +#endif |
| + |
| struct JPEGOutputBuffer : public jpeg_destination_mgr { |
| DISALLOW_NEW(); |
| Vector<unsigned char>* output; |
| @@ -95,25 +187,6 @@ static void handleError(j_common_ptr common) { |
| longjmp(*jumpBufferPtr, -1); |
| } |
| -static void RGBAtoRGB(const unsigned char* pixels, |
| - unsigned pixelCount, |
| - unsigned char* output) { |
| - // Per <canvas> spec, composite the input image pixels source-over on black. |
| - |
| - for (; pixelCount-- > 0; pixels += 4) { |
| - unsigned char alpha = pixels[3]; |
| - if (alpha != 255) { |
| - *output++ = SkMulDiv255Round(pixels[0], alpha); |
| - *output++ = SkMulDiv255Round(pixels[1], alpha); |
| - *output++ = SkMulDiv255Round(pixels[2], alpha); |
| - } else { |
| - *output++ = pixels[0]; |
| - *output++ = pixels[1]; |
| - *output++ = pixels[2]; |
| - } |
| - } |
| -} |
| - |
| static void disableSubsamplingForHighQuality(jpeg_compress_struct* cinfo, |
| int quality) { |
| if (quality < 100) |