Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(370)

Unified Diff: third_party/WebKit/Source/platform/image-encoders/JPEGImageEncoder.cpp

Issue 2576223002: NEON-ize RGBA to RGB code (Closed)
Patch Set: Copyright, fix Windows build. Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/platform/image-encoders/JPEGImageEncoder.cpp
diff --git a/third_party/WebKit/Source/platform/image-encoders/JPEGImageEncoder.cpp b/third_party/WebKit/Source/platform/image-encoders/JPEGImageEncoder.cpp
index 0a70e73db5f7e4733d051ac4b8799add20c74c29..f33de9a045b648ece7f42431214a3799698c79f5 100644
--- a/third_party/WebKit/Source/platform/image-encoders/JPEGImageEncoder.cpp
+++ b/third_party/WebKit/Source/platform/image-encoders/JPEGImageEncoder.cpp
@@ -33,6 +33,7 @@
#include "SkColorPriv.h"
#include "platform/geometry/IntSize.h"
#include "platform/graphics/ImageBuffer.h"
+#include "platform/image-encoders/RGBAtoRGB.h"
#include "wtf/CurrentTime.h"
#include "wtf/PtrUtil.h"
#include <memory>
@@ -45,6 +46,97 @@ extern "C" {
namespace blink {
+void RGBAtoRGBScalar(const unsigned char* pixels,
msarett1 2016/12/16 13:07:54 libjpeg-turbo actually supports RGBA input (in add
cavalcantii1 2016/12/17 02:35:33 Acknowledged.
+ unsigned pixelCount,
+ unsigned char* output) {
+ // Per <canvas> spec, composite the input image pixels source-over on black.
+ for (; pixelCount-- > 0; pixels += 4) {
+ unsigned char alpha = pixels[3];
+ if (alpha != 255) {
+ *output++ = SkMulDiv255Round(pixels[0], alpha);
+ *output++ = SkMulDiv255Round(pixels[1], alpha);
+ *output++ = SkMulDiv255Round(pixels[2], alpha);
+ } else {
+ *output++ = pixels[0];
+ *output++ = pixels[1];
+ *output++ = pixels[2];
+ }
+ }
+}
+
+// TODO(cavalcantii): use regular macro, see https://crbug.com/673067.
+#ifdef __ARM_NEON__
+void RGBAtoRGBNeon(const unsigned char* input,
+ const unsigned pixelCount,
+ unsigned char* output) {
+ const unsigned pixelsPerLoad = 16;
+ const unsigned rgbaStep = pixelsPerLoad * 4, rgbStep = pixelsPerLoad * 3;
+ // Input registers.
+ uint8x16x4_t rgba;
+ // Output registers.
+ uint8x16x3_t rgb;
+ // Intermediate registers.
+ uint16x8_t low, high;
+ uint8x16_t result;
+ unsigned counter;
+
+ for (counter = 0; counter + pixelsPerLoad <= pixelCount;
+ counter += pixelsPerLoad) {
+ // Reads 16 pixels at once, each color channel in a different
+ // 128 bits register.
+ rgba = vld4q_u8(input);
+
+ // Extracts the low/high part of the 128 bits, multiplying by the
+ // respective alpha channel.
+ low = vmull_u8(vget_low_u8(rgba.val[0]), vget_low_u8(rgba.val[3]));
+ high = vmull_u8(vget_high_u8(rgba.val[0]), vget_high_u8(rgba.val[3]));
+
+ // Original Skia formula is: (x + (x >> 8)) >> 8, where x = a*b + 128.
+ // This shifts and accumulates following by rounding in a single
+ // instruction.
+ low = vrsraq_n_u16(low, low, 8);
msarett1 2016/12/16 13:07:54 Skia has a NEON implementation of "mul and rounded
cavalcantii1 2016/12/17 02:35:33 I searched for SkSwizzler_opts.h and it seems it i
+ high = vrsraq_n_u16(high, high, 8);
+
+ // And now to the last shift and combining the vector.
+ result = vcombine_u8(vqrshrn_n_u16(low, 8), vqrshrn_n_u16(high, 8));
+
+ // Write back the Red channel to the first 128 bits register.
+ rgb.val[0] = result;
+
+ // Now the Green channel (don't trust the compiler to unroll the loop).
msarett1 2016/12/16 13:07:54 What about using an inline helper function? Ex: m
cavalcantii1 2016/12/17 02:35:33 I tested moving the pixel manipulation code to a l
+ low = vmull_u8(vget_low_u8(rgba.val[1]), vget_low_u8(rgba.val[3]));
+ high = vmull_u8(vget_high_u8(rgba.val[1]), vget_high_u8(rgba.val[3]));
+ low = vrsraq_n_u16(low, low, 8);
+ high = vrsraq_n_u16(high, high, 8);
+ result = vcombine_u8(vqrshrn_n_u16(low, 8), vqrshrn_n_u16(high, 8));
+ rgb.val[1] = result;
+
+ // Finally the Blue channel.
+ low = vmull_u8(vget_low_u8(rgba.val[2]), vget_low_u8(rgba.val[3]));
+ high = vmull_u8(vget_high_u8(rgba.val[2]), vget_high_u8(rgba.val[3]));
+ low = vrsraq_n_u16(low, low, 8);
+ high = vrsraq_n_u16(high, high, 8);
+ result = vcombine_u8(vqrshrn_n_u16(low, 8), vqrshrn_n_u16(high, 8));
+ rgb.val[2] = result;
+
+ // Write back (interleaved) results to output.
+ vst3q_u8(output, rgb);
+
+ // Advance to next elements (could be avoided loading register with
+ // increment after i.e. "vld4 {vector}, [r1]!").
+ input += rgbaStep;
+ output += rgbStep;
+ }
+
+ // Handle the tail elements.
+ unsigned remaining = pixelCount;
+ remaining -= counter;
+ if (remaining != 0) {
+ RGBAtoRGBScalar(input, remaining, output);
+ }
+}
+#endif
+
struct JPEGOutputBuffer : public jpeg_destination_mgr {
DISALLOW_NEW();
Vector<unsigned char>* output;
@@ -95,25 +187,6 @@ static void handleError(j_common_ptr common) {
longjmp(*jumpBufferPtr, -1);
}
-static void RGBAtoRGB(const unsigned char* pixels,
- unsigned pixelCount,
- unsigned char* output) {
- // Per <canvas> spec, composite the input image pixels source-over on black.
-
- for (; pixelCount-- > 0; pixels += 4) {
- unsigned char alpha = pixels[3];
- if (alpha != 255) {
- *output++ = SkMulDiv255Round(pixels[0], alpha);
- *output++ = SkMulDiv255Round(pixels[1], alpha);
- *output++ = SkMulDiv255Round(pixels[2], alpha);
- } else {
- *output++ = pixels[0];
- *output++ = pixels[1];
- *output++ = pixels[2];
- }
- }
-}
-
static void disableSubsamplingForHighQuality(jpeg_compress_struct* cinfo,
int quality) {
if (quality < 100)

Powered by Google App Engine
This is Rietveld 408576698