Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Unified Diff: src/opts/SkSwizzler_opts.h

Issue 1618003002: Use NEON optimizations for RGB -> RGB(FF) or BGR(FF) in SkSwizzler (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« src/opts/SkOpts_ssse3.cpp ('K') | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/opts/SkSwizzler_opts.h
diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h
index b0cf4cad5324eae5eb64fcce75eb755d253a02d5..8e1de35ecbc44dc4f4f2037d63fa6064b70f713e 100644
--- a/src/opts/SkSwizzler_opts.h
+++ b/src/opts/SkSwizzler_opts.h
@@ -60,6 +60,34 @@ static void swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int count
}
}
+static void xxx_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) {
+ int i8 = 0;
+ const uint8_t* src8 = (uint8_t*) src;
msarett 2016/01/21 22:09:07 It's a little strange that we pass in src as a poi
+ for (int i32 = 0; i32 < count; i32++) {
+ uint8_t b = src8[i8++],
+ g = src8[i8++],
+ r = src8[i8++];
+ dst[i32] = (uint32_t) b << 0
+ | (uint32_t) g << 8
+ | (uint32_t) r << 16
+ | (uint32_t)0xFF << 24;
+ }
+}
+
+static void xxx_swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) {
+ int i8 = 0;
+ const uint8_t* src8 = (uint8_t*) src;
+ for (int i32 = 0; i32 < count; i32++) {
+ uint8_t b = src8[i8++],
+ g = src8[i8++],
+ r = src8[i8++];
+ dst[i32] = (uint32_t) r << 0
+ | (uint32_t) g << 8
+ | (uint32_t) b << 16
+ | (uint32_t)0xFF << 24;
+ }
+}
+
#if defined(SK_ARM_HAS_NEON)
// Rounded divide by 255, (x + 127) / 255
@@ -168,6 +196,88 @@ static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
swaprb_xxxa_portable(dst, src, count);
}
+static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) {
+ const uint8_t* src8 = (const uint8_t*) src;
+ while (count >= 16) {
+ // Load 16 pixels.
+ uint8x16x3_t bgr = vld3q_u8(src8);
+
+ // Insert an opaque alpha channel.
+ uint8x16x4_t bgra;
+ bgra.val[0] = bgr.val[0];
+ bgra.val[1] = bgr.val[1];
+ bgra.val[2] = bgr.val[2];
+ bgra.val[3] = vdupq_n_u8(0xFF);
+
+ // Store 16 pixels.
+ vst4q_u8((uint8_t*) dst, bgra);
+ src8 += 48;
+ dst += 16;
+ count -= 16;
+ }
+
+ if (count >= 8) {
+ // Load 8 pixels.
+ uint8x8x3_t bgr = vld3_u8(src8);
+
+ // Insert an opaque alpha channel.
+ uint8x8x4_t bgra;
+ bgra.val[0] = bgr.val[0];
+ bgra.val[1] = bgr.val[1];
+ bgra.val[2] = bgr.val[2];
+ bgra.val[3] = vdup_n_u8(0xFF);
+
+ // Store 8 pixels.
+ vst4_u8((uint8_t*) dst, bgra);
+ src8 += 24;
+ dst += 8;
+ count -= 8;
+ }
+
+ xxx_xxxa_portable(dst, src, count);
+}
+
+static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
+ const uint8_t* src8 = (const uint8_t*) src;
+ while (count >= 16) {
+ // Load 16 pixels.
+ uint8x16x3_t bgr = vld3q_u8(src8);
+
+ // Swap r and b and insert an opaque alpha channel.
+ uint8x16x4_t rgba;
+ rgba.val[0] = bgr.val[2];
+ rgba.val[1] = bgr.val[1];
+ rgba.val[2] = bgr.val[0];
+ rgba.val[3] = vdupq_n_u8(0xFF);
+
+ // Store 16 pixels.
+ vst4q_u8((uint8_t*) dst, rgba);
+ src8 += 48;
+ dst += 16;
+ count -= 16;
+ }
+
+ if (count >= 8) {
+ // Load 8 pixels.
+ uint8x8x3_t bgr = vld3_u8(src8);
+
+ // Swap r and b and insert an opaque alpha channel.
+ uint8x8x4_t rgba;
+ rgba.val[0] = bgr.val[2];
+ rgba.val[1] = bgr.val[1];
+ rgba.val[2] = bgr.val[0];
+ rgba.val[3] = vdup_n_u8(0xFF);
+
+ // Store 8 pixels.
+ vst4_u8((uint8_t*) dst, rgba);
+ src8 += 24;
+ dst += 8;
+ count -= 8;
+ }
+
+ xxx_swaprb_xxxa_portable(dst, src, count);
+}
+
#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
template <bool kSwapRB>
@@ -264,6 +374,14 @@ static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
swaprb_xxxa_portable(dst, src, count);
}
+static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) {
+ xxx_xxxa_portable(dst, src, count);
+}
+
+static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
+ xxx_swaprb_xxxa_portable(dst, src, count);
+}
+
#else
static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) {
@@ -278,6 +396,14 @@ static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
swaprb_xxxa_portable(dst, src, count);
}
+static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) {
+ xxx_xxxa_portable(dst, src, count);
+}
+
+static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
+ xxx_swaprb_xxxa_portable(dst, src, count);
+}
+
#endif
}
« src/opts/SkOpts_ssse3.cpp ('K') | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698