Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/opts/SkSwizzler_opts.h

Issue 1577703006: Optimized premultiplying swizzles for NEON (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Remove unnecessary if statement Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkOpts_neon.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #ifndef SkSwizzler_opts_DEFINED
9 #define SkSwizzler_opts_DEFINED
10
11 #include "SkColorPriv.h"
12
13 namespace SK_OPTS_NS {
14
15 // These variable names in these functions just pretend the input is BGRA.
16 // They work fine with both RGBA and BGRA.
17
18 static void premul_xxxa_portable(uint32_t dst[], const uint32_t src[], int count ) {
19 for (int i = 0; i < count; i++) {
20 uint8_t a = src[i] >> 24,
21 r = src[i] >> 16,
22 g = src[i] >> 8,
23 b = src[i] >> 0;
24 r = (r*a+127)/255;
25 g = (g*a+127)/255;
26 b = (b*a+127)/255;
27 dst[i] = (uint32_t)a << 24
28 | (uint32_t)r << 16
29 | (uint32_t)g << 8
30 | (uint32_t)b << 0;
31 }
32 }
33
34 static void premul_swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], in t count) {
35 for (int i = 0; i < count; i++) {
36 uint8_t a = src[i] >> 24,
37 r = src[i] >> 16,
38 g = src[i] >> 8,
39 b = src[i] >> 0;
40 r = (r*a+127)/255;
41 g = (g*a+127)/255;
42 b = (b*a+127)/255;
43 dst[i] = (uint32_t)a << 24
44 | (uint32_t)b << 16
45 | (uint32_t)g << 8
46 | (uint32_t)r << 0;
47 }
48 }
49
50 #if defined(SK_ARM_HAS_NEON)
51
52 // Rounded divide by 255, (x + 127) / 255
53 static uint8x8_t div255_round(uint16x8_t x) {
54 // result = (x + 127) / 255
55 // result = (x + 127) / 256 + error1
56 //
57 // error1 = (x + 127) / (255 * 256)
58 // error1 = (x + 127) / (256 * 256) + error2
59 //
60 // error2 = (x + 127) / (255 * 256 * 256)
61 //
62 // The maximum value of error2 is too small to matter. Thus:
63 // result = (x + 127) / 256 + (x + 127) / (256 * 256)
64 // result = ((x + 127) / 256 + x + 127) / 256
65 // result = ((x + 127) >> 8 + x + 127) >> 8
66 //
67 // Use >>> to represent "rounded right shift" which, conveniently,
68 // NEON supports in one instruction.
69 // result = ((x >>> 8) + x) >>> 8
70 //
71 // Note that the second right shift is actually performed as an
72 // "add, round, and narrow back to 8-bits" instruction.
73 return vraddhn_u16(x, vrshrq_n_u16(x, 8));
74 }
75
76 // Scale a byte by another, (x * y + 127) / 255
77 static uint8x8_t scale(uint8x8_t x, uint8x8_t y) {
78 return div255_round(vmull_u8(x, y));
79 }
80
81 template <bool kSwapRB>
82 static void premul_xxxa_should_swaprb(uint32_t dst[], const uint32_t src[], int count) {
83 while (count >= 8) {
84 // Load 8 pixels.
85 uint8x8x4_t bgra = vld4_u8((const uint8_t*) src);
86
87 uint8x8_t a = bgra.val[3],
88 r = bgra.val[2],
89 g = bgra.val[1],
90 b = bgra.val[0];
91
92 // Premultiply.
93 r = scale(r, a);
94 g = scale(g, a);
95 b = scale(b, a);
96
97 // Store 8 premultiplied pixels.
98 if (kSwapRB) {
99 bgra.val[2] = b;
100 bgra.val[1] = g;
101 bgra.val[0] = r;
102 } else {
103 bgra.val[2] = r;
104 bgra.val[1] = g;
105 bgra.val[0] = b;
106 }
107 vst4_u8((uint8_t*) dst, bgra);
108 src += 8;
109 dst += 8;
110 count -= 8;
111 }
112
113 // Call portable code to finish up the tail of [0,8) pixels.
114 auto proc = kSwapRB ? premul_swaprb_xxxa_portable : premul_xxxa_portable;
115 proc(dst, src, count);
116 }
117
118 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) {
119 premul_xxxa_should_swaprb<false>(dst, src, count);
120 }
121
122 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
123 premul_xxxa_should_swaprb<true>(dst, src, count);
124 }
125
126 #else
127
128 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) {
129 premul_xxxa_portable(dst, src, count);
130 }
131
132 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
133 premul_swaprb_xxxa_portable(dst, src, count);
134 }
135
136 #endif
137
138 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
139 for (int i = 0; i < count; i++) {
140 uint8_t a = src[i] >> 24,
141 r = src[i] >> 16,
142 g = src[i] >> 8,
143 b = src[i] >> 0;
144 dst[i] = (uint32_t)a << 24
145 | (uint32_t)b << 16
146 | (uint32_t)g << 8
147 | (uint32_t)r << 0;
148 }
149 }
150
151 }
152
153 #endif // SkSwizzler_opts_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkOpts_neon.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698