OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkSwizzler_opts_DEFINED | 8 #ifndef SkSwizzler_opts_DEFINED |
9 #define SkSwizzler_opts_DEFINED | 9 #define SkSwizzler_opts_DEFINED |
10 | 10 |
(...skipping 29 matching lines...) Expand all Loading... | |
40 r = (r*a+127)/255; | 40 r = (r*a+127)/255; |
41 g = (g*a+127)/255; | 41 g = (g*a+127)/255; |
42 b = (b*a+127)/255; | 42 b = (b*a+127)/255; |
43 dst[i] = (uint32_t)a << 24 | 43 dst[i] = (uint32_t)a << 24 |
44 | (uint32_t)b << 16 | 44 | (uint32_t)b << 16 |
45 | (uint32_t)g << 8 | 45 | (uint32_t)g << 8 |
46 | (uint32_t)r << 0; | 46 | (uint32_t)r << 0; |
47 } | 47 } |
48 } | 48 } |
49 | 49 |
50 static void swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int count ) { | |
51 for (int i = 0; i < count; i++) { | |
52 uint8_t a = src[i] >> 24, | |
53 r = src[i] >> 16, | |
54 g = src[i] >> 8, | |
55 b = src[i] >> 0; | |
56 dst[i] = (uint32_t)a << 24 | |
57 | (uint32_t)b << 16 | |
58 | (uint32_t)g << 8 | |
59 | (uint32_t)r << 0; | |
60 } | |
61 } | |
62 | |
50 #if defined(SK_ARM_HAS_NEON) | 63 #if defined(SK_ARM_HAS_NEON) |
51 | 64 |
52 // Rounded divide by 255, (x + 127) / 255 | 65 // Rounded divide by 255, (x + 127) / 255 |
53 static uint8x8_t div255_round(uint16x8_t x) { | 66 static uint8x8_t div255_round(uint16x8_t x) { |
54 // result = (x + 127) / 255 | 67 // result = (x + 127) / 255 |
55 // result = (x + 127) / 256 + error1 | 68 // result = (x + 127) / 256 + error1 |
56 // | 69 // |
57 // error1 = (x + 127) / (255 * 256) | 70 // error1 = (x + 127) / (255 * 256) |
58 // error1 = (x + 127) / (256 * 256) + error2 | 71 // error1 = (x + 127) / (256 * 256) + error2 |
59 // | 72 // |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
116 } | 129 } |
117 | 130 |
118 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 131 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
119 premul_xxxa_should_swaprb<false>(dst, src, count); | 132 premul_xxxa_should_swaprb<false>(dst, src, count); |
120 } | 133 } |
121 | 134 |
122 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 135 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
123 premul_xxxa_should_swaprb<true>(dst, src, count); | 136 premul_xxxa_should_swaprb<true>(dst, src, count); |
124 } | 137 } |
125 | 138 |
139 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
140 while (count >= 16) { | |
141 // Load 16 pixels. | |
142 uint8x16x4_t bgra = vld4q_u8((const uint8_t*) src); | |
143 | |
144 // Swap r and b. | |
145 uint8x16_t r = bgra.val[2], | |
msarett
2016/01/14 17:08:25
No matter how I write this code, clang really want
| |
146 b = bgra.val[0]; | |
147 bgra.val[2] = b; | |
148 bgra.val[0] = r; | |
149 | |
150 // Store 16 pixels. | |
151 vst4q_u8((uint8_t*) dst, bgra); | |
152 src += 16; | |
153 dst += 16; | |
154 count -= 16; | |
155 } | |
156 | |
157 while (count >= 8) { | |
158 // Load 8 pixels. | |
159 uint8x8x4_t bgra = vld4_u8((const uint8_t*) src); | |
160 | |
161 // Swap r and b. | |
162 uint8x8_t r = bgra.val[2], | |
163 b = bgra.val[0]; | |
164 bgra.val[2] = b; | |
165 bgra.val[0] = r; | |
166 | |
167 // Store 8 pixels. | |
168 vst4_u8((uint8_t*) dst, bgra); | |
169 src += 8; | |
170 dst += 8; | |
171 count -= 8; | |
172 } | |
173 | |
174 swaprb_xxxa_portable(dst, src, count); | |
175 } | |
176 | |
126 #else | 177 #else |
127 | 178 |
128 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 179 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
129 premul_xxxa_portable(dst, src, count); | 180 premul_xxxa_portable(dst, src, count); |
130 } | 181 } |
131 | 182 |
132 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 183 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
133 premul_swaprb_xxxa_portable(dst, src, count); | 184 premul_swaprb_xxxa_portable(dst, src, count); |
134 } | 185 } |
135 | 186 |
187 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
188 swaprb_xxxa_portable(dst, src, count); | |
189 } | |
190 | |
136 #endif | 191 #endif |
137 | 192 |
138 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
139 for (int i = 0; i < count; i++) { | |
140 uint8_t a = src[i] >> 24, | |
141 r = src[i] >> 16, | |
142 g = src[i] >> 8, | |
143 b = src[i] >> 0; | |
144 dst[i] = (uint32_t)a << 24 | |
145 | (uint32_t)b << 16 | |
146 | (uint32_t)g << 8 | |
147 | (uint32_t)r << 0; | |
148 } | |
149 } | |
150 | |
151 } | 193 } |
152 | 194 |
153 #endif // SkSwizzler_opts_DEFINED | 195 #endif // SkSwizzler_opts_DEFINED |
OLD | NEW |