Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkSwizzler_opts_DEFINED | 8 #ifndef SkSwizzler_opts_DEFINED |
| 9 #define SkSwizzler_opts_DEFINED | 9 #define SkSwizzler_opts_DEFINED |
| 10 | 10 |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 40 r = (r*a+127)/255; | 40 r = (r*a+127)/255; |
| 41 g = (g*a+127)/255; | 41 g = (g*a+127)/255; |
| 42 b = (b*a+127)/255; | 42 b = (b*a+127)/255; |
| 43 dst[i] = (uint32_t)a << 24 | 43 dst[i] = (uint32_t)a << 24 |
| 44 | (uint32_t)b << 16 | 44 | (uint32_t)b << 16 |
| 45 | (uint32_t)g << 8 | 45 | (uint32_t)g << 8 |
| 46 | (uint32_t)r << 0; | 46 | (uint32_t)r << 0; |
| 47 } | 47 } |
| 48 } | 48 } |
| 49 | 49 |
| 50 static void swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int count ) { | |
| 51 for (int i = 0; i < count; i++) { | |
| 52 uint8_t a = src[i] >> 24, | |
| 53 r = src[i] >> 16, | |
| 54 g = src[i] >> 8, | |
| 55 b = src[i] >> 0; | |
| 56 dst[i] = (uint32_t)a << 24 | |
| 57 | (uint32_t)b << 16 | |
| 58 | (uint32_t)g << 8 | |
| 59 | (uint32_t)r << 0; | |
| 60 } | |
| 61 } | |
| 62 | |
| 50 #if defined(SK_ARM_HAS_NEON) | 63 #if defined(SK_ARM_HAS_NEON) |
| 51 | 64 |
| 52 // Rounded divide by 255, (x + 127) / 255 | 65 // Rounded divide by 255, (x + 127) / 255 |
| 53 static uint8x8_t div255_round(uint16x8_t x) { | 66 static uint8x8_t div255_round(uint16x8_t x) { |
| 54 // result = (x + 127) / 255 | 67 // result = (x + 127) / 255 |
| 55 // result = (x + 127) / 256 + error1 | 68 // result = (x + 127) / 256 + error1 |
| 56 // | 69 // |
| 57 // error1 = (x + 127) / (255 * 256) | 70 // error1 = (x + 127) / (255 * 256) |
| 58 // error1 = (x + 127) / (256 * 256) + error2 | 71 // error1 = (x + 127) / (256 * 256) + error2 |
| 59 // | 72 // |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 116 } | 129 } |
| 117 | 130 |
| 118 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 131 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| 119 premul_xxxa_should_swaprb<false>(dst, src, count); | 132 premul_xxxa_should_swaprb<false>(dst, src, count); |
| 120 } | 133 } |
| 121 | 134 |
| 122 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 135 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| 123 premul_xxxa_should_swaprb<true>(dst, src, count); | 136 premul_xxxa_should_swaprb<true>(dst, src, count); |
| 124 } | 137 } |
| 125 | 138 |
| 139 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
| 140 while (count >= 16) { | |
| 141 // Load 16 pixels. | |
| 142 uint8x16x4_t bgra = vld4q_u8((const uint8_t*) src); | |
| 143 | |
| 144 // Swap r and b. | |
| 145 uint8x16_t r = bgra.val[2], | |
|
msarett
2016/01/14 17:08:25
No matter how I write this code, clang really want
| |
| 146 b = bgra.val[0]; | |
| 147 bgra.val[2] = b; | |
| 148 bgra.val[0] = r; | |
| 149 | |
| 150 // Store 16 pixels. | |
| 151 vst4q_u8((uint8_t*) dst, bgra); | |
| 152 src += 16; | |
| 153 dst += 16; | |
| 154 count -= 16; | |
| 155 } | |
| 156 | |
| 157 while (count >= 8) { | |
| 158 // Load 8 pixels. | |
| 159 uint8x8x4_t bgra = vld4_u8((const uint8_t*) src); | |
| 160 | |
| 161 // Swap r and b. | |
| 162 uint8x8_t r = bgra.val[2], | |
| 163 b = bgra.val[0]; | |
| 164 bgra.val[2] = b; | |
| 165 bgra.val[0] = r; | |
| 166 | |
| 167 // Store 8 pixels. | |
| 168 vst4_u8((uint8_t*) dst, bgra); | |
| 169 src += 8; | |
| 170 dst += 8; | |
| 171 count -= 8; | |
| 172 } | |
| 173 | |
| 174 swaprb_xxxa_portable(dst, src, count); | |
| 175 } | |
| 176 | |
| 126 #else | 177 #else |
| 127 | 178 |
| 128 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 179 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| 129 premul_xxxa_portable(dst, src, count); | 180 premul_xxxa_portable(dst, src, count); |
| 130 } | 181 } |
| 131 | 182 |
| 132 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 183 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| 133 premul_swaprb_xxxa_portable(dst, src, count); | 184 premul_swaprb_xxxa_portable(dst, src, count); |
| 134 } | 185 } |
| 135 | 186 |
| 187 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
| 188 swaprb_xxxa_portable(dst, src, count); | |
| 189 } | |
| 190 | |
| 136 #endif | 191 #endif |
| 137 | 192 |
| 138 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
| 139 for (int i = 0; i < count; i++) { | |
| 140 uint8_t a = src[i] >> 24, | |
| 141 r = src[i] >> 16, | |
| 142 g = src[i] >> 8, | |
| 143 b = src[i] >> 0; | |
| 144 dst[i] = (uint32_t)a << 24 | |
| 145 | (uint32_t)b << 16 | |
| 146 | (uint32_t)g << 8 | |
| 147 | (uint32_t)r << 0; | |
| 148 } | |
| 149 } | |
| 150 | |
| 151 } | 193 } |
| 152 | 194 |
| 153 #endif // SkSwizzler_opts_DEFINED | 195 #endif // SkSwizzler_opts_DEFINED |
| OLD | NEW |