Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Side by Side Diff: third_party/libwebp/dsp/lossless_neon.c

Issue 1546003002: libwebp: update to 0.5.0 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: rebase Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/libwebp/dsp/lossless_mips_dsp_r2.c ('k') | third_party/libwebp/dsp/lossless_sse2.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 Google Inc. All Rights Reserved. 1 // Copyright 2014 Google Inc. All Rights Reserved.
2 // 2 //
3 // Use of this source code is governed by a BSD-style license 3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source 4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found 5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may 6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree. 7 // be found in the AUTHORS file in the root of the source tree.
8 // ----------------------------------------------------------------------------- 8 // -----------------------------------------------------------------------------
9 // 9 //
10 // NEON variant of methods for lossless decoder 10 // NEON variant of methods for lossless decoder
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
133 vst1_u8(dst + 8, vtbl4_u8(pixels, shuffle1)); 133 vst1_u8(dst + 8, vtbl4_u8(pixels, shuffle1));
134 vst1_u8(dst + 16, vtbl4_u8(pixels, shuffle2)); 134 vst1_u8(dst + 16, vtbl4_u8(pixels, shuffle2));
135 dst += 8 * 3; 135 dst += 8 * 3;
136 } 136 }
137 VP8LConvertBGRAToRGB_C(src, num_pixels & 7, dst); // left-overs 137 VP8LConvertBGRAToRGB_C(src, num_pixels & 7, dst); // left-overs
138 } 138 }
139 139
140 #endif // !WORK_AROUND_GCC 140 #endif // !WORK_AROUND_GCC
141 141
142 //------------------------------------------------------------------------------ 142 //------------------------------------------------------------------------------
143
144 #ifdef USE_INTRINSICS
145
146 static WEBP_INLINE uint32_t Average2(const uint32_t* const a,
147 const uint32_t* const b) {
148 const uint8x8_t a0 = vreinterpret_u8_u64(vcreate_u64(*a));
149 const uint8x8_t b0 = vreinterpret_u8_u64(vcreate_u64(*b));
150 const uint8x8_t avg = vhadd_u8(a0, b0);
151 return vget_lane_u32(vreinterpret_u32_u8(avg), 0);
152 }
153
154 static WEBP_INLINE uint32_t Average3(const uint32_t* const a,
155 const uint32_t* const b,
156 const uint32_t* const c) {
157 const uint8x8_t a0 = vreinterpret_u8_u64(vcreate_u64(*a));
158 const uint8x8_t b0 = vreinterpret_u8_u64(vcreate_u64(*b));
159 const uint8x8_t c0 = vreinterpret_u8_u64(vcreate_u64(*c));
160 const uint8x8_t avg1 = vhadd_u8(a0, c0);
161 const uint8x8_t avg2 = vhadd_u8(avg1, b0);
162 return vget_lane_u32(vreinterpret_u32_u8(avg2), 0);
163 }
164
165 static WEBP_INLINE uint32_t Average4(const uint32_t* const a,
166 const uint32_t* const b,
167 const uint32_t* const c,
168 const uint32_t* const d) {
169 const uint8x8_t a0 = vreinterpret_u8_u64(vcreate_u64(*a));
170 const uint8x8_t b0 = vreinterpret_u8_u64(vcreate_u64(*b));
171 const uint8x8_t c0 = vreinterpret_u8_u64(vcreate_u64(*c));
172 const uint8x8_t d0 = vreinterpret_u8_u64(vcreate_u64(*d));
173 const uint8x8_t avg1 = vhadd_u8(a0, b0);
174 const uint8x8_t avg2 = vhadd_u8(c0, d0);
175 const uint8x8_t avg3 = vhadd_u8(avg1, avg2);
176 return vget_lane_u32(vreinterpret_u32_u8(avg3), 0);
177 }
178
179 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
180 return Average3(&left, top + 0, top + 1);
181 }
182
183 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
184 return Average2(&left, top - 1);
185 }
186
187 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
188 return Average2(&left, top + 0);
189 }
190
191 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
192 (void)left;
193 return Average2(top - 1, top + 0);
194 }
195
196 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
197 (void)left;
198 return Average2(top + 0, top + 1);
199 }
200
201 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
202 return Average4(&left, top - 1, top + 0, top + 1);
203 }
204
205 //------------------------------------------------------------------------------
206
207 static WEBP_INLINE uint32_t Select(const uint32_t* const c0,
208 const uint32_t* const c1,
209 const uint32_t* const c2) {
210 const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0));
211 const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1));
212 const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2));
213 const uint8x8_t bc = vabd_u8(p1, p2); // |b-c|
214 const uint8x8_t ac = vabd_u8(p0, p2); // |a-c|
215 const int16x4_t sum_bc = vreinterpret_s16_u16(vpaddl_u8(bc));
216 const int16x4_t sum_ac = vreinterpret_s16_u16(vpaddl_u8(ac));
217 const int32x2_t diff = vpaddl_s16(vsub_s16(sum_bc, sum_ac));
218 const int32_t pa_minus_pb = vget_lane_s32(diff, 0);
219 return (pa_minus_pb <= 0) ? *c0 : *c1;
220 }
221
222 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
223 return Select(top + 0, &left, top - 1);
224 }
225
226 static WEBP_INLINE uint32_t ClampedAddSubtractFull(const uint32_t* const c0,
227 const uint32_t* const c1,
228 const uint32_t* const c2) {
229 const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0));
230 const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1));
231 const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2));
232 const uint16x8_t sum0 = vaddl_u8(p0, p1); // add and widen
233 const uint16x8_t sum1 = vqsubq_u16(sum0, vmovl_u8(p2)); // widen and subtract
234 const uint8x8_t out = vqmovn_u16(sum1); // narrow and clamp
235 return vget_lane_u32(vreinterpret_u32_u8(out), 0);
236 }
237
238 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
239 return ClampedAddSubtractFull(&left, top + 0, top - 1);
240 }
241
242 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(const uint32_t* const c0,
243 const uint32_t* const c1,
244 const uint32_t* const c2) {
245 const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0));
246 const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1));
247 const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2));
248 const uint8x8_t avg = vhadd_u8(p0, p1); // Average(c0,c1)
249 const uint8x8_t ab = vshr_n_u8(vqsub_u8(avg, p2), 1); // (a-b)>>1 saturated
250 const uint8x8_t ba = vshr_n_u8(vqsub_u8(p2, avg), 1); // (b-a)>>1 saturated
251 const uint8x8_t out = vqsub_u8(vqadd_u8(avg, ab), ba);
252 return vget_lane_u32(vreinterpret_u32_u8(out), 0);
253 }
254
255 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
256 return ClampedAddSubtractHalf(&left, top + 0, top - 1);
257 }
258
259 //------------------------------------------------------------------------------
260 // Subtract-Green Transform 143 // Subtract-Green Transform
261 144
262 // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use 145 // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
263 // non-standard versions there. 146 // non-standard versions there.
264 #if defined(__APPLE__) && defined(__aarch64__) && \ 147 #if defined(__APPLE__) && defined(__aarch64__) && \
265 defined(__apple_build_version__) && (__apple_build_version__< 6020037) 148 defined(__apple_build_version__) && (__apple_build_version__< 6020037)
266 #define USE_VTBLQ 149 #define USE_VTBLQ
267 #endif 150 #endif
268 151
269 #ifdef USE_VTBLQ 152 #ifdef USE_VTBLQ
(...skipping 11 matching lines...) Expand all
281 // 255 = byte will be zeroed 164 // 255 = byte will be zeroed
282 static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255 }; 165 static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255 };
283 166
284 static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb, 167 static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
285 const uint8x8_t shuffle) { 168 const uint8x8_t shuffle) {
286 return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle), 169 return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
287 vtbl1_u8(vget_high_u8(argb), shuffle)); 170 vtbl1_u8(vget_high_u8(argb), shuffle));
288 } 171 }
289 #endif // USE_VTBLQ 172 #endif // USE_VTBLQ
290 173
291 static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) { 174 static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
292 const uint32_t* const end = argb_data + (num_pixels & ~3); 175 const uint32_t* const end = argb_data + (num_pixels & ~3);
293 #ifdef USE_VTBLQ 176 #ifdef USE_VTBLQ
294 const uint8x16_t shuffle = vld1q_u8(kGreenShuffle); 177 const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
295 #else 178 #else
296 const uint8x8_t shuffle = vld1_u8(kGreenShuffle); 179 const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
297 #endif 180 #endif
298 for (; argb_data < end; argb_data += 4) { 181 for (; argb_data < end; argb_data += 4) {
299 const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data); 182 const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
300 const uint8x16_t greens = DoGreenShuffle(argb, shuffle); 183 const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
301 vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens));
302 }
303 // fallthrough and finish off with plain-C
304 VP8LSubtractGreenFromBlueAndRed_C(argb_data, num_pixels & 3);
305 }
306
307 static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
308 const uint32_t* const end = argb_data + (num_pixels & ~3);
309 #ifdef USE_VTBLQ
310 const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
311 #else
312 const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
313 #endif
314 for (; argb_data < end; argb_data += 4) {
315 const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
316 const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
317 vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens)); 184 vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));
318 } 185 }
319 // fallthrough and finish off with plain-C 186 // fallthrough and finish off with plain-C
320 VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3); 187 VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);
321 } 188 }
322 189
190 //------------------------------------------------------------------------------
191 // Color Transform
192
193 static void TransformColorInverse(const VP8LMultipliers* const m,
194 uint32_t* argb_data, int num_pixels) {
195 // sign-extended multiplying constants, pre-shifted by 6.
196 #define CST(X) (((int16_t)(m->X << 8)) >> 6)
197 const int16_t rb[8] = {
198 CST(green_to_blue_), CST(green_to_red_),
199 CST(green_to_blue_), CST(green_to_red_),
200 CST(green_to_blue_), CST(green_to_red_),
201 CST(green_to_blue_), CST(green_to_red_)
202 };
203 const int16x8_t mults_rb = vld1q_s16(rb);
204 const int16_t b2[8] = {
205 0, CST(red_to_blue_), 0, CST(red_to_blue_),
206 0, CST(red_to_blue_), 0, CST(red_to_blue_),
207 };
208 const int16x8_t mults_b2 = vld1q_s16(b2);
209 #undef CST
210 #ifdef USE_VTBLQ
211 static const uint8_t kg0g0[16] = {
212 255, 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13
213 };
214 const uint8x16_t shuffle = vld1q_u8(kg0g0);
215 #else
216 static const uint8_t k0g0g[8] = { 255, 1, 255, 1, 255, 5, 255, 5 };
217 const uint8x8_t shuffle = vld1_u8(k0g0g);
218 #endif
219 const uint32x4_t mask_ag = vdupq_n_u32(0xff00ff00u);
220 int i;
221 for (i = 0; i + 4 <= num_pixels; i += 4) {
222 const uint8x16_t in = vld1q_u8((uint8_t*)(argb_data + i));
223 const uint32x4_t a0g0 = vandq_u32(vreinterpretq_u32_u8(in), mask_ag);
224 // 0 g 0 g
225 const uint8x16_t greens = DoGreenShuffle(in, shuffle);
226 // x dr x db1
227 const int16x8_t A = vqdmulhq_s16(vreinterpretq_s16_u8(greens), mults_rb);
228 // x r' x b'
229 const int8x16_t B = vaddq_s8(vreinterpretq_s8_u8(in),
230 vreinterpretq_s8_s16(A));
231 // r' 0 b' 0
232 const int16x8_t C = vshlq_n_s16(vreinterpretq_s16_s8(B), 8);
233 // x db2 0 0
234 const int16x8_t D = vqdmulhq_s16(C, mults_b2);
235 // 0 x db2 0
236 const uint32x4_t E = vshrq_n_u32(vreinterpretq_u32_s16(D), 8);
237 // r' x b'' 0
238 const int8x16_t F = vaddq_s8(vreinterpretq_s8_u32(E),
239 vreinterpretq_s8_s16(C));
240 // 0 r' 0 b''
241 const uint16x8_t G = vshrq_n_u16(vreinterpretq_u16_s8(F), 8);
242 const uint32x4_t out = vorrq_u32(vreinterpretq_u32_u16(G), a0g0);
243 vst1q_u32(argb_data + i, out);
244 }
245 // Fall-back to C-version for left-overs.
246 VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i);
247 }
248
323 #undef USE_VTBLQ 249 #undef USE_VTBLQ
324 250
325 #endif // USE_INTRINSICS
326
327 #endif // WEBP_USE_NEON
328
329 //------------------------------------------------------------------------------ 251 //------------------------------------------------------------------------------
252 // Entry point
330 253
331 extern void VP8LDspInitNEON(void); 254 extern void VP8LDspInitNEON(void);
332 255
333 void VP8LDspInitNEON(void) { 256 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitNEON(void) {
334 #if defined(WEBP_USE_NEON)
335 VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA; 257 VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
336 VP8LConvertBGRAToBGR = ConvertBGRAToBGR; 258 VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
337 VP8LConvertBGRAToRGB = ConvertBGRAToRGB; 259 VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
338 260
339 #ifdef USE_INTRINSICS
340 VP8LPredictors[5] = Predictor5;
341 VP8LPredictors[6] = Predictor6;
342 VP8LPredictors[7] = Predictor7;
343 VP8LPredictors[8] = Predictor8;
344 VP8LPredictors[9] = Predictor9;
345 VP8LPredictors[10] = Predictor10;
346 VP8LPredictors[11] = Predictor11;
347 VP8LPredictors[12] = Predictor12;
348 VP8LPredictors[13] = Predictor13;
349
350 VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
351 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed; 261 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
352 #endif 262 VP8LTransformColorInverse = TransformColorInverse;
353
354 #endif // WEBP_USE_NEON
355 } 263 }
356 264
357 //------------------------------------------------------------------------------ 265 #else // !WEBP_USE_NEON
266
267 WEBP_DSP_INIT_STUB(VP8LDspInitNEON)
268
269 #endif // WEBP_USE_NEON
OLDNEW
« no previous file with comments | « third_party/libwebp/dsp/lossless_mips_dsp_r2.c ('k') | third_party/libwebp/dsp/lossless_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698