third_party/libwebp/dsp/lossless_neon.c - Issue 1546003002: libwebp: update to 0.5.0

Side by Side Diff: third_party/libwebp/dsp/lossless_neon.c

Issue 1546003002: libwebp: update to 0.5.0 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: rebase Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2014 Google Inc. All Rights Reserved.	1 // Copyright 2014 Google Inc. All Rights Reserved.

2 //	2 //

3 // Use of this source code is governed by a BSD-style license	3 // Use of this source code is governed by a BSD-style license

4 // that can be found in the COPYING file in the root of the source	4 // that can be found in the COPYING file in the root of the source

5 // tree. An additional intellectual property rights grant can be found	5 // tree. An additional intellectual property rights grant can be found

6 // in the file PATENTS. All contributing project authors may	6 // in the file PATENTS. All contributing project authors may

7 // be found in the AUTHORS file in the root of the source tree.	7 // be found in the AUTHORS file in the root of the source tree.

8 // -----------------------------------------------------------------------------	8 // -----------------------------------------------------------------------------

9 //	9 //

10 // NEON variant of methods for lossless decoder	10 // NEON variant of methods for lossless decoder

(...skipping 122 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
133 vst1_u8(dst + 8, vtbl4_u8(pixels, shuffle1));	133 vst1_u8(dst + 8, vtbl4_u8(pixels, shuffle1));

134 vst1_u8(dst + 16, vtbl4_u8(pixels, shuffle2));	134 vst1_u8(dst + 16, vtbl4_u8(pixels, shuffle2));

135 dst += 8 * 3;	135 dst += 8 * 3;

136 }	136 }

137 VP8LConvertBGRAToRGB_C(src, num_pixels & 7, dst); // left-overs	137 VP8LConvertBGRAToRGB_C(src, num_pixels & 7, dst); // left-overs

138 }	138 }

139	139

140 #endif // !WORK_AROUND_GCC	140 #endif // !WORK_AROUND_GCC

141	141

142 //------------------------------------------------------------------------------	142 //------------------------------------------------------------------------------

143

144 #ifdef USE_INTRINSICS

145

146 static WEBP_INLINE uint32_t Average2(const uint32_t* const a,

147 const uint32_t* const b) {

148 const uint8x8_t a0 = vreinterpret_u8_u64(vcreate_u64(*a));

149 const uint8x8_t b0 = vreinterpret_u8_u64(vcreate_u64(*b));

150 const uint8x8_t avg = vhadd_u8(a0, b0);

151 return vget_lane_u32(vreinterpret_u32_u8(avg), 0);

152 }

153

154 static WEBP_INLINE uint32_t Average3(const uint32_t* const a,

155 const uint32_t* const b,

156 const uint32_t* const c) {

157 const uint8x8_t a0 = vreinterpret_u8_u64(vcreate_u64(*a));

158 const uint8x8_t b0 = vreinterpret_u8_u64(vcreate_u64(*b));

159 const uint8x8_t c0 = vreinterpret_u8_u64(vcreate_u64(*c));

160 const uint8x8_t avg1 = vhadd_u8(a0, c0);

161 const uint8x8_t avg2 = vhadd_u8(avg1, b0);

162 return vget_lane_u32(vreinterpret_u32_u8(avg2), 0);

163 }

164

165 static WEBP_INLINE uint32_t Average4(const uint32_t* const a,

166 const uint32_t* const b,

167 const uint32_t* const c,

168 const uint32_t* const d) {

169 const uint8x8_t a0 = vreinterpret_u8_u64(vcreate_u64(*a));

170 const uint8x8_t b0 = vreinterpret_u8_u64(vcreate_u64(*b));

171 const uint8x8_t c0 = vreinterpret_u8_u64(vcreate_u64(*c));

172 const uint8x8_t d0 = vreinterpret_u8_u64(vcreate_u64(*d));

173 const uint8x8_t avg1 = vhadd_u8(a0, b0);

174 const uint8x8_t avg2 = vhadd_u8(c0, d0);

175 const uint8x8_t avg3 = vhadd_u8(avg1, avg2);

176 return vget_lane_u32(vreinterpret_u32_u8(avg3), 0);

177 }

178

179 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {

180 return Average3(&left, top + 0, top + 1);

181 }

182

183 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {

184 return Average2(&left, top - 1);

185 }

186

187 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {

188 return Average2(&left, top + 0);

189 }

190

191 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {

192 (void)left;

193 return Average2(top - 1, top + 0);

194 }

195

196 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {

197 (void)left;

198 return Average2(top + 0, top + 1);

199 }

200

201 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {

202 return Average4(&left, top - 1, top + 0, top + 1);

203 }

204

205 //------------------------------------------------------------------------------

206

207 static WEBP_INLINE uint32_t Select(const uint32_t* const c0,

208 const uint32_t* const c1,

209 const uint32_t* const c2) {

210 const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0));

211 const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1));

212 const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2));

213 const uint8x8_t bc = vabd_u8(p1, p2); // \|b-c\|

214 const uint8x8_t ac = vabd_u8(p0, p2); // \|a-c\|

215 const int16x4_t sum_bc = vreinterpret_s16_u16(vpaddl_u8(bc));

216 const int16x4_t sum_ac = vreinterpret_s16_u16(vpaddl_u8(ac));

217 const int32x2_t diff = vpaddl_s16(vsub_s16(sum_bc, sum_ac));

218 const int32_t pa_minus_pb = vget_lane_s32(diff, 0);

219 return (pa_minus_pb <= 0) ? c0 : c1;

220 }

221

222 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {

223 return Select(top + 0, &left, top - 1);

224 }

225

226 static WEBP_INLINE uint32_t ClampedAddSubtractFull(const uint32_t* const c0,

227 const uint32_t* const c1,

228 const uint32_t* const c2) {

229 const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0));

230 const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1));

231 const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2));

232 const uint16x8_t sum0 = vaddl_u8(p0, p1); // add and widen

233 const uint16x8_t sum1 = vqsubq_u16(sum0, vmovl_u8(p2)); // widen and subtract

234 const uint8x8_t out = vqmovn_u16(sum1); // narrow and clamp

235 return vget_lane_u32(vreinterpret_u32_u8(out), 0);

236 }

237

238 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {

239 return ClampedAddSubtractFull(&left, top + 0, top - 1);

240 }

241

242 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(const uint32_t* const c0,

243 const uint32_t* const c1,

244 const uint32_t* const c2) {

245 const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0));

246 const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1));

247 const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2));

248 const uint8x8_t avg = vhadd_u8(p0, p1); // Average(c0,c1)

249 const uint8x8_t ab = vshr_n_u8(vqsub_u8(avg, p2), 1); // (a-b)>>1 saturated

250 const uint8x8_t ba = vshr_n_u8(vqsub_u8(p2, avg), 1); // (b-a)>>1 saturated

251 const uint8x8_t out = vqsub_u8(vqadd_u8(avg, ab), ba);

252 return vget_lane_u32(vreinterpret_u32_u8(out), 0);

253 }

254

255 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {

256 return ClampedAddSubtractHalf(&left, top + 0, top - 1);

257 }

258

259 //------------------------------------------------------------------------------

260 // Subtract-Green Transform	143 // Subtract-Green Transform

261	144

262 // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use	145 // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use

263 // non-standard versions there.	146 // non-standard versions there.

264 #if defined(__APPLE__) && defined(__aarch64__) && \	147 #if defined(__APPLE__) && defined(__aarch64__) && \

265 defined(__apple_build_version__) && (__apple_build_version__< 6020037)	148 defined(__apple_build_version__) && (__apple_build_version__< 6020037)

266 #define USE_VTBLQ	149 #define USE_VTBLQ

267 #endif	150 #endif

268	151

269 #ifdef USE_VTBLQ	152 #ifdef USE_VTBLQ

(...skipping 11 matching lines...) Expand all Loading...
281 // 255 = byte will be zeroed	164 // 255 = byte will be zeroed

282 static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255 };	165 static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255 };

283	166

284 static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,	167 static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,

285 const uint8x8_t shuffle) {	168 const uint8x8_t shuffle) {

286 return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),	169 return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),

287 vtbl1_u8(vget_high_u8(argb), shuffle));	170 vtbl1_u8(vget_high_u8(argb), shuffle));

288 }	171 }

289 #endif // USE_VTBLQ	172 #endif // USE_VTBLQ

290	173

291 static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {	174 static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {

292 const uint32_t* const end = argb_data + (num_pixels & ~3);	175 const uint32_t* const end = argb_data + (num_pixels & ~3);

293 #ifdef USE_VTBLQ	176 #ifdef USE_VTBLQ

294 const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);	177 const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);

295 #else	178 #else

296 const uint8x8_t shuffle = vld1_u8(kGreenShuffle);	179 const uint8x8_t shuffle = vld1_u8(kGreenShuffle);

297 #endif	180 #endif

298 for (; argb_data < end; argb_data += 4) {	181 for (; argb_data < end; argb_data += 4) {

299 const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);	182 const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);

300 const uint8x16_t greens = DoGreenShuffle(argb, shuffle);	183 const uint8x16_t greens = DoGreenShuffle(argb, shuffle);

301 vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens));

302 }

303 // fallthrough and finish off with plain-C

304 VP8LSubtractGreenFromBlueAndRed_C(argb_data, num_pixels & 3);

305 }

306

307 static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {

308 const uint32_t* const end = argb_data + (num_pixels & ~3);

309 #ifdef USE_VTBLQ

310 const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);

311 #else

312 const uint8x8_t shuffle = vld1_u8(kGreenShuffle);

313 #endif

314 for (; argb_data < end; argb_data += 4) {

315 const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);

316 const uint8x16_t greens = DoGreenShuffle(argb, shuffle);

317 vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));	184 vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));

318 }	185 }

319 // fallthrough and finish off with plain-C	186 // fallthrough and finish off with plain-C

320 VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);	187 VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);

321 }	188 }

322	189

	190 //------------------------------------------------------------------------------

	191 // Color Transform

	192

	193 static void TransformColorInverse(const VP8LMultipliers* const m,

	194 uint32_t* argb_data, int num_pixels) {

	195 // sign-extended multiplying constants, pre-shifted by 6.

	196 #define CST(X) (((int16_t)(m->X << 8)) >> 6)

	197 const int16_t rb[8] = {

	198 CST(green_to_blue_), CST(green_to_red_),

	199 CST(green_to_blue_), CST(green_to_red_),

	200 CST(green_to_blue_), CST(green_to_red_),

	201 CST(green_to_blue_), CST(green_to_red_)

	202 };

	203 const int16x8_t mults_rb = vld1q_s16(rb);

	204 const int16_t b2[8] = {

	205 0, CST(red_to_blue_), 0, CST(red_to_blue_),

	206 0, CST(red_to_blue_), 0, CST(red_to_blue_),

	207 };

	208 const int16x8_t mults_b2 = vld1q_s16(b2);

	209 #undef CST

	210 #ifdef USE_VTBLQ

	211 static const uint8_t kg0g0[16] = {

	212 255, 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13

	213 };

	214 const uint8x16_t shuffle = vld1q_u8(kg0g0);

	215 #else

	216 static const uint8_t k0g0g[8] = { 255, 1, 255, 1, 255, 5, 255, 5 };

	217 const uint8x8_t shuffle = vld1_u8(k0g0g);

	218 #endif

	219 const uint32x4_t mask_ag = vdupq_n_u32(0xff00ff00u);

	220 int i;

	221 for (i = 0; i + 4 <= num_pixels; i += 4) {

	222 const uint8x16_t in = vld1q_u8((uint8_t*)(argb_data + i));

	223 const uint32x4_t a0g0 = vandq_u32(vreinterpretq_u32_u8(in), mask_ag);

	224 // 0 g 0 g

	225 const uint8x16_t greens = DoGreenShuffle(in, shuffle);

	226 // x dr x db1

	227 const int16x8_t A = vqdmulhq_s16(vreinterpretq_s16_u8(greens), mults_rb);

	228 // x r' x b'

	229 const int8x16_t B = vaddq_s8(vreinterpretq_s8_u8(in),

	230 vreinterpretq_s8_s16(A));

	231 // r' 0 b' 0

	232 const int16x8_t C = vshlq_n_s16(vreinterpretq_s16_s8(B), 8);

	233 // x db2 0 0

	234 const int16x8_t D = vqdmulhq_s16(C, mults_b2);

	235 // 0 x db2 0

	236 const uint32x4_t E = vshrq_n_u32(vreinterpretq_u32_s16(D), 8);

	237 // r' x b'' 0

	238 const int8x16_t F = vaddq_s8(vreinterpretq_s8_u32(E),

	239 vreinterpretq_s8_s16(C));

	240 // 0 r' 0 b''

	241 const uint16x8_t G = vshrq_n_u16(vreinterpretq_u16_s8(F), 8);

	242 const uint32x4_t out = vorrq_u32(vreinterpretq_u32_u16(G), a0g0);

	243 vst1q_u32(argb_data + i, out);

	244 }

	245 // Fall-back to C-version for left-overs.

	246 VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i);

	247 }

	248

323 #undef USE_VTBLQ	249 #undef USE_VTBLQ

324	250

325 #endif // USE_INTRINSICS

326

327 #endif // WEBP_USE_NEON

328

329 //------------------------------------------------------------------------------	251 //------------------------------------------------------------------------------

	252 // Entry point

330	253

331 extern void VP8LDspInitNEON(void);	254 extern void VP8LDspInitNEON(void);

332	255

333 void VP8LDspInitNEON(void) {	256 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitNEON(void) {

334 #if defined(WEBP_USE_NEON)

335 VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;	257 VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;

336 VP8LConvertBGRAToBGR = ConvertBGRAToBGR;	258 VP8LConvertBGRAToBGR = ConvertBGRAToBGR;

337 VP8LConvertBGRAToRGB = ConvertBGRAToRGB;	259 VP8LConvertBGRAToRGB = ConvertBGRAToRGB;

338	260

339 #ifdef USE_INTRINSICS

340 VP8LPredictors[5] = Predictor5;

341 VP8LPredictors[6] = Predictor6;

342 VP8LPredictors[7] = Predictor7;

343 VP8LPredictors[8] = Predictor8;

344 VP8LPredictors[9] = Predictor9;

345 VP8LPredictors[10] = Predictor10;

346 VP8LPredictors[11] = Predictor11;

347 VP8LPredictors[12] = Predictor12;

348 VP8LPredictors[13] = Predictor13;

349

350 VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;

351 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;	261 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;

352 #endif	262 VP8LTransformColorInverse = TransformColorInverse;

353

354 #endif // WEBP_USE_NEON

355 }	263 }

356	264

357 //------------------------------------------------------------------------------	265 #else // !WEBP_USE_NEON

	266

	267 WEBP_DSP_INIT_STUB(VP8LDspInitNEON)

	268

	269 #endif // WEBP_USE_NEON

OLD	NEW

« no previous file with comments | « third_party/libwebp/dsp/lossless_mips_dsp_r2.c ('k') | third_party/libwebp/dsp/lossless_sse2.c » ('j') | no next file with comments »