third_party/libwebp/dsp/lossless_mips_dsp_r2.c - Issue 2651883004: libwebp-0.6.0-rc1

Side by Side Diff: third_party/libwebp/dsp/lossless_mips_dsp_r2.c

Issue 2651883004: libwebp-0.6.0-rc1 (Closed)

Patch Set: Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2014 Google Inc. All Rights Reserved.	1 // Copyright 2014 Google Inc. All Rights Reserved.

2 //	2 //

3 // Use of this source code is governed by a BSD-style license	3 // Use of this source code is governed by a BSD-style license

4 // that can be found in the COPYING file in the root of the source	4 // that can be found in the COPYING file in the root of the source

5 // tree. An additional intellectual property rights grant can be found	5 // tree. An additional intellectual property rights grant can be found

6 // in the file PATENTS. All contributing project authors may	6 // in the file PATENTS. All contributing project authors may

7 // be found in the AUTHORS file in the root of the source tree.	7 // be found in the AUTHORS file in the root of the source tree.

8 // -----------------------------------------------------------------------------	8 // -----------------------------------------------------------------------------

9 //	9 //

10 // Image transforms and color space conversion methods for lossless decoder.	10 // Image transforms and color space conversion methods for lossless decoder.

11 //	11 //

12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)	12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)

13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com)	13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com)

14	14

15 #include "./dsp.h"	15 #include "./dsp.h"

16	16

17 #if defined(WEBP_USE_MIPS_DSP_R2)	17 #if defined(WEBP_USE_MIPS_DSP_R2)

18	18

19 #include "./lossless.h"	19 #include "./lossless.h"

	20 #include "./lossless_common.h"

20	21

21 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \	22 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \

22 static void FUNC_NAME(const TYPE* src, \	23 static void FUNC_NAME(const TYPE* src, \

23 const uint32_t* const color_map, \	24 const uint32_t* const color_map, \

24 TYPE* dst, int y_start, int y_end, \	25 TYPE* dst, int y_start, int y_end, \

25 int width) { \	26 int width) { \

26 int y; \	27 int y; \

27 for (y = y_start; y < y_end; ++y) { \	28 for (y = y_start; y < y_end; ++y) { \

28 int x; \	29 int x; \

29 for (x = 0; x < (width >> 2); ++x) { \	30 for (x = 0; x < (width >> 2); ++x) { \

(...skipping 190 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
220 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {	221 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {

221 return ClampedAddSubtractFull(left, top[0], top[-1]);	222 return ClampedAddSubtractFull(left, top[0], top[-1]);

222 }	223 }

223	224

224 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {	225 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {

225 return ClampedAddSubtractHalf(left, top[0], top[-1]);	226 return ClampedAddSubtractHalf(left, top[0], top[-1]);

226 }	227 }

227	228

228 // Add green to blue and red channels (i.e. perform the inverse transform of	229 // Add green to blue and red channels (i.e. perform the inverse transform of

229 // 'subtract green').	230 // 'subtract green').

230 static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {	231 static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,

	232 uint32_t* dst) {

231 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;	233 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;

232 uint32_t* const p_loop1_end = data + (num_pixels & ~3);	234 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);

233 uint32_t* const p_loop2_end = data + num_pixels;	235 const uint32_t* const p_loop2_end = src + num_pixels;

234 __asm__ volatile (	236 __asm__ volatile (

235 ".set push \n\t"	237 ".set push \n\t"

236 ".set noreorder \n\t"	238 ".set noreorder \n\t"

237 "beq %[data], %[p_loop1_end], 3f \n\t"	239 "beq %[src], %[p_loop1_end], 3f \n\t"

238 " nop \n\t"	240 " nop \n\t"

239 "0: \n\t"	241 "0: \n\t"

240 "lw %[temp0], 0(%[data]) \n\t"	242 "lw %[temp0], 0(%[src]) \n\t"

241 "lw %[temp1], 4(%[data]) \n\t"	243 "lw %[temp1], 4(%[src]) \n\t"

242 "lw %[temp2], 8(%[data]) \n\t"	244 "lw %[temp2], 8(%[src]) \n\t"

243 "lw %[temp3], 12(%[data]) \n\t"	245 "lw %[temp3], 12(%[src]) \n\t"

244 "ext %[temp4], %[temp0], 8, 8 \n\t"	246 "ext %[temp4], %[temp0], 8, 8 \n\t"

245 "ext %[temp5], %[temp1], 8, 8 \n\t"	247 "ext %[temp5], %[temp1], 8, 8 \n\t"

246 "ext %[temp6], %[temp2], 8, 8 \n\t"	248 "ext %[temp6], %[temp2], 8, 8 \n\t"

247 "ext %[temp7], %[temp3], 8, 8 \n\t"	249 "ext %[temp7], %[temp3], 8, 8 \n\t"

248 "addiu %[data], %[data], 16 \n\t"	250 "addiu %[src], %[src], 16 \n\t"

	251 "addiu %[dst], %[dst], 16 \n\t"

249 "replv.ph %[temp4], %[temp4] \n\t"	252 "replv.ph %[temp4], %[temp4] \n\t"

250 "replv.ph %[temp5], %[temp5] \n\t"	253 "replv.ph %[temp5], %[temp5] \n\t"

251 "replv.ph %[temp6], %[temp6] \n\t"	254 "replv.ph %[temp6], %[temp6] \n\t"

252 "replv.ph %[temp7], %[temp7] \n\t"	255 "replv.ph %[temp7], %[temp7] \n\t"

253 "addu.qb %[temp0], %[temp0], %[temp4] \n\t"	256 "addu.qb %[temp0], %[temp0], %[temp4] \n\t"

254 "addu.qb %[temp1], %[temp1], %[temp5] \n\t"	257 "addu.qb %[temp1], %[temp1], %[temp5] \n\t"

255 "addu.qb %[temp2], %[temp2], %[temp6] \n\t"	258 "addu.qb %[temp2], %[temp2], %[temp6] \n\t"

256 "addu.qb %[temp3], %[temp3], %[temp7] \n\t"	259 "addu.qb %[temp3], %[temp3], %[temp7] \n\t"

257 "sw %[temp0], -16(%[data]) \n\t"	260 "sw %[temp0], -16(%[dst]) \n\t"

258 "sw %[temp1], -12(%[data]) \n\t"	261 "sw %[temp1], -12(%[dst]) \n\t"

259 "sw %[temp2], -8(%[data]) \n\t"	262 "sw %[temp2], -8(%[dst]) \n\t"

260 "bne %[data], %[p_loop1_end], 0b \n\t"	263 "bne %[src], %[p_loop1_end], 0b \n\t"

261 " sw %[temp3], -4(%[data]) \n\t"	264 " sw %[temp3], -4(%[dst]) \n\t"

262 "3: \n\t"	265 "3: \n\t"

263 "beq %[data], %[p_loop2_end], 2f \n\t"	266 "beq %[src], %[p_loop2_end], 2f \n\t"

264 " nop \n\t"	267 " nop \n\t"

265 "1: \n\t"	268 "1: \n\t"

266 "lw %[temp0], 0(%[data]) \n\t"	269 "lw %[temp0], 0(%[src]) \n\t"

267 "addiu %[data], %[data], 4 \n\t"	270 "addiu %[src], %[src], 4 \n\t"

	271 "addiu %[dst], %[dst], 4 \n\t"

268 "ext %[temp4], %[temp0], 8, 8 \n\t"	272 "ext %[temp4], %[temp0], 8, 8 \n\t"

269 "replv.ph %[temp4], %[temp4] \n\t"	273 "replv.ph %[temp4], %[temp4] \n\t"

270 "addu.qb %[temp0], %[temp0], %[temp4] \n\t"	274 "addu.qb %[temp0], %[temp0], %[temp4] \n\t"

271 "bne %[data], %[p_loop2_end], 1b \n\t"	275 "bne %[src], %[p_loop2_end], 1b \n\t"

272 " sw %[temp0], -4(%[data]) \n\t"	276 " sw %[temp0], -4(%[dst]) \n\t"

273 "2: \n\t"	277 "2: \n\t"

274 ".set pop \n\t"	278 ".set pop \n\t"

275 : [data]"+&r"(data), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),	279 : [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),

276 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),	280 [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),

277 [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)	281 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),

	282 [temp7]"=&r"(temp7)

278 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)	283 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)

279 : "memory"	284 : "memory"

280 );	285 );

281 }	286 }

282	287

283 static void TransformColorInverse(const VP8LMultipliers* const m,	288 static void TransformColorInverse(const VP8LMultipliers* const m,

284 uint32_t* data, int num_pixels) {	289 const uint32_t* src, int num_pixels,

	290 uint32_t* dst) {

285 int temp0, temp1, temp2, temp3, temp4, temp5;	291 int temp0, temp1, temp2, temp3, temp4, temp5;

286 uint32_t argb, argb1, new_red;	292 uint32_t argb, argb1, new_red;

287 const uint32_t G_to_R = m->green_to_red_;	293 const uint32_t G_to_R = m->green_to_red_;

288 const uint32_t G_to_B = m->green_to_blue_;	294 const uint32_t G_to_B = m->green_to_blue_;

289 const uint32_t R_to_B = m->red_to_blue_;	295 const uint32_t R_to_B = m->red_to_blue_;

290 uint32_t* const p_loop_end = data + (num_pixels & ~1);	296 const uint32_t* const p_loop_end = src + (num_pixels & ~1);

291 __asm__ volatile (	297 __asm__ volatile (

292 ".set push \n\t"	298 ".set push \n\t"

293 ".set noreorder \n\t"	299 ".set noreorder \n\t"

294 "beq %[data], %[p_loop_end], 1f \n\t"	300 "beq %[src], %[p_loop_end], 1f \n\t"

295 " nop \n\t"	301 " nop \n\t"

296 "replv.ph %[temp0], %[G_to_R] \n\t"	302 "replv.ph %[temp0], %[G_to_R] \n\t"

297 "replv.ph %[temp1], %[G_to_B] \n\t"	303 "replv.ph %[temp1], %[G_to_B] \n\t"

298 "replv.ph %[temp2], %[R_to_B] \n\t"	304 "replv.ph %[temp2], %[R_to_B] \n\t"

299 "shll.ph %[temp0], %[temp0], 8 \n\t"	305 "shll.ph %[temp0], %[temp0], 8 \n\t"

300 "shll.ph %[temp1], %[temp1], 8 \n\t"	306 "shll.ph %[temp1], %[temp1], 8 \n\t"

301 "shll.ph %[temp2], %[temp2], 8 \n\t"	307 "shll.ph %[temp2], %[temp2], 8 \n\t"

302 "shra.ph %[temp0], %[temp0], 8 \n\t"	308 "shra.ph %[temp0], %[temp0], 8 \n\t"

303 "shra.ph %[temp1], %[temp1], 8 \n\t"	309 "shra.ph %[temp1], %[temp1], 8 \n\t"

304 "shra.ph %[temp2], %[temp2], 8 \n\t"	310 "shra.ph %[temp2], %[temp2], 8 \n\t"

305 "0: \n\t"	311 "0: \n\t"

306 "lw %[argb], 0(%[data]) \n\t"	312 "lw %[argb], 0(%[src]) \n\t"

307 "lw %[argb1], 4(%[data]) \n\t"	313 "lw %[argb1], 4(%[src]) \n\t"

308 "addiu %[data], %[data], 8 \n\t"	314 "sw %[argb], 0(%[dst]) \n\t"

	315 "sw %[argb1], 4(%[dst]) \n\t"

	316 "addiu %[src], %[src], 8 \n\t"

	317 "addiu %[dst], %[dst], 8 \n\t"

309 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"	318 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"

310 "preceu.ph.qbra %[temp3], %[temp3] \n\t"	319 "preceu.ph.qbra %[temp3], %[temp3] \n\t"

311 "shll.ph %[temp3], %[temp3], 8 \n\t"	320 "shll.ph %[temp3], %[temp3], 8 \n\t"

312 "shra.ph %[temp3], %[temp3], 8 \n\t"	321 "shra.ph %[temp3], %[temp3], 8 \n\t"

313 "mul.ph %[temp5], %[temp3], %[temp0] \n\t"	322 "mul.ph %[temp5], %[temp3], %[temp0] \n\t"

314 "mul.ph %[temp3], %[temp3], %[temp1] \n\t"	323 "mul.ph %[temp3], %[temp3], %[temp1] \n\t"

315 "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t"	324 "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t"

316 "ins %[argb1], %[argb], 16, 16 \n\t"	325 "ins %[argb1], %[argb], 16, 16 \n\t"

317 "shra.ph %[temp5], %[temp5], 5 \n\t"	326 "shra.ph %[temp5], %[temp5], 5 \n\t"

318 "shra.ph %[temp3], %[temp3], 5 \n\t"	327 "shra.ph %[temp3], %[temp3], 5 \n\t"

319 "addu.ph %[new_red], %[new_red], %[temp5] \n\t"	328 "addu.ph %[new_red], %[new_red], %[temp5] \n\t"

320 "addu.ph %[argb1], %[argb1], %[temp3] \n\t"	329 "addu.ph %[argb1], %[argb1], %[temp3] \n\t"

321 "preceu.ph.qbra %[temp5], %[new_red] \n\t"	330 "preceu.ph.qbra %[temp5], %[new_red] \n\t"

322 "shll.ph %[temp4], %[temp5], 8 \n\t"	331 "shll.ph %[temp4], %[temp5], 8 \n\t"

323 "shra.ph %[temp4], %[temp4], 8 \n\t"	332 "shra.ph %[temp4], %[temp4], 8 \n\t"

324 "mul.ph %[temp4], %[temp4], %[temp2] \n\t"	333 "mul.ph %[temp4], %[temp4], %[temp2] \n\t"

325 "sb %[temp5], -2(%[data]) \n\t"	334 "sb %[temp5], -2(%[dst]) \n\t"

326 "sra %[temp5], %[temp5], 16 \n\t"	335 "sra %[temp5], %[temp5], 16 \n\t"

327 "shra.ph %[temp4], %[temp4], 5 \n\t"	336 "shra.ph %[temp4], %[temp4], 5 \n\t"

328 "addu.ph %[argb1], %[argb1], %[temp4] \n\t"	337 "addu.ph %[argb1], %[argb1], %[temp4] \n\t"

329 "preceu.ph.qbra %[temp3], %[argb1] \n\t"	338 "preceu.ph.qbra %[temp3], %[argb1] \n\t"

330 "sb %[temp5], -6(%[data]) \n\t"	339 "sb %[temp5], -6(%[dst]) \n\t"

331 "sb %[temp3], -4(%[data]) \n\t"	340 "sb %[temp3], -4(%[dst]) \n\t"

332 "sra %[temp3], %[temp3], 16 \n\t"	341 "sra %[temp3], %[temp3], 16 \n\t"

333 "bne %[data], %[p_loop_end], 0b \n\t"	342 "bne %[src], %[p_loop_end], 0b \n\t"

334 " sb %[temp3], -8(%[data]) \n\t"	343 " sb %[temp3], -8(%[dst]) \n\t"

335 "1: \n\t"	344 "1: \n\t"

336 ".set pop \n\t"	345 ".set pop \n\t"

337 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),	346 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),

338 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),	347 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),

339 [new_red]"=&r"(new_red), [argb]"=&r"(argb),	348 [new_red]"=&r"(new_red), [argb]"=&r"(argb),

340 [argb1]"=&r"(argb1), [data]"+&r"(data)	349 [argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)

341 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),	350 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),

342 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)	351 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)

343 : "memory", "hi", "lo"	352 : "memory", "hi", "lo"

344 );	353 );

345	354

346 // Fall-back to C-version for left-overs.	355 // Fall-back to C-version for left-overs.

347 if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1);	356 if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);

348 }	357 }

349	358

350 static void ConvertBGRAToRGB(const uint32_t* src,	359 static void ConvertBGRAToRGB(const uint32_t* src,

351 int num_pixels, uint8_t* dst) {	360 int num_pixels, uint8_t* dst) {

352 int temp0, temp1, temp2, temp3;	361 int temp0, temp1, temp2, temp3;

353 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);	362 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);

354 const uint32_t* const p_loop2_end = src + num_pixels;	363 const uint32_t* const p_loop2_end = src + num_pixels;

355 __asm__ volatile (	364 __asm__ volatile (

356 ".set push \n\t"	365 ".set push \n\t"

357 ".set noreorder \n\t"	366 ".set noreorder \n\t"

(...skipping 313 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
671 VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;	680 VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;

672 VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;	681 VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;

673 VP8LConvertBGRAToBGR = ConvertBGRAToBGR;	682 VP8LConvertBGRAToBGR = ConvertBGRAToBGR;

674 }	683 }

675	684

676 #else // !WEBP_USE_MIPS_DSP_R2	685 #else // !WEBP_USE_MIPS_DSP_R2

677	686

678 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)	687 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)

679	688

680 #endif // WEBP_USE_MIPS_DSP_R2	689 #endif // WEBP_USE_MIPS_DSP_R2

OLD	NEW

« no previous file with comments | « third_party/libwebp/dsp/lossless_enc_sse41.c ('k') | third_party/libwebp/dsp/lossless_msa.c » ('j') | no next file with comments »