Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(450)

Side by Side Diff: third_party/libwebp/dsp/lossless_mips_dsp_r2.c

Issue 2651883004: libwebp-0.6.0-rc1 (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/libwebp/dsp/lossless_enc_sse41.c ('k') | third_party/libwebp/dsp/lossless_msa.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 Google Inc. All Rights Reserved. 1 // Copyright 2014 Google Inc. All Rights Reserved.
2 // 2 //
3 // Use of this source code is governed by a BSD-style license 3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source 4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found 5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may 6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree. 7 // be found in the AUTHORS file in the root of the source tree.
8 // ----------------------------------------------------------------------------- 8 // -----------------------------------------------------------------------------
9 // 9 //
10 // Image transforms and color space conversion methods for lossless decoder. 10 // Image transforms and color space conversion methods for lossless decoder.
11 // 11 //
12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) 12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com) 13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com)
14 14
15 #include "./dsp.h" 15 #include "./dsp.h"
16 16
17 #if defined(WEBP_USE_MIPS_DSP_R2) 17 #if defined(WEBP_USE_MIPS_DSP_R2)
18 18
19 #include "./lossless.h" 19 #include "./lossless.h"
20 #include "./lossless_common.h"
20 21
21 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \ 22 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
22 static void FUNC_NAME(const TYPE* src, \ 23 static void FUNC_NAME(const TYPE* src, \
23 const uint32_t* const color_map, \ 24 const uint32_t* const color_map, \
24 TYPE* dst, int y_start, int y_end, \ 25 TYPE* dst, int y_start, int y_end, \
25 int width) { \ 26 int width) { \
26 int y; \ 27 int y; \
27 for (y = y_start; y < y_end; ++y) { \ 28 for (y = y_start; y < y_end; ++y) { \
28 int x; \ 29 int x; \
29 for (x = 0; x < (width >> 2); ++x) { \ 30 for (x = 0; x < (width >> 2); ++x) { \
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after
220 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { 221 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
221 return ClampedAddSubtractFull(left, top[0], top[-1]); 222 return ClampedAddSubtractFull(left, top[0], top[-1]);
222 } 223 }
223 224
224 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { 225 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
225 return ClampedAddSubtractHalf(left, top[0], top[-1]); 226 return ClampedAddSubtractHalf(left, top[0], top[-1]);
226 } 227 }
227 228
228 // Add green to blue and red channels (i.e. perform the inverse transform of 229 // Add green to blue and red channels (i.e. perform the inverse transform of
229 // 'subtract green'). 230 // 'subtract green').
230 static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) { 231 static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
232 uint32_t* dst) {
231 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 233 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
232 uint32_t* const p_loop1_end = data + (num_pixels & ~3); 234 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
233 uint32_t* const p_loop2_end = data + num_pixels; 235 const uint32_t* const p_loop2_end = src + num_pixels;
234 __asm__ volatile ( 236 __asm__ volatile (
235 ".set push \n\t" 237 ".set push \n\t"
236 ".set noreorder \n\t" 238 ".set noreorder \n\t"
237 "beq %[data], %[p_loop1_end], 3f \n\t" 239 "beq %[src], %[p_loop1_end], 3f \n\t"
238 " nop \n\t" 240 " nop \n\t"
239 "0: \n\t" 241 "0: \n\t"
240 "lw %[temp0], 0(%[data]) \n\t" 242 "lw %[temp0], 0(%[src]) \n\t"
241 "lw %[temp1], 4(%[data]) \n\t" 243 "lw %[temp1], 4(%[src]) \n\t"
242 "lw %[temp2], 8(%[data]) \n\t" 244 "lw %[temp2], 8(%[src]) \n\t"
243 "lw %[temp3], 12(%[data]) \n\t" 245 "lw %[temp3], 12(%[src]) \n\t"
244 "ext %[temp4], %[temp0], 8, 8 \n\t" 246 "ext %[temp4], %[temp0], 8, 8 \n\t"
245 "ext %[temp5], %[temp1], 8, 8 \n\t" 247 "ext %[temp5], %[temp1], 8, 8 \n\t"
246 "ext %[temp6], %[temp2], 8, 8 \n\t" 248 "ext %[temp6], %[temp2], 8, 8 \n\t"
247 "ext %[temp7], %[temp3], 8, 8 \n\t" 249 "ext %[temp7], %[temp3], 8, 8 \n\t"
248 "addiu %[data], %[data], 16 \n\t" 250 "addiu %[src], %[src], 16 \n\t"
251 "addiu %[dst], %[dst], 16 \n\t"
249 "replv.ph %[temp4], %[temp4] \n\t" 252 "replv.ph %[temp4], %[temp4] \n\t"
250 "replv.ph %[temp5], %[temp5] \n\t" 253 "replv.ph %[temp5], %[temp5] \n\t"
251 "replv.ph %[temp6], %[temp6] \n\t" 254 "replv.ph %[temp6], %[temp6] \n\t"
252 "replv.ph %[temp7], %[temp7] \n\t" 255 "replv.ph %[temp7], %[temp7] \n\t"
253 "addu.qb %[temp0], %[temp0], %[temp4] \n\t" 256 "addu.qb %[temp0], %[temp0], %[temp4] \n\t"
254 "addu.qb %[temp1], %[temp1], %[temp5] \n\t" 257 "addu.qb %[temp1], %[temp1], %[temp5] \n\t"
255 "addu.qb %[temp2], %[temp2], %[temp6] \n\t" 258 "addu.qb %[temp2], %[temp2], %[temp6] \n\t"
256 "addu.qb %[temp3], %[temp3], %[temp7] \n\t" 259 "addu.qb %[temp3], %[temp3], %[temp7] \n\t"
257 "sw %[temp0], -16(%[data]) \n\t" 260 "sw %[temp0], -16(%[dst]) \n\t"
258 "sw %[temp1], -12(%[data]) \n\t" 261 "sw %[temp1], -12(%[dst]) \n\t"
259 "sw %[temp2], -8(%[data]) \n\t" 262 "sw %[temp2], -8(%[dst]) \n\t"
260 "bne %[data], %[p_loop1_end], 0b \n\t" 263 "bne %[src], %[p_loop1_end], 0b \n\t"
261 " sw %[temp3], -4(%[data]) \n\t" 264 " sw %[temp3], -4(%[dst]) \n\t"
262 "3: \n\t" 265 "3: \n\t"
263 "beq %[data], %[p_loop2_end], 2f \n\t" 266 "beq %[src], %[p_loop2_end], 2f \n\t"
264 " nop \n\t" 267 " nop \n\t"
265 "1: \n\t" 268 "1: \n\t"
266 "lw %[temp0], 0(%[data]) \n\t" 269 "lw %[temp0], 0(%[src]) \n\t"
267 "addiu %[data], %[data], 4 \n\t" 270 "addiu %[src], %[src], 4 \n\t"
271 "addiu %[dst], %[dst], 4 \n\t"
268 "ext %[temp4], %[temp0], 8, 8 \n\t" 272 "ext %[temp4], %[temp0], 8, 8 \n\t"
269 "replv.ph %[temp4], %[temp4] \n\t" 273 "replv.ph %[temp4], %[temp4] \n\t"
270 "addu.qb %[temp0], %[temp0], %[temp4] \n\t" 274 "addu.qb %[temp0], %[temp0], %[temp4] \n\t"
271 "bne %[data], %[p_loop2_end], 1b \n\t" 275 "bne %[src], %[p_loop2_end], 1b \n\t"
272 " sw %[temp0], -4(%[data]) \n\t" 276 " sw %[temp0], -4(%[dst]) \n\t"
273 "2: \n\t" 277 "2: \n\t"
274 ".set pop \n\t" 278 ".set pop \n\t"
275 : [data]"+&r"(data), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 279 : [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),
276 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), 280 [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
277 [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7) 281 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
282 [temp7]"=&r"(temp7)
278 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 283 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
279 : "memory" 284 : "memory"
280 ); 285 );
281 } 286 }
282 287
283 static void TransformColorInverse(const VP8LMultipliers* const m, 288 static void TransformColorInverse(const VP8LMultipliers* const m,
284 uint32_t* data, int num_pixels) { 289 const uint32_t* src, int num_pixels,
290 uint32_t* dst) {
285 int temp0, temp1, temp2, temp3, temp4, temp5; 291 int temp0, temp1, temp2, temp3, temp4, temp5;
286 uint32_t argb, argb1, new_red; 292 uint32_t argb, argb1, new_red;
287 const uint32_t G_to_R = m->green_to_red_; 293 const uint32_t G_to_R = m->green_to_red_;
288 const uint32_t G_to_B = m->green_to_blue_; 294 const uint32_t G_to_B = m->green_to_blue_;
289 const uint32_t R_to_B = m->red_to_blue_; 295 const uint32_t R_to_B = m->red_to_blue_;
290 uint32_t* const p_loop_end = data + (num_pixels & ~1); 296 const uint32_t* const p_loop_end = src + (num_pixels & ~1);
291 __asm__ volatile ( 297 __asm__ volatile (
292 ".set push \n\t" 298 ".set push \n\t"
293 ".set noreorder \n\t" 299 ".set noreorder \n\t"
294 "beq %[data], %[p_loop_end], 1f \n\t" 300 "beq %[src], %[p_loop_end], 1f \n\t"
295 " nop \n\t" 301 " nop \n\t"
296 "replv.ph %[temp0], %[G_to_R] \n\t" 302 "replv.ph %[temp0], %[G_to_R] \n\t"
297 "replv.ph %[temp1], %[G_to_B] \n\t" 303 "replv.ph %[temp1], %[G_to_B] \n\t"
298 "replv.ph %[temp2], %[R_to_B] \n\t" 304 "replv.ph %[temp2], %[R_to_B] \n\t"
299 "shll.ph %[temp0], %[temp0], 8 \n\t" 305 "shll.ph %[temp0], %[temp0], 8 \n\t"
300 "shll.ph %[temp1], %[temp1], 8 \n\t" 306 "shll.ph %[temp1], %[temp1], 8 \n\t"
301 "shll.ph %[temp2], %[temp2], 8 \n\t" 307 "shll.ph %[temp2], %[temp2], 8 \n\t"
302 "shra.ph %[temp0], %[temp0], 8 \n\t" 308 "shra.ph %[temp0], %[temp0], 8 \n\t"
303 "shra.ph %[temp1], %[temp1], 8 \n\t" 309 "shra.ph %[temp1], %[temp1], 8 \n\t"
304 "shra.ph %[temp2], %[temp2], 8 \n\t" 310 "shra.ph %[temp2], %[temp2], 8 \n\t"
305 "0: \n\t" 311 "0: \n\t"
306 "lw %[argb], 0(%[data]) \n\t" 312 "lw %[argb], 0(%[src]) \n\t"
307 "lw %[argb1], 4(%[data]) \n\t" 313 "lw %[argb1], 4(%[src]) \n\t"
308 "addiu %[data], %[data], 8 \n\t" 314 "sw %[argb], 0(%[dst]) \n\t"
315 "sw %[argb1], 4(%[dst]) \n\t"
316 "addiu %[src], %[src], 8 \n\t"
317 "addiu %[dst], %[dst], 8 \n\t"
309 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t" 318 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"
310 "preceu.ph.qbra %[temp3], %[temp3] \n\t" 319 "preceu.ph.qbra %[temp3], %[temp3] \n\t"
311 "shll.ph %[temp3], %[temp3], 8 \n\t" 320 "shll.ph %[temp3], %[temp3], 8 \n\t"
312 "shra.ph %[temp3], %[temp3], 8 \n\t" 321 "shra.ph %[temp3], %[temp3], 8 \n\t"
313 "mul.ph %[temp5], %[temp3], %[temp0] \n\t" 322 "mul.ph %[temp5], %[temp3], %[temp0] \n\t"
314 "mul.ph %[temp3], %[temp3], %[temp1] \n\t" 323 "mul.ph %[temp3], %[temp3], %[temp1] \n\t"
315 "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t" 324 "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t"
316 "ins %[argb1], %[argb], 16, 16 \n\t" 325 "ins %[argb1], %[argb], 16, 16 \n\t"
317 "shra.ph %[temp5], %[temp5], 5 \n\t" 326 "shra.ph %[temp5], %[temp5], 5 \n\t"
318 "shra.ph %[temp3], %[temp3], 5 \n\t" 327 "shra.ph %[temp3], %[temp3], 5 \n\t"
319 "addu.ph %[new_red], %[new_red], %[temp5] \n\t" 328 "addu.ph %[new_red], %[new_red], %[temp5] \n\t"
320 "addu.ph %[argb1], %[argb1], %[temp3] \n\t" 329 "addu.ph %[argb1], %[argb1], %[temp3] \n\t"
321 "preceu.ph.qbra %[temp5], %[new_red] \n\t" 330 "preceu.ph.qbra %[temp5], %[new_red] \n\t"
322 "shll.ph %[temp4], %[temp5], 8 \n\t" 331 "shll.ph %[temp4], %[temp5], 8 \n\t"
323 "shra.ph %[temp4], %[temp4], 8 \n\t" 332 "shra.ph %[temp4], %[temp4], 8 \n\t"
324 "mul.ph %[temp4], %[temp4], %[temp2] \n\t" 333 "mul.ph %[temp4], %[temp4], %[temp2] \n\t"
325 "sb %[temp5], -2(%[data]) \n\t" 334 "sb %[temp5], -2(%[dst]) \n\t"
326 "sra %[temp5], %[temp5], 16 \n\t" 335 "sra %[temp5], %[temp5], 16 \n\t"
327 "shra.ph %[temp4], %[temp4], 5 \n\t" 336 "shra.ph %[temp4], %[temp4], 5 \n\t"
328 "addu.ph %[argb1], %[argb1], %[temp4] \n\t" 337 "addu.ph %[argb1], %[argb1], %[temp4] \n\t"
329 "preceu.ph.qbra %[temp3], %[argb1] \n\t" 338 "preceu.ph.qbra %[temp3], %[argb1] \n\t"
330 "sb %[temp5], -6(%[data]) \n\t" 339 "sb %[temp5], -6(%[dst]) \n\t"
331 "sb %[temp3], -4(%[data]) \n\t" 340 "sb %[temp3], -4(%[dst]) \n\t"
332 "sra %[temp3], %[temp3], 16 \n\t" 341 "sra %[temp3], %[temp3], 16 \n\t"
333 "bne %[data], %[p_loop_end], 0b \n\t" 342 "bne %[src], %[p_loop_end], 0b \n\t"
334 " sb %[temp3], -8(%[data]) \n\t" 343 " sb %[temp3], -8(%[dst]) \n\t"
335 "1: \n\t" 344 "1: \n\t"
336 ".set pop \n\t" 345 ".set pop \n\t"
337 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 346 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
338 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 347 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
339 [new_red]"=&r"(new_red), [argb]"=&r"(argb), 348 [new_red]"=&r"(new_red), [argb]"=&r"(argb),
340 [argb1]"=&r"(argb1), [data]"+&r"(data) 349 [argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)
341 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B), 350 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
342 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end) 351 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
343 : "memory", "hi", "lo" 352 : "memory", "hi", "lo"
344 ); 353 );
345 354
346 // Fall-back to C-version for left-overs. 355 // Fall-back to C-version for left-overs.
347 if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1); 356 if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);
348 } 357 }
349 358
350 static void ConvertBGRAToRGB(const uint32_t* src, 359 static void ConvertBGRAToRGB(const uint32_t* src,
351 int num_pixels, uint8_t* dst) { 360 int num_pixels, uint8_t* dst) {
352 int temp0, temp1, temp2, temp3; 361 int temp0, temp1, temp2, temp3;
353 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); 362 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
354 const uint32_t* const p_loop2_end = src + num_pixels; 363 const uint32_t* const p_loop2_end = src + num_pixels;
355 __asm__ volatile ( 364 __asm__ volatile (
356 ".set push \n\t" 365 ".set push \n\t"
357 ".set noreorder \n\t" 366 ".set noreorder \n\t"
(...skipping 313 matching lines...) Expand 10 before | Expand all | Expand 10 after
671 VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444; 680 VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
672 VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565; 681 VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
673 VP8LConvertBGRAToBGR = ConvertBGRAToBGR; 682 VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
674 } 683 }
675 684
676 #else // !WEBP_USE_MIPS_DSP_R2 685 #else // !WEBP_USE_MIPS_DSP_R2
677 686
678 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2) 687 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
679 688
680 #endif // WEBP_USE_MIPS_DSP_R2 689 #endif // WEBP_USE_MIPS_DSP_R2
OLDNEW
« no previous file with comments | « third_party/libwebp/dsp/lossless_enc_sse41.c ('k') | third_party/libwebp/dsp/lossless_msa.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698