| OLD | NEW |
| 1 // Copyright 2014 Google Inc. All Rights Reserved. | 1 // Copyright 2014 Google Inc. All Rights Reserved. |
| 2 // | 2 // |
| 3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
| 4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
| 5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
| 6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
| 7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
| 8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
| 9 // | 9 // |
| 10 // Image transforms and color space conversion methods for lossless decoder. | 10 // Image transforms and color space conversion methods for lossless decoder. |
| 11 // | 11 // |
| 12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) | 12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) |
| 13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com) | 13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com) |
| 14 | 14 |
| 15 #include "./dsp.h" | 15 #include "./dsp.h" |
| 16 | 16 |
| 17 #if defined(WEBP_USE_MIPS_DSP_R2) | 17 #if defined(WEBP_USE_MIPS_DSP_R2) |
| 18 | 18 |
| 19 #include "./lossless.h" | 19 #include "./lossless.h" |
| 20 #include "./lossless_common.h" |
| 20 | 21 |
| 21 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \ | 22 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \ |
| 22 static void FUNC_NAME(const TYPE* src, \ | 23 static void FUNC_NAME(const TYPE* src, \ |
| 23 const uint32_t* const color_map, \ | 24 const uint32_t* const color_map, \ |
| 24 TYPE* dst, int y_start, int y_end, \ | 25 TYPE* dst, int y_start, int y_end, \ |
| 25 int width) { \ | 26 int width) { \ |
| 26 int y; \ | 27 int y; \ |
| 27 for (y = y_start; y < y_end; ++y) { \ | 28 for (y = y_start; y < y_end; ++y) { \ |
| 28 int x; \ | 29 int x; \ |
| 29 for (x = 0; x < (width >> 2); ++x) { \ | 30 for (x = 0; x < (width >> 2); ++x) { \ |
| (...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 220 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { | 221 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { |
| 221 return ClampedAddSubtractFull(left, top[0], top[-1]); | 222 return ClampedAddSubtractFull(left, top[0], top[-1]); |
| 222 } | 223 } |
| 223 | 224 |
| 224 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { | 225 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { |
| 225 return ClampedAddSubtractHalf(left, top[0], top[-1]); | 226 return ClampedAddSubtractHalf(left, top[0], top[-1]); |
| 226 } | 227 } |
| 227 | 228 |
| 228 // Add green to blue and red channels (i.e. perform the inverse transform of | 229 // Add green to blue and red channels (i.e. perform the inverse transform of |
| 229 // 'subtract green'). | 230 // 'subtract green'). |
| 230 static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) { | 231 static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels, |
| 232 uint32_t* dst) { |
| 231 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; | 233 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; |
| 232 uint32_t* const p_loop1_end = data + (num_pixels & ~3); | 234 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 233 uint32_t* const p_loop2_end = data + num_pixels; | 235 const uint32_t* const p_loop2_end = src + num_pixels; |
| 234 __asm__ volatile ( | 236 __asm__ volatile ( |
| 235 ".set push \n\t" | 237 ".set push \n\t" |
| 236 ".set noreorder \n\t" | 238 ".set noreorder \n\t" |
| 237 "beq %[data], %[p_loop1_end], 3f \n\t" | 239 "beq %[src], %[p_loop1_end], 3f \n\t" |
| 238 " nop \n\t" | 240 " nop \n\t" |
| 239 "0: \n\t" | 241 "0: \n\t" |
| 240 "lw %[temp0], 0(%[data]) \n\t" | 242 "lw %[temp0], 0(%[src]) \n\t" |
| 241 "lw %[temp1], 4(%[data]) \n\t" | 243 "lw %[temp1], 4(%[src]) \n\t" |
| 242 "lw %[temp2], 8(%[data]) \n\t" | 244 "lw %[temp2], 8(%[src]) \n\t" |
| 243 "lw %[temp3], 12(%[data]) \n\t" | 245 "lw %[temp3], 12(%[src]) \n\t" |
| 244 "ext %[temp4], %[temp0], 8, 8 \n\t" | 246 "ext %[temp4], %[temp0], 8, 8 \n\t" |
| 245 "ext %[temp5], %[temp1], 8, 8 \n\t" | 247 "ext %[temp5], %[temp1], 8, 8 \n\t" |
| 246 "ext %[temp6], %[temp2], 8, 8 \n\t" | 248 "ext %[temp6], %[temp2], 8, 8 \n\t" |
| 247 "ext %[temp7], %[temp3], 8, 8 \n\t" | 249 "ext %[temp7], %[temp3], 8, 8 \n\t" |
| 248 "addiu %[data], %[data], 16 \n\t" | 250 "addiu %[src], %[src], 16 \n\t" |
| 251 "addiu %[dst], %[dst], 16 \n\t" |
| 249 "replv.ph %[temp4], %[temp4] \n\t" | 252 "replv.ph %[temp4], %[temp4] \n\t" |
| 250 "replv.ph %[temp5], %[temp5] \n\t" | 253 "replv.ph %[temp5], %[temp5] \n\t" |
| 251 "replv.ph %[temp6], %[temp6] \n\t" | 254 "replv.ph %[temp6], %[temp6] \n\t" |
| 252 "replv.ph %[temp7], %[temp7] \n\t" | 255 "replv.ph %[temp7], %[temp7] \n\t" |
| 253 "addu.qb %[temp0], %[temp0], %[temp4] \n\t" | 256 "addu.qb %[temp0], %[temp0], %[temp4] \n\t" |
| 254 "addu.qb %[temp1], %[temp1], %[temp5] \n\t" | 257 "addu.qb %[temp1], %[temp1], %[temp5] \n\t" |
| 255 "addu.qb %[temp2], %[temp2], %[temp6] \n\t" | 258 "addu.qb %[temp2], %[temp2], %[temp6] \n\t" |
| 256 "addu.qb %[temp3], %[temp3], %[temp7] \n\t" | 259 "addu.qb %[temp3], %[temp3], %[temp7] \n\t" |
| 257 "sw %[temp0], -16(%[data]) \n\t" | 260 "sw %[temp0], -16(%[dst]) \n\t" |
| 258 "sw %[temp1], -12(%[data]) \n\t" | 261 "sw %[temp1], -12(%[dst]) \n\t" |
| 259 "sw %[temp2], -8(%[data]) \n\t" | 262 "sw %[temp2], -8(%[dst]) \n\t" |
| 260 "bne %[data], %[p_loop1_end], 0b \n\t" | 263 "bne %[src], %[p_loop1_end], 0b \n\t" |
| 261 " sw %[temp3], -4(%[data]) \n\t" | 264 " sw %[temp3], -4(%[dst]) \n\t" |
| 262 "3: \n\t" | 265 "3: \n\t" |
| 263 "beq %[data], %[p_loop2_end], 2f \n\t" | 266 "beq %[src], %[p_loop2_end], 2f \n\t" |
| 264 " nop \n\t" | 267 " nop \n\t" |
| 265 "1: \n\t" | 268 "1: \n\t" |
| 266 "lw %[temp0], 0(%[data]) \n\t" | 269 "lw %[temp0], 0(%[src]) \n\t" |
| 267 "addiu %[data], %[data], 4 \n\t" | 270 "addiu %[src], %[src], 4 \n\t" |
| 271 "addiu %[dst], %[dst], 4 \n\t" |
| 268 "ext %[temp4], %[temp0], 8, 8 \n\t" | 272 "ext %[temp4], %[temp0], 8, 8 \n\t" |
| 269 "replv.ph %[temp4], %[temp4] \n\t" | 273 "replv.ph %[temp4], %[temp4] \n\t" |
| 270 "addu.qb %[temp0], %[temp0], %[temp4] \n\t" | 274 "addu.qb %[temp0], %[temp0], %[temp4] \n\t" |
| 271 "bne %[data], %[p_loop2_end], 1b \n\t" | 275 "bne %[src], %[p_loop2_end], 1b \n\t" |
| 272 " sw %[temp0], -4(%[data]) \n\t" | 276 " sw %[temp0], -4(%[dst]) \n\t" |
| 273 "2: \n\t" | 277 "2: \n\t" |
| 274 ".set pop \n\t" | 278 ".set pop \n\t" |
| 275 : [data]"+&r"(data), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), | 279 : [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0), |
| 276 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), | 280 [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), |
| 277 [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7) | 281 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), |
| 282 [temp7]"=&r"(temp7) |
| 278 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) | 283 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) |
| 279 : "memory" | 284 : "memory" |
| 280 ); | 285 ); |
| 281 } | 286 } |
| 282 | 287 |
| 283 static void TransformColorInverse(const VP8LMultipliers* const m, | 288 static void TransformColorInverse(const VP8LMultipliers* const m, |
| 284 uint32_t* data, int num_pixels) { | 289 const uint32_t* src, int num_pixels, |
| 290 uint32_t* dst) { |
| 285 int temp0, temp1, temp2, temp3, temp4, temp5; | 291 int temp0, temp1, temp2, temp3, temp4, temp5; |
| 286 uint32_t argb, argb1, new_red; | 292 uint32_t argb, argb1, new_red; |
| 287 const uint32_t G_to_R = m->green_to_red_; | 293 const uint32_t G_to_R = m->green_to_red_; |
| 288 const uint32_t G_to_B = m->green_to_blue_; | 294 const uint32_t G_to_B = m->green_to_blue_; |
| 289 const uint32_t R_to_B = m->red_to_blue_; | 295 const uint32_t R_to_B = m->red_to_blue_; |
| 290 uint32_t* const p_loop_end = data + (num_pixels & ~1); | 296 const uint32_t* const p_loop_end = src + (num_pixels & ~1); |
| 291 __asm__ volatile ( | 297 __asm__ volatile ( |
| 292 ".set push \n\t" | 298 ".set push \n\t" |
| 293 ".set noreorder \n\t" | 299 ".set noreorder \n\t" |
| 294 "beq %[data], %[p_loop_end], 1f \n\t" | 300 "beq %[src], %[p_loop_end], 1f \n\t" |
| 295 " nop \n\t" | 301 " nop \n\t" |
| 296 "replv.ph %[temp0], %[G_to_R] \n\t" | 302 "replv.ph %[temp0], %[G_to_R] \n\t" |
| 297 "replv.ph %[temp1], %[G_to_B] \n\t" | 303 "replv.ph %[temp1], %[G_to_B] \n\t" |
| 298 "replv.ph %[temp2], %[R_to_B] \n\t" | 304 "replv.ph %[temp2], %[R_to_B] \n\t" |
| 299 "shll.ph %[temp0], %[temp0], 8 \n\t" | 305 "shll.ph %[temp0], %[temp0], 8 \n\t" |
| 300 "shll.ph %[temp1], %[temp1], 8 \n\t" | 306 "shll.ph %[temp1], %[temp1], 8 \n\t" |
| 301 "shll.ph %[temp2], %[temp2], 8 \n\t" | 307 "shll.ph %[temp2], %[temp2], 8 \n\t" |
| 302 "shra.ph %[temp0], %[temp0], 8 \n\t" | 308 "shra.ph %[temp0], %[temp0], 8 \n\t" |
| 303 "shra.ph %[temp1], %[temp1], 8 \n\t" | 309 "shra.ph %[temp1], %[temp1], 8 \n\t" |
| 304 "shra.ph %[temp2], %[temp2], 8 \n\t" | 310 "shra.ph %[temp2], %[temp2], 8 \n\t" |
| 305 "0: \n\t" | 311 "0: \n\t" |
| 306 "lw %[argb], 0(%[data]) \n\t" | 312 "lw %[argb], 0(%[src]) \n\t" |
| 307 "lw %[argb1], 4(%[data]) \n\t" | 313 "lw %[argb1], 4(%[src]) \n\t" |
| 308 "addiu %[data], %[data], 8 \n\t" | 314 "sw %[argb], 0(%[dst]) \n\t" |
| 315 "sw %[argb1], 4(%[dst]) \n\t" |
| 316 "addiu %[src], %[src], 8 \n\t" |
| 317 "addiu %[dst], %[dst], 8 \n\t" |
| 309 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t" | 318 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t" |
| 310 "preceu.ph.qbra %[temp3], %[temp3] \n\t" | 319 "preceu.ph.qbra %[temp3], %[temp3] \n\t" |
| 311 "shll.ph %[temp3], %[temp3], 8 \n\t" | 320 "shll.ph %[temp3], %[temp3], 8 \n\t" |
| 312 "shra.ph %[temp3], %[temp3], 8 \n\t" | 321 "shra.ph %[temp3], %[temp3], 8 \n\t" |
| 313 "mul.ph %[temp5], %[temp3], %[temp0] \n\t" | 322 "mul.ph %[temp5], %[temp3], %[temp0] \n\t" |
| 314 "mul.ph %[temp3], %[temp3], %[temp1] \n\t" | 323 "mul.ph %[temp3], %[temp3], %[temp1] \n\t" |
| 315 "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t" | 324 "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t" |
| 316 "ins %[argb1], %[argb], 16, 16 \n\t" | 325 "ins %[argb1], %[argb], 16, 16 \n\t" |
| 317 "shra.ph %[temp5], %[temp5], 5 \n\t" | 326 "shra.ph %[temp5], %[temp5], 5 \n\t" |
| 318 "shra.ph %[temp3], %[temp3], 5 \n\t" | 327 "shra.ph %[temp3], %[temp3], 5 \n\t" |
| 319 "addu.ph %[new_red], %[new_red], %[temp5] \n\t" | 328 "addu.ph %[new_red], %[new_red], %[temp5] \n\t" |
| 320 "addu.ph %[argb1], %[argb1], %[temp3] \n\t" | 329 "addu.ph %[argb1], %[argb1], %[temp3] \n\t" |
| 321 "preceu.ph.qbra %[temp5], %[new_red] \n\t" | 330 "preceu.ph.qbra %[temp5], %[new_red] \n\t" |
| 322 "shll.ph %[temp4], %[temp5], 8 \n\t" | 331 "shll.ph %[temp4], %[temp5], 8 \n\t" |
| 323 "shra.ph %[temp4], %[temp4], 8 \n\t" | 332 "shra.ph %[temp4], %[temp4], 8 \n\t" |
| 324 "mul.ph %[temp4], %[temp4], %[temp2] \n\t" | 333 "mul.ph %[temp4], %[temp4], %[temp2] \n\t" |
| 325 "sb %[temp5], -2(%[data]) \n\t" | 334 "sb %[temp5], -2(%[dst]) \n\t" |
| 326 "sra %[temp5], %[temp5], 16 \n\t" | 335 "sra %[temp5], %[temp5], 16 \n\t" |
| 327 "shra.ph %[temp4], %[temp4], 5 \n\t" | 336 "shra.ph %[temp4], %[temp4], 5 \n\t" |
| 328 "addu.ph %[argb1], %[argb1], %[temp4] \n\t" | 337 "addu.ph %[argb1], %[argb1], %[temp4] \n\t" |
| 329 "preceu.ph.qbra %[temp3], %[argb1] \n\t" | 338 "preceu.ph.qbra %[temp3], %[argb1] \n\t" |
| 330 "sb %[temp5], -6(%[data]) \n\t" | 339 "sb %[temp5], -6(%[dst]) \n\t" |
| 331 "sb %[temp3], -4(%[data]) \n\t" | 340 "sb %[temp3], -4(%[dst]) \n\t" |
| 332 "sra %[temp3], %[temp3], 16 \n\t" | 341 "sra %[temp3], %[temp3], 16 \n\t" |
| 333 "bne %[data], %[p_loop_end], 0b \n\t" | 342 "bne %[src], %[p_loop_end], 0b \n\t" |
| 334 " sb %[temp3], -8(%[data]) \n\t" | 343 " sb %[temp3], -8(%[dst]) \n\t" |
| 335 "1: \n\t" | 344 "1: \n\t" |
| 336 ".set pop \n\t" | 345 ".set pop \n\t" |
| 337 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), | 346 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), |
| 338 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), | 347 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), |
| 339 [new_red]"=&r"(new_red), [argb]"=&r"(argb), | 348 [new_red]"=&r"(new_red), [argb]"=&r"(argb), |
| 340 [argb1]"=&r"(argb1), [data]"+&r"(data) | 349 [argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src) |
| 341 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B), | 350 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B), |
| 342 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end) | 351 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end) |
| 343 : "memory", "hi", "lo" | 352 : "memory", "hi", "lo" |
| 344 ); | 353 ); |
| 345 | 354 |
| 346 // Fall-back to C-version for left-overs. | 355 // Fall-back to C-version for left-overs. |
| 347 if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1); | 356 if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst); |
| 348 } | 357 } |
| 349 | 358 |
| 350 static void ConvertBGRAToRGB(const uint32_t* src, | 359 static void ConvertBGRAToRGB(const uint32_t* src, |
| 351 int num_pixels, uint8_t* dst) { | 360 int num_pixels, uint8_t* dst) { |
| 352 int temp0, temp1, temp2, temp3; | 361 int temp0, temp1, temp2, temp3; |
| 353 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); | 362 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 354 const uint32_t* const p_loop2_end = src + num_pixels; | 363 const uint32_t* const p_loop2_end = src + num_pixels; |
| 355 __asm__ volatile ( | 364 __asm__ volatile ( |
| 356 ".set push \n\t" | 365 ".set push \n\t" |
| 357 ".set noreorder \n\t" | 366 ".set noreorder \n\t" |
| (...skipping 313 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 671 VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444; | 680 VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444; |
| 672 VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565; | 681 VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565; |
| 673 VP8LConvertBGRAToBGR = ConvertBGRAToBGR; | 682 VP8LConvertBGRAToBGR = ConvertBGRAToBGR; |
| 674 } | 683 } |
| 675 | 684 |
| 676 #else // !WEBP_USE_MIPS_DSP_R2 | 685 #else // !WEBP_USE_MIPS_DSP_R2 |
| 677 | 686 |
| 678 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2) | 687 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2) |
| 679 | 688 |
| 680 #endif // WEBP_USE_MIPS_DSP_R2 | 689 #endif // WEBP_USE_MIPS_DSP_R2 |
| OLD | NEW |