OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 Google Inc. All Rights Reserved. |
| 2 // |
| 3 // Use of this source code is governed by a BSD-style license |
| 4 // that can be found in the COPYING file in the root of the source |
| 5 // tree. An additional intellectual property rights grant can be found |
| 6 // in the file PATENTS. All contributing project authors may |
| 7 // be found in the AUTHORS file in the root of the source tree. |
| 8 // ----------------------------------------------------------------------------- |
| 9 // |
| 10 // Image transforms and color space conversion methods for lossless decoder. |
| 11 // |
| 12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) |
| 13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com) |
| 14 |
| 15 #include "./dsp.h" |
| 16 |
| 17 #if defined(WEBP_USE_MIPS_DSP_R2) |
| 18 |
| 19 #include "./lossless.h" |
| 20 |
| 21 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \ |
| 22 static void FUNC_NAME(const TYPE* src, \ |
| 23 const uint32_t* const color_map, \ |
| 24 TYPE* dst, int y_start, int y_end, \ |
| 25 int width) { \ |
| 26 int y; \ |
| 27 for (y = y_start; y < y_end; ++y) { \ |
| 28 int x; \ |
| 29 for (x = 0; x < (width >> 2); ++x) { \ |
| 30 int tmp1, tmp2, tmp3, tmp4; \ |
| 31 __asm__ volatile ( \ |
| 32 ".ifc " #TYPE ", uint8_t \n\t" \ |
| 33 "lbu %[tmp1], 0(%[src]) \n\t" \ |
| 34 "lbu %[tmp2], 1(%[src]) \n\t" \ |
| 35 "lbu %[tmp3], 2(%[src]) \n\t" \ |
| 36 "lbu %[tmp4], 3(%[src]) \n\t" \ |
| 37 "addiu %[src], %[src], 4 \n\t" \ |
| 38 ".endif \n\t" \ |
| 39 ".ifc " #TYPE ", uint32_t \n\t" \ |
| 40 "lw %[tmp1], 0(%[src]) \n\t" \ |
| 41 "lw %[tmp2], 4(%[src]) \n\t" \ |
| 42 "lw %[tmp3], 8(%[src]) \n\t" \ |
| 43 "lw %[tmp4], 12(%[src]) \n\t" \ |
| 44 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \ |
| 45 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \ |
| 46 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \ |
| 47 "ext %[tmp4], %[tmp4], 8, 8 \n\t" \ |
| 48 "addiu %[src], %[src], 16 \n\t" \ |
| 49 ".endif \n\t" \ |
| 50 "sll %[tmp1], %[tmp1], 2 \n\t" \ |
| 51 "sll %[tmp2], %[tmp2], 2 \n\t" \ |
| 52 "sll %[tmp3], %[tmp3], 2 \n\t" \ |
| 53 "sll %[tmp4], %[tmp4], 2 \n\t" \ |
| 54 "lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \ |
| 55 "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \ |
| 56 "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \ |
| 57 "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \ |
| 58 ".ifc " #TYPE ", uint8_t \n\t" \ |
| 59 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \ |
| 60 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \ |
| 61 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \ |
| 62 "ext %[tmp4], %[tmp4], 8, 8 \n\t" \ |
| 63 "sb %[tmp1], 0(%[dst]) \n\t" \ |
| 64 "sb %[tmp2], 1(%[dst]) \n\t" \ |
| 65 "sb %[tmp3], 2(%[dst]) \n\t" \ |
| 66 "sb %[tmp4], 3(%[dst]) \n\t" \ |
| 67 "addiu %[dst], %[dst], 4 \n\t" \ |
| 68 ".endif \n\t" \ |
| 69 ".ifc " #TYPE ", uint32_t \n\t" \ |
| 70 "sw %[tmp1], 0(%[dst]) \n\t" \ |
| 71 "sw %[tmp2], 4(%[dst]) \n\t" \ |
| 72 "sw %[tmp3], 8(%[dst]) \n\t" \ |
| 73 "sw %[tmp4], 12(%[dst]) \n\t" \ |
| 74 "addiu %[dst], %[dst], 16 \n\t" \ |
| 75 ".endif \n\t" \ |
| 76 : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \ |
| 77 [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \ |
| 78 : [color_map]"r"(color_map) \ |
| 79 : "memory" \ |
| 80 ); \ |
| 81 } \ |
| 82 for (x = 0; x < (width & 3); ++x) { \ |
| 83 *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \ |
| 84 } \ |
| 85 } \ |
| 86 } |
| 87 |
| 88 MAP_COLOR_FUNCS(MapARGB, uint32_t, VP8GetARGBIndex, VP8GetARGBValue) |
| 89 MAP_COLOR_FUNCS(MapAlpha, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue) |
| 90 |
| 91 #undef MAP_COLOR_FUNCS |
| 92 |
| 93 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, |
| 94 uint32_t c2) { |
| 95 int temp0, temp1, temp2, temp3, temp4, temp5; |
| 96 __asm__ volatile ( |
| 97 "preceu.ph.qbr %[temp1], %[c0] \n\t" |
| 98 "preceu.ph.qbl %[temp2], %[c0] \n\t" |
| 99 "preceu.ph.qbr %[temp3], %[c1] \n\t" |
| 100 "preceu.ph.qbl %[temp4], %[c1] \n\t" |
| 101 "preceu.ph.qbr %[temp5], %[c2] \n\t" |
| 102 "preceu.ph.qbl %[temp0], %[c2] \n\t" |
| 103 "subq.ph %[temp3], %[temp3], %[temp5] \n\t" |
| 104 "subq.ph %[temp4], %[temp4], %[temp0] \n\t" |
| 105 "addq.ph %[temp1], %[temp1], %[temp3] \n\t" |
| 106 "addq.ph %[temp2], %[temp2], %[temp4] \n\t" |
| 107 "shll_s.ph %[temp1], %[temp1], 7 \n\t" |
| 108 "shll_s.ph %[temp2], %[temp2], 7 \n\t" |
| 109 "precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t" |
| 110 : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), |
| 111 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5) |
| 112 : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2) |
| 113 : "memory" |
| 114 ); |
| 115 return temp2; |
| 116 } |
| 117 |
| 118 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, |
| 119 uint32_t c2) { |
| 120 int temp0, temp1, temp2, temp3, temp4, temp5; |
| 121 __asm__ volatile ( |
| 122 "adduh.qb %[temp5], %[c0], %[c1] \n\t" |
| 123 "preceu.ph.qbr %[temp3], %[c2] \n\t" |
| 124 "preceu.ph.qbr %[temp1], %[temp5] \n\t" |
| 125 "preceu.ph.qbl %[temp2], %[temp5] \n\t" |
| 126 "preceu.ph.qbl %[temp4], %[c2] \n\t" |
| 127 "subq.ph %[temp3], %[temp1], %[temp3] \n\t" |
| 128 "subq.ph %[temp4], %[temp2], %[temp4] \n\t" |
| 129 "shrl.ph %[temp5], %[temp3], 15 \n\t" |
| 130 "shrl.ph %[temp0], %[temp4], 15 \n\t" |
| 131 "addq.ph %[temp3], %[temp3], %[temp5] \n\t" |
| 132 "addq.ph %[temp4], %[temp0], %[temp4] \n\t" |
| 133 "shra.ph %[temp3], %[temp3], 1 \n\t" |
| 134 "shra.ph %[temp4], %[temp4], 1 \n\t" |
| 135 "addq.ph %[temp1], %[temp1], %[temp3] \n\t" |
| 136 "addq.ph %[temp2], %[temp2], %[temp4] \n\t" |
| 137 "shll_s.ph %[temp1], %[temp1], 7 \n\t" |
| 138 "shll_s.ph %[temp2], %[temp2], 7 \n\t" |
| 139 "precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t" |
| 140 : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), |
| 141 [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5) |
| 142 : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2) |
| 143 : "memory" |
| 144 ); |
| 145 return temp1; |
| 146 } |
| 147 |
| 148 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { |
| 149 int temp0, temp1, temp2, temp3, temp4, temp5; |
| 150 __asm__ volatile ( |
| 151 "cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t" |
| 152 "pick.qb %[temp1], %[b], %[c] \n\t" |
| 153 "pick.qb %[temp2], %[c], %[b] \n\t" |
| 154 "cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t" |
| 155 "pick.qb %[temp4], %[a], %[c] \n\t" |
| 156 "pick.qb %[temp5], %[c], %[a] \n\t" |
| 157 "subu.qb %[temp3], %[temp1], %[temp2] \n\t" |
| 158 "subu.qb %[temp0], %[temp4], %[temp5] \n\t" |
| 159 "raddu.w.qb %[temp3], %[temp3] \n\t" |
| 160 "raddu.w.qb %[temp0], %[temp0] \n\t" |
| 161 "subu %[temp3], %[temp3], %[temp0] \n\t" |
| 162 "slti %[temp0], %[temp3], 0x1 \n\t" |
| 163 "movz %[a], %[b], %[temp0] \n\t" |
| 164 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), |
| 165 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0), |
| 166 [a]"+&r"(a) |
| 167 : [b]"r"(b), [c]"r"(c) |
| 168 ); |
| 169 return a; |
| 170 } |
| 171 |
| 172 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { |
| 173 __asm__ volatile ( |
| 174 "adduh.qb %[a0], %[a0], %[a1] \n\t" |
| 175 : [a0]"+r"(a0) |
| 176 : [a1]"r"(a1) |
| 177 ); |
| 178 return a0; |
| 179 } |
| 180 |
| 181 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { |
| 182 return Average2(Average2(a0, a2), a1); |
| 183 } |
| 184 |
| 185 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, |
| 186 uint32_t a2, uint32_t a3) { |
| 187 return Average2(Average2(a0, a1), Average2(a2, a3)); |
| 188 } |
| 189 |
| 190 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) { |
| 191 return Average3(left, top[0], top[1]); |
| 192 } |
| 193 |
| 194 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) { |
| 195 return Average2(left, top[-1]); |
| 196 } |
| 197 |
| 198 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) { |
| 199 return Average2(left, top[0]); |
| 200 } |
| 201 |
| 202 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) { |
| 203 (void)left; |
| 204 return Average2(top[-1], top[0]); |
| 205 } |
| 206 |
| 207 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) { |
| 208 (void)left; |
| 209 return Average2(top[0], top[1]); |
| 210 } |
| 211 |
| 212 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) { |
| 213 return Average4(left, top[-1], top[0], top[1]); |
| 214 } |
| 215 |
| 216 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) { |
| 217 return Select(top[0], left, top[-1]); |
| 218 } |
| 219 |
| 220 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { |
| 221 return ClampedAddSubtractFull(left, top[0], top[-1]); |
| 222 } |
| 223 |
| 224 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { |
| 225 return ClampedAddSubtractHalf(left, top[0], top[-1]); |
| 226 } |
| 227 |
| 228 // Add green to blue and red channels (i.e. perform the inverse transform of |
| 229 // 'subtract green'). |
| 230 static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) { |
| 231 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; |
| 232 uint32_t* const p_loop1_end = data + (num_pixels & ~3); |
| 233 uint32_t* const p_loop2_end = data + num_pixels; |
| 234 __asm__ volatile ( |
| 235 ".set push \n\t" |
| 236 ".set noreorder \n\t" |
| 237 "beq %[data], %[p_loop1_end], 3f \n\t" |
| 238 " nop \n\t" |
| 239 "0: \n\t" |
| 240 "lw %[temp0], 0(%[data]) \n\t" |
| 241 "lw %[temp1], 4(%[data]) \n\t" |
| 242 "lw %[temp2], 8(%[data]) \n\t" |
| 243 "lw %[temp3], 12(%[data]) \n\t" |
| 244 "ext %[temp4], %[temp0], 8, 8 \n\t" |
| 245 "ext %[temp5], %[temp1], 8, 8 \n\t" |
| 246 "ext %[temp6], %[temp2], 8, 8 \n\t" |
| 247 "ext %[temp7], %[temp3], 8, 8 \n\t" |
| 248 "addiu %[data], %[data], 16 \n\t" |
| 249 "replv.ph %[temp4], %[temp4] \n\t" |
| 250 "replv.ph %[temp5], %[temp5] \n\t" |
| 251 "replv.ph %[temp6], %[temp6] \n\t" |
| 252 "replv.ph %[temp7], %[temp7] \n\t" |
| 253 "addu.qb %[temp0], %[temp0], %[temp4] \n\t" |
| 254 "addu.qb %[temp1], %[temp1], %[temp5] \n\t" |
| 255 "addu.qb %[temp2], %[temp2], %[temp6] \n\t" |
| 256 "addu.qb %[temp3], %[temp3], %[temp7] \n\t" |
| 257 "sw %[temp0], -16(%[data]) \n\t" |
| 258 "sw %[temp1], -12(%[data]) \n\t" |
| 259 "sw %[temp2], -8(%[data]) \n\t" |
| 260 "bne %[data], %[p_loop1_end], 0b \n\t" |
| 261 " sw %[temp3], -4(%[data]) \n\t" |
| 262 "3: \n\t" |
| 263 "beq %[data], %[p_loop2_end], 2f \n\t" |
| 264 " nop \n\t" |
| 265 "1: \n\t" |
| 266 "lw %[temp0], 0(%[data]) \n\t" |
| 267 "addiu %[data], %[data], 4 \n\t" |
| 268 "ext %[temp4], %[temp0], 8, 8 \n\t" |
| 269 "replv.ph %[temp4], %[temp4] \n\t" |
| 270 "addu.qb %[temp0], %[temp0], %[temp4] \n\t" |
| 271 "bne %[data], %[p_loop2_end], 1b \n\t" |
| 272 " sw %[temp0], -4(%[data]) \n\t" |
| 273 "2: \n\t" |
| 274 ".set pop \n\t" |
| 275 : [data]"+&r"(data), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), |
| 276 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), |
| 277 [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7) |
| 278 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) |
| 279 : "memory" |
| 280 ); |
| 281 } |
| 282 |
| 283 static void TransformColorInverse(const VP8LMultipliers* const m, |
| 284 uint32_t* data, int num_pixels) { |
| 285 int temp0, temp1, temp2, temp3, temp4, temp5; |
| 286 uint32_t argb, argb1, new_red; |
| 287 const uint32_t G_to_R = m->green_to_red_; |
| 288 const uint32_t G_to_B = m->green_to_blue_; |
| 289 const uint32_t R_to_B = m->red_to_blue_; |
| 290 uint32_t* const p_loop_end = data + (num_pixels & ~1); |
| 291 __asm__ volatile ( |
| 292 ".set push \n\t" |
| 293 ".set noreorder \n\t" |
| 294 "beq %[data], %[p_loop_end], 1f \n\t" |
| 295 " nop \n\t" |
| 296 "replv.ph %[temp0], %[G_to_R] \n\t" |
| 297 "replv.ph %[temp1], %[G_to_B] \n\t" |
| 298 "replv.ph %[temp2], %[R_to_B] \n\t" |
| 299 "shll.ph %[temp0], %[temp0], 8 \n\t" |
| 300 "shll.ph %[temp1], %[temp1], 8 \n\t" |
| 301 "shll.ph %[temp2], %[temp2], 8 \n\t" |
| 302 "shra.ph %[temp0], %[temp0], 8 \n\t" |
| 303 "shra.ph %[temp1], %[temp1], 8 \n\t" |
| 304 "shra.ph %[temp2], %[temp2], 8 \n\t" |
| 305 "0: \n\t" |
| 306 "lw %[argb], 0(%[data]) \n\t" |
| 307 "lw %[argb1], 4(%[data]) \n\t" |
| 308 "addiu %[data], %[data], 8 \n\t" |
| 309 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t" |
| 310 "preceu.ph.qbra %[temp3], %[temp3] \n\t" |
| 311 "shll.ph %[temp3], %[temp3], 8 \n\t" |
| 312 "shra.ph %[temp3], %[temp3], 8 \n\t" |
| 313 "mul.ph %[temp5], %[temp3], %[temp0] \n\t" |
| 314 "mul.ph %[temp3], %[temp3], %[temp1] \n\t" |
| 315 "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t" |
| 316 "ins %[argb1], %[argb], 16, 16 \n\t" |
| 317 "shra.ph %[temp5], %[temp5], 5 \n\t" |
| 318 "shra.ph %[temp3], %[temp3], 5 \n\t" |
| 319 "addu.ph %[new_red], %[new_red], %[temp5] \n\t" |
| 320 "addu.ph %[argb1], %[argb1], %[temp3] \n\t" |
| 321 "preceu.ph.qbra %[temp5], %[new_red] \n\t" |
| 322 "shll.ph %[temp4], %[temp5], 8 \n\t" |
| 323 "shra.ph %[temp4], %[temp4], 8 \n\t" |
| 324 "mul.ph %[temp4], %[temp4], %[temp2] \n\t" |
| 325 "sb %[temp5], -2(%[data]) \n\t" |
| 326 "sra %[temp5], %[temp5], 16 \n\t" |
| 327 "shra.ph %[temp4], %[temp4], 5 \n\t" |
| 328 "addu.ph %[argb1], %[argb1], %[temp4] \n\t" |
| 329 "preceu.ph.qbra %[temp3], %[argb1] \n\t" |
| 330 "sb %[temp5], -6(%[data]) \n\t" |
| 331 "sb %[temp3], -4(%[data]) \n\t" |
| 332 "sra %[temp3], %[temp3], 16 \n\t" |
| 333 "bne %[data], %[p_loop_end], 0b \n\t" |
| 334 " sb %[temp3], -8(%[data]) \n\t" |
| 335 "1: \n\t" |
| 336 ".set pop \n\t" |
| 337 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), |
| 338 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), |
| 339 [new_red]"=&r"(new_red), [argb]"=&r"(argb), |
| 340 [argb1]"=&r"(argb1), [data]"+&r"(data) |
| 341 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B), |
| 342 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end) |
| 343 : "memory", "hi", "lo" |
| 344 ); |
| 345 |
| 346 // Fall-back to C-version for left-overs. |
| 347 if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1); |
| 348 } |
| 349 |
| 350 static void ConvertBGRAToRGB(const uint32_t* src, |
| 351 int num_pixels, uint8_t* dst) { |
| 352 int temp0, temp1, temp2, temp3; |
| 353 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 354 const uint32_t* const p_loop2_end = src + num_pixels; |
| 355 __asm__ volatile ( |
| 356 ".set push \n\t" |
| 357 ".set noreorder \n\t" |
| 358 "beq %[src], %[p_loop1_end], 3f \n\t" |
| 359 " nop \n\t" |
| 360 "0: \n\t" |
| 361 "lw %[temp3], 12(%[src]) \n\t" |
| 362 "lw %[temp2], 8(%[src]) \n\t" |
| 363 "lw %[temp1], 4(%[src]) \n\t" |
| 364 "lw %[temp0], 0(%[src]) \n\t" |
| 365 "ins %[temp3], %[temp2], 24, 8 \n\t" |
| 366 "sll %[temp2], %[temp2], 8 \n\t" |
| 367 "rotr %[temp3], %[temp3], 16 \n\t" |
| 368 "ins %[temp2], %[temp1], 0, 16 \n\t" |
| 369 "sll %[temp1], %[temp1], 8 \n\t" |
| 370 "wsbh %[temp3], %[temp3] \n\t" |
| 371 "balign %[temp0], %[temp1], 1 \n\t" |
| 372 "wsbh %[temp2], %[temp2] \n\t" |
| 373 "wsbh %[temp0], %[temp0] \n\t" |
| 374 "usw %[temp3], 8(%[dst]) \n\t" |
| 375 "rotr %[temp0], %[temp0], 16 \n\t" |
| 376 "usw %[temp2], 4(%[dst]) \n\t" |
| 377 "addiu %[src], %[src], 16 \n\t" |
| 378 "usw %[temp0], 0(%[dst]) \n\t" |
| 379 "bne %[src], %[p_loop1_end], 0b \n\t" |
| 380 " addiu %[dst], %[dst], 12 \n\t" |
| 381 "3: \n\t" |
| 382 "beq %[src], %[p_loop2_end], 2f \n\t" |
| 383 " nop \n\t" |
| 384 "1: \n\t" |
| 385 "lw %[temp0], 0(%[src]) \n\t" |
| 386 "addiu %[src], %[src], 4 \n\t" |
| 387 "wsbh %[temp1], %[temp0] \n\t" |
| 388 "addiu %[dst], %[dst], 3 \n\t" |
| 389 "ush %[temp1], -2(%[dst]) \n\t" |
| 390 "sra %[temp0], %[temp0], 16 \n\t" |
| 391 "bne %[src], %[p_loop2_end], 1b \n\t" |
| 392 " sb %[temp0], -3(%[dst]) \n\t" |
| 393 "2: \n\t" |
| 394 ".set pop \n\t" |
| 395 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), |
| 396 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src) |
| 397 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) |
| 398 : "memory" |
| 399 ); |
| 400 } |
| 401 |
| 402 static void ConvertBGRAToRGBA(const uint32_t* src, |
| 403 int num_pixels, uint8_t* dst) { |
| 404 int temp0, temp1, temp2, temp3; |
| 405 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 406 const uint32_t* const p_loop2_end = src + num_pixels; |
| 407 __asm__ volatile ( |
| 408 ".set push \n\t" |
| 409 ".set noreorder \n\t" |
| 410 "beq %[src], %[p_loop1_end], 3f \n\t" |
| 411 " nop \n\t" |
| 412 "0: \n\t" |
| 413 "lw %[temp0], 0(%[src]) \n\t" |
| 414 "lw %[temp1], 4(%[src]) \n\t" |
| 415 "lw %[temp2], 8(%[src]) \n\t" |
| 416 "lw %[temp3], 12(%[src]) \n\t" |
| 417 "wsbh %[temp0], %[temp0] \n\t" |
| 418 "wsbh %[temp1], %[temp1] \n\t" |
| 419 "wsbh %[temp2], %[temp2] \n\t" |
| 420 "wsbh %[temp3], %[temp3] \n\t" |
| 421 "addiu %[src], %[src], 16 \n\t" |
| 422 "balign %[temp0], %[temp0], 1 \n\t" |
| 423 "balign %[temp1], %[temp1], 1 \n\t" |
| 424 "balign %[temp2], %[temp2], 1 \n\t" |
| 425 "balign %[temp3], %[temp3], 1 \n\t" |
| 426 "usw %[temp0], 0(%[dst]) \n\t" |
| 427 "usw %[temp1], 4(%[dst]) \n\t" |
| 428 "usw %[temp2], 8(%[dst]) \n\t" |
| 429 "usw %[temp3], 12(%[dst]) \n\t" |
| 430 "bne %[src], %[p_loop1_end], 0b \n\t" |
| 431 " addiu %[dst], %[dst], 16 \n\t" |
| 432 "3: \n\t" |
| 433 "beq %[src], %[p_loop2_end], 2f \n\t" |
| 434 " nop \n\t" |
| 435 "1: \n\t" |
| 436 "lw %[temp0], 0(%[src]) \n\t" |
| 437 "wsbh %[temp0], %[temp0] \n\t" |
| 438 "addiu %[src], %[src], 4 \n\t" |
| 439 "balign %[temp0], %[temp0], 1 \n\t" |
| 440 "usw %[temp0], 0(%[dst]) \n\t" |
| 441 "bne %[src], %[p_loop2_end], 1b \n\t" |
| 442 " addiu %[dst], %[dst], 4 \n\t" |
| 443 "2: \n\t" |
| 444 ".set pop \n\t" |
| 445 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), |
| 446 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src) |
| 447 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) |
| 448 : "memory" |
| 449 ); |
| 450 } |
| 451 |
| 452 static void ConvertBGRAToRGBA4444(const uint32_t* src, |
| 453 int num_pixels, uint8_t* dst) { |
| 454 int temp0, temp1, temp2, temp3, temp4, temp5; |
| 455 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 456 const uint32_t* const p_loop2_end = src + num_pixels; |
| 457 __asm__ volatile ( |
| 458 ".set push \n\t" |
| 459 ".set noreorder \n\t" |
| 460 "beq %[src], %[p_loop1_end], 3f \n\t" |
| 461 " nop \n\t" |
| 462 "0: \n\t" |
| 463 "lw %[temp0], 0(%[src]) \n\t" |
| 464 "lw %[temp1], 4(%[src]) \n\t" |
| 465 "lw %[temp2], 8(%[src]) \n\t" |
| 466 "lw %[temp3], 12(%[src]) \n\t" |
| 467 "ext %[temp4], %[temp0], 28, 4 \n\t" |
| 468 "ext %[temp5], %[temp0], 12, 4 \n\t" |
| 469 "ins %[temp0], %[temp4], 0, 4 \n\t" |
| 470 "ext %[temp4], %[temp1], 28, 4 \n\t" |
| 471 "ins %[temp0], %[temp5], 16, 4 \n\t" |
| 472 "ext %[temp5], %[temp1], 12, 4 \n\t" |
| 473 "ins %[temp1], %[temp4], 0, 4 \n\t" |
| 474 "ext %[temp4], %[temp2], 28, 4 \n\t" |
| 475 "ins %[temp1], %[temp5], 16, 4 \n\t" |
| 476 "ext %[temp5], %[temp2], 12, 4 \n\t" |
| 477 "ins %[temp2], %[temp4], 0, 4 \n\t" |
| 478 "ext %[temp4], %[temp3], 28, 4 \n\t" |
| 479 "ins %[temp2], %[temp5], 16, 4 \n\t" |
| 480 "ext %[temp5], %[temp3], 12, 4 \n\t" |
| 481 "ins %[temp3], %[temp4], 0, 4 \n\t" |
| 482 "precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t" |
| 483 "ins %[temp3], %[temp5], 16, 4 \n\t" |
| 484 "addiu %[src], %[src], 16 \n\t" |
| 485 "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t" |
| 486 #ifdef WEBP_SWAP_16BIT_CSP |
| 487 "usw %[temp1], 0(%[dst]) \n\t" |
| 488 "usw %[temp3], 4(%[dst]) \n\t" |
| 489 #else |
| 490 "wsbh %[temp1], %[temp1] \n\t" |
| 491 "wsbh %[temp3], %[temp3] \n\t" |
| 492 "usw %[temp1], 0(%[dst]) \n\t" |
| 493 "usw %[temp3], 4(%[dst]) \n\t" |
| 494 #endif |
| 495 "bne %[src], %[p_loop1_end], 0b \n\t" |
| 496 " addiu %[dst], %[dst], 8 \n\t" |
| 497 "3: \n\t" |
| 498 "beq %[src], %[p_loop2_end], 2f \n\t" |
| 499 " nop \n\t" |
| 500 "1: \n\t" |
| 501 "lw %[temp0], 0(%[src]) \n\t" |
| 502 "ext %[temp4], %[temp0], 28, 4 \n\t" |
| 503 "ext %[temp5], %[temp0], 12, 4 \n\t" |
| 504 "ins %[temp0], %[temp4], 0, 4 \n\t" |
| 505 "ins %[temp0], %[temp5], 16, 4 \n\t" |
| 506 "addiu %[src], %[src], 4 \n\t" |
| 507 "precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t" |
| 508 #ifdef WEBP_SWAP_16BIT_CSP |
| 509 "ush %[temp0], 0(%[dst]) \n\t" |
| 510 #else |
| 511 "wsbh %[temp0], %[temp0] \n\t" |
| 512 "ush %[temp0], 0(%[dst]) \n\t" |
| 513 #endif |
| 514 "bne %[src], %[p_loop2_end], 1b \n\t" |
| 515 " addiu %[dst], %[dst], 2 \n\t" |
| 516 "2: \n\t" |
| 517 ".set pop \n\t" |
| 518 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), |
| 519 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), |
| 520 [dst]"+&r"(dst), [src]"+&r"(src) |
| 521 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) |
| 522 : "memory" |
| 523 ); |
| 524 } |
| 525 |
| 526 static void ConvertBGRAToRGB565(const uint32_t* src, |
| 527 int num_pixels, uint8_t* dst) { |
| 528 int temp0, temp1, temp2, temp3, temp4, temp5; |
| 529 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 530 const uint32_t* const p_loop2_end = src + num_pixels; |
| 531 __asm__ volatile ( |
| 532 ".set push \n\t" |
| 533 ".set noreorder \n\t" |
| 534 "beq %[src], %[p_loop1_end], 3f \n\t" |
| 535 " nop \n\t" |
| 536 "0: \n\t" |
| 537 "lw %[temp0], 0(%[src]) \n\t" |
| 538 "lw %[temp1], 4(%[src]) \n\t" |
| 539 "lw %[temp2], 8(%[src]) \n\t" |
| 540 "lw %[temp3], 12(%[src]) \n\t" |
| 541 "ext %[temp4], %[temp0], 8, 16 \n\t" |
| 542 "ext %[temp5], %[temp0], 5, 11 \n\t" |
| 543 "ext %[temp0], %[temp0], 3, 5 \n\t" |
| 544 "ins %[temp4], %[temp5], 0, 11 \n\t" |
| 545 "ext %[temp5], %[temp1], 5, 11 \n\t" |
| 546 "ins %[temp4], %[temp0], 0, 5 \n\t" |
| 547 "ext %[temp0], %[temp1], 8, 16 \n\t" |
| 548 "ext %[temp1], %[temp1], 3, 5 \n\t" |
| 549 "ins %[temp0], %[temp5], 0, 11 \n\t" |
| 550 "ext %[temp5], %[temp2], 5, 11 \n\t" |
| 551 "ins %[temp0], %[temp1], 0, 5 \n\t" |
| 552 "ext %[temp1], %[temp2], 8, 16 \n\t" |
| 553 "ext %[temp2], %[temp2], 3, 5 \n\t" |
| 554 "ins %[temp1], %[temp5], 0, 11 \n\t" |
| 555 "ext %[temp5], %[temp3], 5, 11 \n\t" |
| 556 "ins %[temp1], %[temp2], 0, 5 \n\t" |
| 557 "ext %[temp2], %[temp3], 8, 16 \n\t" |
| 558 "ext %[temp3], %[temp3], 3, 5 \n\t" |
| 559 "ins %[temp2], %[temp5], 0, 11 \n\t" |
| 560 "append %[temp0], %[temp4], 16 \n\t" |
| 561 "ins %[temp2], %[temp3], 0, 5 \n\t" |
| 562 "addiu %[src], %[src], 16 \n\t" |
| 563 "append %[temp2], %[temp1], 16 \n\t" |
| 564 #ifdef WEBP_SWAP_16BIT_CSP |
| 565 "usw %[temp0], 0(%[dst]) \n\t" |
| 566 "usw %[temp2], 4(%[dst]) \n\t" |
| 567 #else |
| 568 "wsbh %[temp0], %[temp0] \n\t" |
| 569 "wsbh %[temp2], %[temp2] \n\t" |
| 570 "usw %[temp0], 0(%[dst]) \n\t" |
| 571 "usw %[temp2], 4(%[dst]) \n\t" |
| 572 #endif |
| 573 "bne %[src], %[p_loop1_end], 0b \n\t" |
| 574 " addiu %[dst], %[dst], 8 \n\t" |
| 575 "3: \n\t" |
| 576 "beq %[src], %[p_loop2_end], 2f \n\t" |
| 577 " nop \n\t" |
| 578 "1: \n\t" |
| 579 "lw %[temp0], 0(%[src]) \n\t" |
| 580 "ext %[temp4], %[temp0], 8, 16 \n\t" |
| 581 "ext %[temp5], %[temp0], 5, 11 \n\t" |
| 582 "ext %[temp0], %[temp0], 3, 5 \n\t" |
| 583 "ins %[temp4], %[temp5], 0, 11 \n\t" |
| 584 "addiu %[src], %[src], 4 \n\t" |
| 585 "ins %[temp4], %[temp0], 0, 5 \n\t" |
| 586 #ifdef WEBP_SWAP_16BIT_CSP |
| 587 "ush %[temp4], 0(%[dst]) \n\t" |
| 588 #else |
| 589 "wsbh %[temp4], %[temp4] \n\t" |
| 590 "ush %[temp4], 0(%[dst]) \n\t" |
| 591 #endif |
| 592 "bne %[src], %[p_loop2_end], 1b \n\t" |
| 593 " addiu %[dst], %[dst], 2 \n\t" |
| 594 "2: \n\t" |
| 595 ".set pop \n\t" |
| 596 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), |
| 597 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), |
| 598 [dst]"+&r"(dst), [src]"+&r"(src) |
| 599 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) |
| 600 : "memory" |
| 601 ); |
| 602 } |
| 603 |
| 604 static void ConvertBGRAToBGR(const uint32_t* src, |
| 605 int num_pixels, uint8_t* dst) { |
| 606 int temp0, temp1, temp2, temp3; |
| 607 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 608 const uint32_t* const p_loop2_end = src + num_pixels; |
| 609 __asm__ volatile ( |
| 610 ".set push \n\t" |
| 611 ".set noreorder \n\t" |
| 612 "beq %[src], %[p_loop1_end], 3f \n\t" |
| 613 " nop \n\t" |
| 614 "0: \n\t" |
| 615 "lw %[temp0], 0(%[src]) \n\t" |
| 616 "lw %[temp1], 4(%[src]) \n\t" |
| 617 "lw %[temp2], 8(%[src]) \n\t" |
| 618 "lw %[temp3], 12(%[src]) \n\t" |
| 619 "ins %[temp0], %[temp1], 24, 8 \n\t" |
| 620 "sra %[temp1], %[temp1], 8 \n\t" |
| 621 "ins %[temp1], %[temp2], 16, 16 \n\t" |
| 622 "sll %[temp2], %[temp2], 8 \n\t" |
| 623 "balign %[temp3], %[temp2], 1 \n\t" |
| 624 "addiu %[src], %[src], 16 \n\t" |
| 625 "usw %[temp0], 0(%[dst]) \n\t" |
| 626 "usw %[temp1], 4(%[dst]) \n\t" |
| 627 "usw %[temp3], 8(%[dst]) \n\t" |
| 628 "bne %[src], %[p_loop1_end], 0b \n\t" |
| 629 " addiu %[dst], %[dst], 12 \n\t" |
| 630 "3: \n\t" |
| 631 "beq %[src], %[p_loop2_end], 2f \n\t" |
| 632 " nop \n\t" |
| 633 "1: \n\t" |
| 634 "lw %[temp0], 0(%[src]) \n\t" |
| 635 "addiu %[src], %[src], 4 \n\t" |
| 636 "addiu %[dst], %[dst], 3 \n\t" |
| 637 "ush %[temp0], -3(%[dst]) \n\t" |
| 638 "sra %[temp0], %[temp0], 16 \n\t" |
| 639 "bne %[src], %[p_loop2_end], 1b \n\t" |
| 640 " sb %[temp0], -1(%[dst]) \n\t" |
| 641 "2: \n\t" |
| 642 ".set pop \n\t" |
| 643 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), |
| 644 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src) |
| 645 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) |
| 646 : "memory" |
| 647 ); |
| 648 } |
| 649 |
| 650 //------------------------------------------------------------------------------ |
| 651 // Entry point |
| 652 |
| 653 extern void VP8LDspInitMIPSdspR2(void); |
| 654 |
| 655 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) { |
| 656 VP8LMapColor32b = MapARGB; |
| 657 VP8LMapColor8b = MapAlpha; |
| 658 VP8LPredictors[5] = Predictor5; |
| 659 VP8LPredictors[6] = Predictor6; |
| 660 VP8LPredictors[7] = Predictor7; |
| 661 VP8LPredictors[8] = Predictor8; |
| 662 VP8LPredictors[9] = Predictor9; |
| 663 VP8LPredictors[10] = Predictor10; |
| 664 VP8LPredictors[11] = Predictor11; |
| 665 VP8LPredictors[12] = Predictor12; |
| 666 VP8LPredictors[13] = Predictor13; |
| 667 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed; |
| 668 VP8LTransformColorInverse = TransformColorInverse; |
| 669 VP8LConvertBGRAToRGB = ConvertBGRAToRGB; |
| 670 VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA; |
| 671 VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444; |
| 672 VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565; |
| 673 VP8LConvertBGRAToBGR = ConvertBGRAToBGR; |
| 674 } |
| 675 |
| 676 #else // !WEBP_USE_MIPS_DSP_R2 |
| 677 |
| 678 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2) |
| 679 |
| 680 #endif // WEBP_USE_MIPS_DSP_R2 |
OLD | NEW |