OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 Google Inc. All Rights Reserved. |
| 2 // |
| 3 // Use of this source code is governed by a BSD-style license |
| 4 // that can be found in the COPYING file in the root of the source |
| 5 // tree. An additional intellectual property rights grant can be found |
| 6 // in the file PATENTS. All contributing project authors may |
| 7 // be found in the AUTHORS file in the root of the source tree. |
| 8 // ----------------------------------------------------------------------------- |
| 9 // |
| 10 // Image transform methods for lossless encoder. |
| 11 // |
| 12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) |
| 13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com) |
| 14 |
| 15 #include "./dsp.h" |
| 16 |
| 17 #if defined(WEBP_USE_MIPS_DSP_R2) |
| 18 |
| 19 #include "./lossless.h" |
| 20 |
| 21 static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, |
| 22 int num_pixels) { |
| 23 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; |
| 24 uint32_t* const p_loop1_end = argb_data + (num_pixels & ~3); |
| 25 uint32_t* const p_loop2_end = p_loop1_end + (num_pixels & 3); |
| 26 __asm__ volatile ( |
| 27 ".set push \n\t" |
| 28 ".set noreorder \n\t" |
| 29 "beq %[argb_data], %[p_loop1_end], 3f \n\t" |
| 30 " nop \n\t" |
| 31 "0: \n\t" |
| 32 "lw %[temp0], 0(%[argb_data]) \n\t" |
| 33 "lw %[temp1], 4(%[argb_data]) \n\t" |
| 34 "lw %[temp2], 8(%[argb_data]) \n\t" |
| 35 "lw %[temp3], 12(%[argb_data]) \n\t" |
| 36 "ext %[temp4], %[temp0], 8, 8 \n\t" |
| 37 "ext %[temp5], %[temp1], 8, 8 \n\t" |
| 38 "ext %[temp6], %[temp2], 8, 8 \n\t" |
| 39 "ext %[temp7], %[temp3], 8, 8 \n\t" |
| 40 "addiu %[argb_data], %[argb_data], 16 \n\t" |
| 41 "replv.ph %[temp4], %[temp4] \n\t" |
| 42 "replv.ph %[temp5], %[temp5] \n\t" |
| 43 "replv.ph %[temp6], %[temp6] \n\t" |
| 44 "replv.ph %[temp7], %[temp7] \n\t" |
| 45 "subu.qb %[temp0], %[temp0], %[temp4] \n\t" |
| 46 "subu.qb %[temp1], %[temp1], %[temp5] \n\t" |
| 47 "subu.qb %[temp2], %[temp2], %[temp6] \n\t" |
| 48 "subu.qb %[temp3], %[temp3], %[temp7] \n\t" |
| 49 "sw %[temp0], -16(%[argb_data]) \n\t" |
| 50 "sw %[temp1], -12(%[argb_data]) \n\t" |
| 51 "sw %[temp2], -8(%[argb_data]) \n\t" |
| 52 "bne %[argb_data], %[p_loop1_end], 0b \n\t" |
| 53 " sw %[temp3], -4(%[argb_data]) \n\t" |
| 54 "3: \n\t" |
| 55 "beq %[argb_data], %[p_loop2_end], 2f \n\t" |
| 56 " nop \n\t" |
| 57 "1: \n\t" |
| 58 "lw %[temp0], 0(%[argb_data]) \n\t" |
| 59 "addiu %[argb_data], %[argb_data], 4 \n\t" |
| 60 "ext %[temp4], %[temp0], 8, 8 \n\t" |
| 61 "replv.ph %[temp4], %[temp4] \n\t" |
| 62 "subu.qb %[temp0], %[temp0], %[temp4] \n\t" |
| 63 "bne %[argb_data], %[p_loop2_end], 1b \n\t" |
| 64 " sw %[temp0], -4(%[argb_data]) \n\t" |
| 65 "2: \n\t" |
| 66 ".set pop \n\t" |
| 67 : [argb_data]"+&r"(argb_data), [temp0]"=&r"(temp0), |
| 68 [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), |
| 69 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), |
| 70 [temp7]"=&r"(temp7) |
| 71 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) |
| 72 : "memory" |
| 73 ); |
| 74 } |
| 75 |
| 76 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred, |
| 77 int8_t color) { |
| 78 return (uint32_t)((int)(color_pred) * color) >> 5; |
| 79 } |
| 80 |
| 81 static void TransformColor(const VP8LMultipliers* const m, uint32_t* data, |
| 82 int num_pixels) { |
| 83 int temp0, temp1, temp2, temp3, temp4, temp5; |
| 84 uint32_t argb, argb1, new_red, new_red1; |
| 85 const uint32_t G_to_R = m->green_to_red_; |
| 86 const uint32_t G_to_B = m->green_to_blue_; |
| 87 const uint32_t R_to_B = m->red_to_blue_; |
| 88 uint32_t* const p_loop_end = data + (num_pixels & ~1); |
| 89 __asm__ volatile ( |
| 90 ".set push \n\t" |
| 91 ".set noreorder \n\t" |
| 92 "beq %[data], %[p_loop_end], 1f \n\t" |
| 93 " nop \n\t" |
| 94 "replv.ph %[temp0], %[G_to_R] \n\t" |
| 95 "replv.ph %[temp1], %[G_to_B] \n\t" |
| 96 "replv.ph %[temp2], %[R_to_B] \n\t" |
| 97 "shll.ph %[temp0], %[temp0], 8 \n\t" |
| 98 "shll.ph %[temp1], %[temp1], 8 \n\t" |
| 99 "shll.ph %[temp2], %[temp2], 8 \n\t" |
| 100 "shra.ph %[temp0], %[temp0], 8 \n\t" |
| 101 "shra.ph %[temp1], %[temp1], 8 \n\t" |
| 102 "shra.ph %[temp2], %[temp2], 8 \n\t" |
| 103 "0: \n\t" |
| 104 "lw %[argb], 0(%[data]) \n\t" |
| 105 "lw %[argb1], 4(%[data]) \n\t" |
| 106 "lhu %[new_red], 2(%[data]) \n\t" |
| 107 "lhu %[new_red1], 6(%[data]) \n\t" |
| 108 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t" |
| 109 "precr.qb.ph %[temp4], %[argb], %[argb1] \n\t" |
| 110 "preceu.ph.qbra %[temp3], %[temp3] \n\t" |
| 111 "preceu.ph.qbla %[temp4], %[temp4] \n\t" |
| 112 "shll.ph %[temp3], %[temp3], 8 \n\t" |
| 113 "shll.ph %[temp4], %[temp4], 8 \n\t" |
| 114 "shra.ph %[temp3], %[temp3], 8 \n\t" |
| 115 "shra.ph %[temp4], %[temp4], 8 \n\t" |
| 116 "mul.ph %[temp5], %[temp3], %[temp0] \n\t" |
| 117 "mul.ph %[temp3], %[temp3], %[temp1] \n\t" |
| 118 "mul.ph %[temp4], %[temp4], %[temp2] \n\t" |
| 119 "addiu %[data], %[data], 8 \n\t" |
| 120 "ins %[new_red1], %[new_red], 16, 16 \n\t" |
| 121 "ins %[argb1], %[argb], 16, 16 \n\t" |
| 122 "shra.ph %[temp5], %[temp5], 5 \n\t" |
| 123 "shra.ph %[temp3], %[temp3], 5 \n\t" |
| 124 "shra.ph %[temp4], %[temp4], 5 \n\t" |
| 125 "subu.ph %[new_red1], %[new_red1], %[temp5] \n\t" |
| 126 "subu.ph %[argb1], %[argb1], %[temp3] \n\t" |
| 127 "preceu.ph.qbra %[temp5], %[new_red1] \n\t" |
| 128 "subu.ph %[argb1], %[argb1], %[temp4] \n\t" |
| 129 "preceu.ph.qbra %[temp3], %[argb1] \n\t" |
| 130 "sb %[temp5], -2(%[data]) \n\t" |
| 131 "sb %[temp3], -4(%[data]) \n\t" |
| 132 "sra %[temp5], %[temp5], 16 \n\t" |
| 133 "sra %[temp3], %[temp3], 16 \n\t" |
| 134 "sb %[temp5], -6(%[data]) \n\t" |
| 135 "bne %[data], %[p_loop_end], 0b \n\t" |
| 136 " sb %[temp3], -8(%[data]) \n\t" |
| 137 "1: \n\t" |
| 138 ".set pop \n\t" |
| 139 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), |
| 140 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), |
| 141 [new_red1]"=&r"(new_red1), [new_red]"=&r"(new_red), |
| 142 [argb]"=&r"(argb), [argb1]"=&r"(argb1), [data]"+&r"(data) |
| 143 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B), |
| 144 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end) |
| 145 : "memory", "hi", "lo" |
| 146 ); |
| 147 |
| 148 if (num_pixels & 1) { |
| 149 const uint32_t argb_ = data[0]; |
| 150 const uint32_t green = argb_ >> 8; |
| 151 const uint32_t red = argb_ >> 16; |
| 152 uint32_t new_blue = argb_; |
| 153 new_red = red; |
| 154 new_red -= ColorTransformDelta(m->green_to_red_, green); |
| 155 new_red &= 0xff; |
| 156 new_blue -= ColorTransformDelta(m->green_to_blue_, green); |
| 157 new_blue -= ColorTransformDelta(m->red_to_blue_, red); |
| 158 new_blue &= 0xff; |
| 159 data[0] = (argb_ & 0xff00ff00u) | (new_red << 16) | (new_blue); |
| 160 } |
| 161 } |
| 162 |
| 163 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, |
| 164 uint8_t red_to_blue, |
| 165 uint32_t argb) { |
| 166 const uint32_t green = argb >> 8; |
| 167 const uint32_t red = argb >> 16; |
| 168 uint8_t new_blue = argb; |
| 169 new_blue -= ColorTransformDelta(green_to_blue, green); |
| 170 new_blue -= ColorTransformDelta(red_to_blue, red); |
| 171 return (new_blue & 0xff); |
| 172 } |
| 173 |
| 174 static void CollectColorBlueTransforms(const uint32_t* argb, int stride, |
| 175 int tile_width, int tile_height, |
| 176 int green_to_blue, int red_to_blue, |
| 177 int histo[]) { |
| 178 const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff); |
| 179 const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff); |
| 180 const uint32_t mask = 0xff00ffu; |
| 181 while (tile_height-- > 0) { |
| 182 int x; |
| 183 const uint32_t* p_argb = argb; |
| 184 argb += stride; |
| 185 for (x = 0; x < (tile_width >> 1); ++x) { |
| 186 int temp0, temp1, temp2, temp3, temp4, temp5, temp6; |
| 187 __asm__ volatile ( |
| 188 "lw %[temp0], 0(%[p_argb]) \n\t" |
| 189 "lw %[temp1], 4(%[p_argb]) \n\t" |
| 190 "precr.qb.ph %[temp2], %[temp0], %[temp1] \n\t" |
| 191 "ins %[temp1], %[temp0], 16, 16 \n\t" |
| 192 "shra.ph %[temp2], %[temp2], 8 \n\t" |
| 193 "shra.ph %[temp3], %[temp1], 8 \n\t" |
| 194 "mul.ph %[temp5], %[temp2], %[rtb] \n\t" |
| 195 "mul.ph %[temp6], %[temp3], %[gtb] \n\t" |
| 196 "and %[temp4], %[temp1], %[mask] \n\t" |
| 197 "addiu %[p_argb], %[p_argb], 8 \n\t" |
| 198 "shra.ph %[temp5], %[temp5], 5 \n\t" |
| 199 "shra.ph %[temp6], %[temp6], 5 \n\t" |
| 200 "subu.qb %[temp2], %[temp4], %[temp5] \n\t" |
| 201 "subu.qb %[temp2], %[temp2], %[temp6] \n\t" |
| 202 : [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), |
| 203 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), |
| 204 [temp5]"=&r"(temp5), [temp6]"=&r"(temp6) |
| 205 : [rtb]"r"(rtb), [gtb]"r"(gtb), [mask]"r"(mask) |
| 206 : "memory", "hi", "lo" |
| 207 ); |
| 208 ++histo[(uint8_t)(temp2 >> 16)]; |
| 209 ++histo[(uint8_t)temp2]; |
| 210 } |
| 211 if (tile_width & 1) { |
| 212 ++histo[TransformColorBlue(green_to_blue, red_to_blue, *p_argb)]; |
| 213 } |
| 214 } |
| 215 } |
| 216 |
| 217 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, |
| 218 uint32_t argb) { |
| 219 const uint32_t green = argb >> 8; |
| 220 uint32_t new_red = argb >> 16; |
| 221 new_red -= ColorTransformDelta(green_to_red, green); |
| 222 return (new_red & 0xff); |
| 223 } |
| 224 |
| 225 static void CollectColorRedTransforms(const uint32_t* argb, int stride, |
| 226 int tile_width, int tile_height, |
| 227 int green_to_red, int histo[]) { |
| 228 const int gtr = (green_to_red << 16) | (green_to_red & 0xffff); |
| 229 while (tile_height-- > 0) { |
| 230 int x; |
| 231 const uint32_t* p_argb = argb; |
| 232 argb += stride; |
| 233 for (x = 0; x < (tile_width >> 1); ++x) { |
| 234 int temp0, temp1, temp2, temp3, temp4; |
| 235 __asm__ volatile ( |
| 236 "lw %[temp0], 0(%[p_argb]) \n\t" |
| 237 "lw %[temp1], 4(%[p_argb]) \n\t" |
| 238 "precrq.ph.w %[temp4], %[temp0], %[temp1] \n\t" |
| 239 "ins %[temp1], %[temp0], 16, 16 \n\t" |
| 240 "shra.ph %[temp3], %[temp1], 8 \n\t" |
| 241 "mul.ph %[temp2], %[temp3], %[gtr] \n\t" |
| 242 "addiu %[p_argb], %[p_argb], 8 \n\t" |
| 243 "shra.ph %[temp2], %[temp2], 5 \n\t" |
| 244 "subu.qb %[temp2], %[temp4], %[temp2] \n\t" |
| 245 : [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), |
| 246 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4) |
| 247 : [gtr]"r"(gtr) |
| 248 : "memory", "hi", "lo" |
| 249 ); |
| 250 ++histo[(uint8_t)(temp2 >> 16)]; |
| 251 ++histo[(uint8_t)temp2]; |
| 252 } |
| 253 if (tile_width & 1) { |
| 254 ++histo[TransformColorRed(green_to_red, *p_argb)]; |
| 255 } |
| 256 } |
| 257 } |
| 258 |
| 259 //------------------------------------------------------------------------------ |
| 260 // Entry point |
| 261 |
| 262 extern void VP8LEncDspInitMIPSdspR2(void); |
| 263 |
| 264 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPSdspR2(void) { |
| 265 VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed; |
| 266 VP8LTransformColor = TransformColor; |
| 267 VP8LCollectColorBlueTransforms = CollectColorBlueTransforms; |
| 268 VP8LCollectColorRedTransforms = CollectColorRedTransforms; |
| 269 } |
| 270 |
| 271 #else // !WEBP_USE_MIPS_DSP_R2 |
| 272 |
| 273 WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPSdspR2) |
| 274 |
| 275 #endif // WEBP_USE_MIPS_DSP_R2 |
OLD | NEW |