third_party/libwebp/dsp/lossless_mips_dsp_r2.c - Issue 1546003002: libwebp: update to 0.5.0

Side by Side Diff: third_party/libwebp/dsp/lossless_mips_dsp_r2.c

Issue 1546003002: libwebp: update to 0.5.0 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: rebase Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2014 Google Inc. All Rights Reserved.

	2 //

	3 // Use of this source code is governed by a BSD-style license

	4 // that can be found in the COPYING file in the root of the source

	5 // tree. An additional intellectual property rights grant can be found

	6 // in the file PATENTS. All contributing project authors may

	7 // be found in the AUTHORS file in the root of the source tree.

	8 // -----------------------------------------------------------------------------

	9 //

	10 // Image transforms and color space conversion methods for lossless decoder.

	11 //

	12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)

	13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com)

	14

	15 #include "./dsp.h"

	16

	17 #if defined(WEBP_USE_MIPS_DSP_R2)

	18

	19 #include "./lossless.h"

	20

	21 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \

	22 static void FUNC_NAME(const TYPE* src, \

	23 const uint32_t* const color_map, \

	24 TYPE* dst, int y_start, int y_end, \

	25 int width) { \

	26 int y; \

	27 for (y = y_start; y < y_end; ++y) { \

	28 int x; \

	29 for (x = 0; x < (width >> 2); ++x) { \

	30 int tmp1, tmp2, tmp3, tmp4; \

	31 __asm__ volatile ( \

	32 ".ifc " #TYPE ", uint8_t \n\t" \

	33 "lbu %[tmp1], 0(%[src]) \n\t" \

	34 "lbu %[tmp2], 1(%[src]) \n\t" \

	35 "lbu %[tmp3], 2(%[src]) \n\t" \

	36 "lbu %[tmp4], 3(%[src]) \n\t" \

	37 "addiu %[src], %[src], 4 \n\t" \

	38 ".endif \n\t" \

	39 ".ifc " #TYPE ", uint32_t \n\t" \

	40 "lw %[tmp1], 0(%[src]) \n\t" \

	41 "lw %[tmp2], 4(%[src]) \n\t" \

	42 "lw %[tmp3], 8(%[src]) \n\t" \

	43 "lw %[tmp4], 12(%[src]) \n\t" \

	44 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \

	45 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \

	46 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \

	47 "ext %[tmp4], %[tmp4], 8, 8 \n\t" \

	48 "addiu %[src], %[src], 16 \n\t" \

	49 ".endif \n\t" \

	50 "sll %[tmp1], %[tmp1], 2 \n\t" \

	51 "sll %[tmp2], %[tmp2], 2 \n\t" \

	52 "sll %[tmp3], %[tmp3], 2 \n\t" \

	53 "sll %[tmp4], %[tmp4], 2 \n\t" \

	54 "lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \

	55 "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \

	56 "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \

	57 "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \

	58 ".ifc " #TYPE ", uint8_t \n\t" \

	59 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \

	60 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \

	61 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \

	62 "ext %[tmp4], %[tmp4], 8, 8 \n\t" \

	63 "sb %[tmp1], 0(%[dst]) \n\t" \

	64 "sb %[tmp2], 1(%[dst]) \n\t" \

	65 "sb %[tmp3], 2(%[dst]) \n\t" \

	66 "sb %[tmp4], 3(%[dst]) \n\t" \

	67 "addiu %[dst], %[dst], 4 \n\t" \

	68 ".endif \n\t" \

	69 ".ifc " #TYPE ", uint32_t \n\t" \

	70 "sw %[tmp1], 0(%[dst]) \n\t" \

	71 "sw %[tmp2], 4(%[dst]) \n\t" \

	72 "sw %[tmp3], 8(%[dst]) \n\t" \

	73 "sw %[tmp4], 12(%[dst]) \n\t" \

	74 "addiu %[dst], %[dst], 16 \n\t" \

	75 ".endif \n\t" \

	76 : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \

	77 [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \

	78 : [color_map]"r"(color_map) \

	79 : "memory" \

	80 ); \

	81 } \

	82 for (x = 0; x < (width & 3); ++x) { \

	83 dst++ = GET_VALUE(color_map[GET_INDEX(src++)]); \

	84 } \

	85 } \

	86 }

	87

	88 MAP_COLOR_FUNCS(MapARGB, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)

	89 MAP_COLOR_FUNCS(MapAlpha, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)

	90

	91 #undef MAP_COLOR_FUNCS

	92

	93 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,

	94 uint32_t c2) {

	95 int temp0, temp1, temp2, temp3, temp4, temp5;

	96 __asm__ volatile (

	97 "preceu.ph.qbr %[temp1], %[c0] \n\t"

	98 "preceu.ph.qbl %[temp2], %[c0] \n\t"

	99 "preceu.ph.qbr %[temp3], %[c1] \n\t"

	100 "preceu.ph.qbl %[temp4], %[c1] \n\t"

	101 "preceu.ph.qbr %[temp5], %[c2] \n\t"

	102 "preceu.ph.qbl %[temp0], %[c2] \n\t"

	103 "subq.ph %[temp3], %[temp3], %[temp5] \n\t"

	104 "subq.ph %[temp4], %[temp4], %[temp0] \n\t"

	105 "addq.ph %[temp1], %[temp1], %[temp3] \n\t"

	106 "addq.ph %[temp2], %[temp2], %[temp4] \n\t"

	107 "shll_s.ph %[temp1], %[temp1], 7 \n\t"

	108 "shll_s.ph %[temp2], %[temp2], 7 \n\t"

	109 "precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t"

	110 : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),

	111 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)

	112 : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)

	113 : "memory"

	114 );

	115 return temp2;

	116 }

	117

	118 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,

	119 uint32_t c2) {

	120 int temp0, temp1, temp2, temp3, temp4, temp5;

	121 __asm__ volatile (

	122 "adduh.qb %[temp5], %[c0], %[c1] \n\t"

	123 "preceu.ph.qbr %[temp3], %[c2] \n\t"

	124 "preceu.ph.qbr %[temp1], %[temp5] \n\t"

	125 "preceu.ph.qbl %[temp2], %[temp5] \n\t"

	126 "preceu.ph.qbl %[temp4], %[c2] \n\t"

	127 "subq.ph %[temp3], %[temp1], %[temp3] \n\t"

	128 "subq.ph %[temp4], %[temp2], %[temp4] \n\t"

	129 "shrl.ph %[temp5], %[temp3], 15 \n\t"

	130 "shrl.ph %[temp0], %[temp4], 15 \n\t"

	131 "addq.ph %[temp3], %[temp3], %[temp5] \n\t"

	132 "addq.ph %[temp4], %[temp0], %[temp4] \n\t"

	133 "shra.ph %[temp3], %[temp3], 1 \n\t"

	134 "shra.ph %[temp4], %[temp4], 1 \n\t"

	135 "addq.ph %[temp1], %[temp1], %[temp3] \n\t"

	136 "addq.ph %[temp2], %[temp2], %[temp4] \n\t"

	137 "shll_s.ph %[temp1], %[temp1], 7 \n\t"

	138 "shll_s.ph %[temp2], %[temp2], 7 \n\t"

	139 "precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t"

	140 : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),

	141 [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)

	142 : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)

	143 : "memory"

	144 );

	145 return temp1;

	146 }

	147

	148 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {

	149 int temp0, temp1, temp2, temp3, temp4, temp5;

	150 __asm__ volatile (

	151 "cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t"

	152 "pick.qb %[temp1], %[b], %[c] \n\t"

	153 "pick.qb %[temp2], %[c], %[b] \n\t"

	154 "cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t"

	155 "pick.qb %[temp4], %[a], %[c] \n\t"

	156 "pick.qb %[temp5], %[c], %[a] \n\t"

	157 "subu.qb %[temp3], %[temp1], %[temp2] \n\t"

	158 "subu.qb %[temp0], %[temp4], %[temp5] \n\t"

	159 "raddu.w.qb %[temp3], %[temp3] \n\t"

	160 "raddu.w.qb %[temp0], %[temp0] \n\t"

	161 "subu %[temp3], %[temp3], %[temp0] \n\t"

	162 "slti %[temp0], %[temp3], 0x1 \n\t"

	163 "movz %[a], %[b], %[temp0] \n\t"

	164 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),

	165 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),

	166 [a]"+&r"(a)

	167 : [b]"r"(b), [c]"r"(c)

	168 );

	169 return a;

	170 }

	171

	172 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {

	173 __asm__ volatile (

	174 "adduh.qb %[a0], %[a0], %[a1] \n\t"

	175 : [a0]"+r"(a0)

	176 : [a1]"r"(a1)

	177 );

	178 return a0;

	179 }

	180

	181 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {

	182 return Average2(Average2(a0, a2), a1);

	183 }

	184

	185 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,

	186 uint32_t a2, uint32_t a3) {

	187 return Average2(Average2(a0, a1), Average2(a2, a3));

	188 }

	189

	190 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {

	191 return Average3(left, top[0], top[1]);

	192 }

	193

	194 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {

	195 return Average2(left, top[-1]);

	196 }

	197

	198 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {

	199 return Average2(left, top[0]);

	200 }

	201

	202 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {

	203 (void)left;

	204 return Average2(top[-1], top[0]);

	205 }

	206

	207 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {

	208 (void)left;

	209 return Average2(top[0], top[1]);

	210 }

	211

	212 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {

	213 return Average4(left, top[-1], top[0], top[1]);

	214 }

	215

	216 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {

	217 return Select(top[0], left, top[-1]);

	218 }

	219

	220 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {

	221 return ClampedAddSubtractFull(left, top[0], top[-1]);

	222 }

	223

	224 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {

	225 return ClampedAddSubtractHalf(left, top[0], top[-1]);

	226 }

	227

	228 // Add green to blue and red channels (i.e. perform the inverse transform of

	229 // 'subtract green').

	230 static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {

	231 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;

	232 uint32_t* const p_loop1_end = data + (num_pixels & ~3);

	233 uint32_t* const p_loop2_end = data + num_pixels;

	234 __asm__ volatile (

	235 ".set push \n\t"

	236 ".set noreorder \n\t"

	237 "beq %[data], %[p_loop1_end], 3f \n\t"

	238 " nop \n\t"

	239 "0: \n\t"

	240 "lw %[temp0], 0(%[data]) \n\t"

	241 "lw %[temp1], 4(%[data]) \n\t"

	242 "lw %[temp2], 8(%[data]) \n\t"

	243 "lw %[temp3], 12(%[data]) \n\t"

	244 "ext %[temp4], %[temp0], 8, 8 \n\t"

	245 "ext %[temp5], %[temp1], 8, 8 \n\t"

	246 "ext %[temp6], %[temp2], 8, 8 \n\t"

	247 "ext %[temp7], %[temp3], 8, 8 \n\t"

	248 "addiu %[data], %[data], 16 \n\t"

	249 "replv.ph %[temp4], %[temp4] \n\t"

	250 "replv.ph %[temp5], %[temp5] \n\t"

	251 "replv.ph %[temp6], %[temp6] \n\t"

	252 "replv.ph %[temp7], %[temp7] \n\t"

	253 "addu.qb %[temp0], %[temp0], %[temp4] \n\t"

	254 "addu.qb %[temp1], %[temp1], %[temp5] \n\t"

	255 "addu.qb %[temp2], %[temp2], %[temp6] \n\t"

	256 "addu.qb %[temp3], %[temp3], %[temp7] \n\t"

	257 "sw %[temp0], -16(%[data]) \n\t"

	258 "sw %[temp1], -12(%[data]) \n\t"

	259 "sw %[temp2], -8(%[data]) \n\t"

	260 "bne %[data], %[p_loop1_end], 0b \n\t"

	261 " sw %[temp3], -4(%[data]) \n\t"

	262 "3: \n\t"

	263 "beq %[data], %[p_loop2_end], 2f \n\t"

	264 " nop \n\t"

	265 "1: \n\t"

	266 "lw %[temp0], 0(%[data]) \n\t"

	267 "addiu %[data], %[data], 4 \n\t"

	268 "ext %[temp4], %[temp0], 8, 8 \n\t"

	269 "replv.ph %[temp4], %[temp4] \n\t"

	270 "addu.qb %[temp0], %[temp0], %[temp4] \n\t"

	271 "bne %[data], %[p_loop2_end], 1b \n\t"

	272 " sw %[temp0], -4(%[data]) \n\t"

	273 "2: \n\t"

	274 ".set pop \n\t"

	275 : [data]"+&r"(data), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),

	276 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),

	277 [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)

	278 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)

	279 : "memory"

	280 );

	281 }

	282

	283 static void TransformColorInverse(const VP8LMultipliers* const m,

	284 uint32_t* data, int num_pixels) {

	285 int temp0, temp1, temp2, temp3, temp4, temp5;

	286 uint32_t argb, argb1, new_red;

	287 const uint32_t G_to_R = m->green_to_red_;

	288 const uint32_t G_to_B = m->green_to_blue_;

	289 const uint32_t R_to_B = m->red_to_blue_;

	290 uint32_t* const p_loop_end = data + (num_pixels & ~1);

	291 __asm__ volatile (

	292 ".set push \n\t"

	293 ".set noreorder \n\t"

	294 "beq %[data], %[p_loop_end], 1f \n\t"

	295 " nop \n\t"

	296 "replv.ph %[temp0], %[G_to_R] \n\t"

	297 "replv.ph %[temp1], %[G_to_B] \n\t"

	298 "replv.ph %[temp2], %[R_to_B] \n\t"

	299 "shll.ph %[temp0], %[temp0], 8 \n\t"

	300 "shll.ph %[temp1], %[temp1], 8 \n\t"

	301 "shll.ph %[temp2], %[temp2], 8 \n\t"

	302 "shra.ph %[temp0], %[temp0], 8 \n\t"

	303 "shra.ph %[temp1], %[temp1], 8 \n\t"

	304 "shra.ph %[temp2], %[temp2], 8 \n\t"

	305 "0: \n\t"

	306 "lw %[argb], 0(%[data]) \n\t"

	307 "lw %[argb1], 4(%[data]) \n\t"

	308 "addiu %[data], %[data], 8 \n\t"

	309 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"

	310 "preceu.ph.qbra %[temp3], %[temp3] \n\t"

	311 "shll.ph %[temp3], %[temp3], 8 \n\t"

	312 "shra.ph %[temp3], %[temp3], 8 \n\t"

	313 "mul.ph %[temp5], %[temp3], %[temp0] \n\t"

	314 "mul.ph %[temp3], %[temp3], %[temp1] \n\t"

	315 "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t"

	316 "ins %[argb1], %[argb], 16, 16 \n\t"

	317 "shra.ph %[temp5], %[temp5], 5 \n\t"

	318 "shra.ph %[temp3], %[temp3], 5 \n\t"

	319 "addu.ph %[new_red], %[new_red], %[temp5] \n\t"

	320 "addu.ph %[argb1], %[argb1], %[temp3] \n\t"

	321 "preceu.ph.qbra %[temp5], %[new_red] \n\t"

	322 "shll.ph %[temp4], %[temp5], 8 \n\t"

	323 "shra.ph %[temp4], %[temp4], 8 \n\t"

	324 "mul.ph %[temp4], %[temp4], %[temp2] \n\t"

	325 "sb %[temp5], -2(%[data]) \n\t"

	326 "sra %[temp5], %[temp5], 16 \n\t"

	327 "shra.ph %[temp4], %[temp4], 5 \n\t"

	328 "addu.ph %[argb1], %[argb1], %[temp4] \n\t"

	329 "preceu.ph.qbra %[temp3], %[argb1] \n\t"

	330 "sb %[temp5], -6(%[data]) \n\t"

	331 "sb %[temp3], -4(%[data]) \n\t"

	332 "sra %[temp3], %[temp3], 16 \n\t"

	333 "bne %[data], %[p_loop_end], 0b \n\t"

	334 " sb %[temp3], -8(%[data]) \n\t"

	335 "1: \n\t"

	336 ".set pop \n\t"

	337 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),

	338 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),

	339 [new_red]"=&r"(new_red), [argb]"=&r"(argb),

	340 [argb1]"=&r"(argb1), [data]"+&r"(data)

	341 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),

	342 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)

	343 : "memory", "hi", "lo"

	344 );

	345

	346 // Fall-back to C-version for left-overs.

	347 if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1);

	348 }

	349

	350 static void ConvertBGRAToRGB(const uint32_t* src,

	351 int num_pixels, uint8_t* dst) {

	352 int temp0, temp1, temp2, temp3;

	353 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);

	354 const uint32_t* const p_loop2_end = src + num_pixels;

	355 __asm__ volatile (

	356 ".set push \n\t"

	357 ".set noreorder \n\t"

	358 "beq %[src], %[p_loop1_end], 3f \n\t"

	359 " nop \n\t"

	360 "0: \n\t"

	361 "lw %[temp3], 12(%[src]) \n\t"

	362 "lw %[temp2], 8(%[src]) \n\t"

	363 "lw %[temp1], 4(%[src]) \n\t"

	364 "lw %[temp0], 0(%[src]) \n\t"

	365 "ins %[temp3], %[temp2], 24, 8 \n\t"

	366 "sll %[temp2], %[temp2], 8 \n\t"

	367 "rotr %[temp3], %[temp3], 16 \n\t"

	368 "ins %[temp2], %[temp1], 0, 16 \n\t"

	369 "sll %[temp1], %[temp1], 8 \n\t"

	370 "wsbh %[temp3], %[temp3] \n\t"

	371 "balign %[temp0], %[temp1], 1 \n\t"

	372 "wsbh %[temp2], %[temp2] \n\t"

	373 "wsbh %[temp0], %[temp0] \n\t"

	374 "usw %[temp3], 8(%[dst]) \n\t"

	375 "rotr %[temp0], %[temp0], 16 \n\t"

	376 "usw %[temp2], 4(%[dst]) \n\t"

	377 "addiu %[src], %[src], 16 \n\t"

	378 "usw %[temp0], 0(%[dst]) \n\t"

	379 "bne %[src], %[p_loop1_end], 0b \n\t"

	380 " addiu %[dst], %[dst], 12 \n\t"

	381 "3: \n\t"

	382 "beq %[src], %[p_loop2_end], 2f \n\t"

	383 " nop \n\t"

	384 "1: \n\t"

	385 "lw %[temp0], 0(%[src]) \n\t"

	386 "addiu %[src], %[src], 4 \n\t"

	387 "wsbh %[temp1], %[temp0] \n\t"

	388 "addiu %[dst], %[dst], 3 \n\t"

	389 "ush %[temp1], -2(%[dst]) \n\t"

	390 "sra %[temp0], %[temp0], 16 \n\t"

	391 "bne %[src], %[p_loop2_end], 1b \n\t"

	392 " sb %[temp0], -3(%[dst]) \n\t"

	393 "2: \n\t"

	394 ".set pop \n\t"

	395 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),

	396 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)

	397 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)

	398 : "memory"

	399 );

	400 }

	401

	402 static void ConvertBGRAToRGBA(const uint32_t* src,

	403 int num_pixels, uint8_t* dst) {

	404 int temp0, temp1, temp2, temp3;

	405 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);

	406 const uint32_t* const p_loop2_end = src + num_pixels;

	407 __asm__ volatile (

	408 ".set push \n\t"

	409 ".set noreorder \n\t"

	410 "beq %[src], %[p_loop1_end], 3f \n\t"

	411 " nop \n\t"

	412 "0: \n\t"

	413 "lw %[temp0], 0(%[src]) \n\t"

	414 "lw %[temp1], 4(%[src]) \n\t"

	415 "lw %[temp2], 8(%[src]) \n\t"

	416 "lw %[temp3], 12(%[src]) \n\t"

	417 "wsbh %[temp0], %[temp0] \n\t"

	418 "wsbh %[temp1], %[temp1] \n\t"

	419 "wsbh %[temp2], %[temp2] \n\t"

	420 "wsbh %[temp3], %[temp3] \n\t"

	421 "addiu %[src], %[src], 16 \n\t"

	422 "balign %[temp0], %[temp0], 1 \n\t"

	423 "balign %[temp1], %[temp1], 1 \n\t"

	424 "balign %[temp2], %[temp2], 1 \n\t"

	425 "balign %[temp3], %[temp3], 1 \n\t"

	426 "usw %[temp0], 0(%[dst]) \n\t"

	427 "usw %[temp1], 4(%[dst]) \n\t"

	428 "usw %[temp2], 8(%[dst]) \n\t"

	429 "usw %[temp3], 12(%[dst]) \n\t"

	430 "bne %[src], %[p_loop1_end], 0b \n\t"

	431 " addiu %[dst], %[dst], 16 \n\t"

	432 "3: \n\t"

	433 "beq %[src], %[p_loop2_end], 2f \n\t"

	434 " nop \n\t"

	435 "1: \n\t"

	436 "lw %[temp0], 0(%[src]) \n\t"

	437 "wsbh %[temp0], %[temp0] \n\t"

	438 "addiu %[src], %[src], 4 \n\t"

	439 "balign %[temp0], %[temp0], 1 \n\t"

	440 "usw %[temp0], 0(%[dst]) \n\t"

	441 "bne %[src], %[p_loop2_end], 1b \n\t"

	442 " addiu %[dst], %[dst], 4 \n\t"

	443 "2: \n\t"

	444 ".set pop \n\t"

	445 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),

	446 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)

	447 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)

	448 : "memory"

	449 );

	450 }

	451

	452 static void ConvertBGRAToRGBA4444(const uint32_t* src,

	453 int num_pixels, uint8_t* dst) {

	454 int temp0, temp1, temp2, temp3, temp4, temp5;

	455 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);

	456 const uint32_t* const p_loop2_end = src + num_pixels;

	457 __asm__ volatile (

	458 ".set push \n\t"

	459 ".set noreorder \n\t"

	460 "beq %[src], %[p_loop1_end], 3f \n\t"

	461 " nop \n\t"

	462 "0: \n\t"

	463 "lw %[temp0], 0(%[src]) \n\t"

	464 "lw %[temp1], 4(%[src]) \n\t"

	465 "lw %[temp2], 8(%[src]) \n\t"

	466 "lw %[temp3], 12(%[src]) \n\t"

	467 "ext %[temp4], %[temp0], 28, 4 \n\t"

	468 "ext %[temp5], %[temp0], 12, 4 \n\t"

	469 "ins %[temp0], %[temp4], 0, 4 \n\t"

	470 "ext %[temp4], %[temp1], 28, 4 \n\t"

	471 "ins %[temp0], %[temp5], 16, 4 \n\t"

	472 "ext %[temp5], %[temp1], 12, 4 \n\t"

	473 "ins %[temp1], %[temp4], 0, 4 \n\t"

	474 "ext %[temp4], %[temp2], 28, 4 \n\t"

	475 "ins %[temp1], %[temp5], 16, 4 \n\t"

	476 "ext %[temp5], %[temp2], 12, 4 \n\t"

	477 "ins %[temp2], %[temp4], 0, 4 \n\t"

	478 "ext %[temp4], %[temp3], 28, 4 \n\t"

	479 "ins %[temp2], %[temp5], 16, 4 \n\t"

	480 "ext %[temp5], %[temp3], 12, 4 \n\t"

	481 "ins %[temp3], %[temp4], 0, 4 \n\t"

	482 "precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t"

	483 "ins %[temp3], %[temp5], 16, 4 \n\t"

	484 "addiu %[src], %[src], 16 \n\t"

	485 "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t"

	486 #ifdef WEBP_SWAP_16BIT_CSP

	487 "usw %[temp1], 0(%[dst]) \n\t"

	488 "usw %[temp3], 4(%[dst]) \n\t"

	489 #else

	490 "wsbh %[temp1], %[temp1] \n\t"

	491 "wsbh %[temp3], %[temp3] \n\t"

	492 "usw %[temp1], 0(%[dst]) \n\t"

	493 "usw %[temp3], 4(%[dst]) \n\t"

	494 #endif

	495 "bne %[src], %[p_loop1_end], 0b \n\t"

	496 " addiu %[dst], %[dst], 8 \n\t"

	497 "3: \n\t"

	498 "beq %[src], %[p_loop2_end], 2f \n\t"

	499 " nop \n\t"

	500 "1: \n\t"

	501 "lw %[temp0], 0(%[src]) \n\t"

	502 "ext %[temp4], %[temp0], 28, 4 \n\t"

	503 "ext %[temp5], %[temp0], 12, 4 \n\t"

	504 "ins %[temp0], %[temp4], 0, 4 \n\t"

	505 "ins %[temp0], %[temp5], 16, 4 \n\t"

	506 "addiu %[src], %[src], 4 \n\t"

	507 "precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t"

	508 #ifdef WEBP_SWAP_16BIT_CSP

	509 "ush %[temp0], 0(%[dst]) \n\t"

	510 #else

	511 "wsbh %[temp0], %[temp0] \n\t"

	512 "ush %[temp0], 0(%[dst]) \n\t"

	513 #endif

	514 "bne %[src], %[p_loop2_end], 1b \n\t"

	515 " addiu %[dst], %[dst], 2 \n\t"

	516 "2: \n\t"

	517 ".set pop \n\t"

	518 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),

	519 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),

	520 [dst]"+&r"(dst), [src]"+&r"(src)

	521 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)

	522 : "memory"

	523 );

	524 }

	525

	526 static void ConvertBGRAToRGB565(const uint32_t* src,

	527 int num_pixels, uint8_t* dst) {

	528 int temp0, temp1, temp2, temp3, temp4, temp5;

	529 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);

	530 const uint32_t* const p_loop2_end = src + num_pixels;

	531 __asm__ volatile (

	532 ".set push \n\t"

	533 ".set noreorder \n\t"

	534 "beq %[src], %[p_loop1_end], 3f \n\t"

	535 " nop \n\t"

	536 "0: \n\t"

	537 "lw %[temp0], 0(%[src]) \n\t"

	538 "lw %[temp1], 4(%[src]) \n\t"

	539 "lw %[temp2], 8(%[src]) \n\t"

	540 "lw %[temp3], 12(%[src]) \n\t"

	541 "ext %[temp4], %[temp0], 8, 16 \n\t"

	542 "ext %[temp5], %[temp0], 5, 11 \n\t"

	543 "ext %[temp0], %[temp0], 3, 5 \n\t"

	544 "ins %[temp4], %[temp5], 0, 11 \n\t"

	545 "ext %[temp5], %[temp1], 5, 11 \n\t"

	546 "ins %[temp4], %[temp0], 0, 5 \n\t"

	547 "ext %[temp0], %[temp1], 8, 16 \n\t"

	548 "ext %[temp1], %[temp1], 3, 5 \n\t"

	549 "ins %[temp0], %[temp5], 0, 11 \n\t"

	550 "ext %[temp5], %[temp2], 5, 11 \n\t"

	551 "ins %[temp0], %[temp1], 0, 5 \n\t"

	552 "ext %[temp1], %[temp2], 8, 16 \n\t"

	553 "ext %[temp2], %[temp2], 3, 5 \n\t"

	554 "ins %[temp1], %[temp5], 0, 11 \n\t"

	555 "ext %[temp5], %[temp3], 5, 11 \n\t"

	556 "ins %[temp1], %[temp2], 0, 5 \n\t"

	557 "ext %[temp2], %[temp3], 8, 16 \n\t"

	558 "ext %[temp3], %[temp3], 3, 5 \n\t"

	559 "ins %[temp2], %[temp5], 0, 11 \n\t"

	560 "append %[temp0], %[temp4], 16 \n\t"

	561 "ins %[temp2], %[temp3], 0, 5 \n\t"

	562 "addiu %[src], %[src], 16 \n\t"

	563 "append %[temp2], %[temp1], 16 \n\t"

	564 #ifdef WEBP_SWAP_16BIT_CSP

	565 "usw %[temp0], 0(%[dst]) \n\t"

	566 "usw %[temp2], 4(%[dst]) \n\t"

	567 #else

	568 "wsbh %[temp0], %[temp0] \n\t"

	569 "wsbh %[temp2], %[temp2] \n\t"

	570 "usw %[temp0], 0(%[dst]) \n\t"

	571 "usw %[temp2], 4(%[dst]) \n\t"

	572 #endif

	573 "bne %[src], %[p_loop1_end], 0b \n\t"

	574 " addiu %[dst], %[dst], 8 \n\t"

	575 "3: \n\t"

	576 "beq %[src], %[p_loop2_end], 2f \n\t"

	577 " nop \n\t"

	578 "1: \n\t"

	579 "lw %[temp0], 0(%[src]) \n\t"

	580 "ext %[temp4], %[temp0], 8, 16 \n\t"

	581 "ext %[temp5], %[temp0], 5, 11 \n\t"

	582 "ext %[temp0], %[temp0], 3, 5 \n\t"

	583 "ins %[temp4], %[temp5], 0, 11 \n\t"

	584 "addiu %[src], %[src], 4 \n\t"

	585 "ins %[temp4], %[temp0], 0, 5 \n\t"

	586 #ifdef WEBP_SWAP_16BIT_CSP

	587 "ush %[temp4], 0(%[dst]) \n\t"

	588 #else

	589 "wsbh %[temp4], %[temp4] \n\t"

	590 "ush %[temp4], 0(%[dst]) \n\t"

	591 #endif

	592 "bne %[src], %[p_loop2_end], 1b \n\t"

	593 " addiu %[dst], %[dst], 2 \n\t"

	594 "2: \n\t"

	595 ".set pop \n\t"

	596 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),

	597 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),

	598 [dst]"+&r"(dst), [src]"+&r"(src)

	599 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)

	600 : "memory"

	601 );

	602 }

	603

	604 static void ConvertBGRAToBGR(const uint32_t* src,

	605 int num_pixels, uint8_t* dst) {

	606 int temp0, temp1, temp2, temp3;

	607 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);

	608 const uint32_t* const p_loop2_end = src + num_pixels;

	609 __asm__ volatile (

	610 ".set push \n\t"

	611 ".set noreorder \n\t"

	612 "beq %[src], %[p_loop1_end], 3f \n\t"

	613 " nop \n\t"

	614 "0: \n\t"

	615 "lw %[temp0], 0(%[src]) \n\t"

	616 "lw %[temp1], 4(%[src]) \n\t"

	617 "lw %[temp2], 8(%[src]) \n\t"

	618 "lw %[temp3], 12(%[src]) \n\t"

	619 "ins %[temp0], %[temp1], 24, 8 \n\t"

	620 "sra %[temp1], %[temp1], 8 \n\t"

	621 "ins %[temp1], %[temp2], 16, 16 \n\t"

	622 "sll %[temp2], %[temp2], 8 \n\t"

	623 "balign %[temp3], %[temp2], 1 \n\t"

	624 "addiu %[src], %[src], 16 \n\t"

	625 "usw %[temp0], 0(%[dst]) \n\t"

	626 "usw %[temp1], 4(%[dst]) \n\t"

	627 "usw %[temp3], 8(%[dst]) \n\t"

	628 "bne %[src], %[p_loop1_end], 0b \n\t"

	629 " addiu %[dst], %[dst], 12 \n\t"

	630 "3: \n\t"

	631 "beq %[src], %[p_loop2_end], 2f \n\t"

	632 " nop \n\t"

	633 "1: \n\t"

	634 "lw %[temp0], 0(%[src]) \n\t"

	635 "addiu %[src], %[src], 4 \n\t"

	636 "addiu %[dst], %[dst], 3 \n\t"

	637 "ush %[temp0], -3(%[dst]) \n\t"

	638 "sra %[temp0], %[temp0], 16 \n\t"

	639 "bne %[src], %[p_loop2_end], 1b \n\t"

	640 " sb %[temp0], -1(%[dst]) \n\t"

	641 "2: \n\t"

	642 ".set pop \n\t"

	643 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),

	644 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)

	645 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)

	646 : "memory"

	647 );

	648 }

	649

	650 //------------------------------------------------------------------------------

	651 // Entry point

	652

	653 extern void VP8LDspInitMIPSdspR2(void);

	654

	655 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {

	656 VP8LMapColor32b = MapARGB;

	657 VP8LMapColor8b = MapAlpha;

	658 VP8LPredictors[5] = Predictor5;

	659 VP8LPredictors[6] = Predictor6;

	660 VP8LPredictors[7] = Predictor7;

	661 VP8LPredictors[8] = Predictor8;

	662 VP8LPredictors[9] = Predictor9;

	663 VP8LPredictors[10] = Predictor10;

	664 VP8LPredictors[11] = Predictor11;

	665 VP8LPredictors[12] = Predictor12;

	666 VP8LPredictors[13] = Predictor13;

	667 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;

	668 VP8LTransformColorInverse = TransformColorInverse;

	669 VP8LConvertBGRAToRGB = ConvertBGRAToRGB;

	670 VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;

	671 VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;

	672 VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;

	673 VP8LConvertBGRAToBGR = ConvertBGRAToBGR;

	674 }

	675

	676 #else // !WEBP_USE_MIPS_DSP_R2

	677

	678 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)

	679

	680 #endif // WEBP_USE_MIPS_DSP_R2

OLD	NEW

« no previous file with comments | « third_party/libwebp/dsp/lossless_mips32.c ('k') | third_party/libwebp/dsp/lossless_neon.c » ('j') | no next file with comments »