third_party/libwebp/dsp/rescaler_mips_dsp_r2.c - Issue 1546003002: libwebp: update to 0.5.0

Side by Side Diff: third_party/libwebp/dsp/rescaler_mips_dsp_r2.c

Issue 1546003002: libwebp: update to 0.5.0 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: rebase Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2014 Google Inc. All Rights Reserved.

	2 //

	3 // Use of this source code is governed by a BSD-style license

	4 // that can be found in the COPYING file in the root of the source

	5 // tree. An additional intellectual property rights grant can be found

	6 // in the file PATENTS. All contributing project authors may

	7 // be found in the AUTHORS file in the root of the source tree.

	8 // -----------------------------------------------------------------------------

	9 //

	10 // MIPS version of rescaling functions

	11 //

	12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)

	13

	14 #include "./dsp.h"

	15

	16 #if defined(WEBP_USE_MIPS_DSP_R2)

	17

	18 #include <assert.h>

	19 #include "../utils/rescaler.h"

	20

	21 #define ROUNDER (WEBP_RESCALER_ONE >> 1)

	22 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)

	23

	24 //------------------------------------------------------------------------------

	25 // Row export

	26

	27 static void ExportRowShrink(WebPRescaler* const wrk) {

	28 int i;

	29 const int x_out_max = wrk->dst_width * wrk->num_channels;

	30 uint8_t* dst = wrk->dst;

	31 rescaler_t* irow = wrk->irow;

	32 const rescaler_t* frow = wrk->frow;

	33 const int yscale = wrk->fy_scale * (-wrk->y_accum);

	34 int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;

	35 const int temp7 = (int)wrk->fxy_scale;

	36 const int temp6 = (x_out_max & ~0x3) << 2;

	37 assert(!WebPRescalerOutputDone(wrk));

	38 assert(wrk->y_accum <= 0);

	39 assert(!wrk->y_expand);

	40 assert(wrk->fxy_scale != 0);

	41 if (yscale) {

	42 if (x_out_max >= 4) {

	43 int temp8, temp9, temp10, temp11;

	44 __asm__ volatile (

	45 "li %[temp3], 0x10000 \n\t"

	46 "li %[temp4], 0x8000 \n\t"

	47 "addu %[loop_end], %[frow], %[temp6] \n\t"

	48 "1: \n\t"

	49 "lw %[temp0], 0(%[frow]) \n\t"

	50 "lw %[temp1], 4(%[frow]) \n\t"

	51 "lw %[temp2], 8(%[frow]) \n\t"

	52 "lw %[temp5], 12(%[frow]) \n\t"

	53 "mult $ac0, %[temp3], %[temp4] \n\t"

	54 "maddu $ac0, %[temp0], %[yscale] \n\t"

	55 "mult $ac1, %[temp3], %[temp4] \n\t"

	56 "maddu $ac1, %[temp1], %[yscale] \n\t"

	57 "mult $ac2, %[temp3], %[temp4] \n\t"

	58 "maddu $ac2, %[temp2], %[yscale] \n\t"

	59 "mult $ac3, %[temp3], %[temp4] \n\t"

	60 "maddu $ac3, %[temp5], %[yscale] \n\t"

	61 "addiu %[frow], %[frow], 16 \n\t"

	62 "mfhi %[temp0], $ac0 \n\t"

	63 "mfhi %[temp1], $ac1 \n\t"

	64 "mfhi %[temp2], $ac2 \n\t"

	65 "mfhi %[temp5], $ac3 \n\t"

	66 "lw %[temp8], 0(%[irow]) \n\t"

	67 "lw %[temp9], 4(%[irow]) \n\t"

	68 "lw %[temp10], 8(%[irow]) \n\t"

	69 "lw %[temp11], 12(%[irow]) \n\t"

	70 "addiu %[dst], %[dst], 4 \n\t"

	71 "addiu %[irow], %[irow], 16 \n\t"

	72 "subu %[temp8], %[temp8], %[temp0] \n\t"

	73 "subu %[temp9], %[temp9], %[temp1] \n\t"

	74 "subu %[temp10], %[temp10], %[temp2] \n\t"

	75 "subu %[temp11], %[temp11], %[temp5] \n\t"

	76 "mult $ac0, %[temp3], %[temp4] \n\t"

	77 "maddu $ac0, %[temp8], %[temp7] \n\t"

	78 "mult $ac1, %[temp3], %[temp4] \n\t"

	79 "maddu $ac1, %[temp9], %[temp7] \n\t"

	80 "mult $ac2, %[temp3], %[temp4] \n\t"

	81 "maddu $ac2, %[temp10], %[temp7] \n\t"

	82 "mult $ac3, %[temp3], %[temp4] \n\t"

	83 "maddu $ac3, %[temp11], %[temp7] \n\t"

	84 "mfhi %[temp8], $ac0 \n\t"

	85 "mfhi %[temp9], $ac1 \n\t"

	86 "mfhi %[temp10], $ac2 \n\t"

	87 "mfhi %[temp11], $ac3 \n\t"

	88 "sw %[temp0], -16(%[irow]) \n\t"

	89 "sw %[temp1], -12(%[irow]) \n\t"

	90 "sw %[temp2], -8(%[irow]) \n\t"

	91 "sw %[temp5], -4(%[irow]) \n\t"

	92 "sb %[temp8], -4(%[dst]) \n\t"

	93 "sb %[temp9], -3(%[dst]) \n\t"

	94 "sb %[temp10], -2(%[dst]) \n\t"

	95 "sb %[temp11], -1(%[dst]) \n\t"

	96 "bne %[frow], %[loop_end], 1b \n\t"

	97 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),

	98 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),

	99 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),

	100 [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),

	101 [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)

	102 : [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)

	103 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",

	104 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"

	105 );

	106 }

	107 for (i = 0; i < (x_out_max & 0x3); ++i) {

	108 const uint32_t frac = (uint32_t)MULT_FIX(*frow++, yscale);

	109 const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);

	110 assert(v >= 0 && v <= 255);

	111 *dst++ = v;

	112 *irow++ = frac; // new fractional start

	113 }

	114 } else {

	115 if (x_out_max >= 4) {

	116 __asm__ volatile (

	117 "li %[temp3], 0x10000 \n\t"

	118 "li %[temp4], 0x8000 \n\t"

	119 "addu %[loop_end], %[irow], %[temp6] \n\t"

	120 "1: \n\t"

	121 "lw %[temp0], 0(%[irow]) \n\t"

	122 "lw %[temp1], 4(%[irow]) \n\t"

	123 "lw %[temp2], 8(%[irow]) \n\t"

	124 "lw %[temp5], 12(%[irow]) \n\t"

	125 "addiu %[dst], %[dst], 4 \n\t"

	126 "addiu %[irow], %[irow], 16 \n\t"

	127 "mult $ac0, %[temp3], %[temp4] \n\t"

	128 "maddu $ac0, %[temp0], %[temp7] \n\t"

	129 "mult $ac1, %[temp3], %[temp4] \n\t"

	130 "maddu $ac1, %[temp1], %[temp7] \n\t"

	131 "mult $ac2, %[temp3], %[temp4] \n\t"

	132 "maddu $ac2, %[temp2], %[temp7] \n\t"

	133 "mult $ac3, %[temp3], %[temp4] \n\t"

	134 "maddu $ac3, %[temp5], %[temp7] \n\t"

	135 "mfhi %[temp0], $ac0 \n\t"

	136 "mfhi %[temp1], $ac1 \n\t"

	137 "mfhi %[temp2], $ac2 \n\t"

	138 "mfhi %[temp5], $ac3 \n\t"

	139 "sw $zero, -16(%[irow]) \n\t"

	140 "sw $zero, -12(%[irow]) \n\t"

	141 "sw $zero, -8(%[irow]) \n\t"

	142 "sw $zero, -4(%[irow]) \n\t"

	143 "sb %[temp0], -4(%[dst]) \n\t"

	144 "sb %[temp1], -3(%[dst]) \n\t"

	145 "sb %[temp2], -2(%[dst]) \n\t"

	146 "sb %[temp5], -1(%[dst]) \n\t"

	147 "bne %[irow], %[loop_end], 1b \n\t"

	148 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),

	149 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),

	150 [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)

	151 : [temp7]"r"(temp7), [temp6]"r"(temp6)

	152 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",

	153 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"

	154 );

	155 }

	156 for (i = 0; i < (x_out_max & 0x3); ++i) {

	157 const int v = (int)MULT_FIX(*irow, wrk->fxy_scale);

	158 assert(v >= 0 && v <= 255);

	159 *dst++ = v;

	160 *irow++ = 0;

	161 }

	162 }

	163 }

	164

	165 static void ExportRowExpand(WebPRescaler* const wrk) {

	166 int i;

	167 uint8_t* dst = wrk->dst;

	168 rescaler_t* irow = wrk->irow;

	169 const int x_out_max = wrk->dst_width * wrk->num_channels;

	170 const rescaler_t* frow = wrk->frow;

	171 int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;

	172 const int temp6 = (x_out_max & ~0x3) << 2;

	173 const int temp7 = (int)wrk->fy_scale;

	174 assert(!WebPRescalerOutputDone(wrk));

	175 assert(wrk->y_accum <= 0);

	176 assert(wrk->y_expand);

	177 assert(wrk->y_sub != 0);

	178 if (wrk->y_accum == 0) {

	179 if (x_out_max >= 4) {

	180 __asm__ volatile (

	181 "li %[temp4], 0x10000 \n\t"

	182 "li %[temp5], 0x8000 \n\t"

	183 "addu %[loop_end], %[frow], %[temp6] \n\t"

	184 "1: \n\t"

	185 "lw %[temp0], 0(%[frow]) \n\t"

	186 "lw %[temp1], 4(%[frow]) \n\t"

	187 "lw %[temp2], 8(%[frow]) \n\t"

	188 "lw %[temp3], 12(%[frow]) \n\t"

	189 "addiu %[dst], %[dst], 4 \n\t"

	190 "addiu %[frow], %[frow], 16 \n\t"

	191 "mult $ac0, %[temp4], %[temp5] \n\t"

	192 "maddu $ac0, %[temp0], %[temp7] \n\t"

	193 "mult $ac1, %[temp4], %[temp5] \n\t"

	194 "maddu $ac1, %[temp1], %[temp7] \n\t"

	195 "mult $ac2, %[temp4], %[temp5] \n\t"

	196 "maddu $ac2, %[temp2], %[temp7] \n\t"

	197 "mult $ac3, %[temp4], %[temp5] \n\t"

	198 "maddu $ac3, %[temp3], %[temp7] \n\t"

	199 "mfhi %[temp0], $ac0 \n\t"

	200 "mfhi %[temp1], $ac1 \n\t"

	201 "mfhi %[temp2], $ac2 \n\t"

	202 "mfhi %[temp3], $ac3 \n\t"

	203 "sb %[temp0], -4(%[dst]) \n\t"

	204 "sb %[temp1], -3(%[dst]) \n\t"

	205 "sb %[temp2], -2(%[dst]) \n\t"

	206 "sb %[temp3], -1(%[dst]) \n\t"

	207 "bne %[frow], %[loop_end], 1b \n\t"

	208 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),

	209 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),

	210 [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)

	211 : [temp7]"r"(temp7), [temp6]"r"(temp6)

	212 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",

	213 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"

	214 );

	215 }

	216 for (i = 0; i < (x_out_max & 0x3); ++i) {

	217 const uint32_t J = *frow++;

	218 const int v = (int)MULT_FIX(J, wrk->fy_scale);

	219 assert(v >= 0 && v <= 255);

	220 *dst++ = v;

	221 }

	222 } else {

	223 const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);

	224 const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);

	225 if (x_out_max >= 4) {

	226 int temp8, temp9, temp10, temp11;

	227 __asm__ volatile (

	228 "li %[temp8], 0x10000 \n\t"

	229 "li %[temp9], 0x8000 \n\t"

	230 "addu %[loop_end], %[frow], %[temp6] \n\t"

	231 "1: \n\t"

	232 "lw %[temp0], 0(%[frow]) \n\t"

	233 "lw %[temp1], 4(%[frow]) \n\t"

	234 "lw %[temp2], 8(%[frow]) \n\t"

	235 "lw %[temp3], 12(%[frow]) \n\t"

	236 "lw %[temp4], 0(%[irow]) \n\t"

	237 "lw %[temp5], 4(%[irow]) \n\t"

	238 "lw %[temp10], 8(%[irow]) \n\t"

	239 "lw %[temp11], 12(%[irow]) \n\t"

	240 "addiu %[dst], %[dst], 4 \n\t"

	241 "mult $ac0, %[temp8], %[temp9] \n\t"

	242 "maddu $ac0, %[A], %[temp0] \n\t"

	243 "maddu $ac0, %[B], %[temp4] \n\t"

	244 "mult $ac1, %[temp8], %[temp9] \n\t"

	245 "maddu $ac1, %[A], %[temp1] \n\t"

	246 "maddu $ac1, %[B], %[temp5] \n\t"

	247 "mult $ac2, %[temp8], %[temp9] \n\t"

	248 "maddu $ac2, %[A], %[temp2] \n\t"

	249 "maddu $ac2, %[B], %[temp10] \n\t"

	250 "mult $ac3, %[temp8], %[temp9] \n\t"

	251 "maddu $ac3, %[A], %[temp3] \n\t"

	252 "maddu $ac3, %[B], %[temp11] \n\t"

	253 "addiu %[frow], %[frow], 16 \n\t"

	254 "addiu %[irow], %[irow], 16 \n\t"

	255 "mfhi %[temp0], $ac0 \n\t"

	256 "mfhi %[temp1], $ac1 \n\t"

	257 "mfhi %[temp2], $ac2 \n\t"

	258 "mfhi %[temp3], $ac3 \n\t"

	259 "mult $ac0, %[temp8], %[temp9] \n\t"

	260 "maddu $ac0, %[temp0], %[temp7] \n\t"

	261 "mult $ac1, %[temp8], %[temp9] \n\t"

	262 "maddu $ac1, %[temp1], %[temp7] \n\t"

	263 "mult $ac2, %[temp8], %[temp9] \n\t"

	264 "maddu $ac2, %[temp2], %[temp7] \n\t"

	265 "mult $ac3, %[temp8], %[temp9] \n\t"

	266 "maddu $ac3, %[temp3], %[temp7] \n\t"

	267 "mfhi %[temp0], $ac0 \n\t"

	268 "mfhi %[temp1], $ac1 \n\t"

	269 "mfhi %[temp2], $ac2 \n\t"

	270 "mfhi %[temp3], $ac3 \n\t"

	271 "sb %[temp0], -4(%[dst]) \n\t"

	272 "sb %[temp1], -3(%[dst]) \n\t"

	273 "sb %[temp2], -2(%[dst]) \n\t"

	274 "sb %[temp3], -1(%[dst]) \n\t"

	275 "bne %[frow], %[loop_end], 1b \n\t"

	276 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),

	277 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),

	278 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),

	279 [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),

	280 [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)

	281 : [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)

	282 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",

	283 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"

	284 );

	285 }

	286 for (i = 0; i < (x_out_max & 0x3); ++i) {

	287 const uint64_t I = (uint64_t)A * *frow++

	288 + (uint64_t)B * *irow++;

	289 const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);

	290 const int v = (int)MULT_FIX(J, wrk->fy_scale);

	291 assert(v >= 0 && v <= 255);

	292 *dst++ = v;

	293 }

	294 }

	295 }

	296

	297 #undef MULT_FIX

	298 #undef ROUNDER

	299

	300 //------------------------------------------------------------------------------

	301 // Entry point

	302

	303 extern void WebPRescalerDspInitMIPSdspR2(void);

	304

	305 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {

	306 WebPRescalerExportRowExpand = ExportRowExpand;

	307 WebPRescalerExportRowShrink = ExportRowShrink;

	308 }

	309

	310 #else // !WEBP_USE_MIPS_DSP_R2

	311

	312 WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)

	313

	314 #endif // WEBP_USE_MIPS_DSP_R2

OLD	NEW

« no previous file with comments | « third_party/libwebp/dsp/rescaler_mips32.c ('k') | third_party/libwebp/dsp/rescaler_neon.c » ('j') | no next file with comments »