third_party/libwebp/utils/rescaler.c - Issue 1422493004: libwebp: update to 0.4.4

Side by Side Diff: third_party/libwebp/utils/rescaler.c

Issue 1422493004: libwebp: update to 0.4.4 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2012 Google Inc. All Rights Reserved.	1 // Copyright 2012 Google Inc. All Rights Reserved.

2 //	2 //

3 // Use of this source code is governed by a BSD-style license	3 // Use of this source code is governed by a BSD-style license

4 // that can be found in the COPYING file in the root of the source	4 // that can be found in the COPYING file in the root of the source

5 // tree. An additional intellectual property rights grant can be found	5 // tree. An additional intellectual property rights grant can be found

6 // in the file PATENTS. All contributing project authors may	6 // in the file PATENTS. All contributing project authors may

7 // be found in the AUTHORS file in the root of the source tree.	7 // be found in the AUTHORS file in the root of the source tree.

8 // -----------------------------------------------------------------------------	8 // -----------------------------------------------------------------------------

9 //	9 //

10 // Rescaling functions	10 // Rescaling functions

11 //	11 //

12 // Author: Skal (pascal.massimino@gmail.com)	12 // Author: Skal (pascal.massimino@gmail.com)

13	13

14 #include <assert.h>	14 #include <assert.h>

15 #include <stdlib.h>	15 #include <stdlib.h>

	16 #include <string.h>

16 #include "./rescaler.h"	17 #include "./rescaler.h"

17 #include "../dsp/dsp.h"	18 #include "../dsp/dsp.h"

18	19

19 //------------------------------------------------------------------------------	20 //------------------------------------------------------------------------------

20 // Implementations of critical functions ImportRow / ExportRow	21 // Implementations of critical functions ImportRow / ExportRow

21	22

22 void (WebPRescalerImportRow)(WebPRescaler const wrk,	23 // Import a row of data and save its contribution in the rescaler.

23 const uint8_t* const src, int channel) = NULL;	24 // 'channel' denotes the channel number to be imported. 'Expand' corresponds to

24 void (WebPRescalerExportRow)(WebPRescaler const wrk, int x_out) = NULL;	25 // the wrk->x_expand case. Otherwise, 'Shrink' is to be used.

	26 typedef void (WebPRescalerImportRowFunc)(WebPRescaler const wrk,

	27 const uint8_t* src);

	28 static WebPRescalerImportRowFunc WebPRescalerImportRowExpand;

	29 static WebPRescalerImportRowFunc WebPRescalerImportRowShrink;

25	30

26 #define RFIX 30	31 // Export one row (starting at x_out position) from rescaler.

27 #define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)	32 // 'Expand' corresponds to the wrk->y_expand case.

	33 // Otherwise 'Shrink' is to be used

	34 typedef void (WebPRescalerExportRowFunc)(WebPRescaler const wrk);

	35 static WebPRescalerExportRowFunc WebPRescalerExportRowExpand;

	36 static WebPRescalerExportRowFunc WebPRescalerExportRowShrink;

28	37

29 static void ImportRowC(WebPRescaler* const wrk,	38 #define WEBP_RESCALER_RFIX 32 // fixed-point precision for multiplies

30 const uint8_t* const src, int channel) {	39 #define WEBP_RESCALER_ONE (1ull << WEBP_RESCALER_RFIX)

	40 #define WEBP_RESCALER_FRAC(x, y) \

	41 ((uint32_t)(((uint64_t)(x) << WEBP_RESCALER_RFIX) / (y)))

	42 #define ROUNDER (WEBP_RESCALER_ONE >> 1)

	43 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)

	44

	45 static void ImportRowExpandC(WebPRescaler* const wrk, const uint8_t* src) {

31 const int x_stride = wrk->num_channels;	46 const int x_stride = wrk->num_channels;

32 const int x_out_max = wrk->dst_width * wrk->num_channels;	47 const int x_out_max = wrk->dst_width * wrk->num_channels;

33 int x_in = channel;	48 int channel;

34 int x_out;	49 assert(!WebPRescalerInputDone(wrk));

35 int accum = 0;	50 assert(wrk->x_expand);

36 if (!wrk->x_expand) {	51 for (channel = 0; channel < x_stride; ++channel) {

37 int sum = 0;	52 int x_in = channel;

38 for (x_out = channel; x_out < x_out_max; x_out += x_stride) {	53 int x_out = channel;

	54 // simple bilinear interpolation

	55 int accum = wrk->x_add;

	56 int left = src[x_in];

	57 int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left;

	58 x_in += x_stride;

	59 while (1) {

	60 wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;

	61 x_out += x_stride;

	62 if (x_out >= x_out_max) break;

	63 accum -= wrk->x_sub;

	64 if (accum < 0) {

	65 left = right;

	66 x_in += x_stride;

	67 assert(x_in < wrk->src_width * x_stride);

	68 right = src[x_in];

	69 accum += wrk->x_add;

	70 }

	71 }

	72 assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ \|\| accum == 0);

	73 }

	74 }

	75

	76 static void ImportRowShrinkC(WebPRescaler* const wrk, const uint8_t* src) {

	77 const int x_stride = wrk->num_channels;

	78 const int x_out_max = wrk->dst_width * wrk->num_channels;

	79 int channel;

	80 assert(!WebPRescalerInputDone(wrk));

	81 assert(!wrk->x_expand);

	82 for (channel = 0; channel < x_stride; ++channel) {

	83 int x_in = channel;

	84 int x_out = channel;

	85 uint32_t sum = 0;

	86 int accum = 0;

	87 while (x_out < x_out_max) {

	88 uint32_t base = 0;

39 accum += wrk->x_add;	89 accum += wrk->x_add;

40 for (; accum > 0; accum -= wrk->x_sub) {	90 while (accum > 0) {

41 sum += src[x_in];	91 accum -= wrk->x_sub;

	92 assert(x_in < wrk->src_width * x_stride);

	93 base = src[x_in];

	94 sum += base;

42 x_in += x_stride;	95 x_in += x_stride;

43 }	96 }

44 { // Emit next horizontal pixel.	97 { // Emit next horizontal pixel.

45 const int32_t base = src[x_in];	98 const rescaler_t frac = base * (-accum);

46 const int32_t frac = base * (-accum);	99 wrk->frow[x_out] = sum * wrk->x_sub - frac;

47 x_in += x_stride;

48 wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac;

49 // fresh fractional start for next pixel	100 // fresh fractional start for next pixel

50 sum = (int)MULT_FIX(frac, wrk->fx_scale);	101 sum = (int)MULT_FIX(frac, wrk->fx_scale);

51 }	102 }

	103 x_out += x_stride;

52 }	104 }

53 } else { // simple bilinear interpolation	105 assert(accum == 0);

54 int left = src[channel], right = src[channel];

55 for (x_out = channel; x_out < x_out_max; x_out += x_stride) {

56 if (accum < 0) {

57 left = right;

58 x_in += x_stride;

59 right = src[x_in];

60 accum += wrk->x_add;

61 }

62 wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;

63 accum -= wrk->x_sub;

64 }

65 }

66 // Accumulate the contribution of the new row.

67 for (x_out = channel; x_out < x_out_max; x_out += x_stride) {

68 wrk->irow[x_out] += wrk->frow[x_out];

69 }	106 }

70 }	107 }

71	108

72 static void ExportRowC(WebPRescaler* const wrk, int x_out) {	109 //------------------------------------------------------------------------------

	110 // Row export

	111

	112 static void ExportRowExpandC(WebPRescaler* const wrk) {

	113 int x_out;

	114 uint8_t* const dst = wrk->dst;

	115 rescaler_t* const irow = wrk->irow;

	116 const int x_out_max = wrk->dst_width * wrk->num_channels;

	117 const rescaler_t* const frow = wrk->frow;

	118 assert(!WebPRescalerOutputDone(wrk));

	119 assert(wrk->y_accum <= 0);

	120 assert(wrk->y_expand);

	121 assert(wrk->y_sub != 0);

	122 if (wrk->y_accum == 0) {

	123 for (x_out = 0; x_out < x_out_max; ++x_out) {

	124 const uint32_t J = frow[x_out];

	125 const int v = (int)MULT_FIX(J, wrk->fy_scale);

	126 assert(v >= 0 && v <= 255);

	127 dst[x_out] = v;

	128 }

	129 } else {

	130 const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);

	131 const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);

	132 for (x_out = 0; x_out < x_out_max; ++x_out) {

	133 const uint64_t I = (uint64_t)A * frow[x_out]

	134 + (uint64_t)B * irow[x_out];

	135 const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);

	136 const int v = (int)MULT_FIX(J, wrk->fy_scale);

	137 assert(v >= 0 && v <= 255);

	138 dst[x_out] = v;

	139 }

	140 }

	141 }

	142

	143 static void ExportRowShrinkC(WebPRescaler* const wrk) {

	144 int x_out;

	145 uint8_t* const dst = wrk->dst;

	146 rescaler_t* const irow = wrk->irow;

	147 const int x_out_max = wrk->dst_width * wrk->num_channels;

	148 const rescaler_t* const frow = wrk->frow;

	149 const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum);

	150 assert(!WebPRescalerOutputDone(wrk));

	151 assert(wrk->y_accum <= 0);

	152 assert(!wrk->y_expand);

	153 if (yscale) {

	154 for (x_out = 0; x_out < x_out_max; ++x_out) {

	155 const uint32_t frac = (uint32_t)MULT_FIX(frow[x_out], yscale);

	156 const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);

	157 assert(v >= 0 && v <= 255);

	158 dst[x_out] = v;

	159 irow[x_out] = frac; // new fractional start

	160 }

	161 } else {

	162 for (x_out = 0; x_out < x_out_max; ++x_out) {

	163 const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale);

	164 assert(v >= 0 && v <= 255);

	165 dst[x_out] = v;

	166 irow[x_out] = 0;

	167 }

	168 }

	169 }

	170

	171 //------------------------------------------------------------------------------

	172 // Main entry calls

	173

	174 void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* src) {

	175 assert(!WebPRescalerInputDone(wrk));

	176 if (!wrk->x_expand) {

	177 WebPRescalerImportRowShrink(wrk, src);

	178 } else {

	179 WebPRescalerImportRowExpand(wrk, src);

	180 }

	181 }

	182

	183 void WebPRescalerExportRow(WebPRescaler* const wrk) {

73 if (wrk->y_accum <= 0) {	184 if (wrk->y_accum <= 0) {

74 uint8_t* const dst = wrk->dst;	185 assert(!WebPRescalerOutputDone(wrk));

75 int32_t* const irow = wrk->irow;	186 if (wrk->y_expand) {

76 const int32_t* const frow = wrk->frow;	187 WebPRescalerExportRowExpand(wrk);

77 const int yscale = wrk->fy_scale * (-wrk->y_accum);	188 } else if (wrk->fxy_scale) {

78 const int x_out_max = wrk->dst_width * wrk->num_channels;	189 WebPRescalerExportRowShrink(wrk);

79 for (; x_out < x_out_max; ++x_out) {	190 } else { // very special case for src = dst = 1x1

80 const int frac = (int)MULT_FIX(frow[x_out], yscale);	191 int i;

81 const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);	192 assert(wrk->src_width == 1 && wrk->dst_width <= 2);

82 dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;	193 assert(wrk->src_height == 1 && wrk->dst_height == 1);

83 irow[x_out] = frac; // new fractional start	194 for (i = 0; i < wrk->num_channels * wrk->dst_width; ++i) {

	195 wrk->dst[i] = wrk->irow[i];

	196 wrk->irow[i] = 0;

	197 }

84 }	198 }

85 wrk->y_accum += wrk->y_add;	199 wrk->y_accum += wrk->y_add;

86 wrk->dst += wrk->dst_stride;	200 wrk->dst += wrk->dst_stride;

	201 ++wrk->dst_y;

87 }	202 }

88 }	203 }

89	204

90 //------------------------------------------------------------------------------	205 //------------------------------------------------------------------------------

91 // MIPS version	206 // MIPS version

92	207

93 #if defined(WEBP_USE_MIPS32)	208 #if defined(WEBP_USE_MIPS32)

94	209

95 static void ImportRowMIPS(WebPRescaler* const wrk,	210 static void ImportRowShrinkMIPS(WebPRescaler* const wrk, const uint8_t* src) {

96 const uint8_t* const src, int channel) {

97 const int x_stride = wrk->num_channels;	211 const int x_stride = wrk->num_channels;

98 const int x_out_max = wrk->dst_width * wrk->num_channels;	212 const int x_out_max = wrk->dst_width * wrk->num_channels;

99 const int fx_scale = wrk->fx_scale;	213 const int fx_scale = wrk->fx_scale;

100 const int x_add = wrk->x_add;	214 const int x_add = wrk->x_add;

101 const int x_sub = wrk->x_sub;	215 const int x_sub = wrk->x_sub;

102 int* frow = wrk->frow + channel;

103 int* irow = wrk->irow + channel;

104 const uint8_t* src1 = src + channel;

105 int temp1, temp2, temp3;

106 int base, frac, sum;

107 int accum, accum1;

108 const int x_stride1 = x_stride << 2;	216 const int x_stride1 = x_stride << 2;

109 int loop_c = x_out_max - channel;	217 int channel;

	218 assert(!wrk->x_expand);

	219 assert(!WebPRescalerInputDone(wrk));

110	220

111 if (!wrk->x_expand) {	221 for (channel = 0; channel < x_stride; ++channel) {

	222 const uint8_t* src1 = src + channel;

	223 rescaler_t* frow = wrk->frow + channel;

	224 int temp1, temp2, temp3;

	225 int base, frac, sum;

	226 int accum, accum1;

	227 int loop_c = x_out_max - channel;

	228

112 __asm__ volatile (	229 __asm__ volatile (

113 "li %[temp1], 0x8000 \n\t"	230 "li %[temp1], 0x8000 \n\t"

114 "li %[temp2], 0x10000 \n\t"	231 "li %[temp2], 0x10000 \n\t"

115 "li %[sum], 0 \n\t"	232 "li %[sum], 0 \n\t"

116 "li %[accum], 0 \n\t"	233 "li %[accum], 0 \n\t"

117 "1: \n\t"	234 "1: \n\t"

118 "addu %[accum], %[accum], %[x_add] \n\t"	235 "addu %[accum], %[accum], %[x_add] \n\t"

	236 "li %[base], 0 \n\t"

119 "blez %[accum], 3f \n\t"	237 "blez %[accum], 3f \n\t"

120 "2: \n\t"	238 "2: \n\t"

121 "lbu %[temp3], 0(%[src1]) \n\t"	239 "lbu %[base], 0(%[src1]) \n\t"

122 "subu %[accum], %[accum], %[x_sub] \n\t"	240 "subu %[accum], %[accum], %[x_sub] \n\t"

123 "addu %[src1], %[src1], %[x_stride] \n\t"	241 "addu %[src1], %[src1], %[x_stride] \n\t"

124 "addu %[sum], %[sum], %[temp3] \n\t"	242 "addu %[sum], %[sum], %[base] \n\t"

125 "bgtz %[accum], 2b \n\t"	243 "bgtz %[accum], 2b \n\t"

126 "3: \n\t"	244 "3: \n\t"

127 "lbu %[base], 0(%[src1]) \n\t"

128 "addu %[src1], %[src1], %[x_stride] \n\t"

129 "negu %[accum1], %[accum] \n\t"	245 "negu %[accum1], %[accum] \n\t"

130 "mul %[frac], %[base], %[accum1] \n\t"	246 "mul %[frac], %[base], %[accum1] \n\t"

131 "addu %[temp3], %[sum], %[base] \n\t"	247 "mul %[temp3], %[sum], %[x_sub] \n\t"

132 "mul %[temp3], %[temp3], %[x_sub] \n\t"

133 "lw %[base], 0(%[irow]) \n\t"

134 "subu %[loop_c], %[loop_c], %[x_stride] \n\t"	248 "subu %[loop_c], %[loop_c], %[x_stride] \n\t"

135 "sll %[accum1], %[frac], 2 \n\t"

136 "mult %[temp1], %[temp2] \n\t"	249 "mult %[temp1], %[temp2] \n\t"

137 "madd %[accum1], %[fx_scale] \n\t"	250 "maddu %[frac], %[fx_scale] \n\t"

138 "mfhi %[sum] \n\t"	251 "mfhi %[sum] \n\t"

139 "subu %[temp3], %[temp3], %[frac] \n\t"	252 "subu %[temp3], %[temp3], %[frac] \n\t"

140 "sw %[temp3], 0(%[frow]) \n\t"	253 "sw %[temp3], 0(%[frow]) \n\t"

141 "add %[base], %[base], %[temp3] \n\t"

142 "sw %[base], 0(%[irow]) \n\t"

143 "addu %[irow], %[irow], %[x_stride1] \n\t"

144 "addu %[frow], %[frow], %[x_stride1] \n\t"	254 "addu %[frow], %[frow], %[x_stride1] \n\t"

145 "bgtz %[loop_c], 1b \n\t"	255 "bgtz %[loop_c], 1b \n\t"

146	256 : [accum]"=&r"(accum), [src1]"+r"(src1), [temp3]"=&r"(temp3),

147 : [accum] "=&r" (accum), [src1] "+r" (src1), [temp3] "=&r" (temp3),	257 [sum]"=&r"(sum), [base]"=&r"(base), [frac]"=&r"(frac),

148 [sum] "=&r" (sum), [base] "=&r" (base), [frac] "=&r" (frac),	258 [frow]"+r"(frow), [accum1]"=&r"(accum1),

149 [frow] "+r" (frow), [irow] "+r" (irow), [accum1] "=&r" (accum1),	259 [temp2]"=&r"(temp2), [temp1]"=&r"(temp1)

150 [temp2] "=&r" (temp2), [temp1] "=&r" (temp1)	260 : [x_stride]"r"(x_stride), [fx_scale]"r"(fx_scale),

151 : [x_stride] "r" (x_stride), [fx_scale] "r" (fx_scale),	261 [x_sub]"r"(x_sub), [x_add]"r"(x_add),

152 [x_sub] "r" (x_sub), [x_add] "r" (x_add),	262 [loop_c]"r"(loop_c), [x_stride1]"r"(x_stride1)

153 [loop_c] "r" (loop_c), [x_stride1] "r" (x_stride1)	263 : "memory", "hi", "lo"

	264 );

	265 assert(accum == 0);

	266 }

	267 }

	268

	269 static void ImportRowExpandMIPS(WebPRescaler* const wrk, const uint8_t* src) {

	270 const int x_stride = wrk->num_channels;

	271 const int x_out_max = wrk->dst_width * wrk->num_channels;

	272 const int x_add = wrk->x_add;

	273 const int x_sub = wrk->x_sub;

	274 const int src_width = wrk->src_width;

	275 const int x_stride1 = x_stride << 2;

	276 int channel;

	277 assert(wrk->x_expand);

	278 assert(!WebPRescalerInputDone(wrk));

	279

	280 for (channel = 0; channel < x_stride; ++channel) {

	281 const uint8_t* src1 = src + channel;

	282 rescaler_t* frow = wrk->frow + channel;

	283 int temp1, temp2, temp3, temp4;

	284 int frac;

	285 int accum;

	286 int x_out = channel;

	287

	288 __asm__ volatile (

	289 "addiu %[temp3], %[src_width], -1 \n\t"

	290 "lbu %[temp2], 0(%[src1]) \n\t"

	291 "addu %[src1], %[src1], %[x_stride] \n\t"

	292 "bgtz %[temp3], 0f \n\t"

	293 "addiu %[temp1], %[temp2], 0 \n\t"

	294 "b 3f \n\t"

	295 "0: \n\t"

	296 "lbu %[temp1], 0(%[src1]) \n\t"

	297 "3: \n\t"

	298 "addiu %[accum], %[x_add], 0 \n\t"

	299 "1: \n\t"

	300 "subu %[temp3], %[temp2], %[temp1] \n\t"

	301 "mul %[temp3], %[temp3], %[accum] \n\t"

	302 "mul %[temp4], %[temp1], %[x_add] \n\t"

	303 "addu %[temp3], %[temp4], %[temp3] \n\t"

	304 "sw %[temp3], 0(%[frow]) \n\t"

	305 "addu %[frow], %[frow], %[x_stride1] \n\t"

	306 "addu %[x_out], %[x_out], %[x_stride] \n\t"

	307 "subu %[temp3], %[x_out], %[x_out_max] \n\t"

	308 "bgez %[temp3], 2f \n\t"

	309 "subu %[accum], %[accum], %[x_sub] \n\t"

	310 "bgez %[accum], 4f \n\t"

	311 "addiu %[temp2], %[temp1], 0 \n\t"

	312 "addu %[src1], %[src1], %[x_stride] \n\t"

	313 "lbu %[temp1], 0(%[src1]) \n\t"

	314 "addu %[accum], %[accum], %[x_add] \n\t"

	315 "4: \n\t"

	316 "b 1b \n\t"

	317 "2: \n\t"

	318 : [src1]"+r"(src1), [accum]"=&r"(accum), [temp1]"=&r"(temp1),

	319 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),

	320 [x_out]"+r"(x_out), [frac]"=&r"(frac), [frow]"+r"(frow)

	321 : [x_stride]"r"(x_stride), [x_add]"r"(x_add), [x_sub]"r"(x_sub),

	322 [x_stride1]"r"(x_stride1), [src_width]"r"(src_width),

	323 [x_out_max]"r"(x_out_max)

	324 : "memory", "hi", "lo"

	325 );

	326 assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ \|\| accum == 0);

	327 }

	328 }

	329

	330 //------------------------------------------------------------------------------

	331 // Row export

	332

	333 static void ExportRowExpandMIPS(WebPRescaler* const wrk) {

	334 uint8_t* dst = wrk->dst;

	335 rescaler_t* irow = wrk->irow;

	336 const int x_out_max = wrk->dst_width * wrk->num_channels;

	337 const rescaler_t* frow = wrk->frow;

	338 int temp0, temp1, temp3, temp4, temp5, loop_end;

	339 const int temp2 = (int)wrk->fy_scale;

	340 const int temp6 = x_out_max << 2;

	341 assert(!WebPRescalerOutputDone(wrk));

	342 assert(wrk->y_accum <= 0);

	343 assert(wrk->y_expand);

	344 assert(wrk->y_sub != 0);

	345 if (wrk->y_accum == 0) {

	346 __asm__ volatile (

	347 "li %[temp3], 0x10000 \n\t"

	348 "li %[temp4], 0x8000 \n\t"

	349 "addu %[loop_end], %[frow], %[temp6] \n\t"

	350 "1: \n\t"

	351 "lw %[temp0], 0(%[frow]) \n\t"

	352 "addiu %[dst], %[dst], 1 \n\t"

	353 "addiu %[frow], %[frow], 4 \n\t"

	354 "mult %[temp3], %[temp4] \n\t"

	355 "maddu %[temp0], %[temp2] \n\t"

	356 "mfhi %[temp5] \n\t"

	357 "sb %[temp5], -1(%[dst]) \n\t"

	358 "bne %[frow], %[loop_end], 1b \n\t"

	359 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),

	360 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),

	361 [dst]"+r"(dst), [loop_end]"=&r"(loop_end)

	362 : [temp2]"r"(temp2), [temp6]"r"(temp6)

154 : "memory", "hi", "lo"	363 : "memory", "hi", "lo"

155 );	364 );

156 } else {	365 } else {

157 __asm__ volatile (	366 const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);

158 "lbu %[temp1], 0(%[src1]) \n\t"	367 const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);

159 "move %[temp2], %[temp1] \n\t"	368 __asm__ volatile (

160 "li %[accum], 0 \n\t"	369 "li %[temp3], 0x10000 \n\t"

161 "1: \n\t"	370 "li %[temp4], 0x8000 \n\t"

162 "bgez %[accum], 2f \n\t"	371 "addu %[loop_end], %[frow], %[temp6] \n\t"

163 "move %[temp2], %[temp1] \n\t"	372 "1: \n\t"

164 "addu %[src1], %[x_stride] \n\t"	373 "lw %[temp0], 0(%[frow]) \n\t"

165 "lbu %[temp1], 0(%[src1]) \n\t"	374 "lw %[temp1], 0(%[irow]) \n\t"

166 "addu %[accum], %[x_add] \n\t"	375 "addiu %[dst], %[dst], 1 \n\t"

167 "2: \n\t"	376 "mult %[temp3], %[temp4] \n\t"

168 "subu %[temp3], %[temp2], %[temp1] \n\t"	377 "maddu %[A], %[temp0] \n\t"

169 "mul %[temp3], %[temp3], %[accum] \n\t"	378 "maddu %[B], %[temp1] \n\t"

170 "mul %[base], %[temp1], %[x_add] \n\t"	379 "addiu %[frow], %[frow], 4 \n\t"

171 "subu %[accum], %[accum], %[x_sub] \n\t"	380 "addiu %[irow], %[irow], 4 \n\t"

172 "lw %[frac], 0(%[irow]) \n\t"	381 "mfhi %[temp5] \n\t"

173 "subu %[loop_c], %[loop_c], %[x_stride] \n\t"	382 "mult %[temp3], %[temp4] \n\t"

174 "addu %[temp3], %[base], %[temp3] \n\t"	383 "maddu %[temp5], %[temp2] \n\t"

175 "sw %[temp3], 0(%[frow]) \n\t"	384 "mfhi %[temp5] \n\t"

176 "addu %[frow], %[x_stride1] \n\t"	385 "sb %[temp5], -1(%[dst]) \n\t"

177 "addu %[frac], %[temp3] \n\t"	386 "bne %[frow], %[loop_end], 1b \n\t"

178 "sw %[frac], 0(%[irow]) \n\t"	387 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),

179 "addu %[irow], %[x_stride1] \n\t"	388 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),

180 "bgtz %[loop_c], 1b \n\t"	389 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end)

181	390 : [temp2]"r"(temp2), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)

182 : [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1),	391 : "memory", "hi", "lo"

183 [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [base] "=&r" (base),	392 );

184 [frac] "=&r" (frac), [frow] "+r" (frow), [irow] "+r" (irow)	393 }

185 : [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub),	394 }

186 [x_stride1] "r" (x_stride1), [loop_c] "r" (loop_c)	395

187 : "memory", "hi", "lo"	396 static void ExportRowShrinkMIPS(WebPRescaler* const wrk) {

188 );	397 const int x_out_max = wrk->dst_width * wrk->num_channels;

189 }	398 uint8_t* dst = wrk->dst;

190 }	399 rescaler_t* irow = wrk->irow;

191	400 const rescaler_t* frow = wrk->frow;

192 static void ExportRowMIPS(WebPRescaler* const wrk, int x_out) {	401 const int yscale = wrk->fy_scale * (-wrk->y_accum);

193 if (wrk->y_accum <= 0) {	402 int temp0, temp1, temp3, temp4, temp5, loop_end;

194 uint8_t* const dst = wrk->dst;	403 const int temp2 = (int)wrk->fxy_scale;

195 int32_t* const irow = wrk->irow;	404 const int temp6 = x_out_max << 2;

196 const int32_t* const frow = wrk->frow;	405

197 const int yscale = wrk->fy_scale * (-wrk->y_accum);	406 assert(!WebPRescalerOutputDone(wrk));

198 const int x_out_max = wrk->dst_width * wrk->num_channels;	407 assert(wrk->y_accum <= 0);

199 // if wrk->fxy_scale can fit into 32 bits use optimized code,	408 assert(!wrk->y_expand);

200 // otherwise use C code	409 assert(wrk->fxy_scale != 0);

201 if ((wrk->fxy_scale >> 32) == 0) {	410 if (yscale) {

202 int temp0, temp1, temp3, temp4, temp5, temp6, temp7, loop_end;	411 __asm__ volatile (

203 const int temp2 = (int)(wrk->fxy_scale);	412 "li %[temp3], 0x10000 \n\t"

204 const int temp8 = x_out_max << 2;	413 "li %[temp4], 0x8000 \n\t"

205 uint8_t* dst_t = (uint8_t*)dst;	414 "addu %[loop_end], %[frow], %[temp6] \n\t"

206 int32_t* irow_t = (int32_t*)irow;	415 "1: \n\t"

207 const int32_t* frow_t = (const int32_t*)frow;	416 "lw %[temp0], 0(%[frow]) \n\t"

208	417 "mult %[temp3], %[temp4] \n\t"

209 __asm__ volatile(	418 "addiu %[frow], %[frow], 4 \n\t"

210 "addiu %[temp6], $zero, -256 \n\t"	419 "maddu %[temp0], %[yscale] \n\t"

211 "addiu %[temp7], $zero, 255 \n\t"	420 "mfhi %[temp1] \n\t"

212 "li %[temp3], 0x10000 \n\t"	421 "lw %[temp0], 0(%[irow]) \n\t"

213 "li %[temp4], 0x8000 \n\t"	422 "addiu %[dst], %[dst], 1 \n\t"

214 "addu %[loop_end], %[frow_t], %[temp8] \n\t"	423 "addiu %[irow], %[irow], 4 \n\t"

215 "1: \n\t"	424 "subu %[temp0], %[temp0], %[temp1] \n\t"

216 "lw %[temp0], 0(%[frow_t]) \n\t"	425 "mult %[temp3], %[temp4] \n\t"

217 "mult %[temp3], %[temp4] \n\t"	426 "maddu %[temp0], %[temp2] \n\t"

218 "addiu %[frow_t], %[frow_t], 4 \n\t"	427 "mfhi %[temp5] \n\t"

219 "sll %[temp0], %[temp0], 2 \n\t"	428 "sw %[temp1], -4(%[irow]) \n\t"

220 "madd %[temp0], %[yscale] \n\t"	429 "sb %[temp5], -1(%[dst]) \n\t"

221 "mfhi %[temp1] \n\t"	430 "bne %[frow], %[loop_end], 1b \n\t"

222 "lw %[temp0], 0(%[irow_t]) \n\t"	431 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),

223 "addiu %[dst_t], %[dst_t], 1 \n\t"	432 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),

224 "addiu %[irow_t], %[irow_t], 4 \n\t"	433 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end)

225 "subu %[temp0], %[temp0], %[temp1] \n\t"	434 : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp6]"r"(temp6)

226 "mult %[temp3], %[temp4] \n\t"	435 : "memory", "hi", "lo"

227 "sll %[temp0], %[temp0], 2 \n\t"	436 );

228 "madd %[temp0], %[temp2] \n\t"	437 } else {

229 "mfhi %[temp5] \n\t"	438 __asm__ volatile (

230 "sw %[temp1], -4(%[irow_t]) \n\t"	439 "li %[temp3], 0x10000 \n\t"

231 "and %[temp0], %[temp5], %[temp6] \n\t"	440 "li %[temp4], 0x8000 \n\t"

232 "slti %[temp1], %[temp5], 0 \n\t"	441 "addu %[loop_end], %[irow], %[temp6] \n\t"

233 "beqz %[temp0], 2f \n\t"	442 "1: \n\t"

234 "xor %[temp5], %[temp5], %[temp5] \n\t"	443 "lw %[temp0], 0(%[irow]) \n\t"

235 "movz %[temp5], %[temp7], %[temp1] \n\t"	444 "addiu %[dst], %[dst], 1 \n\t"

236 "2: \n\t"	445 "addiu %[irow], %[irow], 4 \n\t"

237 "sb %[temp5], -1(%[dst_t]) \n\t"	446 "mult %[temp3], %[temp4] \n\t"

238 "bne %[frow_t], %[loop_end], 1b \n\t"	447 "maddu %[temp0], %[temp2] \n\t"

239	448 "mfhi %[temp5] \n\t"

240 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),	449 "sw $zero, -4(%[irow]) \n\t"

241 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),	450 "sb %[temp5], -1(%[dst]) \n\t"

242 [temp7]"=&r"(temp7), [frow_t]"+r"(frow_t), [irow_t]"+r"(irow_t),	451 "bne %[irow], %[loop_end], 1b \n\t"

243 [dst_t]"+r"(dst_t), [loop_end]"=&r"(loop_end)	452 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),

244 : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8)	453 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),

245 : "memory", "hi", "lo"	454 [dst]"+r"(dst), [loop_end]"=&r"(loop_end)

246 );	455 : [temp2]"r"(temp2), [temp6]"r"(temp6)

247 wrk->y_accum += wrk->y_add;	456 : "memory", "hi", "lo"

248 wrk->dst += wrk->dst_stride;	457 );

249 } else {	458 }

250 ExportRowC(wrk, x_out);	459 }

251 }	460

252 }

253 }

254 #endif // WEBP_USE_MIPS32	461 #endif // WEBP_USE_MIPS32

255	462

256 //------------------------------------------------------------------------------	463 //------------------------------------------------------------------------------

257	464

258 void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,	465 void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,

259 uint8_t* const dst, int dst_width, int dst_height,	466 uint8_t* const dst,

260 int dst_stride, int num_channels, int x_add, int x_sub,	467 int dst_width, int dst_height, int dst_stride,

261 int y_add, int y_sub, int32_t* const work) {	468 int num_channels, rescaler_t* const work) {

	469 const int x_add = src_width, x_sub = dst_width;

	470 const int y_add = src_height, y_sub = dst_height;

262 wrk->x_expand = (src_width < dst_width);	471 wrk->x_expand = (src_width < dst_width);

	472 wrk->y_expand = (src_height < dst_height);

263 wrk->src_width = src_width;	473 wrk->src_width = src_width;

264 wrk->src_height = src_height;	474 wrk->src_height = src_height;

265 wrk->dst_width = dst_width;	475 wrk->dst_width = dst_width;

266 wrk->dst_height = dst_height;	476 wrk->dst_height = dst_height;

	477 wrk->src_y = 0;

	478 wrk->dst_y = 0;

267 wrk->dst = dst;	479 wrk->dst = dst;

268 wrk->dst_stride = dst_stride;	480 wrk->dst_stride = dst_stride;

269 wrk->num_channels = num_channels;	481 wrk->num_channels = num_channels;

	482

270 // for 'x_expand', we use bilinear interpolation	483 // for 'x_expand', we use bilinear interpolation

271 wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub;	484 wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add;

272 wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;	485 wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;

273 wrk->y_accum = y_add;	486 if (!wrk->x_expand) { // fx_scale is not used otherwise

274 wrk->y_add = y_add;	487 wrk->fx_scale = WEBP_RESCALER_FRAC(1, wrk->x_sub);

275 wrk->y_sub = y_sub;	488 }

276 wrk->fx_scale = (1 << RFIX) / x_sub;	489 // vertical scaling parameters

277 wrk->fy_scale = (1 << RFIX) / y_sub;	490 wrk->y_add = wrk->y_expand ? y_add - 1 : y_add;

278 wrk->fxy_scale = wrk->x_expand ?	491 wrk->y_sub = wrk->y_expand ? y_sub - 1 : y_sub;

279 ((int64_t)dst_height << RFIX) / (x_sub * src_height) :	492 wrk->y_accum = wrk->y_expand ? wrk->y_sub : wrk->y_add;

280 ((int64_t)dst_height << RFIX) / (x_add * src_height);	493 if (!wrk->y_expand) {

	494 // this is WEBP_RESCALER_FRAC(dst_height, x_add * y_add) without the cast.

	495 const uint64_t ratio =

	496 (uint64_t)dst_height * WEBP_RESCALER_ONE / (wrk->x_add * wrk->y_add);

	497 if (ratio != (uint32_t)ratio) {

	498 // We can't represent the ratio with the current fixed-point precision.

	499 // => We special-case fxy_scale = 0, in WebPRescalerExportRow().

	500 wrk->fxy_scale = 0;

	501 } else {

	502 wrk->fxy_scale = (uint32_t)ratio;

	503 }

	504 wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->y_sub);

	505 } else {

	506 wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->x_add);

	507 // wrk->fxy_scale is unused here.

	508 }

281 wrk->irow = work;	509 wrk->irow = work;

282 wrk->frow = work + num_channels * dst_width;	510 wrk->frow = work + num_channels * dst_width;

283	511 memset(work, 0, 2 * dst_width * num_channels * sizeof(*work));

284 if (WebPRescalerImportRow == NULL) {	512

285 WebPRescalerImportRow = ImportRowC;	513 if (WebPRescalerImportRowExpand == NULL) {

286 WebPRescalerExportRow = ExportRowC;	514 WebPRescalerImportRowExpand = ImportRowExpandC;

	515 WebPRescalerImportRowShrink = ImportRowShrinkC;

	516 WebPRescalerExportRowExpand = ExportRowExpandC;

	517 WebPRescalerExportRowShrink = ExportRowShrinkC;

287 if (VP8GetCPUInfo != NULL) {	518 if (VP8GetCPUInfo != NULL) {

288 #if defined(WEBP_USE_MIPS32)	519 #if defined(WEBP_USE_MIPS32)

289 if (VP8GetCPUInfo(kMIPS32)) {	520 if (VP8GetCPUInfo(kMIPS32)) {

290 WebPRescalerImportRow = ImportRowMIPS;	521 WebPRescalerImportRowExpand = ImportRowExpandMIPS;

291 WebPRescalerExportRow = ExportRowMIPS;	522 WebPRescalerImportRowShrink = ImportRowShrinkMIPS;

	523 WebPRescalerExportRowExpand = ExportRowExpandMIPS;

	524 WebPRescalerExportRowShrink = ExportRowShrinkMIPS;

292 }	525 }

293 #endif	526 #endif

294 }	527 }

295 }	528 }

296 }	529 }

297	530

298 #undef MULT_FIX	531 #undef MULT_FIX

299 #undef RFIX	532 #undef WEBP_RESCALER_RFIX

	533 #undef WEBP_RESCALER_ONE

	534 #undef WEBP_RESCALER_FRAC

	535 #undef ROUNDER

300	536

301 //------------------------------------------------------------------------------	537 //------------------------------------------------------------------------------

302 // all-in-one calls	538 // all-in-one calls

303	539

304 int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) {	540 int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) {

305 const int num_lines = (wrk->y_accum + wrk->y_sub - 1) / wrk->y_sub;	541 const int num_lines = (wrk->y_accum + wrk->y_sub - 1) / wrk->y_sub;

306 return (num_lines > max_num_lines) ? max_num_lines : num_lines;	542 return (num_lines > max_num_lines) ? max_num_lines : num_lines;

307 }	543 }

308	544

309 int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,	545 int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,

310 const uint8_t* src, int src_stride) {	546 const uint8_t* src, int src_stride) {

311 int total_imported = 0;	547 int total_imported = 0;

312 while (total_imported < num_lines && wrk->y_accum > 0) {	548 while (total_imported < num_lines && !WebPRescalerHasPendingOutput(wrk)) {

313 int channel;	549 if (wrk->y_expand) {

314 for (channel = 0; channel < wrk->num_channels; ++channel) {	550 rescaler_t* const tmp = wrk->irow;

315 WebPRescalerImportRow(wrk, src, channel);	551 wrk->irow = wrk->frow;

	552 wrk->frow = tmp;

316 }	553 }

	554 WebPRescalerImportRow(wrk, src);

	555 if (!wrk->y_expand) { // Accumulate the contribution of the new row.

	556 int x;

	557 for (x = 0; x < wrk->num_channels * wrk->dst_width; ++x) {

	558 wrk->irow[x] += wrk->frow[x];

	559 }

	560 }

	561 ++wrk->src_y;

317 src += src_stride;	562 src += src_stride;

318 ++total_imported;	563 ++total_imported;

319 wrk->y_accum -= wrk->y_sub;	564 wrk->y_accum -= wrk->y_sub;

320 }	565 }

321 return total_imported;	566 return total_imported;

322 }	567 }

323	568

324 int WebPRescalerExport(WebPRescaler* const rescaler) {	569 int WebPRescalerExport(WebPRescaler* const rescaler) {

325 int total_exported = 0;	570 int total_exported = 0;

326 while (WebPRescalerHasPendingOutput(rescaler)) {	571 while (WebPRescalerHasPendingOutput(rescaler)) {

327 WebPRescalerExportRow(rescaler, 0);	572 WebPRescalerExportRow(rescaler);

328 ++total_exported;	573 ++total_exported;

329 }	574 }

330 return total_exported;	575 return total_exported;

331 }	576 }

332	577

333 //------------------------------------------------------------------------------	578 //------------------------------------------------------------------------------

OLD	NEW

« no previous file with comments | « third_party/libwebp/utils/rescaler.h ('k') | no next file » | no next file with comments »