source/libvpx/vp9/encoder/vp9_dct.c - Issue 54923004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/encoder/vp9_dct.c

Issue 54923004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11

12 #include <assert.h>	11 #include <assert.h>

13 #include <math.h>	12 #include <math.h>

	13

14 #include "./vpx_config.h"	14 #include "./vpx_config.h"

15 #include "vp9/common/vp9_systemdependent.h"	15 #include "./vp9_rtcd.h"

16	16

17 #include "vp9/common/vp9_blockd.h"	17 #include "vp9/common/vp9_blockd.h"

18 #include "vp9/common/vp9_idct.h"	18 #include "vp9/common/vp9_idct.h"

	19 #include "vp9/common/vp9_systemdependent.h"

19	20

20 static void fdct4_1d(int16_t input, int16_t output) {	21 #include "vp9/encoder/vp9_dct.h"

	22

	23 static void fdct4(const int16_t input, int16_t output) {

21 int16_t step[4];	24 int16_t step[4];

22 int temp1, temp2;	25 int temp1, temp2;

23	26

24 step[0] = input[0] + input[3];	27 step[0] = input[0] + input[3];

25 step[1] = input[1] + input[2];	28 step[1] = input[1] + input[2];

26 step[2] = input[1] - input[2];	29 step[2] = input[1] - input[2];

27 step[3] = input[0] - input[3];	30 step[3] = input[0] - input[3];

28	31

29 temp1 = (step[0] + step[1]) * cospi_16_64;	32 temp1 = (step[0] + step[1]) * cospi_16_64;

30 temp2 = (step[0] - step[1]) * cospi_16_64;	33 temp2 = (step[0] - step[1]) * cospi_16_64;

31 output[0] = dct_const_round_shift(temp1);	34 output[0] = dct_const_round_shift(temp1);

32 output[2] = dct_const_round_shift(temp2);	35 output[2] = dct_const_round_shift(temp2);

33 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;	36 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;

34 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;	37 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;

35 output[1] = dct_const_round_shift(temp1);	38 output[1] = dct_const_round_shift(temp1);

36 output[3] = dct_const_round_shift(temp2);	39 output[3] = dct_const_round_shift(temp2);

37 }	40 }

38	41

39 void vp9_short_fdct4x4_c(int16_t input, int16_t output, int pitch) {	42 void vp9_fdct4x4_c(const int16_t input, int16_t output, int stride) {

40 // The 2D transform is done with two passes which are actually pretty	43 // The 2D transform is done with two passes which are actually pretty

41 // similar. In the first one, we transform the columns and transpose	44 // similar. In the first one, we transform the columns and transpose

42 // the results. In the second one, we transform the rows. To achieve that,	45 // the results. In the second one, we transform the rows. To achieve that,

43 // as the first pass results are transposed, we tranpose the columns (that	46 // as the first pass results are transposed, we tranpose the columns (that

44 // is the transposed rows) and transpose the results (so that it goes back	47 // is the transposed rows) and transpose the results (so that it goes back

45 // in normal/row positions).	48 // in normal/row positions).

46 const int stride = pitch >> 1;

47 int pass;	49 int pass;

48 // We need an intermediate buffer between passes.	50 // We need an intermediate buffer between passes.

49 int16_t intermediate[4 * 4];	51 int16_t intermediate[4 * 4];

50 int16_t *in = input;	52 const int16_t *in = input;

51 int16_t *out = intermediate;	53 int16_t *out = intermediate;

52 // Do the two transform/transpose passes	54 // Do the two transform/transpose passes

53 for (pass = 0; pass < 2; ++pass) {	55 for (pass = 0; pass < 2; ++pass) {

54 /canbe16/ int input[4];	56 /canbe16/ int input[4];

55 /canbe16/ int step[4];	57 /canbe16/ int step[4];

56 /needs32/ int temp1, temp2;	58 /needs32/ int temp1, temp2;

57 int i;	59 int i;

58 for (i = 0; i < 4; ++i) {	60 for (i = 0; i < 4; ++i) {

59 // Load inputs.	61 // Load inputs.

60 if (0 == pass) {	62 if (0 == pass) {

61 input[0] = in[0 * stride] << 4;	63 input[0] = in[0 * stride] * 16;

62 input[1] = in[1 * stride] << 4;	64 input[1] = in[1 * stride] * 16;

63 input[2] = in[2 * stride] << 4;	65 input[2] = in[2 * stride] * 16;

64 input[3] = in[3 * stride] << 4;	66 input[3] = in[3 * stride] * 16;

65 if (i == 0 && input[0]) {	67 if (i == 0 && input[0]) {

66 input[0] += 1;	68 input[0] += 1;

67 }	69 }

68 } else {	70 } else {

69 input[0] = in[0 * 4];	71 input[0] = in[0 * 4];

70 input[1] = in[1 * 4];	72 input[1] = in[1 * 4];

71 input[2] = in[2 * 4];	73 input[2] = in[2 * 4];

72 input[3] = in[3 * 4];	74 input[3] = in[3 * 4];

73 }	75 }

74 // Transform.	76 // Transform.

(...skipping 20 matching lines...) Expand all Loading...
95	97

96 {	98 {

97 int i, j;	99 int i, j;

98 for (i = 0; i < 4; ++i) {	100 for (i = 0; i < 4; ++i) {

99 for (j = 0; j < 4; ++j)	101 for (j = 0; j < 4; ++j)

100 output[j + i * 4] = (output[j + i * 4] + 1) >> 2;	102 output[j + i * 4] = (output[j + i * 4] + 1) >> 2;

101 }	103 }

102 }	104 }

103 }	105 }

104	106

105 static void fadst4_1d(int16_t input, int16_t output) {	107 static void fadst4(const int16_t input, int16_t output) {

106 int x0, x1, x2, x3;	108 int x0, x1, x2, x3;

107 int s0, s1, s2, s3, s4, s5, s6, s7;	109 int s0, s1, s2, s3, s4, s5, s6, s7;

108	110

109 x0 = input[0];	111 x0 = input[0];

110 x1 = input[1];	112 x1 = input[1];

111 x2 = input[2];	113 x2 = input[2];

112 x3 = input[3];	114 x3 = input[3];

113	115

114 if (!(x0 \| x1 \| x2 \| x3)) {	116 if (!(x0 \| x1 \| x2 \| x3)) {

115 output[0] = output[1] = output[2] = output[3] = 0;	117 output[0] = output[1] = output[2] = output[3] = 0;

(...skipping 20 matching lines...) Expand all Loading...
136 s3 = x2 - x0 + x3;	138 s3 = x2 - x0 + x3;

137	139

138 // 1-D transform scaling factor is sqrt(2).	140 // 1-D transform scaling factor is sqrt(2).

139 output[0] = dct_const_round_shift(s0);	141 output[0] = dct_const_round_shift(s0);

140 output[1] = dct_const_round_shift(s1);	142 output[1] = dct_const_round_shift(s1);

141 output[2] = dct_const_round_shift(s2);	143 output[2] = dct_const_round_shift(s2);

142 output[3] = dct_const_round_shift(s3);	144 output[3] = dct_const_round_shift(s3);

143 }	145 }

144	146

145 static const transform_2d FHT_4[] = {	147 static const transform_2d FHT_4[] = {

146 { fdct4_1d, fdct4_1d }, // DCT_DCT = 0	148 { fdct4, fdct4 }, // DCT_DCT = 0

147 { fadst4_1d, fdct4_1d }, // ADST_DCT = 1	149 { fadst4, fdct4 }, // ADST_DCT = 1

148 { fdct4_1d, fadst4_1d }, // DCT_ADST = 2	150 { fdct4, fadst4 }, // DCT_ADST = 2

149 { fadst4_1d, fadst4_1d } // ADST_ADST = 3	151 { fadst4, fadst4 } // ADST_ADST = 3

150 };	152 };

151	153

152 void vp9_short_fht4x4_c(int16_t input, int16_t output,	154 void vp9_short_fht4x4_c(const int16_t input, int16_t output,

153 int pitch, TX_TYPE tx_type) {	155 int stride, int tx_type) {

154 int16_t out[4 * 4];	156 int16_t out[4 * 4];

155 int16_t *outptr = &out[0];	157 int16_t *outptr = &out[0];

156 int i, j;	158 int i, j;

157 int16_t temp_in[4], temp_out[4];	159 int16_t temp_in[4], temp_out[4];

158 const transform_2d ht = FHT_4[tx_type];	160 const transform_2d ht = FHT_4[tx_type];

159	161

160 // Columns	162 // Columns

161 for (i = 0; i < 4; ++i) {	163 for (i = 0; i < 4; ++i) {

162 for (j = 0; j < 4; ++j)	164 for (j = 0; j < 4; ++j)

163 temp_in[j] = input[j * pitch + i] << 4;	165 temp_in[j] = input[j * stride + i] * 16;

164 if (i == 0 && temp_in[0])	166 if (i == 0 && temp_in[0])

165 temp_in[0] += 1;	167 temp_in[0] += 1;

166 ht.cols(temp_in, temp_out);	168 ht.cols(temp_in, temp_out);

167 for (j = 0; j < 4; ++j)	169 for (j = 0; j < 4; ++j)

168 outptr[j * 4 + i] = temp_out[j];	170 outptr[j * 4 + i] = temp_out[j];

169 }	171 }

170	172

171 // Rows	173 // Rows

172 for (i = 0; i < 4; ++i) {	174 for (i = 0; i < 4; ++i) {

173 for (j = 0; j < 4; ++j)	175 for (j = 0; j < 4; ++j)

174 temp_in[j] = out[j + i * 4];	176 temp_in[j] = out[j + i * 4];

175 ht.rows(temp_in, temp_out);	177 ht.rows(temp_in, temp_out);

176 for (j = 0; j < 4; ++j)	178 for (j = 0; j < 4; ++j)

177 output[j + i * 4] = (temp_out[j] + 1) >> 2;	179 output[j + i * 4] = (temp_out[j] + 1) >> 2;

178 }	180 }

179 }	181 }

180	182

181 void vp9_short_fdct8x4_c(int16_t input, int16_t output, int pitch) {	183 static void fdct8(const int16_t input, int16_t output) {

182 vp9_short_fdct4x4_c(input, output, pitch);

183 vp9_short_fdct4x4_c(input + 4, output + 16, pitch);

184 }

185

186 static void fdct8_1d(int16_t input, int16_t output) {

187 /canbe16/ int s0, s1, s2, s3, s4, s5, s6, s7;	184 /canbe16/ int s0, s1, s2, s3, s4, s5, s6, s7;

188 /needs32/ int t0, t1, t2, t3;	185 /needs32/ int t0, t1, t2, t3;

189 /canbe16/ int x0, x1, x2, x3;	186 /canbe16/ int x0, x1, x2, x3;

190	187

191 // stage 1	188 // stage 1

192 s0 = input[0] + input[7];	189 s0 = input[0] + input[7];

193 s1 = input[1] + input[6];	190 s1 = input[1] + input[6];

194 s2 = input[2] + input[5];	191 s2 = input[2] + input[5];

195 s3 = input[3] + input[4];	192 s3 = input[3] + input[4];

196 s4 = input[3] - input[4];	193 s4 = input[3] - input[4];

197 s5 = input[2] - input[5];	194 s5 = input[2] - input[5];

198 s6 = input[1] - input[6];	195 s6 = input[1] - input[6];

199 s7 = input[0] - input[7];	196 s7 = input[0] - input[7];

200	197

201 // fdct4_1d(step, step);	198 // fdct4(step, step);

202 x0 = s0 + s3;	199 x0 = s0 + s3;

203 x1 = s1 + s2;	200 x1 = s1 + s2;

204 x2 = s1 - s2;	201 x2 = s1 - s2;

205 x3 = s0 - s3;	202 x3 = s0 - s3;

206 t0 = (x0 + x1) * cospi_16_64;	203 t0 = (x0 + x1) * cospi_16_64;

207 t1 = (x0 - x1) * cospi_16_64;	204 t1 = (x0 - x1) * cospi_16_64;

208 t2 = x2 * cospi_24_64 + x3 * cospi_8_64;	205 t2 = x2 * cospi_24_64 + x3 * cospi_8_64;

209 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;	206 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;

210 output[0] = dct_const_round_shift(t0);	207 output[0] = dct_const_round_shift(t0);

211 output[2] = dct_const_round_shift(t2);	208 output[2] = dct_const_round_shift(t2);

(...skipping 16 matching lines...) Expand all Loading...
228 t0 = x0 * cospi_28_64 + x3 * cospi_4_64;	225 t0 = x0 * cospi_28_64 + x3 * cospi_4_64;

229 t1 = x1 * cospi_12_64 + x2 * cospi_20_64;	226 t1 = x1 * cospi_12_64 + x2 * cospi_20_64;

230 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;	227 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;

231 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;	228 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;

232 output[1] = dct_const_round_shift(t0);	229 output[1] = dct_const_round_shift(t0);

233 output[3] = dct_const_round_shift(t2);	230 output[3] = dct_const_round_shift(t2);

234 output[5] = dct_const_round_shift(t1);	231 output[5] = dct_const_round_shift(t1);

235 output[7] = dct_const_round_shift(t3);	232 output[7] = dct_const_round_shift(t3);

236 }	233 }

237	234

238 void vp9_short_fdct8x8_c(int16_t input, int16_t final_output, int pitch) {	235 void vp9_fdct8x8_c(const int16_t input, int16_t final_output, int stride) {

239 const int stride = pitch >> 1;

240 int i, j;	236 int i, j;

241 int16_t intermediate[64];	237 int16_t intermediate[64];

242	238

243 // Transform columns	239 // Transform columns

244 {	240 {

245 int16_t *output = intermediate;	241 int16_t *output = intermediate;

246 /canbe16/ int s0, s1, s2, s3, s4, s5, s6, s7;	242 /canbe16/ int s0, s1, s2, s3, s4, s5, s6, s7;

247 /needs32/ int t0, t1, t2, t3;	243 /needs32/ int t0, t1, t2, t3;

248 /canbe16/ int x0, x1, x2, x3;	244 /canbe16/ int x0, x1, x2, x3;

249	245

250 int i;	246 int i;

251 for (i = 0; i < 8; i++) {	247 for (i = 0; i < 8; i++) {

252 // stage 1	248 // stage 1

253 s0 = (input[0 * stride] + input[7 * stride]) << 2;	249 s0 = (input[0 * stride] + input[7 * stride]) * 4;

254 s1 = (input[1 * stride] + input[6 * stride]) << 2;	250 s1 = (input[1 * stride] + input[6 * stride]) * 4;

255 s2 = (input[2 * stride] + input[5 * stride]) << 2;	251 s2 = (input[2 * stride] + input[5 * stride]) * 4;

256 s3 = (input[3 * stride] + input[4 * stride]) << 2;	252 s3 = (input[3 * stride] + input[4 * stride]) * 4;

257 s4 = (input[3 * stride] - input[4 * stride]) << 2;	253 s4 = (input[3 * stride] - input[4 * stride]) * 4;

258 s5 = (input[2 * stride] - input[5 * stride]) << 2;	254 s5 = (input[2 * stride] - input[5 * stride]) * 4;

259 s6 = (input[1 * stride] - input[6 * stride]) << 2;	255 s6 = (input[1 * stride] - input[6 * stride]) * 4;

260 s7 = (input[0 * stride] - input[7 * stride]) << 2;	256 s7 = (input[0 * stride] - input[7 * stride]) * 4;

261	257

262 // fdct4_1d(step, step);	258 // fdct4(step, step);

263 x0 = s0 + s3;	259 x0 = s0 + s3;

264 x1 = s1 + s2;	260 x1 = s1 + s2;

265 x2 = s1 - s2;	261 x2 = s1 - s2;

266 x3 = s0 - s3;	262 x3 = s0 - s3;

267 t0 = (x0 + x1) * cospi_16_64;	263 t0 = (x0 + x1) * cospi_16_64;

268 t1 = (x0 - x1) * cospi_16_64;	264 t1 = (x0 - x1) * cospi_16_64;

269 t2 = x2 * cospi_24_64 + x3 * cospi_8_64;	265 t2 = x2 * cospi_24_64 + x3 * cospi_8_64;

270 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;	266 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;

271 output[0 * 8] = dct_const_round_shift(t0);	267 output[0 * 8] = dct_const_round_shift(t0);

272 output[2 * 8] = dct_const_round_shift(t2);	268 output[2 * 8] = dct_const_round_shift(t2);

(...skipping 21 matching lines...) Expand all Loading...
294 output[3 * 8] = dct_const_round_shift(t2);	290 output[3 * 8] = dct_const_round_shift(t2);

295 output[5 * 8] = dct_const_round_shift(t1);	291 output[5 * 8] = dct_const_round_shift(t1);

296 output[7 * 8] = dct_const_round_shift(t3);	292 output[7 * 8] = dct_const_round_shift(t3);

297 input++;	293 input++;

298 output++;	294 output++;

299 }	295 }

300 }	296 }

301	297

302 // Rows	298 // Rows

303 for (i = 0; i < 8; ++i) {	299 for (i = 0; i < 8; ++i) {

304 fdct8_1d(&intermediate[i * 8], &final_output[i * 8]);	300 fdct8(&intermediate[i * 8], &final_output[i * 8]);

305 for (j = 0; j < 8; ++j)	301 for (j = 0; j < 8; ++j)

306 final_output[j + i * 8] /= 2;	302 final_output[j + i * 8] /= 2;

307 }	303 }

308 }	304 }

309	305

310 void vp9_short_fdct16x16_c(int16_t input, int16_t output, int pitch) {	306 void vp9_fdct16x16_c(const int16_t input, int16_t output, int stride) {

311 // The 2D transform is done with two passes which are actually pretty	307 // The 2D transform is done with two passes which are actually pretty

312 // similar. In the first one, we transform the columns and transpose	308 // similar. In the first one, we transform the columns and transpose

313 // the results. In the second one, we transform the rows. To achieve that,	309 // the results. In the second one, we transform the rows. To achieve that,

314 // as the first pass results are transposed, we tranpose the columns (that	310 // as the first pass results are transposed, we tranpose the columns (that

315 // is the transposed rows) and transpose the results (so that it goes back	311 // is the transposed rows) and transpose the results (so that it goes back

316 // in normal/row positions).	312 // in normal/row positions).

317 const int stride = pitch >> 1;

318 int pass;	313 int pass;

319 // We need an intermediate buffer between passes.	314 // We need an intermediate buffer between passes.

320 int16_t intermediate[256];	315 int16_t intermediate[256];

321 int16_t *in = input;	316 const int16_t *in = input;

322 int16_t *out = intermediate;	317 int16_t *out = intermediate;

323 // Do the two transform/transpose passes	318 // Do the two transform/transpose passes

324 for (pass = 0; pass < 2; ++pass) {	319 for (pass = 0; pass < 2; ++pass) {

325 /canbe16/ int step1[8];	320 /canbe16/ int step1[8];

326 /canbe16/ int step2[8];	321 /canbe16/ int step2[8];

327 /canbe16/ int step3[8];	322 /canbe16/ int step3[8];

328 /canbe16/ int input[8];	323 /canbe16/ int input[8];

329 /needs32/ int temp1, temp2;	324 /needs32/ int temp1, temp2;

330 int i;	325 int i;

331 for (i = 0; i < 16; i++) {	326 for (i = 0; i < 16; i++) {

332 if (0 == pass) {	327 if (0 == pass) {

333 // Calculate input for the first 8 results.	328 // Calculate input for the first 8 results.

334 input[0] = (in[0 * stride] + in[15 * stride]) << 2;	329 input[0] = (in[0 * stride] + in[15 * stride]) * 4;

335 input[1] = (in[1 * stride] + in[14 * stride]) << 2;	330 input[1] = (in[1 * stride] + in[14 * stride]) * 4;

336 input[2] = (in[2 * stride] + in[13 * stride]) << 2;	331 input[2] = (in[2 * stride] + in[13 * stride]) * 4;

337 input[3] = (in[3 * stride] + in[12 * stride]) << 2;	332 input[3] = (in[3 * stride] + in[12 * stride]) * 4;

338 input[4] = (in[4 * stride] + in[11 * stride]) << 2;	333 input[4] = (in[4 * stride] + in[11 * stride]) * 4;

339 input[5] = (in[5 * stride] + in[10 * stride]) << 2;	334 input[5] = (in[5 * stride] + in[10 * stride]) * 4;

340 input[6] = (in[6 * stride] + in[ 9 * stride]) << 2;	335 input[6] = (in[6 * stride] + in[ 9 * stride]) * 4;

341 input[7] = (in[7 * stride] + in[ 8 * stride]) << 2;	336 input[7] = (in[7 * stride] + in[ 8 * stride]) * 4;

342 // Calculate input for the next 8 results.	337 // Calculate input for the next 8 results.

343 step1[0] = (in[7 * stride] - in[ 8 * stride]) << 2;	338 step1[0] = (in[7 * stride] - in[ 8 * stride]) * 4;

344 step1[1] = (in[6 * stride] - in[ 9 * stride]) << 2;	339 step1[1] = (in[6 * stride] - in[ 9 * stride]) * 4;

345 step1[2] = (in[5 * stride] - in[10 * stride]) << 2;	340 step1[2] = (in[5 * stride] - in[10 * stride]) * 4;

346 step1[3] = (in[4 * stride] - in[11 * stride]) << 2;	341 step1[3] = (in[4 * stride] - in[11 * stride]) * 4;

347 step1[4] = (in[3 * stride] - in[12 * stride]) << 2;	342 step1[4] = (in[3 * stride] - in[12 * stride]) * 4;

348 step1[5] = (in[2 * stride] - in[13 * stride]) << 2;	343 step1[5] = (in[2 * stride] - in[13 * stride]) * 4;

349 step1[6] = (in[1 * stride] - in[14 * stride]) << 2;	344 step1[6] = (in[1 * stride] - in[14 * stride]) * 4;

350 step1[7] = (in[0 * stride] - in[15 * stride]) << 2;	345 step1[7] = (in[0 * stride] - in[15 * stride]) * 4;

351 } else {	346 } else {

352 // Calculate input for the first 8 results.	347 // Calculate input for the first 8 results.

353 input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);	348 input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);

354 input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);	349 input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);

355 input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);	350 input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);

356 input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);	351 input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);

357 input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);	352 input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);

358 input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);	353 input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);

359 input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2);	354 input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2);

360 input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2);	355 input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2);

361 // Calculate input for the next 8 results.	356 // Calculate input for the next 8 results.

362 step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2);	357 step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2);

363 step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2);	358 step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2);

364 step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);	359 step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);

365 step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);	360 step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);

366 step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);	361 step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);

367 step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);	362 step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);

368 step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);	363 step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);

369 step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);	364 step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);

370 }	365 }

371 // Work on the first eight values; fdct8_1d(input, even_results);	366 // Work on the first eight values; fdct8(input, even_results);

372 {	367 {

373 /canbe16/ int s0, s1, s2, s3, s4, s5, s6, s7;	368 /canbe16/ int s0, s1, s2, s3, s4, s5, s6, s7;

374 /needs32/ int t0, t1, t2, t3;	369 /needs32/ int t0, t1, t2, t3;

375 /canbe16/ int x0, x1, x2, x3;	370 /canbe16/ int x0, x1, x2, x3;

376	371

377 // stage 1	372 // stage 1

378 s0 = input[0] + input[7];	373 s0 = input[0] + input[7];

379 s1 = input[1] + input[6];	374 s1 = input[1] + input[6];

380 s2 = input[2] + input[5];	375 s2 = input[2] + input[5];

381 s3 = input[3] + input[4];	376 s3 = input[3] + input[4];

382 s4 = input[3] - input[4];	377 s4 = input[3] - input[4];

383 s5 = input[2] - input[5];	378 s5 = input[2] - input[5];

384 s6 = input[1] - input[6];	379 s6 = input[1] - input[6];

385 s7 = input[0] - input[7];	380 s7 = input[0] - input[7];

386	381

387 // fdct4_1d(step, step);	382 // fdct4(step, step);

388 x0 = s0 + s3;	383 x0 = s0 + s3;

389 x1 = s1 + s2;	384 x1 = s1 + s2;

390 x2 = s1 - s2;	385 x2 = s1 - s2;

391 x3 = s0 - s3;	386 x3 = s0 - s3;

392 t0 = (x0 + x1) * cospi_16_64;	387 t0 = (x0 + x1) * cospi_16_64;

393 t1 = (x0 - x1) * cospi_16_64;	388 t1 = (x0 - x1) * cospi_16_64;

394 t2 = x3 * cospi_8_64 + x2 * cospi_24_64;	389 t2 = x3 * cospi_8_64 + x2 * cospi_24_64;

395 t3 = x3 * cospi_24_64 - x2 * cospi_8_64;	390 t3 = x3 * cospi_24_64 - x2 * cospi_8_64;

396 out[0] = dct_const_round_shift(t0);	391 out[0] = dct_const_round_shift(t0);

397 out[4] = dct_const_round_shift(t2);	392 out[4] = dct_const_round_shift(t2);

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
479 // Do next column (which is a transposed row in second/horizontal pass)	474 // Do next column (which is a transposed row in second/horizontal pass)

480 in++;	475 in++;

481 out += 16;	476 out += 16;

482 }	477 }

483 // Setup in/out for next pass.	478 // Setup in/out for next pass.

484 in = intermediate;	479 in = intermediate;

485 out = output;	480 out = output;

486 }	481 }

487 }	482 }

488	483

489 static void fadst8_1d(int16_t input, int16_t output) {	484 static void fadst8(const int16_t input, int16_t output) {

490 int s0, s1, s2, s3, s4, s5, s6, s7;	485 int s0, s1, s2, s3, s4, s5, s6, s7;

491	486

492 int x0 = input[7];	487 int x0 = input[7];

493 int x1 = input[0];	488 int x1 = input[0];

494 int x2 = input[5];	489 int x2 = input[5];

495 int x3 = input[2];	490 int x3 = input[2];

496 int x4 = input[3];	491 int x4 = input[3];

497 int x5 = input[4];	492 int x5 = input[4];

498 int x6 = input[1];	493 int x6 = input[1];

499 int x7 = input[6];	494 int x7 = input[6];

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
551 output[1] = - x4;	546 output[1] = - x4;

552 output[2] = x6;	547 output[2] = x6;

553 output[3] = - x2;	548 output[3] = - x2;

554 output[4] = x3;	549 output[4] = x3;

555 output[5] = - x7;	550 output[5] = - x7;

556 output[6] = x5;	551 output[6] = x5;

557 output[7] = - x1;	552 output[7] = - x1;

558 }	553 }

559	554

560 static const transform_2d FHT_8[] = {	555 static const transform_2d FHT_8[] = {

561 { fdct8_1d, fdct8_1d }, // DCT_DCT = 0	556 { fdct8, fdct8 }, // DCT_DCT = 0

562 { fadst8_1d, fdct8_1d }, // ADST_DCT = 1	557 { fadst8, fdct8 }, // ADST_DCT = 1

563 { fdct8_1d, fadst8_1d }, // DCT_ADST = 2	558 { fdct8, fadst8 }, // DCT_ADST = 2

564 { fadst8_1d, fadst8_1d } // ADST_ADST = 3	559 { fadst8, fadst8 } // ADST_ADST = 3

565 };	560 };

566	561

567 void vp9_short_fht8x8_c(int16_t input, int16_t output,	562 void vp9_short_fht8x8_c(const int16_t input, int16_t output,

568 int pitch, TX_TYPE tx_type) {	563 int stride, int tx_type) {

569 int16_t out[64];	564 int16_t out[64];

570 int16_t *outptr = &out[0];	565 int16_t *outptr = &out[0];

571 int i, j;	566 int i, j;

572 int16_t temp_in[8], temp_out[8];	567 int16_t temp_in[8], temp_out[8];

573 const transform_2d ht = FHT_8[tx_type];	568 const transform_2d ht = FHT_8[tx_type];

574	569

575 // Columns	570 // Columns

576 for (i = 0; i < 8; ++i) {	571 for (i = 0; i < 8; ++i) {

577 for (j = 0; j < 8; ++j)	572 for (j = 0; j < 8; ++j)

578 temp_in[j] = input[j * pitch + i] << 2;	573 temp_in[j] = input[j * stride + i] * 4;

579 ht.cols(temp_in, temp_out);	574 ht.cols(temp_in, temp_out);

580 for (j = 0; j < 8; ++j)	575 for (j = 0; j < 8; ++j)

581 outptr[j * 8 + i] = temp_out[j];	576 outptr[j * 8 + i] = temp_out[j];

582 }	577 }

583	578

584 // Rows	579 // Rows

585 for (i = 0; i < 8; ++i) {	580 for (i = 0; i < 8; ++i) {

586 for (j = 0; j < 8; ++j)	581 for (j = 0; j < 8; ++j)

587 temp_in[j] = out[j + i * 8];	582 temp_in[j] = out[j + i * 8];

588 ht.rows(temp_in, temp_out);	583 ht.rows(temp_in, temp_out);

589 for (j = 0; j < 8; ++j)	584 for (j = 0; j < 8; ++j)

590 output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;	585 output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;

591 }	586 }

592 }	587 }

593	588

594 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per	589 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per

595 pixel. */	590 pixel. */

596 void vp9_short_walsh4x4_c(short input, short output, int pitch) {	591 void vp9_fwht4x4_c(const int16_t input, int16_t output, int stride) {

597 int i;	592 int i;

598 int a1, b1, c1, d1, e1;	593 int a1, b1, c1, d1, e1;

599 short *ip = input;	594 const int16_t *ip = input;

600 short *op = output;	595 int16_t *op = output;

601 int pitch_short = pitch >> 1;

602	596

603 for (i = 0; i < 4; i++) {	597 for (i = 0; i < 4; i++) {

604 a1 = ip[0 * pitch_short];	598 a1 = ip[0 * stride];

605 b1 = ip[1 * pitch_short];	599 b1 = ip[1 * stride];

606 c1 = ip[2 * pitch_short];	600 c1 = ip[2 * stride];

607 d1 = ip[3 * pitch_short];	601 d1 = ip[3 * stride];

608	602

609 a1 += b1;	603 a1 += b1;

610 d1 = d1 - c1;	604 d1 = d1 - c1;

611 e1 = (a1 - d1) >> 1;	605 e1 = (a1 - d1) >> 1;

612 b1 = e1 - b1;	606 b1 = e1 - b1;

613 c1 = e1 - c1;	607 c1 = e1 - c1;

614 a1 -= c1;	608 a1 -= c1;

615 d1 += b1;	609 d1 += b1;

616 op[0] = a1;	610 op[0] = a1;

617 op[4] = c1;	611 op[4] = c1;

(...skipping 12 matching lines...) Expand all Loading...
630 c1 = ip[2];	624 c1 = ip[2];

631 d1 = ip[3];	625 d1 = ip[3];

632	626

633 a1 += b1;	627 a1 += b1;

634 d1 -= c1;	628 d1 -= c1;

635 e1 = (a1 - d1) >> 1;	629 e1 = (a1 - d1) >> 1;

636 b1 = e1 - b1;	630 b1 = e1 - b1;

637 c1 = e1 - c1;	631 c1 = e1 - c1;

638 a1 -= c1;	632 a1 -= c1;

639 d1 += b1;	633 d1 += b1;

640 op[0] = a1 << WHT_UPSCALE_FACTOR;	634 op[0] = a1 * UNIT_QUANT_FACTOR;

641 op[1] = c1 << WHT_UPSCALE_FACTOR;	635 op[1] = c1 * UNIT_QUANT_FACTOR;

642 op[2] = d1 << WHT_UPSCALE_FACTOR;	636 op[2] = d1 * UNIT_QUANT_FACTOR;

643 op[3] = b1 << WHT_UPSCALE_FACTOR;	637 op[3] = b1 * UNIT_QUANT_FACTOR;

644	638

645 ip += 4;	639 ip += 4;

646 op += 4;	640 op += 4;

647 }	641 }

648 }	642 }

649	643

650 void vp9_short_walsh8x4_c(short input, short output, int pitch) {

651 vp9_short_walsh4x4_c(input, output, pitch);

652 vp9_short_walsh4x4_c(input + 4, output + 16, pitch);

653 }

654

655

656 // Rewrote to use same algorithm as others.	644 // Rewrote to use same algorithm as others.

657 static void fdct16_1d(int16_t in[16], int16_t out[16]) {	645 static void fdct16(const int16_t in[16], int16_t out[16]) {

658 /canbe16/ int step1[8];	646 /canbe16/ int step1[8];

659 /canbe16/ int step2[8];	647 /canbe16/ int step2[8];

660 /canbe16/ int step3[8];	648 /canbe16/ int step3[8];

661 /canbe16/ int input[8];	649 /canbe16/ int input[8];

662 /needs32/ int temp1, temp2;	650 /needs32/ int temp1, temp2;

663	651

664 // step 1	652 // step 1

665 input[0] = in[0] + in[15];	653 input[0] = in[0] + in[15];

666 input[1] = in[1] + in[14];	654 input[1] = in[1] + in[14];

667 input[2] = in[2] + in[13];	655 input[2] = in[2] + in[13];

668 input[3] = in[3] + in[12];	656 input[3] = in[3] + in[12];

669 input[4] = in[4] + in[11];	657 input[4] = in[4] + in[11];

670 input[5] = in[5] + in[10];	658 input[5] = in[5] + in[10];

671 input[6] = in[6] + in[ 9];	659 input[6] = in[6] + in[ 9];

672 input[7] = in[7] + in[ 8];	660 input[7] = in[7] + in[ 8];

673	661

674 step1[0] = in[7] - in[ 8];	662 step1[0] = in[7] - in[ 8];

675 step1[1] = in[6] - in[ 9];	663 step1[1] = in[6] - in[ 9];

676 step1[2] = in[5] - in[10];	664 step1[2] = in[5] - in[10];

677 step1[3] = in[4] - in[11];	665 step1[3] = in[4] - in[11];

678 step1[4] = in[3] - in[12];	666 step1[4] = in[3] - in[12];

679 step1[5] = in[2] - in[13];	667 step1[5] = in[2] - in[13];

680 step1[6] = in[1] - in[14];	668 step1[6] = in[1] - in[14];

681 step1[7] = in[0] - in[15];	669 step1[7] = in[0] - in[15];

682	670

683 // fdct8_1d(step, step);	671 // fdct8(step, step);

684 {	672 {

685 /canbe16/ int s0, s1, s2, s3, s4, s5, s6, s7;	673 /canbe16/ int s0, s1, s2, s3, s4, s5, s6, s7;

686 /needs32/ int t0, t1, t2, t3;	674 /needs32/ int t0, t1, t2, t3;

687 /canbe16/ int x0, x1, x2, x3;	675 /canbe16/ int x0, x1, x2, x3;

688	676

689 // stage 1	677 // stage 1

690 s0 = input[0] + input[7];	678 s0 = input[0] + input[7];

691 s1 = input[1] + input[6];	679 s1 = input[1] + input[6];

692 s2 = input[2] + input[5];	680 s2 = input[2] + input[5];

693 s3 = input[3] + input[4];	681 s3 = input[3] + input[4];

694 s4 = input[3] - input[4];	682 s4 = input[3] - input[4];

695 s5 = input[2] - input[5];	683 s5 = input[2] - input[5];

696 s6 = input[1] - input[6];	684 s6 = input[1] - input[6];

697 s7 = input[0] - input[7];	685 s7 = input[0] - input[7];

698	686

699 // fdct4_1d(step, step);	687 // fdct4(step, step);

700 x0 = s0 + s3;	688 x0 = s0 + s3;

701 x1 = s1 + s2;	689 x1 = s1 + s2;

702 x2 = s1 - s2;	690 x2 = s1 - s2;

703 x3 = s0 - s3;	691 x3 = s0 - s3;

704 t0 = (x0 + x1) * cospi_16_64;	692 t0 = (x0 + x1) * cospi_16_64;

705 t1 = (x0 - x1) * cospi_16_64;	693 t1 = (x0 - x1) * cospi_16_64;

706 t2 = x3 * cospi_8_64 + x2 * cospi_24_64;	694 t2 = x3 * cospi_8_64 + x2 * cospi_24_64;

707 t3 = x3 * cospi_24_64 - x2 * cospi_8_64;	695 t3 = x3 * cospi_24_64 - x2 * cospi_8_64;

708 out[0] = dct_const_round_shift(t0);	696 out[0] = dct_const_round_shift(t0);

709 out[4] = dct_const_round_shift(t2);	697 out[4] = dct_const_round_shift(t2);

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
788 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;	776 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;

789 out[3] = dct_const_round_shift(temp1);	777 out[3] = dct_const_round_shift(temp1);

790 out[11] = dct_const_round_shift(temp2);	778 out[11] = dct_const_round_shift(temp2);

791	779

792 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;	780 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;

793 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;	781 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;

794 out[7] = dct_const_round_shift(temp1);	782 out[7] = dct_const_round_shift(temp1);

795 out[15] = dct_const_round_shift(temp2);	783 out[15] = dct_const_round_shift(temp2);

796 }	784 }

797	785

798 void fadst16_1d(int16_t input, int16_t output) {	786 static void fadst16(const int16_t input, int16_t output) {

799 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;	787 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;

800	788

801 int x0 = input[15];	789 int x0 = input[15];

802 int x1 = input[0];	790 int x1 = input[0];

803 int x2 = input[13];	791 int x2 = input[13];

804 int x3 = input[2];	792 int x3 = input[2];

805 int x4 = input[11];	793 int x4 = input[11];

806 int x5 = input[4];	794 int x5 = input[4];

807 int x6 = input[9];	795 int x6 = input[9];

808 int x7 = input[6];	796 int x7 = input[6];

(...skipping 142 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
951 output[9] = x11;	939 output[9] = x11;

952 output[10] = x15;	940 output[10] = x15;

953 output[11] = x7;	941 output[11] = x7;

954 output[12] = x5;	942 output[12] = x5;

955 output[13] = - x13;	943 output[13] = - x13;

956 output[14] = x9;	944 output[14] = x9;

957 output[15] = - x1;	945 output[15] = - x1;

958 }	946 }

959	947

960 static const transform_2d FHT_16[] = {	948 static const transform_2d FHT_16[] = {

961 { fdct16_1d, fdct16_1d }, // DCT_DCT = 0	949 { fdct16, fdct16 }, // DCT_DCT = 0

962 { fadst16_1d, fdct16_1d }, // ADST_DCT = 1	950 { fadst16, fdct16 }, // ADST_DCT = 1

963 { fdct16_1d, fadst16_1d }, // DCT_ADST = 2	951 { fdct16, fadst16 }, // DCT_ADST = 2

964 { fadst16_1d, fadst16_1d } // ADST_ADST = 3	952 { fadst16, fadst16 } // ADST_ADST = 3

965 };	953 };

966	954

967 void vp9_short_fht16x16_c(int16_t input, int16_t output,	955 void vp9_short_fht16x16_c(const int16_t input, int16_t output,

968 int pitch, TX_TYPE tx_type) {	956 int stride, int tx_type) {

969 int16_t out[256];	957 int16_t out[256];

970 int16_t *outptr = &out[0];	958 int16_t *outptr = &out[0];

971 int i, j;	959 int i, j;

972 int16_t temp_in[16], temp_out[16];	960 int16_t temp_in[16], temp_out[16];

973 const transform_2d ht = FHT_16[tx_type];	961 const transform_2d ht = FHT_16[tx_type];

974	962

975 // Columns	963 // Columns

976 for (i = 0; i < 16; ++i) {	964 for (i = 0; i < 16; ++i) {

977 for (j = 0; j < 16; ++j)	965 for (j = 0; j < 16; ++j)

978 temp_in[j] = input[j * pitch + i] << 2;	966 temp_in[j] = input[j * stride + i] * 4;

979 ht.cols(temp_in, temp_out);	967 ht.cols(temp_in, temp_out);

980 for (j = 0; j < 16; ++j)	968 for (j = 0; j < 16; ++j)

981 outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;	969 outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;

982 // outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;	970 // outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;

983 }	971 }

984	972

985 // Rows	973 // Rows

986 for (i = 0; i < 16; ++i) {	974 for (i = 0; i < 16; ++i) {

987 for (j = 0; j < 16; ++j)	975 for (j = 0; j < 16; ++j)

988 temp_in[j] = out[j + i * 16];	976 temp_in[j] = out[j + i * 16];

989 ht.rows(temp_in, temp_out);	977 ht.rows(temp_in, temp_out);

990 for (j = 0; j < 16; ++j)	978 for (j = 0; j < 16; ++j)

991 output[j + i * 16] = temp_out[j];	979 output[j + i * 16] = temp_out[j];

992 }	980 }

993 }	981 }

994	982

995 static INLINE int dct_32_round(int input) {	983 static INLINE int dct_32_round(int input) {

996 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);	984 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);

997 assert(-131072 <= rv && rv <= 131071);	985 assert(-131072 <= rv && rv <= 131071);

998 return rv;	986 return rv;

999 }	987 }

1000	988

1001 static INLINE int half_round_shift(int input) {	989 static INLINE int half_round_shift(int input) {

1002 int rv = (input + 1 + (input < 0)) >> 2;	990 int rv = (input + 1 + (input < 0)) >> 2;

1003 return rv;	991 return rv;

1004 }	992 }

1005	993

1006 static void dct32_1d(int input, int output, int round) {	994 static void dct32_1d(const int input, int output, int round) {

1007 int step[32];	995 int step[32];

1008 // Stage 1	996 // Stage 1

1009 step[0] = input[0] + input[(32 - 1)];	997 step[0] = input[0] + input[(32 - 1)];

1010 step[1] = input[1] + input[(32 - 2)];	998 step[1] = input[1] + input[(32 - 2)];

1011 step[2] = input[2] + input[(32 - 3)];	999 step[2] = input[2] + input[(32 - 3)];

1012 step[3] = input[3] + input[(32 - 4)];	1000 step[3] = input[3] + input[(32 - 4)];

1013 step[4] = input[4] + input[(32 - 5)];	1001 step[4] = input[4] + input[(32 - 5)];

1014 step[5] = input[5] + input[(32 - 6)];	1002 step[5] = input[5] + input[(32 - 6)];

1015 step[6] = input[6] + input[(32 - 7)];	1003 step[6] = input[6] + input[(32 - 7)];

1016 step[7] = input[7] + input[(32 - 8)];	1004 step[7] = input[7] + input[(32 - 8)];

(...skipping 302 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1319 output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64);	1307 output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64);

1320 output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64);	1308 output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64);

1321 output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64);	1309 output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64);

1322 output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64);	1310 output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64);

1323 output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64);	1311 output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64);

1324 output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64);	1312 output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64);

1325 output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64);	1313 output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64);

1326 output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);	1314 output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);

1327 }	1315 }

1328	1316

1329 void vp9_short_fdct32x32_c(int16_t input, int16_t out, int pitch) {	1317 void vp9_fdct32x32_c(const int16_t input, int16_t out, int stride) {

1330 int shortpitch = pitch >> 1;

1331 int i, j;	1318 int i, j;

1332 int output[32 * 32];	1319 int output[32 * 32];

1333	1320

1334 // Columns	1321 // Columns

1335 for (i = 0; i < 32; ++i) {	1322 for (i = 0; i < 32; ++i) {

1336 int temp_in[32], temp_out[32];	1323 int temp_in[32], temp_out[32];

1337 for (j = 0; j < 32; ++j)	1324 for (j = 0; j < 32; ++j)

1338 temp_in[j] = input[j * shortpitch + i] << 2;	1325 temp_in[j] = input[j * stride + i] * 4;

1339 dct32_1d(temp_in, temp_out, 0);	1326 dct32_1d(temp_in, temp_out, 0);

1340 for (j = 0; j < 32; ++j)	1327 for (j = 0; j < 32; ++j)

1341 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;	1328 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;

1342 }	1329 }

1343	1330

1344 // Rows	1331 // Rows

1345 for (i = 0; i < 32; ++i) {	1332 for (i = 0; i < 32; ++i) {

1346 int temp_in[32], temp_out[32];	1333 int temp_in[32], temp_out[32];

1347 for (j = 0; j < 32; ++j)	1334 for (j = 0; j < 32; ++j)

1348 temp_in[j] = output[j + i * 32];	1335 temp_in[j] = output[j + i * 32];

1349 dct32_1d(temp_in, temp_out, 0);	1336 dct32_1d(temp_in, temp_out, 0);

1350 for (j = 0; j < 32; ++j)	1337 for (j = 0; j < 32; ++j)

1351 out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;	1338 out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;

1352 }	1339 }

1353 }	1340 }

1354	1341

1355 // Note that although we use dct_32_round in dct32_1d computation flow,	1342 // Note that although we use dct_32_round in dct32_1d computation flow,

1356 // this 2d fdct32x32 for rate-distortion optimization loop is operating	1343 // this 2d fdct32x32 for rate-distortion optimization loop is operating

1357 // within 16 bits precision.	1344 // within 16 bits precision.

1358 void vp9_short_fdct32x32_rd_c(int16_t input, int16_t out, int pitch) {	1345 void vp9_fdct32x32_rd_c(const int16_t input, int16_t out, int stride) {

1359 int shortpitch = pitch >> 1;

1360 int i, j;	1346 int i, j;

1361 int output[32 * 32];	1347 int output[32 * 32];

1362	1348

1363 // Columns	1349 // Columns

1364 for (i = 0; i < 32; ++i) {	1350 for (i = 0; i < 32; ++i) {

1365 int temp_in[32], temp_out[32];	1351 int temp_in[32], temp_out[32];

1366 for (j = 0; j < 32; ++j)	1352 for (j = 0; j < 32; ++j)

1367 temp_in[j] = input[j * shortpitch + i] << 2;	1353 temp_in[j] = input[j * stride + i] * 4;

1368 dct32_1d(temp_in, temp_out, 0);	1354 dct32_1d(temp_in, temp_out, 0);

1369 for (j = 0; j < 32; ++j)	1355 for (j = 0; j < 32; ++j)

1370 // TODO(cd): see quality impact of only doing	1356 // TODO(cd): see quality impact of only doing

1371 // output[j * 32 + i] = (temp_out[j] + 1) >> 2;	1357 // output[j * 32 + i] = (temp_out[j] + 1) >> 2;

1372 // PS: also change code in vp9/encoder/x86/vp9_dct_sse2.c	1358 // PS: also change code in vp9/encoder/x86/vp9_dct_sse2.c

1373 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;	1359 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;

1374 }	1360 }

1375	1361

1376 // Rows	1362 // Rows

1377 for (i = 0; i < 32; ++i) {	1363 for (i = 0; i < 32; ++i) {

1378 int temp_in[32], temp_out[32];	1364 int temp_in[32], temp_out[32];

1379 for (j = 0; j < 32; ++j)	1365 for (j = 0; j < 32; ++j)

1380 temp_in[j] = output[j + i * 32];	1366 temp_in[j] = output[j + i * 32];

1381 dct32_1d(temp_in, temp_out, 1);	1367 dct32_1d(temp_in, temp_out, 1);

1382 for (j = 0; j < 32; ++j)	1368 for (j = 0; j < 32; ++j)

1383 out[j + i * 32] = temp_out[j];	1369 out[j + i * 32] = temp_out[j];

1384 }	1370 }

1385 }	1371 }

	1372

	1373 void vp9_fht4x4(TX_TYPE tx_type, const int16_t input, int16_t output,

	1374 int stride) {

	1375 if (tx_type == DCT_DCT)

	1376 vp9_fdct4x4(input, output, stride);

	1377 else

	1378 vp9_short_fht4x4(input, output, stride, tx_type);

	1379 }

	1380

	1381 void vp9_fht8x8(TX_TYPE tx_type, const int16_t input, int16_t output,

	1382 int stride) {

	1383 if (tx_type == DCT_DCT)

	1384 vp9_fdct8x8(input, output, stride);

	1385 else

	1386 vp9_short_fht8x8(input, output, stride, tx_type);

	1387 }

	1388

	1389 void vp9_fht16x16(TX_TYPE tx_type, const int16_t input, int16_t output,

	1390 int stride) {

	1391 if (tx_type == DCT_DCT)

	1392 vp9_fdct16x16(input, output, stride);

	1393 else

	1394 vp9_short_fht16x16(input, output, stride, tx_type);

	1395 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/encoder/vp9_dct.h ('k') | source/libvpx/vp9/encoder/vp9_encodeframe.h » ('j') | no next file with comments »