source/libvpx/vpx_dsp/inv_txfm.c - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vpx_dsp/inv_txfm.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include <math.h>	11 #include <math.h>

12 #include <string.h>	12 #include <string.h>

13	13

14 #include "vpx_dsp/inv_txfm.h"	14 #include "vpx_dsp/inv_txfm.h"

15	15

16 void vp9_iwht4x4_16_add_c(const tran_low_t input, uint8_t dest, int stride) {	16 void vpx_iwht4x4_16_add_c(const tran_low_t input, uint8_t dest, int stride) {

17 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,	17 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,

18 0.5 shifts per pixel. */	18 0.5 shifts per pixel. */

19 int i;	19 int i;

20 tran_low_t output[16];	20 tran_low_t output[16];

21 tran_high_t a1, b1, c1, d1, e1;	21 tran_high_t a1, b1, c1, d1, e1;

22 const tran_low_t *ip = input;	22 const tran_low_t *ip = input;

23 tran_low_t *op = output;	23 tran_low_t *op = output;

24	24

25 for (i = 0; i < 4; i++) {	25 for (i = 0; i < 4; i++) {

26 a1 = ip[0] >> UNIT_QUANT_SHIFT;	26 a1 = ip[0] >> UNIT_QUANT_SHIFT;

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
58 dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1);	58 dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1);

59 dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1);	59 dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1);

60 dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1);	60 dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1);

61 dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1);	61 dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1);

62	62

63 ip++;	63 ip++;

64 dest++;	64 dest++;

65 }	65 }

66 }	66 }

67	67

68 void vp9_iwht4x4_1_add_c(const tran_low_t in, uint8_t dest, int dest_stride) {	68 void vpx_iwht4x4_1_add_c(const tran_low_t in, uint8_t dest, int dest_stride) {

69 int i;	69 int i;

70 tran_high_t a1, e1;	70 tran_high_t a1, e1;

71 tran_low_t tmp[4];	71 tran_low_t tmp[4];

72 const tran_low_t *ip = in;	72 const tran_low_t *ip = in;

73 tran_low_t *op = tmp;	73 tran_low_t *op = tmp;

74	74

75 a1 = ip[0] >> UNIT_QUANT_SHIFT;	75 a1 = ip[0] >> UNIT_QUANT_SHIFT;

76 e1 = a1 >> 1;	76 e1 = a1 >> 1;

77 a1 -= e1;	77 a1 -= e1;

78 op[0] = WRAPLOW(a1, 8);	78 op[0] = WRAPLOW(a1, 8);

(...skipping 25 matching lines...) Expand all Loading...
104 step[2] = WRAPLOW(dct_const_round_shift(temp1), 8);	104 step[2] = WRAPLOW(dct_const_round_shift(temp1), 8);

105 step[3] = WRAPLOW(dct_const_round_shift(temp2), 8);	105 step[3] = WRAPLOW(dct_const_round_shift(temp2), 8);

106	106

107 // stage 2	107 // stage 2

108 output[0] = WRAPLOW(step[0] + step[3], 8);	108 output[0] = WRAPLOW(step[0] + step[3], 8);

109 output[1] = WRAPLOW(step[1] + step[2], 8);	109 output[1] = WRAPLOW(step[1] + step[2], 8);

110 output[2] = WRAPLOW(step[1] - step[2], 8);	110 output[2] = WRAPLOW(step[1] - step[2], 8);

111 output[3] = WRAPLOW(step[0] - step[3], 8);	111 output[3] = WRAPLOW(step[0] - step[3], 8);

112 }	112 }

113	113

114 void vp9_idct4x4_16_add_c(const tran_low_t input, uint8_t dest, int stride) {	114 void vpx_idct4x4_16_add_c(const tran_low_t input, uint8_t dest, int stride) {

115 tran_low_t out[4 * 4];	115 tran_low_t out[4 * 4];

116 tran_low_t *outptr = out;	116 tran_low_t *outptr = out;

117 int i, j;	117 int i, j;

118 tran_low_t temp_in[4], temp_out[4];	118 tran_low_t temp_in[4], temp_out[4];

119	119

120 // Rows	120 // Rows

121 for (i = 0; i < 4; ++i) {	121 for (i = 0; i < 4; ++i) {

122 idct4_c(input, outptr);	122 idct4_c(input, outptr);

123 input += 4;	123 input += 4;

124 outptr += 4;	124 outptr += 4;

125 }	125 }

126	126

127 // Columns	127 // Columns

128 for (i = 0; i < 4; ++i) {	128 for (i = 0; i < 4; ++i) {

129 for (j = 0; j < 4; ++j)	129 for (j = 0; j < 4; ++j)

130 temp_in[j] = out[j * 4 + i];	130 temp_in[j] = out[j * 4 + i];

131 idct4_c(temp_in, temp_out);	131 idct4_c(temp_in, temp_out);

132 for (j = 0; j < 4; ++j) {	132 for (j = 0; j < 4; ++j) {

133 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],	133 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],

134 ROUND_POWER_OF_TWO(temp_out[j], 4));	134 ROUND_POWER_OF_TWO(temp_out[j], 4));

135 }	135 }

136 }	136 }

137 }	137 }

138	138

139 void vp9_idct4x4_1_add_c(const tran_low_t input, uint8_t dest,	139 void vpx_idct4x4_1_add_c(const tran_low_t input, uint8_t dest,

140 int dest_stride) {	140 int dest_stride) {

141 int i;	141 int i;

142 tran_high_t a1;	142 tran_high_t a1;

143 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);	143 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);

144 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);	144 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);

145 a1 = ROUND_POWER_OF_TWO(out, 4);	145 a1 = ROUND_POWER_OF_TWO(out, 4);

146	146

147 for (i = 0; i < 4; i++) {	147 for (i = 0; i < 4; i++) {

148 dest[0] = clip_pixel_add(dest[0], a1);	148 dest[0] = clip_pixel_add(dest[0], a1);

149 dest[1] = clip_pixel_add(dest[1], a1);	149 dest[1] = clip_pixel_add(dest[1], a1);

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
191 output[0] = WRAPLOW(step1[0] + step1[7], 8);	191 output[0] = WRAPLOW(step1[0] + step1[7], 8);

192 output[1] = WRAPLOW(step1[1] + step1[6], 8);	192 output[1] = WRAPLOW(step1[1] + step1[6], 8);

193 output[2] = WRAPLOW(step1[2] + step1[5], 8);	193 output[2] = WRAPLOW(step1[2] + step1[5], 8);

194 output[3] = WRAPLOW(step1[3] + step1[4], 8);	194 output[3] = WRAPLOW(step1[3] + step1[4], 8);

195 output[4] = WRAPLOW(step1[3] - step1[4], 8);	195 output[4] = WRAPLOW(step1[3] - step1[4], 8);

196 output[5] = WRAPLOW(step1[2] - step1[5], 8);	196 output[5] = WRAPLOW(step1[2] - step1[5], 8);

197 output[6] = WRAPLOW(step1[1] - step1[6], 8);	197 output[6] = WRAPLOW(step1[1] - step1[6], 8);

198 output[7] = WRAPLOW(step1[0] - step1[7], 8);	198 output[7] = WRAPLOW(step1[0] - step1[7], 8);

199 }	199 }

200	200

201 void vp9_idct8x8_64_add_c(const tran_low_t input, uint8_t dest, int stride) {	201 void vpx_idct8x8_64_add_c(const tran_low_t input, uint8_t dest, int stride) {

202 tran_low_t out[8 * 8];	202 tran_low_t out[8 * 8];

203 tran_low_t *outptr = out;	203 tran_low_t *outptr = out;

204 int i, j;	204 int i, j;

205 tran_low_t temp_in[8], temp_out[8];	205 tran_low_t temp_in[8], temp_out[8];

206	206

207 // First transform rows	207 // First transform rows

208 for (i = 0; i < 8; ++i) {	208 for (i = 0; i < 8; ++i) {

209 idct8_c(input, outptr);	209 idct8_c(input, outptr);

210 input += 8;	210 input += 8;

211 outptr += 8;	211 outptr += 8;

212 }	212 }

213	213

214 // Then transform columns	214 // Then transform columns

215 for (i = 0; i < 8; ++i) {	215 for (i = 0; i < 8; ++i) {

216 for (j = 0; j < 8; ++j)	216 for (j = 0; j < 8; ++j)

217 temp_in[j] = out[j * 8 + i];	217 temp_in[j] = out[j * 8 + i];

218 idct8_c(temp_in, temp_out);	218 idct8_c(temp_in, temp_out);

219 for (j = 0; j < 8; ++j) {	219 for (j = 0; j < 8; ++j) {

220 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],	220 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],

221 ROUND_POWER_OF_TWO(temp_out[j], 5));	221 ROUND_POWER_OF_TWO(temp_out[j], 5));

222 }	222 }

223 }	223 }

224 }	224 }

225	225

226 void vp9_idct8x8_1_add_c(const tran_low_t input, uint8_t dest, int stride) {	226 void vpx_idct8x8_1_add_c(const tran_low_t input, uint8_t dest, int stride) {

227 int i, j;	227 int i, j;

228 tran_high_t a1;	228 tran_high_t a1;

229 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);	229 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);

230 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);	230 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);

231 a1 = ROUND_POWER_OF_TWO(out, 5);	231 a1 = ROUND_POWER_OF_TWO(out, 5);

232 for (j = 0; j < 8; ++j) {	232 for (j = 0; j < 8; ++j) {

233 for (i = 0; i < 8; ++i)	233 for (i = 0; i < 8; ++i)

234 dest[i] = clip_pixel_add(dest[i], a1);	234 dest[i] = clip_pixel_add(dest[i], a1);

235 dest += stride;	235 dest += stride;

236 }	236 }

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
343 output[0] = WRAPLOW(x0, 8);	343 output[0] = WRAPLOW(x0, 8);

344 output[1] = WRAPLOW(-x4, 8);	344 output[1] = WRAPLOW(-x4, 8);

345 output[2] = WRAPLOW(x6, 8);	345 output[2] = WRAPLOW(x6, 8);

346 output[3] = WRAPLOW(-x2, 8);	346 output[3] = WRAPLOW(-x2, 8);

347 output[4] = WRAPLOW(x3, 8);	347 output[4] = WRAPLOW(x3, 8);

348 output[5] = WRAPLOW(-x7, 8);	348 output[5] = WRAPLOW(-x7, 8);

349 output[6] = WRAPLOW(x5, 8);	349 output[6] = WRAPLOW(x5, 8);

350 output[7] = WRAPLOW(-x1, 8);	350 output[7] = WRAPLOW(-x1, 8);

351 }	351 }

352	352

353 void vp9_idct8x8_12_add_c(const tran_low_t input, uint8_t dest, int stride) {	353 void vpx_idct8x8_12_add_c(const tran_low_t input, uint8_t dest, int stride) {

354 tran_low_t out[8 * 8] = { 0 };	354 tran_low_t out[8 * 8] = { 0 };

355 tran_low_t *outptr = out;	355 tran_low_t *outptr = out;

356 int i, j;	356 int i, j;

357 tran_low_t temp_in[8], temp_out[8];	357 tran_low_t temp_in[8], temp_out[8];

358	358

359 // First transform rows	359 // First transform rows

360 // only first 4 row has non-zero coefs	360 // only first 4 row has non-zero coefs

361 for (i = 0; i < 4; ++i) {	361 for (i = 0; i < 4; ++i) {

362 idct8_c(input, outptr);	362 idct8_c(input, outptr);

363 input += 8;	363 input += 8;

(...skipping 170 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
534 output[8] = WRAPLOW(step2[7] - step2[8], 8);	534 output[8] = WRAPLOW(step2[7] - step2[8], 8);

535 output[9] = WRAPLOW(step2[6] - step2[9], 8);	535 output[9] = WRAPLOW(step2[6] - step2[9], 8);

536 output[10] = WRAPLOW(step2[5] - step2[10], 8);	536 output[10] = WRAPLOW(step2[5] - step2[10], 8);

537 output[11] = WRAPLOW(step2[4] - step2[11], 8);	537 output[11] = WRAPLOW(step2[4] - step2[11], 8);

538 output[12] = WRAPLOW(step2[3] - step2[12], 8);	538 output[12] = WRAPLOW(step2[3] - step2[12], 8);

539 output[13] = WRAPLOW(step2[2] - step2[13], 8);	539 output[13] = WRAPLOW(step2[2] - step2[13], 8);

540 output[14] = WRAPLOW(step2[1] - step2[14], 8);	540 output[14] = WRAPLOW(step2[1] - step2[14], 8);

541 output[15] = WRAPLOW(step2[0] - step2[15], 8);	541 output[15] = WRAPLOW(step2[0] - step2[15], 8);

542 }	542 }

543	543

544 void vp9_idct16x16_256_add_c(const tran_low_t input, uint8_t dest,	544 void vpx_idct16x16_256_add_c(const tran_low_t input, uint8_t dest,

545 int stride) {	545 int stride) {

546 tran_low_t out[16 * 16];	546 tran_low_t out[16 * 16];

547 tran_low_t *outptr = out;	547 tran_low_t *outptr = out;

548 int i, j;	548 int i, j;

549 tran_low_t temp_in[16], temp_out[16];	549 tran_low_t temp_in[16], temp_out[16];

550	550

551 // First transform rows	551 // First transform rows

552 for (i = 0; i < 16; ++i) {	552 for (i = 0; i < 16; ++i) {

553 idct16_c(input, outptr);	553 idct16_c(input, outptr);

554 input += 16;	554 input += 16;

(...skipping 177 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
732 output[8] = WRAPLOW(x3, 8);	732 output[8] = WRAPLOW(x3, 8);

733 output[9] = WRAPLOW(x11, 8);	733 output[9] = WRAPLOW(x11, 8);

734 output[10] = WRAPLOW(x15, 8);	734 output[10] = WRAPLOW(x15, 8);

735 output[11] = WRAPLOW(x7, 8);	735 output[11] = WRAPLOW(x7, 8);

736 output[12] = WRAPLOW(x5, 8);	736 output[12] = WRAPLOW(x5, 8);

737 output[13] = WRAPLOW(-x13, 8);	737 output[13] = WRAPLOW(-x13, 8);

738 output[14] = WRAPLOW(x9, 8);	738 output[14] = WRAPLOW(x9, 8);

739 output[15] = WRAPLOW(-x1, 8);	739 output[15] = WRAPLOW(-x1, 8);

740 }	740 }

741	741

742 void vp9_idct16x16_10_add_c(const tran_low_t input, uint8_t dest,	742 void vpx_idct16x16_10_add_c(const tran_low_t input, uint8_t dest,

743 int stride) {	743 int stride) {

744 tran_low_t out[16 * 16] = { 0 };	744 tran_low_t out[16 * 16] = { 0 };

745 tran_low_t *outptr = out;	745 tran_low_t *outptr = out;

746 int i, j;	746 int i, j;

747 tran_low_t temp_in[16], temp_out[16];	747 tran_low_t temp_in[16], temp_out[16];

748	748

749 // First transform rows. Since all non-zero dct coefficients are in	749 // First transform rows. Since all non-zero dct coefficients are in

750 // upper-left 4x4 area, we only need to calculate first 4 rows here.	750 // upper-left 4x4 area, we only need to calculate first 4 rows here.

751 for (i = 0; i < 4; ++i) {	751 for (i = 0; i < 4; ++i) {

752 idct16_c(input, outptr);	752 idct16_c(input, outptr);

753 input += 16;	753 input += 16;

754 outptr += 16;	754 outptr += 16;

755 }	755 }

756	756

757 // Then transform columns	757 // Then transform columns

758 for (i = 0; i < 16; ++i) {	758 for (i = 0; i < 16; ++i) {

759 for (j = 0; j < 16; ++j)	759 for (j = 0; j < 16; ++j)

760 temp_in[j] = out[j*16 + i];	760 temp_in[j] = out[j*16 + i];

761 idct16_c(temp_in, temp_out);	761 idct16_c(temp_in, temp_out);

762 for (j = 0; j < 16; ++j) {	762 for (j = 0; j < 16; ++j) {

763 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],	763 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],

764 ROUND_POWER_OF_TWO(temp_out[j], 6));	764 ROUND_POWER_OF_TWO(temp_out[j], 6));

765 }	765 }

766 }	766 }

767 }	767 }

768	768

769 void vp9_idct16x16_1_add_c(const tran_low_t input, uint8_t dest, int stride) {	769 void vpx_idct16x16_1_add_c(const tran_low_t input, uint8_t dest, int stride) {

770 int i, j;	770 int i, j;

771 tran_high_t a1;	771 tran_high_t a1;

772 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);	772 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);

773 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);	773 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);

774 a1 = ROUND_POWER_OF_TWO(out, 6);	774 a1 = ROUND_POWER_OF_TWO(out, 6);

775 for (j = 0; j < 16; ++j) {	775 for (j = 0; j < 16; ++j) {

776 for (i = 0; i < 16; ++i)	776 for (i = 0; i < 16; ++i)

777 dest[i] = clip_pixel_add(dest[i], a1);	777 dest[i] = clip_pixel_add(dest[i], a1);

778 dest += stride;	778 dest += stride;

779 }	779 }

(...skipping 359 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1139 output[24] = WRAPLOW(step1[7] - step1[24], 8);	1139 output[24] = WRAPLOW(step1[7] - step1[24], 8);

1140 output[25] = WRAPLOW(step1[6] - step1[25], 8);	1140 output[25] = WRAPLOW(step1[6] - step1[25], 8);

1141 output[26] = WRAPLOW(step1[5] - step1[26], 8);	1141 output[26] = WRAPLOW(step1[5] - step1[26], 8);

1142 output[27] = WRAPLOW(step1[4] - step1[27], 8);	1142 output[27] = WRAPLOW(step1[4] - step1[27], 8);

1143 output[28] = WRAPLOW(step1[3] - step1[28], 8);	1143 output[28] = WRAPLOW(step1[3] - step1[28], 8);

1144 output[29] = WRAPLOW(step1[2] - step1[29], 8);	1144 output[29] = WRAPLOW(step1[2] - step1[29], 8);

1145 output[30] = WRAPLOW(step1[1] - step1[30], 8);	1145 output[30] = WRAPLOW(step1[1] - step1[30], 8);

1146 output[31] = WRAPLOW(step1[0] - step1[31], 8);	1146 output[31] = WRAPLOW(step1[0] - step1[31], 8);

1147 }	1147 }

1148	1148

1149 void vp9_idct32x32_1024_add_c(const tran_low_t input, uint8_t dest,	1149 void vpx_idct32x32_1024_add_c(const tran_low_t input, uint8_t dest,

1150 int stride) {	1150 int stride) {

1151 tran_low_t out[32 * 32];	1151 tran_low_t out[32 * 32];

1152 tran_low_t *outptr = out;	1152 tran_low_t *outptr = out;

1153 int i, j;	1153 int i, j;

1154 tran_low_t temp_in[32], temp_out[32];	1154 tran_low_t temp_in[32], temp_out[32];

1155	1155

1156 // Rows	1156 // Rows

1157 for (i = 0; i < 32; ++i) {	1157 for (i = 0; i < 32; ++i) {

1158 int16_t zero_coeff[16];	1158 int16_t zero_coeff[16];

1159 for (j = 0; j < 16; ++j)	1159 for (j = 0; j < 16; ++j)

(...skipping 18 matching lines...) Expand all Loading...
1178 for (j = 0; j < 32; ++j)	1178 for (j = 0; j < 32; ++j)

1179 temp_in[j] = out[j * 32 + i];	1179 temp_in[j] = out[j * 32 + i];

1180 idct32_c(temp_in, temp_out);	1180 idct32_c(temp_in, temp_out);

1181 for (j = 0; j < 32; ++j) {	1181 for (j = 0; j < 32; ++j) {

1182 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],	1182 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],

1183 ROUND_POWER_OF_TWO(temp_out[j], 6));	1183 ROUND_POWER_OF_TWO(temp_out[j], 6));

1184 }	1184 }

1185 }	1185 }

1186 }	1186 }

1187	1187

1188 void vp9_idct32x32_34_add_c(const tran_low_t input, uint8_t dest,	1188 void vpx_idct32x32_34_add_c(const tran_low_t input, uint8_t dest,

1189 int stride) {	1189 int stride) {

1190 tran_low_t out[32 * 32] = {0};	1190 tran_low_t out[32 * 32] = {0};

1191 tran_low_t *outptr = out;	1191 tran_low_t *outptr = out;

1192 int i, j;	1192 int i, j;

1193 tran_low_t temp_in[32], temp_out[32];	1193 tran_low_t temp_in[32], temp_out[32];

1194	1194

1195 // Rows	1195 // Rows

1196 // only upper-left 8x8 has non-zero coeff	1196 // only upper-left 8x8 has non-zero coeff

1197 for (i = 0; i < 8; ++i) {	1197 for (i = 0; i < 8; ++i) {

1198 idct32_c(input, outptr);	1198 idct32_c(input, outptr);

1199 input += 32;	1199 input += 32;

1200 outptr += 32;	1200 outptr += 32;

1201 }	1201 }

1202	1202

1203 // Columns	1203 // Columns

1204 for (i = 0; i < 32; ++i) {	1204 for (i = 0; i < 32; ++i) {

1205 for (j = 0; j < 32; ++j)	1205 for (j = 0; j < 32; ++j)

1206 temp_in[j] = out[j * 32 + i];	1206 temp_in[j] = out[j * 32 + i];

1207 idct32_c(temp_in, temp_out);	1207 idct32_c(temp_in, temp_out);

1208 for (j = 0; j < 32; ++j) {	1208 for (j = 0; j < 32; ++j) {

1209 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],	1209 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],

1210 ROUND_POWER_OF_TWO(temp_out[j], 6));	1210 ROUND_POWER_OF_TWO(temp_out[j], 6));

1211 }	1211 }

1212 }	1212 }

1213 }	1213 }

1214	1214

1215 void vp9_idct32x32_1_add_c(const tran_low_t input, uint8_t dest, int stride) {	1215 void vpx_idct32x32_1_add_c(const tran_low_t input, uint8_t dest, int stride) {

1216 int i, j;	1216 int i, j;

1217 tran_high_t a1;	1217 tran_high_t a1;

1218	1218

1219 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);	1219 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);

1220 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);	1220 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);

1221 a1 = ROUND_POWER_OF_TWO(out, 6);	1221 a1 = ROUND_POWER_OF_TWO(out, 6);

1222	1222

1223 for (j = 0; j < 32; ++j) {	1223 for (j = 0; j < 32; ++j) {

1224 for (i = 0; i < 32; ++i)	1224 for (i = 0; i < 32; ++i)

1225 dest[i] = clip_pixel_add(dest[i], a1);	1225 dest[i] = clip_pixel_add(dest[i], a1);

1226 dest += stride;	1226 dest += stride;

1227 }	1227 }

1228 }	1228 }

1229	1229

1230 #if CONFIG_VP9_HIGHBITDEPTH	1230 #if CONFIG_VP9_HIGHBITDEPTH

1231 void vp9_highbd_iwht4x4_16_add_c(const tran_low_t input, uint8_t dest8,	1231 void vpx_highbd_iwht4x4_16_add_c(const tran_low_t input, uint8_t dest8,

1232 int stride, int bd) {	1232 int stride, int bd) {

1233 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,	1233 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,

1234 0.5 shifts per pixel. */	1234 0.5 shifts per pixel. */

1235 int i;	1235 int i;

1236 tran_low_t output[16];	1236 tran_low_t output[16];

1237 tran_high_t a1, b1, c1, d1, e1;	1237 tran_high_t a1, b1, c1, d1, e1;

1238 const tran_low_t *ip = input;	1238 const tran_low_t *ip = input;

1239 tran_low_t *op = output;	1239 tran_low_t *op = output;

1240 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	1240 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

1241	1241

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1275 dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd);	1275 dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd);

1276 dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd);	1276 dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd);

1277 dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd);	1277 dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd);

1278 dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd);	1278 dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd);

1279	1279

1280 ip++;	1280 ip++;

1281 dest++;	1281 dest++;

1282 }	1282 }

1283 }	1283 }

1284	1284

1285 void vp9_highbd_iwht4x4_1_add_c(const tran_low_t in, uint8_t dest8,	1285 void vpx_highbd_iwht4x4_1_add_c(const tran_low_t in, uint8_t dest8,

1286 int dest_stride, int bd) {	1286 int dest_stride, int bd) {

1287 int i;	1287 int i;

1288 tran_high_t a1, e1;	1288 tran_high_t a1, e1;

1289 tran_low_t tmp[4];	1289 tran_low_t tmp[4];

1290 const tran_low_t *ip = in;	1290 const tran_low_t *ip = in;

1291 tran_low_t *op = tmp;	1291 tran_low_t *op = tmp;

1292 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	1292 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

1293 (void) bd;	1293 (void) bd;

1294	1294

1295 a1 = ip[0] >> UNIT_QUANT_SHIFT;	1295 a1 = ip[0] >> UNIT_QUANT_SHIFT;

(...skipping 12 matching lines...) Expand all Loading...
1308 dest[dest_stride * 1], e1, bd);	1308 dest[dest_stride * 1], e1, bd);

1309 dest[dest_stride * 2] = highbd_clip_pixel_add(	1309 dest[dest_stride * 2] = highbd_clip_pixel_add(

1310 dest[dest_stride * 2], e1, bd);	1310 dest[dest_stride * 2], e1, bd);

1311 dest[dest_stride * 3] = highbd_clip_pixel_add(	1311 dest[dest_stride * 3] = highbd_clip_pixel_add(

1312 dest[dest_stride * 3], e1, bd);	1312 dest[dest_stride * 3], e1, bd);

1313 ip++;	1313 ip++;

1314 dest++;	1314 dest++;

1315 }	1315 }

1316 }	1316 }

1317	1317

1318 void vp9_highbd_idct4_c(const tran_low_t input, tran_low_t output, int bd) {	1318 void vpx_highbd_idct4_c(const tran_low_t input, tran_low_t output, int bd) {

1319 tran_low_t step[4];	1319 tran_low_t step[4];

1320 tran_high_t temp1, temp2;	1320 tran_high_t temp1, temp2;

1321 (void) bd;	1321 (void) bd;

1322 // stage 1	1322 // stage 1

1323 temp1 = (input[0] + input[2]) * cospi_16_64;	1323 temp1 = (input[0] + input[2]) * cospi_16_64;

1324 temp2 = (input[0] - input[2]) * cospi_16_64;	1324 temp2 = (input[0] - input[2]) * cospi_16_64;

1325 step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);	1325 step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);

1326 step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);	1326 step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);

1327 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;	1327 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;

1328 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;	1328 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;

1329 step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);	1329 step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);

1330 step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);	1330 step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);

1331	1331

1332 // stage 2	1332 // stage 2

1333 output[0] = WRAPLOW(step[0] + step[3], bd);	1333 output[0] = WRAPLOW(step[0] + step[3], bd);

1334 output[1] = WRAPLOW(step[1] + step[2], bd);	1334 output[1] = WRAPLOW(step[1] + step[2], bd);

1335 output[2] = WRAPLOW(step[1] - step[2], bd);	1335 output[2] = WRAPLOW(step[1] - step[2], bd);

1336 output[3] = WRAPLOW(step[0] - step[3], bd);	1336 output[3] = WRAPLOW(step[0] - step[3], bd);

1337 }	1337 }

1338	1338

1339 void vp9_highbd_idct4x4_16_add_c(const tran_low_t input, uint8_t dest8,	1339 void vpx_highbd_idct4x4_16_add_c(const tran_low_t input, uint8_t dest8,

1340 int stride, int bd) {	1340 int stride, int bd) {

1341 tran_low_t out[4 * 4];	1341 tran_low_t out[4 * 4];

1342 tran_low_t *outptr = out;	1342 tran_low_t *outptr = out;

1343 int i, j;	1343 int i, j;

1344 tran_low_t temp_in[4], temp_out[4];	1344 tran_low_t temp_in[4], temp_out[4];

1345 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	1345 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

1346	1346

1347 // Rows	1347 // Rows

1348 for (i = 0; i < 4; ++i) {	1348 for (i = 0; i < 4; ++i) {

1349 vp9_highbd_idct4_c(input, outptr, bd);	1349 vpx_highbd_idct4_c(input, outptr, bd);

1350 input += 4;	1350 input += 4;

1351 outptr += 4;	1351 outptr += 4;

1352 }	1352 }

1353	1353

1354 // Columns	1354 // Columns

1355 for (i = 0; i < 4; ++i) {	1355 for (i = 0; i < 4; ++i) {

1356 for (j = 0; j < 4; ++j)	1356 for (j = 0; j < 4; ++j)

1357 temp_in[j] = out[j * 4 + i];	1357 temp_in[j] = out[j * 4 + i];

1358 vp9_highbd_idct4_c(temp_in, temp_out, bd);	1358 vpx_highbd_idct4_c(temp_in, temp_out, bd);

1359 for (j = 0; j < 4; ++j) {	1359 for (j = 0; j < 4; ++j) {

1360 dest[j * stride + i] = highbd_clip_pixel_add(	1360 dest[j * stride + i] = highbd_clip_pixel_add(

1361 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);	1361 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);

1362 }	1362 }

1363 }	1363 }

1364 }	1364 }

1365	1365

1366 void vp9_highbd_idct4x4_1_add_c(const tran_low_t input, uint8_t dest8,	1366 void vpx_highbd_idct4x4_1_add_c(const tran_low_t input, uint8_t dest8,

1367 int dest_stride, int bd) {	1367 int dest_stride, int bd) {

1368 int i;	1368 int i;

1369 tran_high_t a1;	1369 tran_high_t a1;

1370 tran_low_t out = WRAPLOW(	1370 tran_low_t out = WRAPLOW(

1371 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);	1371 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);

1372 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	1372 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

1373	1373

1374 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);	1374 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);

1375 a1 = ROUND_POWER_OF_TWO(out, 4);	1375 a1 = ROUND_POWER_OF_TWO(out, 4);

1376	1376

1377 for (i = 0; i < 4; i++) {	1377 for (i = 0; i < 4; i++) {

1378 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd);	1378 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd);

1379 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd);	1379 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd);

1380 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd);	1380 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd);

1381 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd);	1381 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd);

1382 dest += dest_stride;	1382 dest += dest_stride;

1383 }	1383 }

1384 }	1384 }

1385	1385

1386 void vp9_highbd_idct8_c(const tran_low_t input, tran_low_t output, int bd) {	1386 void vpx_highbd_idct8_c(const tran_low_t input, tran_low_t output, int bd) {

1387 tran_low_t step1[8], step2[8];	1387 tran_low_t step1[8], step2[8];

1388 tran_high_t temp1, temp2;	1388 tran_high_t temp1, temp2;

1389 // stage 1	1389 // stage 1

1390 step1[0] = input[0];	1390 step1[0] = input[0];

1391 step1[2] = input[4];	1391 step1[2] = input[4];

1392 step1[1] = input[2];	1392 step1[1] = input[2];

1393 step1[3] = input[6];	1393 step1[3] = input[6];

1394 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;	1394 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;

1395 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;	1395 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;

1396 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);	1396 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);

1397 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);	1397 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);

1398 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;	1398 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;

1399 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;	1399 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;

1400 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);	1400 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);

1401 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);	1401 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);

1402	1402

1403 // stage 2 & stage 3 - even half	1403 // stage 2 & stage 3 - even half

1404 vp9_highbd_idct4_c(step1, step1, bd);	1404 vpx_highbd_idct4_c(step1, step1, bd);

1405	1405

1406 // stage 2 - odd half	1406 // stage 2 - odd half

1407 step2[4] = WRAPLOW(step1[4] + step1[5], bd);	1407 step2[4] = WRAPLOW(step1[4] + step1[5], bd);

1408 step2[5] = WRAPLOW(step1[4] - step1[5], bd);	1408 step2[5] = WRAPLOW(step1[4] - step1[5], bd);

1409 step2[6] = WRAPLOW(-step1[6] + step1[7], bd);	1409 step2[6] = WRAPLOW(-step1[6] + step1[7], bd);

1410 step2[7] = WRAPLOW(step1[6] + step1[7], bd);	1410 step2[7] = WRAPLOW(step1[6] + step1[7], bd);

1411	1411

1412 // stage 3 - odd half	1412 // stage 3 - odd half

1413 step1[4] = step2[4];	1413 step1[4] = step2[4];

1414 temp1 = (step2[6] - step2[5]) * cospi_16_64;	1414 temp1 = (step2[6] - step2[5]) * cospi_16_64;

1415 temp2 = (step2[5] + step2[6]) * cospi_16_64;	1415 temp2 = (step2[5] + step2[6]) * cospi_16_64;

1416 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);	1416 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);

1417 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);	1417 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);

1418 step1[7] = step2[7];	1418 step1[7] = step2[7];

1419	1419

1420 // stage 4	1420 // stage 4

1421 output[0] = WRAPLOW(step1[0] + step1[7], bd);	1421 output[0] = WRAPLOW(step1[0] + step1[7], bd);

1422 output[1] = WRAPLOW(step1[1] + step1[6], bd);	1422 output[1] = WRAPLOW(step1[1] + step1[6], bd);

1423 output[2] = WRAPLOW(step1[2] + step1[5], bd);	1423 output[2] = WRAPLOW(step1[2] + step1[5], bd);

1424 output[3] = WRAPLOW(step1[3] + step1[4], bd);	1424 output[3] = WRAPLOW(step1[3] + step1[4], bd);

1425 output[4] = WRAPLOW(step1[3] - step1[4], bd);	1425 output[4] = WRAPLOW(step1[3] - step1[4], bd);

1426 output[5] = WRAPLOW(step1[2] - step1[5], bd);	1426 output[5] = WRAPLOW(step1[2] - step1[5], bd);

1427 output[6] = WRAPLOW(step1[1] - step1[6], bd);	1427 output[6] = WRAPLOW(step1[1] - step1[6], bd);

1428 output[7] = WRAPLOW(step1[0] - step1[7], bd);	1428 output[7] = WRAPLOW(step1[0] - step1[7], bd);

1429 }	1429 }

1430	1430

1431 void vp9_highbd_idct8x8_64_add_c(const tran_low_t input, uint8_t dest8,	1431 void vpx_highbd_idct8x8_64_add_c(const tran_low_t input, uint8_t dest8,

1432 int stride, int bd) {	1432 int stride, int bd) {

1433 tran_low_t out[8 * 8];	1433 tran_low_t out[8 * 8];

1434 tran_low_t *outptr = out;	1434 tran_low_t *outptr = out;

1435 int i, j;	1435 int i, j;

1436 tran_low_t temp_in[8], temp_out[8];	1436 tran_low_t temp_in[8], temp_out[8];

1437 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	1437 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

1438	1438

1439 // First transform rows.	1439 // First transform rows.

1440 for (i = 0; i < 8; ++i) {	1440 for (i = 0; i < 8; ++i) {

1441 vp9_highbd_idct8_c(input, outptr, bd);	1441 vpx_highbd_idct8_c(input, outptr, bd);

1442 input += 8;	1442 input += 8;

1443 outptr += 8;	1443 outptr += 8;

1444 }	1444 }

1445	1445

1446 // Then transform columns.	1446 // Then transform columns.

1447 for (i = 0; i < 8; ++i) {	1447 for (i = 0; i < 8; ++i) {

1448 for (j = 0; j < 8; ++j)	1448 for (j = 0; j < 8; ++j)

1449 temp_in[j] = out[j * 8 + i];	1449 temp_in[j] = out[j * 8 + i];

1450 vp9_highbd_idct8_c(temp_in, temp_out, bd);	1450 vpx_highbd_idct8_c(temp_in, temp_out, bd);

1451 for (j = 0; j < 8; ++j) {	1451 for (j = 0; j < 8; ++j) {

1452 dest[j * stride + i] = highbd_clip_pixel_add(	1452 dest[j * stride + i] = highbd_clip_pixel_add(

1453 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);	1453 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);

1454 }	1454 }

1455 }	1455 }

1456 }	1456 }

1457	1457

1458 void vp9_highbd_idct8x8_1_add_c(const tran_low_t input, uint8_t dest8,	1458 void vpx_highbd_idct8x8_1_add_c(const tran_low_t input, uint8_t dest8,

1459 int stride, int bd) {	1459 int stride, int bd) {

1460 int i, j;	1460 int i, j;

1461 tran_high_t a1;	1461 tran_high_t a1;

1462 tran_low_t out = WRAPLOW(	1462 tran_low_t out = WRAPLOW(

1463 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);	1463 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);

1464 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	1464 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

1465 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);	1465 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);

1466 a1 = ROUND_POWER_OF_TWO(out, 5);	1466 a1 = ROUND_POWER_OF_TWO(out, 5);

1467 for (j = 0; j < 8; ++j) {	1467 for (j = 0; j < 8; ++j) {

1468 for (i = 0; i < 8; ++i)	1468 for (i = 0; i < 8; ++i)

1469 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);	1469 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);

1470 dest += stride;	1470 dest += stride;

1471 }	1471 }

1472 }	1472 }

1473	1473

1474 void highbd_iadst4_c(const tran_low_t input, tran_low_t output, int bd) {	1474 void vpx_highbd_iadst4_c(const tran_low_t input, tran_low_t output, int bd) {

1475 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;	1475 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;

1476	1476

1477 tran_low_t x0 = input[0];	1477 tran_low_t x0 = input[0];

1478 tran_low_t x1 = input[1];	1478 tran_low_t x1 = input[1];

1479 tran_low_t x2 = input[2];	1479 tran_low_t x2 = input[2];

1480 tran_low_t x3 = input[3];	1480 tran_low_t x3 = input[3];

1481 (void) bd;	1481 (void) bd;

1482	1482

1483 if (!(x0 \| x1 \| x2 \| x3)) {	1483 if (!(x0 \| x1 \| x2 \| x3)) {

1484 memset(output, 0, 4 * sizeof(*output));	1484 memset(output, 0, 4 * sizeof(*output));

(...skipping 17 matching lines...) Expand all Loading...
1502 // 1-D transform scaling factor is sqrt(2).	1502 // 1-D transform scaling factor is sqrt(2).

1503 // The overall dynamic range is 14b (input) + 14b (multiplication scaling)	1503 // The overall dynamic range is 14b (input) + 14b (multiplication scaling)

1504 // + 1b (addition) = 29b.	1504 // + 1b (addition) = 29b.

1505 // Hence the output bit depth is 15b.	1505 // Hence the output bit depth is 15b.

1506 output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd);	1506 output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd);

1507 output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd);	1507 output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd);

1508 output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd);	1508 output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd);

1509 output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd);	1509 output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd);

1510 }	1510 }

1511	1511

1512 void highbd_iadst8_c(const tran_low_t input, tran_low_t output, int bd) {	1512 void vpx_highbd_iadst8_c(const tran_low_t input, tran_low_t output, int bd) {

1513 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;	1513 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;

1514	1514

1515 tran_low_t x0 = input[7];	1515 tran_low_t x0 = input[7];

1516 tran_low_t x1 = input[0];	1516 tran_low_t x1 = input[0];

1517 tran_low_t x2 = input[5];	1517 tran_low_t x2 = input[5];

1518 tran_low_t x3 = input[2];	1518 tran_low_t x3 = input[2];

1519 tran_low_t x4 = input[3];	1519 tran_low_t x4 = input[3];

1520 tran_low_t x5 = input[4];	1520 tran_low_t x5 = input[4];

1521 tran_low_t x6 = input[1];	1521 tran_low_t x6 = input[1];

1522 tran_low_t x7 = input[6];	1522 tran_low_t x7 = input[6];

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1579 output[0] = WRAPLOW(x0, bd);	1579 output[0] = WRAPLOW(x0, bd);

1580 output[1] = WRAPLOW(-x4, bd);	1580 output[1] = WRAPLOW(-x4, bd);

1581 output[2] = WRAPLOW(x6, bd);	1581 output[2] = WRAPLOW(x6, bd);

1582 output[3] = WRAPLOW(-x2, bd);	1582 output[3] = WRAPLOW(-x2, bd);

1583 output[4] = WRAPLOW(x3, bd);	1583 output[4] = WRAPLOW(x3, bd);

1584 output[5] = WRAPLOW(-x7, bd);	1584 output[5] = WRAPLOW(-x7, bd);

1585 output[6] = WRAPLOW(x5, bd);	1585 output[6] = WRAPLOW(x5, bd);

1586 output[7] = WRAPLOW(-x1, bd);	1586 output[7] = WRAPLOW(-x1, bd);

1587 }	1587 }

1588	1588

1589 void vp9_highbd_idct8x8_10_add_c(const tran_low_t input, uint8_t dest8,	1589 void vpx_highbd_idct8x8_10_add_c(const tran_low_t input, uint8_t dest8,

1590 int stride, int bd) {	1590 int stride, int bd) {

1591 tran_low_t out[8 * 8] = { 0 };	1591 tran_low_t out[8 * 8] = { 0 };

1592 tran_low_t *outptr = out;	1592 tran_low_t *outptr = out;

1593 int i, j;	1593 int i, j;

1594 tran_low_t temp_in[8], temp_out[8];	1594 tran_low_t temp_in[8], temp_out[8];

1595 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	1595 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

1596	1596

1597 // First transform rows.	1597 // First transform rows.

1598 // Only first 4 row has non-zero coefs.	1598 // Only first 4 row has non-zero coefs.

1599 for (i = 0; i < 4; ++i) {	1599 for (i = 0; i < 4; ++i) {

1600 vp9_highbd_idct8_c(input, outptr, bd);	1600 vpx_highbd_idct8_c(input, outptr, bd);

1601 input += 8;	1601 input += 8;

1602 outptr += 8;	1602 outptr += 8;

1603 }	1603 }

1604 // Then transform columns.	1604 // Then transform columns.

1605 for (i = 0; i < 8; ++i) {	1605 for (i = 0; i < 8; ++i) {

1606 for (j = 0; j < 8; ++j)	1606 for (j = 0; j < 8; ++j)

1607 temp_in[j] = out[j * 8 + i];	1607 temp_in[j] = out[j * 8 + i];

1608 vp9_highbd_idct8_c(temp_in, temp_out, bd);	1608 vpx_highbd_idct8_c(temp_in, temp_out, bd);

1609 for (j = 0; j < 8; ++j) {	1609 for (j = 0; j < 8; ++j) {

1610 dest[j * stride + i] = highbd_clip_pixel_add(	1610 dest[j * stride + i] = highbd_clip_pixel_add(

1611 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);	1611 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);

1612 }	1612 }

1613 }	1613 }

1614 }	1614 }

1615	1615

1616 void vp9_highbd_idct16_c(const tran_low_t input, tran_low_t output, int bd) {	1616 void vpx_highbd_idct16_c(const tran_low_t input, tran_low_t output, int bd) {

1617 tran_low_t step1[16], step2[16];	1617 tran_low_t step1[16], step2[16];

1618 tran_high_t temp1, temp2;	1618 tran_high_t temp1, temp2;

1619 (void) bd;	1619 (void) bd;

1620	1620

1621 // stage 1	1621 // stage 1

1622 step1[0] = input[0/2];	1622 step1[0] = input[0/2];

1623 step1[1] = input[16/2];	1623 step1[1] = input[16/2];

1624 step1[2] = input[8/2];	1624 step1[2] = input[8/2];

1625 step1[3] = input[24/2];	1625 step1[3] = input[24/2];

1626 step1[4] = input[4/2];	1626 step1[4] = input[4/2];

(...skipping 145 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1772 output[8] = WRAPLOW(step2[7] - step2[8], bd);	1772 output[8] = WRAPLOW(step2[7] - step2[8], bd);

1773 output[9] = WRAPLOW(step2[6] - step2[9], bd);	1773 output[9] = WRAPLOW(step2[6] - step2[9], bd);

1774 output[10] = WRAPLOW(step2[5] - step2[10], bd);	1774 output[10] = WRAPLOW(step2[5] - step2[10], bd);

1775 output[11] = WRAPLOW(step2[4] - step2[11], bd);	1775 output[11] = WRAPLOW(step2[4] - step2[11], bd);

1776 output[12] = WRAPLOW(step2[3] - step2[12], bd);	1776 output[12] = WRAPLOW(step2[3] - step2[12], bd);

1777 output[13] = WRAPLOW(step2[2] - step2[13], bd);	1777 output[13] = WRAPLOW(step2[2] - step2[13], bd);

1778 output[14] = WRAPLOW(step2[1] - step2[14], bd);	1778 output[14] = WRAPLOW(step2[1] - step2[14], bd);

1779 output[15] = WRAPLOW(step2[0] - step2[15], bd);	1779 output[15] = WRAPLOW(step2[0] - step2[15], bd);

1780 }	1780 }

1781	1781

1782 void vp9_highbd_idct16x16_256_add_c(const tran_low_t input, uint8_t dest8,	1782 void vpx_highbd_idct16x16_256_add_c(const tran_low_t input, uint8_t dest8,

1783 int stride, int bd) {	1783 int stride, int bd) {

1784 tran_low_t out[16 * 16];	1784 tran_low_t out[16 * 16];

1785 tran_low_t *outptr = out;	1785 tran_low_t *outptr = out;

1786 int i, j;	1786 int i, j;

1787 tran_low_t temp_in[16], temp_out[16];	1787 tran_low_t temp_in[16], temp_out[16];

1788 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	1788 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

1789	1789

1790 // First transform rows.	1790 // First transform rows.

1791 for (i = 0; i < 16; ++i) {	1791 for (i = 0; i < 16; ++i) {

1792 vp9_highbd_idct16_c(input, outptr, bd);	1792 vpx_highbd_idct16_c(input, outptr, bd);

1793 input += 16;	1793 input += 16;

1794 outptr += 16;	1794 outptr += 16;

1795 }	1795 }

1796	1796

1797 // Then transform columns.	1797 // Then transform columns.

1798 for (i = 0; i < 16; ++i) {	1798 for (i = 0; i < 16; ++i) {

1799 for (j = 0; j < 16; ++j)	1799 for (j = 0; j < 16; ++j)

1800 temp_in[j] = out[j * 16 + i];	1800 temp_in[j] = out[j * 16 + i];

1801 vp9_highbd_idct16_c(temp_in, temp_out, bd);	1801 vpx_highbd_idct16_c(temp_in, temp_out, bd);

1802 for (j = 0; j < 16; ++j) {	1802 for (j = 0; j < 16; ++j) {

1803 dest[j * stride + i] = highbd_clip_pixel_add(	1803 dest[j * stride + i] = highbd_clip_pixel_add(

1804 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);	1804 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);

1805 }	1805 }

1806 }	1806 }

1807 }	1807 }

1808	1808

1809 void highbd_iadst16_c(const tran_low_t input, tran_low_t output, int bd) {	1809 void vpx_highbd_iadst16_c(const tran_low_t input, tran_low_t output, int bd) {

1810 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;	1810 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;

1811 tran_high_t s9, s10, s11, s12, s13, s14, s15;	1811 tran_high_t s9, s10, s11, s12, s13, s14, s15;

1812	1812

1813 tran_low_t x0 = input[15];	1813 tran_low_t x0 = input[15];

1814 tran_low_t x1 = input[0];	1814 tran_low_t x1 = input[0];

1815 tran_low_t x2 = input[13];	1815 tran_low_t x2 = input[13];

1816 tran_low_t x3 = input[2];	1816 tran_low_t x3 = input[2];

1817 tran_low_t x4 = input[11];	1817 tran_low_t x4 = input[11];

1818 tran_low_t x5 = input[4];	1818 tran_low_t x5 = input[4];

1819 tran_low_t x6 = input[9];	1819 tran_low_t x6 = input[9];

(...skipping 149 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1969 output[8] = WRAPLOW(x3, bd);	1969 output[8] = WRAPLOW(x3, bd);

1970 output[9] = WRAPLOW(x11, bd);	1970 output[9] = WRAPLOW(x11, bd);

1971 output[10] = WRAPLOW(x15, bd);	1971 output[10] = WRAPLOW(x15, bd);

1972 output[11] = WRAPLOW(x7, bd);	1972 output[11] = WRAPLOW(x7, bd);

1973 output[12] = WRAPLOW(x5, bd);	1973 output[12] = WRAPLOW(x5, bd);

1974 output[13] = WRAPLOW(-x13, bd);	1974 output[13] = WRAPLOW(-x13, bd);

1975 output[14] = WRAPLOW(x9, bd);	1975 output[14] = WRAPLOW(x9, bd);

1976 output[15] = WRAPLOW(-x1, bd);	1976 output[15] = WRAPLOW(-x1, bd);

1977 }	1977 }

1978	1978

1979 void vp9_highbd_idct16x16_10_add_c(const tran_low_t input, uint8_t dest8,	1979 void vpx_highbd_idct16x16_10_add_c(const tran_low_t input, uint8_t dest8,

1980 int stride, int bd) {	1980 int stride, int bd) {

1981 tran_low_t out[16 * 16] = { 0 };	1981 tran_low_t out[16 * 16] = { 0 };

1982 tran_low_t *outptr = out;	1982 tran_low_t *outptr = out;

1983 int i, j;	1983 int i, j;

1984 tran_low_t temp_in[16], temp_out[16];	1984 tran_low_t temp_in[16], temp_out[16];

1985 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	1985 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

1986	1986

1987 // First transform rows. Since all non-zero dct coefficients are in	1987 // First transform rows. Since all non-zero dct coefficients are in

1988 // upper-left 4x4 area, we only need to calculate first 4 rows here.	1988 // upper-left 4x4 area, we only need to calculate first 4 rows here.

1989 for (i = 0; i < 4; ++i) {	1989 for (i = 0; i < 4; ++i) {

1990 vp9_highbd_idct16_c(input, outptr, bd);	1990 vpx_highbd_idct16_c(input, outptr, bd);

1991 input += 16;	1991 input += 16;

1992 outptr += 16;	1992 outptr += 16;

1993 }	1993 }

1994	1994

1995 // Then transform columns.	1995 // Then transform columns.

1996 for (i = 0; i < 16; ++i) {	1996 for (i = 0; i < 16; ++i) {

1997 for (j = 0; j < 16; ++j)	1997 for (j = 0; j < 16; ++j)

1998 temp_in[j] = out[j*16 + i];	1998 temp_in[j] = out[j*16 + i];

1999 vp9_highbd_idct16_c(temp_in, temp_out, bd);	1999 vpx_highbd_idct16_c(temp_in, temp_out, bd);

2000 for (j = 0; j < 16; ++j) {	2000 for (j = 0; j < 16; ++j) {

2001 dest[j * stride + i] = highbd_clip_pixel_add(	2001 dest[j * stride + i] = highbd_clip_pixel_add(

2002 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);	2002 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);

2003 }	2003 }

2004 }	2004 }

2005 }	2005 }

2006	2006

2007 void vp9_highbd_idct16x16_1_add_c(const tran_low_t input, uint8_t dest8,	2007 void vpx_highbd_idct16x16_1_add_c(const tran_low_t input, uint8_t dest8,

2008 int stride, int bd) {	2008 int stride, int bd) {

2009 int i, j;	2009 int i, j;

2010 tran_high_t a1;	2010 tran_high_t a1;

2011 tran_low_t out = WRAPLOW(	2011 tran_low_t out = WRAPLOW(

2012 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);	2012 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);

2013 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	2013 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

2014	2014

2015 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);	2015 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);

2016 a1 = ROUND_POWER_OF_TWO(out, 6);	2016 a1 = ROUND_POWER_OF_TWO(out, 6);

2017 for (j = 0; j < 16; ++j) {	2017 for (j = 0; j < 16; ++j) {

2018 for (i = 0; i < 16; ++i)	2018 for (i = 0; i < 16; ++i)

2019 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);	2019 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);

2020 dest += stride;	2020 dest += stride;

2021 }	2021 }

2022 }	2022 }

2023	2023

2024 void highbd_idct32_c(const tran_low_t input, tran_low_t output, int bd) {	2024 static void highbd_idct32_c(const tran_low_t *input,

	2025 tran_low_t *output, int bd) {

2025 tran_low_t step1[32], step2[32];	2026 tran_low_t step1[32], step2[32];

2026 tran_high_t temp1, temp2;	2027 tran_high_t temp1, temp2;

2027 (void) bd;	2028 (void) bd;

2028	2029

2029 // stage 1	2030 // stage 1

2030 step1[0] = input[0];	2031 step1[0] = input[0];

2031 step1[1] = input[16];	2032 step1[1] = input[16];

2032 step1[2] = input[8];	2033 step1[2] = input[8];

2033 step1[3] = input[24];	2034 step1[3] = input[24];

2034 step1[4] = input[4];	2035 step1[4] = input[4];

(...skipping 347 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2382 output[24] = WRAPLOW(step1[7] - step1[24], bd);	2383 output[24] = WRAPLOW(step1[7] - step1[24], bd);

2383 output[25] = WRAPLOW(step1[6] - step1[25], bd);	2384 output[25] = WRAPLOW(step1[6] - step1[25], bd);

2384 output[26] = WRAPLOW(step1[5] - step1[26], bd);	2385 output[26] = WRAPLOW(step1[5] - step1[26], bd);

2385 output[27] = WRAPLOW(step1[4] - step1[27], bd);	2386 output[27] = WRAPLOW(step1[4] - step1[27], bd);

2386 output[28] = WRAPLOW(step1[3] - step1[28], bd);	2387 output[28] = WRAPLOW(step1[3] - step1[28], bd);

2387 output[29] = WRAPLOW(step1[2] - step1[29], bd);	2388 output[29] = WRAPLOW(step1[2] - step1[29], bd);

2388 output[30] = WRAPLOW(step1[1] - step1[30], bd);	2389 output[30] = WRAPLOW(step1[1] - step1[30], bd);

2389 output[31] = WRAPLOW(step1[0] - step1[31], bd);	2390 output[31] = WRAPLOW(step1[0] - step1[31], bd);

2390 }	2391 }

2391	2392

2392 void vp9_highbd_idct32x32_1024_add_c(const tran_low_t input, uint8_t dest8,	2393 void vpx_highbd_idct32x32_1024_add_c(const tran_low_t input, uint8_t dest8,

2393 int stride, int bd) {	2394 int stride, int bd) {

2394 tran_low_t out[32 * 32];	2395 tran_low_t out[32 * 32];

2395 tran_low_t *outptr = out;	2396 tran_low_t *outptr = out;

2396 int i, j;	2397 int i, j;

2397 tran_low_t temp_in[32], temp_out[32];	2398 tran_low_t temp_in[32], temp_out[32];

2398 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	2399 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

2399	2400

2400 // Rows	2401 // Rows

2401 for (i = 0; i < 32; ++i) {	2402 for (i = 0; i < 32; ++i) {

2402 tran_low_t zero_coeff[16];	2403 tran_low_t zero_coeff[16];

(...skipping 19 matching lines...) Expand all Loading...
2422 for (j = 0; j < 32; ++j)	2423 for (j = 0; j < 32; ++j)

2423 temp_in[j] = out[j * 32 + i];	2424 temp_in[j] = out[j * 32 + i];

2424 highbd_idct32_c(temp_in, temp_out, bd);	2425 highbd_idct32_c(temp_in, temp_out, bd);

2425 for (j = 0; j < 32; ++j) {	2426 for (j = 0; j < 32; ++j) {

2426 dest[j * stride + i] = highbd_clip_pixel_add(	2427 dest[j * stride + i] = highbd_clip_pixel_add(

2427 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);	2428 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);

2428 }	2429 }

2429 }	2430 }

2430 }	2431 }

2431	2432

2432 void vp9_highbd_idct32x32_34_add_c(const tran_low_t input, uint8_t dest8,	2433 void vpx_highbd_idct32x32_34_add_c(const tran_low_t input, uint8_t dest8,

2433 int stride, int bd) {	2434 int stride, int bd) {

2434 tran_low_t out[32 * 32] = {0};	2435 tran_low_t out[32 * 32] = {0};

2435 tran_low_t *outptr = out;	2436 tran_low_t *outptr = out;

2436 int i, j;	2437 int i, j;

2437 tran_low_t temp_in[32], temp_out[32];	2438 tran_low_t temp_in[32], temp_out[32];

2438 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	2439 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

2439	2440

2440 // Rows	2441 // Rows

2441 // Only upper-left 8x8 has non-zero coeff.	2442 // Only upper-left 8x8 has non-zero coeff.

2442 for (i = 0; i < 8; ++i) {	2443 for (i = 0; i < 8; ++i) {

2443 highbd_idct32_c(input, outptr, bd);	2444 highbd_idct32_c(input, outptr, bd);

2444 input += 32;	2445 input += 32;

2445 outptr += 32;	2446 outptr += 32;

2446 }	2447 }

2447 // Columns	2448 // Columns

2448 for (i = 0; i < 32; ++i) {	2449 for (i = 0; i < 32; ++i) {

2449 for (j = 0; j < 32; ++j)	2450 for (j = 0; j < 32; ++j)

2450 temp_in[j] = out[j * 32 + i];	2451 temp_in[j] = out[j * 32 + i];

2451 highbd_idct32_c(temp_in, temp_out, bd);	2452 highbd_idct32_c(temp_in, temp_out, bd);

2452 for (j = 0; j < 32; ++j) {	2453 for (j = 0; j < 32; ++j) {

2453 dest[j * stride + i] = highbd_clip_pixel_add(	2454 dest[j * stride + i] = highbd_clip_pixel_add(

2454 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);	2455 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);

2455 }	2456 }

2456 }	2457 }

2457 }	2458 }

2458	2459

2459 void vp9_highbd_idct32x32_1_add_c(const tran_low_t input, uint8_t dest8,	2460 void vpx_highbd_idct32x32_1_add_c(const tran_low_t input, uint8_t dest8,

2460 int stride, int bd) {	2461 int stride, int bd) {

2461 int i, j;	2462 int i, j;

2462 int a1;	2463 int a1;

2463 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);	2464 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);

2464	2465

2465 tran_low_t out = WRAPLOW(	2466 tran_low_t out = WRAPLOW(

2466 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);	2467 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);

2467 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);	2468 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);

2468 a1 = ROUND_POWER_OF_TWO(out, 6);	2469 a1 = ROUND_POWER_OF_TWO(out, 6);

2469	2470

2470 for (j = 0; j < 32; ++j) {	2471 for (j = 0; j < 32; ++j) {

2471 for (i = 0; i < 32; ++i)	2472 for (i = 0; i < 32; ++i)

2472 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);	2473 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);

2473 dest += stride;	2474 dest += stride;

2474 }	2475 }

2475 }	2476 }

2476 #endif // CONFIG_VP9_HIGHBITDEPTH	2477 #endif // CONFIG_VP9_HIGHBITDEPTH

OLD	NEW

« no previous file with comments | « source/libvpx/vpx_dsp/inv_txfm.h ('k') | source/libvpx/vpx_dsp/mips/common_dspr2.c » ('j') | no next file with comments »