source/libvpx/vp9/common/vp9_idct.c - Issue 54923004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/vp9_idct.c

Issue 54923004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include <assert.h>	11 #include <assert.h>

12 #include <math.h>	12 #include <math.h>

13	13

14 #include "./vpx_config.h"	14 #include "./vpx_config.h"

15 #include "./vp9_rtcd.h"	15 #include "./vp9_rtcd.h"

16 #include "vp9/common/vp9_systemdependent.h"	16 #include "vp9/common/vp9_systemdependent.h"

17 #include "vp9/common/vp9_blockd.h"	17 #include "vp9/common/vp9_blockd.h"

18 #include "vp9/common/vp9_common.h"	18 #include "vp9/common/vp9_common.h"

19 #include "vp9/common/vp9_idct.h"	19 #include "vp9/common/vp9_idct.h"

20	20

21 void vp9_short_iwalsh4x4_add_c(int16_t input, uint8_t dest, int dest_stride) {	21 void vp9_iwht4x4_16_add_c(const int16_t input, uint8_t dest, int stride) {

22 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,	22 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,

23 0.5 shifts per pixel. */	23 0.5 shifts per pixel. */

24 int i;	24 int i;

25 int16_t output[16];	25 int16_t output[16];

26 int a1, b1, c1, d1, e1;	26 int a1, b1, c1, d1, e1;

27 int16_t *ip = input;	27 const int16_t *ip = input;

28 int16_t *op = output;	28 int16_t *op = output;

29	29

30 for (i = 0; i < 4; i++) {	30 for (i = 0; i < 4; i++) {

31 a1 = ip[0] >> WHT_UPSCALE_FACTOR;	31 a1 = ip[0] >> UNIT_QUANT_SHIFT;

32 c1 = ip[1] >> WHT_UPSCALE_FACTOR;	32 c1 = ip[1] >> UNIT_QUANT_SHIFT;

33 d1 = ip[2] >> WHT_UPSCALE_FACTOR;	33 d1 = ip[2] >> UNIT_QUANT_SHIFT;

34 b1 = ip[3] >> WHT_UPSCALE_FACTOR;	34 b1 = ip[3] >> UNIT_QUANT_SHIFT;

35 a1 += c1;	35 a1 += c1;

36 d1 -= b1;	36 d1 -= b1;

37 e1 = (a1 - d1) >> 1;	37 e1 = (a1 - d1) >> 1;

38 b1 = e1 - b1;	38 b1 = e1 - b1;

39 c1 = e1 - c1;	39 c1 = e1 - c1;

40 a1 -= b1;	40 a1 -= b1;

41 d1 += c1;	41 d1 += c1;

42 op[0] = a1;	42 op[0] = a1;

43 op[1] = b1;	43 op[1] = b1;

44 op[2] = c1;	44 op[2] = c1;

45 op[3] = d1;	45 op[3] = d1;

46 ip += 4;	46 ip += 4;

47 op += 4;	47 op += 4;

48 }	48 }

49	49

50 ip = output;	50 ip = output;

51 for (i = 0; i < 4; i++) {	51 for (i = 0; i < 4; i++) {

52 a1 = ip[4 * 0];	52 a1 = ip[4 * 0];

53 c1 = ip[4 * 1];	53 c1 = ip[4 * 1];

54 d1 = ip[4 * 2];	54 d1 = ip[4 * 2];

55 b1 = ip[4 * 3];	55 b1 = ip[4 * 3];

56 a1 += c1;	56 a1 += c1;

57 d1 -= b1;	57 d1 -= b1;

58 e1 = (a1 - d1) >> 1;	58 e1 = (a1 - d1) >> 1;

59 b1 = e1 - b1;	59 b1 = e1 - b1;

60 c1 = e1 - c1;	60 c1 = e1 - c1;

61 a1 -= b1;	61 a1 -= b1;

62 d1 += c1;	62 d1 += c1;

63 dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1);	63 dest[stride * 0] = clip_pixel(dest[stride * 0] + a1);

64 dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + b1);	64 dest[stride * 1] = clip_pixel(dest[stride * 1] + b1);

65 dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + c1);	65 dest[stride * 2] = clip_pixel(dest[stride * 2] + c1);

66 dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + d1);	66 dest[stride * 3] = clip_pixel(dest[stride * 3] + d1);

67	67

68 ip++;	68 ip++;

69 dest++;	69 dest++;

70 }	70 }

71 }	71 }

72	72

73 void vp9_short_iwalsh4x4_1_add_c(int16_t in, uint8_t dest, int dest_stride) {	73 void vp9_iwht4x4_1_add_c(const int16_t in, uint8_t dest, int dest_stride) {

74 int i;	74 int i;

75 int a1, e1;	75 int a1, e1;

76 int16_t tmp[4];	76 int16_t tmp[4];

77 int16_t *ip = in;	77 const int16_t *ip = in;

78 int16_t *op = tmp;	78 int16_t *op = tmp;

79	79

80 a1 = ip[0] >> WHT_UPSCALE_FACTOR;	80 a1 = ip[0] >> UNIT_QUANT_SHIFT;

81 e1 = a1 >> 1;	81 e1 = a1 >> 1;

82 a1 -= e1;	82 a1 -= e1;

83 op[0] = a1;	83 op[0] = a1;

84 op[1] = op[2] = op[3] = e1;	84 op[1] = op[2] = op[3] = e1;

85	85

86 ip = tmp;	86 ip = tmp;

87 for (i = 0; i < 4; i++) {	87 for (i = 0; i < 4; i++) {

88 e1 = ip[0] >> 1;	88 e1 = ip[0] >> 1;

89 a1 = ip[0] - e1;	89 a1 = ip[0] - e1;

90 dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1);	90 dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1);

91 dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1);	91 dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1);

92 dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1);	92 dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1);

93 dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1);	93 dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1);

94 ip++;	94 ip++;

95 dest++;	95 dest++;

96 }	96 }

97 }	97 }

98	98

99 void vp9_idct4_1d_c(int16_t input, int16_t output) {	99 static void idct4_1d(const int16_t input, int16_t output) {

100 int16_t step[4];	100 int16_t step[4];

101 int temp1, temp2;	101 int temp1, temp2;

102 // stage 1	102 // stage 1

103 temp1 = (input[0] + input[2]) * cospi_16_64;	103 temp1 = (input[0] + input[2]) * cospi_16_64;

104 temp2 = (input[0] - input[2]) * cospi_16_64;	104 temp2 = (input[0] - input[2]) * cospi_16_64;

105 step[0] = dct_const_round_shift(temp1);	105 step[0] = dct_const_round_shift(temp1);

106 step[1] = dct_const_round_shift(temp2);	106 step[1] = dct_const_round_shift(temp2);

107 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;	107 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;

108 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;	108 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;

109 step[2] = dct_const_round_shift(temp1);	109 step[2] = dct_const_round_shift(temp1);

110 step[3] = dct_const_round_shift(temp2);	110 step[3] = dct_const_round_shift(temp2);

111	111

112 // stage 2	112 // stage 2

113 output[0] = step[0] + step[3];	113 output[0] = step[0] + step[3];

114 output[1] = step[1] + step[2];	114 output[1] = step[1] + step[2];

115 output[2] = step[1] - step[2];	115 output[2] = step[1] - step[2];

116 output[3] = step[0] - step[3];	116 output[3] = step[0] - step[3];

117 }	117 }

118	118

119 void vp9_short_idct4x4_add_c(int16_t input, uint8_t dest, int dest_stride) {	119 void vp9_idct4x4_16_add_c(const int16_t input, uint8_t dest, int stride) {

120 int16_t out[4 * 4];	120 int16_t out[4 * 4];

121 int16_t *outptr = out;	121 int16_t *outptr = out;

122 int i, j;	122 int i, j;

123 int16_t temp_in[4], temp_out[4];	123 int16_t temp_in[4], temp_out[4];

124	124

125 // Rows	125 // Rows

126 for (i = 0; i < 4; ++i) {	126 for (i = 0; i < 4; ++i) {

127 vp9_idct4_1d(input, outptr);	127 idct4_1d(input, outptr);

128 input += 4;	128 input += 4;

129 outptr += 4;	129 outptr += 4;

130 }	130 }

131	131

132 // Columns	132 // Columns

133 for (i = 0; i < 4; ++i) {	133 for (i = 0; i < 4; ++i) {

134 for (j = 0; j < 4; ++j)	134 for (j = 0; j < 4; ++j)

135 temp_in[j] = out[j * 4 + i];	135 temp_in[j] = out[j * 4 + i];

136 vp9_idct4_1d(temp_in, temp_out);	136 idct4_1d(temp_in, temp_out);

137 for (j = 0; j < 4; ++j)	137 for (j = 0; j < 4; ++j)

138 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)	138 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)

139 + dest[j * dest_stride + i]);	139 + dest[j * stride + i]);

140 }	140 }

141 }	141 }

142	142

143 void vp9_short_idct4x4_1_add_c(int16_t input, uint8_t dest, int dest_stride) {	143 void vp9_idct4x4_1_add_c(const int16_t input, uint8_t dest, int dest_stride) {

144 int i;	144 int i;

145 int a1;	145 int a1;

146 int16_t out = dct_const_round_shift(input[0] * cospi_16_64);	146 int16_t out = dct_const_round_shift(input[0] * cospi_16_64);

147 out = dct_const_round_shift(out * cospi_16_64);	147 out = dct_const_round_shift(out * cospi_16_64);

148 a1 = ROUND_POWER_OF_TWO(out, 4);	148 a1 = ROUND_POWER_OF_TWO(out, 4);

149	149

150 for (i = 0; i < 4; i++) {	150 for (i = 0; i < 4; i++) {

151 dest[0] = clip_pixel(dest[0] + a1);	151 dest[0] = clip_pixel(dest[0] + a1);

152 dest[1] = clip_pixel(dest[1] + a1);	152 dest[1] = clip_pixel(dest[1] + a1);

153 dest[2] = clip_pixel(dest[2] + a1);	153 dest[2] = clip_pixel(dest[2] + a1);

154 dest[3] = clip_pixel(dest[3] + a1);	154 dest[3] = clip_pixel(dest[3] + a1);

155 dest += dest_stride;	155 dest += dest_stride;

156 }	156 }

157 }	157 }

158	158

159 static void idct8_1d(int16_t input, int16_t output) {	159 static void idct8_1d(const int16_t input, int16_t output) {

160 int16_t step1[8], step2[8];	160 int16_t step1[8], step2[8];

161 int temp1, temp2;	161 int temp1, temp2;

162 // stage 1	162 // stage 1

163 step1[0] = input[0];	163 step1[0] = input[0];

164 step1[2] = input[4];	164 step1[2] = input[4];

165 step1[1] = input[2];	165 step1[1] = input[2];

166 step1[3] = input[6];	166 step1[3] = input[6];

167 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;	167 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;

168 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;	168 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;

169 step1[4] = dct_const_round_shift(temp1);	169 step1[4] = dct_const_round_shift(temp1);

170 step1[7] = dct_const_round_shift(temp2);	170 step1[7] = dct_const_round_shift(temp2);

171 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;	171 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;

172 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;	172 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;

173 step1[5] = dct_const_round_shift(temp1);	173 step1[5] = dct_const_round_shift(temp1);

174 step1[6] = dct_const_round_shift(temp2);	174 step1[6] = dct_const_round_shift(temp2);

175	175

176 // stage 2 & stage 3 - even half	176 // stage 2 & stage 3 - even half

177 vp9_idct4_1d(step1, step1);	177 idct4_1d(step1, step1);

178	178

179 // stage 2 - odd half	179 // stage 2 - odd half

180 step2[4] = step1[4] + step1[5];	180 step2[4] = step1[4] + step1[5];

181 step2[5] = step1[4] - step1[5];	181 step2[5] = step1[4] - step1[5];

182 step2[6] = -step1[6] + step1[7];	182 step2[6] = -step1[6] + step1[7];

183 step2[7] = step1[6] + step1[7];	183 step2[7] = step1[6] + step1[7];

184	184

185 // stage 3 -odd half	185 // stage 3 -odd half

186 step1[4] = step2[4];	186 step1[4] = step2[4];

187 temp1 = (step2[6] - step2[5]) * cospi_16_64;	187 temp1 = (step2[6] - step2[5]) * cospi_16_64;

188 temp2 = (step2[5] + step2[6]) * cospi_16_64;	188 temp2 = (step2[5] + step2[6]) * cospi_16_64;

189 step1[5] = dct_const_round_shift(temp1);	189 step1[5] = dct_const_round_shift(temp1);

190 step1[6] = dct_const_round_shift(temp2);	190 step1[6] = dct_const_round_shift(temp2);

191 step1[7] = step2[7];	191 step1[7] = step2[7];

192	192

193 // stage 4	193 // stage 4

194 output[0] = step1[0] + step1[7];	194 output[0] = step1[0] + step1[7];

195 output[1] = step1[1] + step1[6];	195 output[1] = step1[1] + step1[6];

196 output[2] = step1[2] + step1[5];	196 output[2] = step1[2] + step1[5];

197 output[3] = step1[3] + step1[4];	197 output[3] = step1[3] + step1[4];

198 output[4] = step1[3] - step1[4];	198 output[4] = step1[3] - step1[4];

199 output[5] = step1[2] - step1[5];	199 output[5] = step1[2] - step1[5];

200 output[6] = step1[1] - step1[6];	200 output[6] = step1[1] - step1[6];

201 output[7] = step1[0] - step1[7];	201 output[7] = step1[0] - step1[7];

202 }	202 }

203	203

204 void vp9_short_idct8x8_add_c(int16_t input, uint8_t dest, int dest_stride) {	204 void vp9_idct8x8_64_add_c(const int16_t input, uint8_t dest, int stride) {

205 int16_t out[8 * 8];	205 int16_t out[8 * 8];

206 int16_t *outptr = out;	206 int16_t *outptr = out;

207 int i, j;	207 int i, j;

208 int16_t temp_in[8], temp_out[8];	208 int16_t temp_in[8], temp_out[8];

209	209

210 // First transform rows	210 // First transform rows

211 for (i = 0; i < 8; ++i) {	211 for (i = 0; i < 8; ++i) {

212 idct8_1d(input, outptr);	212 idct8_1d(input, outptr);

213 input += 8;	213 input += 8;

214 outptr += 8;	214 outptr += 8;

215 }	215 }

216	216

217 // Then transform columns	217 // Then transform columns

218 for (i = 0; i < 8; ++i) {	218 for (i = 0; i < 8; ++i) {

219 for (j = 0; j < 8; ++j)	219 for (j = 0; j < 8; ++j)

220 temp_in[j] = out[j * 8 + i];	220 temp_in[j] = out[j * 8 + i];

221 idct8_1d(temp_in, temp_out);	221 idct8_1d(temp_in, temp_out);

222 for (j = 0; j < 8; ++j)	222 for (j = 0; j < 8; ++j)

223 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)	223 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)

224 + dest[j * dest_stride + i]);	224 + dest[j * stride + i]);

225 }	225 }

226 }	226 }

227	227

228 void vp9_short_idct8x8_1_add_c(int16_t input, uint8_t dest, int dest_stride) {	228 void vp9_idct8x8_1_add_c(const int16_t input, uint8_t dest, int stride) {

229 int i, j;	229 int i, j;

230 int a1;	230 int a1;

231 int16_t out = dct_const_round_shift(input[0] * cospi_16_64);	231 int16_t out = dct_const_round_shift(input[0] * cospi_16_64);

232 out = dct_const_round_shift(out * cospi_16_64);	232 out = dct_const_round_shift(out * cospi_16_64);

233 a1 = ROUND_POWER_OF_TWO(out, 5);	233 a1 = ROUND_POWER_OF_TWO(out, 5);

234 for (j = 0; j < 8; ++j) {	234 for (j = 0; j < 8; ++j) {

235 for (i = 0; i < 8; ++i)	235 for (i = 0; i < 8; ++i)

236 dest[i] = clip_pixel(dest[i] + a1);	236 dest[i] = clip_pixel(dest[i] + a1);

237 dest += dest_stride;	237 dest += stride;

238 }	238 }

239 }	239 }

240	240

241 static void iadst4_1d(int16_t input, int16_t output) {	241 static void iadst4_1d(const int16_t input, int16_t output) {

242 int s0, s1, s2, s3, s4, s5, s6, s7;	242 int s0, s1, s2, s3, s4, s5, s6, s7;

243	243

244 int x0 = input[0];	244 int x0 = input[0];

245 int x1 = input[1];	245 int x1 = input[1];

246 int x2 = input[2];	246 int x2 = input[2];

247 int x3 = input[3];	247 int x3 = input[3];

248	248

249 if (!(x0 \| x1 \| x2 \| x3)) {	249 if (!(x0 \| x1 \| x2 \| x3)) {

250 output[0] = output[1] = output[2] = output[3] = 0;	250 output[0] = output[1] = output[2] = output[3] = 0;

251 return;	251 return;

(...skipping 21 matching lines...) Expand all Loading...
273 // 1-D transform scaling factor is sqrt(2).	273 // 1-D transform scaling factor is sqrt(2).

274 // The overall dynamic range is 14b (input) + 14b (multiplication scaling)	274 // The overall dynamic range is 14b (input) + 14b (multiplication scaling)

275 // + 1b (addition) = 29b.	275 // + 1b (addition) = 29b.

276 // Hence the output bit depth is 15b.	276 // Hence the output bit depth is 15b.

277 output[0] = dct_const_round_shift(s0);	277 output[0] = dct_const_round_shift(s0);

278 output[1] = dct_const_round_shift(s1);	278 output[1] = dct_const_round_shift(s1);

279 output[2] = dct_const_round_shift(s2);	279 output[2] = dct_const_round_shift(s2);

280 output[3] = dct_const_round_shift(s3);	280 output[3] = dct_const_round_shift(s3);

281 }	281 }

282	282

283 void vp9_short_iht4x4_add_c(int16_t input, uint8_t dest, int dest_stride,	283 void vp9_iht4x4_16_add_c(const int16_t input, uint8_t dest, int stride,

284 int tx_type) {	284 int tx_type) {

285 const transform_2d IHT_4[] = {	285 const transform_2d IHT_4[] = {

286 { vp9_idct4_1d, vp9_idct4_1d }, // DCT_DCT = 0	286 { idct4_1d, idct4_1d }, // DCT_DCT = 0

287 { iadst4_1d, vp9_idct4_1d }, // ADST_DCT = 1	287 { iadst4_1d, idct4_1d }, // ADST_DCT = 1

288 { vp9_idct4_1d, iadst4_1d }, // DCT_ADST = 2	288 { idct4_1d, iadst4_1d }, // DCT_ADST = 2

289 { iadst4_1d, iadst4_1d } // ADST_ADST = 3	289 { iadst4_1d, iadst4_1d } // ADST_ADST = 3

290 };	290 };

291	291

292 int i, j;	292 int i, j;

293 int16_t out[4 * 4];	293 int16_t out[4 * 4];

294 int16_t *outptr = out;	294 int16_t *outptr = out;

295 int16_t temp_in[4], temp_out[4];	295 int16_t temp_in[4], temp_out[4];

296	296

297 // inverse transform row vectors	297 // inverse transform row vectors

298 for (i = 0; i < 4; ++i) {	298 for (i = 0; i < 4; ++i) {

299 IHT_4[tx_type].rows(input, outptr);	299 IHT_4[tx_type].rows(input, outptr);

300 input += 4;	300 input += 4;

301 outptr += 4;	301 outptr += 4;

302 }	302 }

303	303

304 // inverse transform column vectors	304 // inverse transform column vectors

305 for (i = 0; i < 4; ++i) {	305 for (i = 0; i < 4; ++i) {

306 for (j = 0; j < 4; ++j)	306 for (j = 0; j < 4; ++j)

307 temp_in[j] = out[j * 4 + i];	307 temp_in[j] = out[j * 4 + i];

308 IHT_4[tx_type].cols(temp_in, temp_out);	308 IHT_4[tx_type].cols(temp_in, temp_out);

309 for (j = 0; j < 4; ++j)	309 for (j = 0; j < 4; ++j)

310 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)	310 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)

311 + dest[j * dest_stride + i]);	311 + dest[j * stride + i]);

312 }	312 }

313 }	313 }

314 static void iadst8_1d(int16_t input, int16_t output) {	314 static void iadst8_1d(const int16_t input, int16_t output) {

315 int s0, s1, s2, s3, s4, s5, s6, s7;	315 int s0, s1, s2, s3, s4, s5, s6, s7;

316	316

317 int x0 = input[7];	317 int x0 = input[7];

318 int x1 = input[0];	318 int x1 = input[0];

319 int x2 = input[5];	319 int x2 = input[5];

320 int x3 = input[2];	320 int x3 = input[2];

321 int x4 = input[3];	321 int x4 = input[3];

322 int x5 = input[4];	322 int x5 = input[4];

323 int x6 = input[1];	323 int x6 = input[1];

324 int x7 = input[6];	324 int x7 = input[6];

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
388 output[7] = -x1;	388 output[7] = -x1;

389 }	389 }

390	390

391 static const transform_2d IHT_8[] = {	391 static const transform_2d IHT_8[] = {

392 { idct8_1d, idct8_1d }, // DCT_DCT = 0	392 { idct8_1d, idct8_1d }, // DCT_DCT = 0

393 { iadst8_1d, idct8_1d }, // ADST_DCT = 1	393 { iadst8_1d, idct8_1d }, // ADST_DCT = 1

394 { idct8_1d, iadst8_1d }, // DCT_ADST = 2	394 { idct8_1d, iadst8_1d }, // DCT_ADST = 2

395 { iadst8_1d, iadst8_1d } // ADST_ADST = 3	395 { iadst8_1d, iadst8_1d } // ADST_ADST = 3

396 };	396 };

397	397

398 void vp9_short_iht8x8_add_c(int16_t input, uint8_t dest, int dest_stride,	398 void vp9_iht8x8_64_add_c(const int16_t input, uint8_t dest, int stride,

399 int tx_type) {	399 int tx_type) {

400 int i, j;	400 int i, j;

401 int16_t out[8 * 8];	401 int16_t out[8 * 8];

402 int16_t *outptr = out;	402 int16_t *outptr = out;

403 int16_t temp_in[8], temp_out[8];	403 int16_t temp_in[8], temp_out[8];

404 const transform_2d ht = IHT_8[tx_type];	404 const transform_2d ht = IHT_8[tx_type];

405	405

406 // inverse transform row vectors	406 // inverse transform row vectors

407 for (i = 0; i < 8; ++i) {	407 for (i = 0; i < 8; ++i) {

408 ht.rows(input, outptr);	408 ht.rows(input, outptr);

409 input += 8;	409 input += 8;

410 outptr += 8;	410 outptr += 8;

411 }	411 }

412	412

413 // inverse transform column vectors	413 // inverse transform column vectors

414 for (i = 0; i < 8; ++i) {	414 for (i = 0; i < 8; ++i) {

415 for (j = 0; j < 8; ++j)	415 for (j = 0; j < 8; ++j)

416 temp_in[j] = out[j * 8 + i];	416 temp_in[j] = out[j * 8 + i];

417 ht.cols(temp_in, temp_out);	417 ht.cols(temp_in, temp_out);

418 for (j = 0; j < 8; ++j)	418 for (j = 0; j < 8; ++j)

419 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)	419 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)

420 + dest[j * dest_stride + i]); }	420 + dest[j * stride + i]);

	421 }

421 }	422 }

422	423

423 void vp9_short_idct10_8x8_add_c(int16_t input, uint8_t dest,	424 void vp9_idct8x8_10_add_c(const int16_t input, uint8_t dest, int stride) {

424 int dest_stride) {

425 int16_t out[8 * 8] = { 0 };	425 int16_t out[8 * 8] = { 0 };

426 int16_t *outptr = out;	426 int16_t *outptr = out;

427 int i, j;	427 int i, j;

428 int16_t temp_in[8], temp_out[8];	428 int16_t temp_in[8], temp_out[8];

429	429

430 // First transform rows	430 // First transform rows

431 // only first 4 row has non-zero coefs	431 // only first 4 row has non-zero coefs

432 for (i = 0; i < 4; ++i) {	432 for (i = 0; i < 4; ++i) {

433 idct8_1d(input, outptr);	433 idct8_1d(input, outptr);

434 input += 8;	434 input += 8;

435 outptr += 8;	435 outptr += 8;

436 }	436 }

437	437

438 // Then transform columns	438 // Then transform columns

439 for (i = 0; i < 8; ++i) {	439 for (i = 0; i < 8; ++i) {

440 for (j = 0; j < 8; ++j)	440 for (j = 0; j < 8; ++j)

441 temp_in[j] = out[j * 8 + i];	441 temp_in[j] = out[j * 8 + i];

442 idct8_1d(temp_in, temp_out);	442 idct8_1d(temp_in, temp_out);

443 for (j = 0; j < 8; ++j)	443 for (j = 0; j < 8; ++j)

444 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)	444 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)

445 + dest[j * dest_stride + i]);	445 + dest[j * stride + i]);

446 }	446 }

447 }	447 }

448	448

449 static void idct16_1d(int16_t input, int16_t output) {	449 static void idct16_1d(const int16_t input, int16_t output) {

450 int16_t step1[16], step2[16];	450 int16_t step1[16], step2[16];

451 int temp1, temp2;	451 int temp1, temp2;

452	452

453 // stage 1	453 // stage 1

454 step1[0] = input[0/2];	454 step1[0] = input[0/2];

455 step1[1] = input[16/2];	455 step1[1] = input[16/2];

456 step1[2] = input[8/2];	456 step1[2] = input[8/2];

457 step1[3] = input[24/2];	457 step1[3] = input[24/2];

458 step1[4] = input[4/2];	458 step1[4] = input[4/2];

459 step1[5] = input[20/2];	459 step1[5] = input[20/2];

(...skipping 144 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
604 output[8] = step2[7] - step2[8];	604 output[8] = step2[7] - step2[8];

605 output[9] = step2[6] - step2[9];	605 output[9] = step2[6] - step2[9];

606 output[10] = step2[5] - step2[10];	606 output[10] = step2[5] - step2[10];

607 output[11] = step2[4] - step2[11];	607 output[11] = step2[4] - step2[11];

608 output[12] = step2[3] - step2[12];	608 output[12] = step2[3] - step2[12];

609 output[13] = step2[2] - step2[13];	609 output[13] = step2[2] - step2[13];

610 output[14] = step2[1] - step2[14];	610 output[14] = step2[1] - step2[14];

611 output[15] = step2[0] - step2[15];	611 output[15] = step2[0] - step2[15];

612 }	612 }

613	613

614 void vp9_short_idct16x16_add_c(int16_t input, uint8_t dest, int dest_stride) {	614 void vp9_idct16x16_256_add_c(const int16_t input, uint8_t dest, int stride) {

615 int16_t out[16 * 16];	615 int16_t out[16 * 16];

616 int16_t *outptr = out;	616 int16_t *outptr = out;

617 int i, j;	617 int i, j;

618 int16_t temp_in[16], temp_out[16];	618 int16_t temp_in[16], temp_out[16];

619	619

620 // First transform rows	620 // First transform rows

621 for (i = 0; i < 16; ++i) {	621 for (i = 0; i < 16; ++i) {

622 idct16_1d(input, outptr);	622 idct16_1d(input, outptr);

623 input += 16;	623 input += 16;

624 outptr += 16;	624 outptr += 16;

625 }	625 }

626	626

627 // Then transform columns	627 // Then transform columns

628 for (i = 0; i < 16; ++i) {	628 for (i = 0; i < 16; ++i) {

629 for (j = 0; j < 16; ++j)	629 for (j = 0; j < 16; ++j)

630 temp_in[j] = out[j * 16 + i];	630 temp_in[j] = out[j * 16 + i];

631 idct16_1d(temp_in, temp_out);	631 idct16_1d(temp_in, temp_out);

632 for (j = 0; j < 16; ++j)	632 for (j = 0; j < 16; ++j)

633 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)	633 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)

634 + dest[j * dest_stride + i]);	634 + dest[j * stride + i]);

635 }	635 }

636 }	636 }

637	637

638 void iadst16_1d(int16_t input, int16_t output) {	638 static void iadst16_1d(const int16_t input, int16_t output) {

639 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;	639 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;

640	640

641 int x0 = input[15];	641 int x0 = input[15];

642 int x1 = input[0];	642 int x1 = input[0];

643 int x2 = input[13];	643 int x2 = input[13];

644 int x3 = input[2];	644 int x3 = input[2];

645 int x4 = input[11];	645 int x4 = input[11];

646 int x5 = input[4];	646 int x5 = input[4];

647 int x6 = input[9];	647 int x6 = input[9];

648 int x7 = input[6];	648 int x7 = input[6];

(...skipping 157 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
806 output[15] = -x1;	806 output[15] = -x1;

807 }	807 }

808	808

809 static const transform_2d IHT_16[] = {	809 static const transform_2d IHT_16[] = {

810 { idct16_1d, idct16_1d }, // DCT_DCT = 0	810 { idct16_1d, idct16_1d }, // DCT_DCT = 0

811 { iadst16_1d, idct16_1d }, // ADST_DCT = 1	811 { iadst16_1d, idct16_1d }, // ADST_DCT = 1

812 { idct16_1d, iadst16_1d }, // DCT_ADST = 2	812 { idct16_1d, iadst16_1d }, // DCT_ADST = 2

813 { iadst16_1d, iadst16_1d } // ADST_ADST = 3	813 { iadst16_1d, iadst16_1d } // ADST_ADST = 3

814 };	814 };

815	815

816 void vp9_short_iht16x16_add_c(int16_t input, uint8_t dest, int dest_stride,	816 void vp9_iht16x16_256_add_c(const int16_t input, uint8_t dest, int stride,

817 int tx_type) {	817 int tx_type) {

818 int i, j;	818 int i, j;

819 int16_t out[16 * 16];	819 int16_t out[16 * 16];

820 int16_t *outptr = out;	820 int16_t *outptr = out;

821 int16_t temp_in[16], temp_out[16];	821 int16_t temp_in[16], temp_out[16];

822 const transform_2d ht = IHT_16[tx_type];	822 const transform_2d ht = IHT_16[tx_type];

823	823

824 // Rows	824 // Rows

825 for (i = 0; i < 16; ++i) {	825 for (i = 0; i < 16; ++i) {

826 ht.rows(input, outptr);	826 ht.rows(input, outptr);

827 input += 16;	827 input += 16;

828 outptr += 16;	828 outptr += 16;

829 }	829 }

830	830

831 // Columns	831 // Columns

832 for (i = 0; i < 16; ++i) {	832 for (i = 0; i < 16; ++i) {

833 for (j = 0; j < 16; ++j)	833 for (j = 0; j < 16; ++j)

834 temp_in[j] = out[j * 16 + i];	834 temp_in[j] = out[j * 16 + i];

835 ht.cols(temp_in, temp_out);	835 ht.cols(temp_in, temp_out);

836 for (j = 0; j < 16; ++j)	836 for (j = 0; j < 16; ++j)

837 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)	837 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)

838 + dest[j * dest_stride + i]); }	838 + dest[j * stride + i]); }

839 }	839 }

840	840

841 void vp9_short_idct10_16x16_add_c(int16_t input, uint8_t dest,	841 void vp9_idct16x16_10_add_c(const int16_t input, uint8_t dest, int stride) {

842 int dest_stride) {

843 int16_t out[16 * 16] = { 0 };	842 int16_t out[16 * 16] = { 0 };

844 int16_t *outptr = out;	843 int16_t *outptr = out;

845 int i, j;	844 int i, j;

846 int16_t temp_in[16], temp_out[16];	845 int16_t temp_in[16], temp_out[16];

847	846

848 // First transform rows. Since all non-zero dct coefficients are in	847 // First transform rows. Since all non-zero dct coefficients are in

849 // upper-left 4x4 area, we only need to calculate first 4 rows here.	848 // upper-left 4x4 area, we only need to calculate first 4 rows here.

850 for (i = 0; i < 4; ++i) {	849 for (i = 0; i < 4; ++i) {

851 idct16_1d(input, outptr);	850 idct16_1d(input, outptr);

852 input += 16;	851 input += 16;

853 outptr += 16;	852 outptr += 16;

854 }	853 }

855	854

856 // Then transform columns	855 // Then transform columns

857 for (i = 0; i < 16; ++i) {	856 for (i = 0; i < 16; ++i) {

858 for (j = 0; j < 16; ++j)	857 for (j = 0; j < 16; ++j)

859 temp_in[j] = out[j*16 + i];	858 temp_in[j] = out[j*16 + i];

860 idct16_1d(temp_in, temp_out);	859 idct16_1d(temp_in, temp_out);

861 for (j = 0; j < 16; ++j)	860 for (j = 0; j < 16; ++j)

862 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)	861 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)

863 + dest[j * dest_stride + i]);	862 + dest[j * stride + i]);

864 }	863 }

865 }	864 }

866	865

867 void vp9_short_idct16x16_1_add_c(int16_t input, uint8_t dest,	866 void vp9_idct16x16_1_add_c(const int16_t input, uint8_t dest, int stride) {

868 int dest_stride) {

869 int i, j;	867 int i, j;

870 int a1;	868 int a1;

871 int16_t out = dct_const_round_shift(input[0] * cospi_16_64);	869 int16_t out = dct_const_round_shift(input[0] * cospi_16_64);

872 out = dct_const_round_shift(out * cospi_16_64);	870 out = dct_const_round_shift(out * cospi_16_64);

873 a1 = ROUND_POWER_OF_TWO(out, 6);	871 a1 = ROUND_POWER_OF_TWO(out, 6);

874 for (j = 0; j < 16; ++j) {	872 for (j = 0; j < 16; ++j) {

875 for (i = 0; i < 16; ++i)	873 for (i = 0; i < 16; ++i)

876 dest[i] = clip_pixel(dest[i] + a1);	874 dest[i] = clip_pixel(dest[i] + a1);

877 dest += dest_stride;	875 dest += stride;

878 }	876 }

879 }	877 }

880	878

881 static void idct32_1d(int16_t input, int16_t output) {	879 static void idct32_1d(const int16_t input, int16_t output) {

882 int16_t step1[32], step2[32];	880 int16_t step1[32], step2[32];

883 int temp1, temp2;	881 int temp1, temp2;

884	882

885 // stage 1	883 // stage 1

886 step1[0] = input[0];	884 step1[0] = input[0];

887 step1[1] = input[16];	885 step1[1] = input[16];

888 step1[2] = input[8];	886 step1[2] = input[8];

889 step1[3] = input[24];	887 step1[3] = input[24];

890 step1[4] = input[4];	888 step1[4] = input[4];

891 step1[5] = input[20];	889 step1[5] = input[20];

(...skipping 346 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1238 output[24] = step1[7] - step1[24];	1236 output[24] = step1[7] - step1[24];

1239 output[25] = step1[6] - step1[25];	1237 output[25] = step1[6] - step1[25];

1240 output[26] = step1[5] - step1[26];	1238 output[26] = step1[5] - step1[26];

1241 output[27] = step1[4] - step1[27];	1239 output[27] = step1[4] - step1[27];

1242 output[28] = step1[3] - step1[28];	1240 output[28] = step1[3] - step1[28];

1243 output[29] = step1[2] - step1[29];	1241 output[29] = step1[2] - step1[29];

1244 output[30] = step1[1] - step1[30];	1242 output[30] = step1[1] - step1[30];

1245 output[31] = step1[0] - step1[31];	1243 output[31] = step1[0] - step1[31];

1246 }	1244 }

1247	1245

1248 void vp9_short_idct32x32_add_c(int16_t input, uint8_t dest, int dest_stride) {	1246 void vp9_idct32x32_1024_add_c(const int16_t input, uint8_t dest, int stride) {

1249 int16_t out[32 * 32];	1247 int16_t out[32 * 32];

1250 int16_t *outptr = out;	1248 int16_t *outptr = out;

1251 int i, j;	1249 int i, j;

1252 int16_t temp_in[32], temp_out[32];	1250 int16_t temp_in[32], temp_out[32];

1253	1251

1254 // Rows	1252 // Rows

1255 for (i = 0; i < 32; ++i) {	1253 for (i = 0; i < 32; ++i) {

	1254 int16_t zero_coeff[16];

	1255 for (j = 0; j < 16; ++j)

	1256 zero_coeff[j] = input[2 * j] \| input[2 * j + 1];

	1257 for (j = 0; j < 8; ++j)

	1258 zero_coeff[j] = zero_coeff[2 * j] \| zero_coeff[2 * j + 1];

	1259 for (j = 0; j < 4; ++j)

	1260 zero_coeff[j] = zero_coeff[2 * j] \| zero_coeff[2 * j + 1];

	1261 for (j = 0; j < 2; ++j)

	1262 zero_coeff[j] = zero_coeff[2 * j] \| zero_coeff[2 * j + 1];

	1263

	1264 if (zero_coeff[0] \| zero_coeff[1])

	1265 idct32_1d(input, outptr);

	1266 else

	1267 vpx_memset(outptr, 0, sizeof(int16_t) * 32);

	1268 input += 32;

	1269 outptr += 32;

	1270 }

	1271

	1272 // Columns

	1273 for (i = 0; i < 32; ++i) {

	1274 for (j = 0; j < 32; ++j)

	1275 temp_in[j] = out[j * 32 + i];

	1276 idct32_1d(temp_in, temp_out);

	1277 for (j = 0; j < 32; ++j)

	1278 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)

	1279 + dest[j * stride + i]);

	1280 }

	1281 }

	1282

	1283 void vp9_idct32x32_34_add_c(const int16_t input, uint8_t dest, int stride) {

	1284 int16_t out[32 * 32] = {0};

	1285 int16_t *outptr = out;

	1286 int i, j;

	1287 int16_t temp_in[32], temp_out[32];

	1288

	1289 // Rows

	1290 // only upper-left 8x8 has non-zero coeff

	1291 for (i = 0; i < 8; ++i) {

1256 idct32_1d(input, outptr);	1292 idct32_1d(input, outptr);

1257 input += 32;	1293 input += 32;

1258 outptr += 32;	1294 outptr += 32;

1259 }	1295 }

1260	1296

1261 // Columns	1297 // Columns

1262 for (i = 0; i < 32; ++i) {	1298 for (i = 0; i < 32; ++i) {

1263 for (j = 0; j < 32; ++j)	1299 for (j = 0; j < 32; ++j)

1264 temp_in[j] = out[j * 32 + i];	1300 temp_in[j] = out[j * 32 + i];

1265 idct32_1d(temp_in, temp_out);	1301 idct32_1d(temp_in, temp_out);

1266 for (j = 0; j < 32; ++j)	1302 for (j = 0; j < 32; ++j)

1267 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)	1303 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)

1268 + dest[j * dest_stride + i]);	1304 + dest[j * stride + i]);

1269 }	1305 }

1270 }	1306 }

1271	1307

1272 void vp9_short_idct1_32x32_c(int16_t input, int16_t output) {	1308 void vp9_idct32x32_1_add_c(const int16_t input, uint8_t dest, int stride) {

	1309 int i, j;

	1310 int a1;

	1311

1273 int16_t out = dct_const_round_shift(input[0] * cospi_16_64);	1312 int16_t out = dct_const_round_shift(input[0] * cospi_16_64);

1274 out = dct_const_round_shift(out * cospi_16_64);	1313 out = dct_const_round_shift(out * cospi_16_64);

1275 output[0] = ROUND_POWER_OF_TWO(out, 6);	1314 a1 = ROUND_POWER_OF_TWO(out, 6);

	1315

	1316 for (j = 0; j < 32; ++j) {

	1317 for (i = 0; i < 32; ++i)

	1318 dest[i] = clip_pixel(dest[i] + a1);

	1319 dest += stride;

	1320 }

1276 }	1321 }

	1322

	1323 // idct

	1324 void vp9_idct4x4_add(const int16_t input, uint8_t dest, int stride, int eob) {

	1325 if (eob > 1)

	1326 vp9_idct4x4_16_add(input, dest, stride);

	1327 else

	1328 vp9_idct4x4_1_add(input, dest, stride);

	1329 }

	1330

	1331

	1332 void vp9_iwht4x4_add(const int16_t input, uint8_t dest, int stride, int eob) {

	1333 if (eob > 1)

	1334 vp9_iwht4x4_16_add(input, dest, stride);

	1335 else

	1336 vp9_iwht4x4_1_add(input, dest, stride);

	1337 }

	1338

	1339 void vp9_idct8x8_add(const int16_t input, uint8_t dest, int stride, int eob) {

	1340 // If dc is 1, then input[0] is the reconstructed value, do not need

	1341 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.

	1342

	1343 // The calculation can be simplified if there are not many non-zero dct

	1344 // coefficients. Use eobs to decide what to do.

	1345 // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.

	1346 // Combine that with code here.

	1347 if (eob) {

	1348 if (eob == 1)

	1349 // DC only DCT coefficient

	1350 vp9_idct8x8_1_add(input, dest, stride);

	1351 else if (eob <= 10)

	1352 vp9_idct8x8_10_add(input, dest, stride);

	1353 else

	1354 vp9_idct8x8_64_add(input, dest, stride);

	1355 }

	1356 }

	1357

	1358 void vp9_idct16x16_add(const int16_t input, uint8_t dest, int stride,

	1359 int eob) {

	1360 /* The calculation can be simplified if there are not many non-zero dct

	1361 * coefficients. Use eobs to separate different cases. */

	1362 if (eob) {

	1363 if (eob == 1)

	1364 /* DC only DCT coefficient. */

	1365 vp9_idct16x16_1_add(input, dest, stride);

	1366 else if (eob <= 10)

	1367 vp9_idct16x16_10_add(input, dest, stride);

	1368 else

	1369 vp9_idct16x16_256_add(input, dest, stride);

	1370 }

	1371 }

	1372

	1373 void vp9_idct32x32_add(const int16_t input, uint8_t dest, int stride,

	1374 int eob) {

	1375 if (eob) {

	1376 if (eob == 1)

	1377 vp9_idct32x32_1_add(input, dest, stride);

	1378 else if (eob <= 34)

	1379 // non-zero coeff only in upper-left 8x8

	1380 vp9_idct32x32_34_add(input, dest, stride);

	1381 else

	1382 vp9_idct32x32_1024_add(input, dest, stride);

	1383 }

	1384 }

	1385

	1386 // iht

	1387 void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t input, uint8_t dest,

	1388 int stride, int eob) {

	1389 if (tx_type == DCT_DCT)

	1390 vp9_idct4x4_add(input, dest, stride, eob);

	1391 else

	1392 vp9_iht4x4_16_add(input, dest, stride, tx_type);

	1393 }

	1394

	1395 void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t input, uint8_t dest,

	1396 int stride, int eob) {

	1397 if (tx_type == DCT_DCT) {

	1398 vp9_idct8x8_add(input, dest, stride, eob);

	1399 } else {

	1400 if (eob > 0) {

	1401 vp9_iht8x8_64_add(input, dest, stride, tx_type);

	1402 }

	1403 }

	1404 }

	1405

	1406 void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t input, uint8_t dest,

	1407 int stride, int eob) {

	1408 if (tx_type == DCT_DCT) {

	1409 vp9_idct16x16_add(input, dest, stride, eob);

	1410 } else {

	1411 if (eob > 0) {

	1412 vp9_iht16x16_256_add(input, dest, stride, tx_type);

	1413 }

	1414 }

	1415 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/vp9_idct.h ('k') | source/libvpx/vp9/common/vp9_loopfilter.h » ('j') | no next file with comments »