OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <assert.h> | 11 #include <assert.h> |
12 #include <math.h> | 12 #include <math.h> |
13 | 13 |
14 #include "./vpx_config.h" | 14 #include "./vpx_config.h" |
15 #include "./vp9_rtcd.h" | 15 #include "./vp9_rtcd.h" |
16 #include "vp9/common/vp9_systemdependent.h" | 16 #include "vp9/common/vp9_systemdependent.h" |
17 #include "vp9/common/vp9_blockd.h" | 17 #include "vp9/common/vp9_blockd.h" |
18 #include "vp9/common/vp9_common.h" | 18 #include "vp9/common/vp9_common.h" |
19 #include "vp9/common/vp9_idct.h" | 19 #include "vp9/common/vp9_idct.h" |
20 | 20 |
21 void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 21 void vp9_iwht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { |
22 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, | 22 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
23 0.5 shifts per pixel. */ | 23 0.5 shifts per pixel. */ |
24 int i; | 24 int i; |
25 int16_t output[16]; | 25 int16_t output[16]; |
26 int a1, b1, c1, d1, e1; | 26 int a1, b1, c1, d1, e1; |
27 int16_t *ip = input; | 27 const int16_t *ip = input; |
28 int16_t *op = output; | 28 int16_t *op = output; |
29 | 29 |
30 for (i = 0; i < 4; i++) { | 30 for (i = 0; i < 4; i++) { |
31 a1 = ip[0] >> WHT_UPSCALE_FACTOR; | 31 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
32 c1 = ip[1] >> WHT_UPSCALE_FACTOR; | 32 c1 = ip[1] >> UNIT_QUANT_SHIFT; |
33 d1 = ip[2] >> WHT_UPSCALE_FACTOR; | 33 d1 = ip[2] >> UNIT_QUANT_SHIFT; |
34 b1 = ip[3] >> WHT_UPSCALE_FACTOR; | 34 b1 = ip[3] >> UNIT_QUANT_SHIFT; |
35 a1 += c1; | 35 a1 += c1; |
36 d1 -= b1; | 36 d1 -= b1; |
37 e1 = (a1 - d1) >> 1; | 37 e1 = (a1 - d1) >> 1; |
38 b1 = e1 - b1; | 38 b1 = e1 - b1; |
39 c1 = e1 - c1; | 39 c1 = e1 - c1; |
40 a1 -= b1; | 40 a1 -= b1; |
41 d1 += c1; | 41 d1 += c1; |
42 op[0] = a1; | 42 op[0] = a1; |
43 op[1] = b1; | 43 op[1] = b1; |
44 op[2] = c1; | 44 op[2] = c1; |
45 op[3] = d1; | 45 op[3] = d1; |
46 ip += 4; | 46 ip += 4; |
47 op += 4; | 47 op += 4; |
48 } | 48 } |
49 | 49 |
50 ip = output; | 50 ip = output; |
51 for (i = 0; i < 4; i++) { | 51 for (i = 0; i < 4; i++) { |
52 a1 = ip[4 * 0]; | 52 a1 = ip[4 * 0]; |
53 c1 = ip[4 * 1]; | 53 c1 = ip[4 * 1]; |
54 d1 = ip[4 * 2]; | 54 d1 = ip[4 * 2]; |
55 b1 = ip[4 * 3]; | 55 b1 = ip[4 * 3]; |
56 a1 += c1; | 56 a1 += c1; |
57 d1 -= b1; | 57 d1 -= b1; |
58 e1 = (a1 - d1) >> 1; | 58 e1 = (a1 - d1) >> 1; |
59 b1 = e1 - b1; | 59 b1 = e1 - b1; |
60 c1 = e1 - c1; | 60 c1 = e1 - c1; |
61 a1 -= b1; | 61 a1 -= b1; |
62 d1 += c1; | 62 d1 += c1; |
63 dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); | 63 dest[stride * 0] = clip_pixel(dest[stride * 0] + a1); |
64 dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + b1); | 64 dest[stride * 1] = clip_pixel(dest[stride * 1] + b1); |
65 dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + c1); | 65 dest[stride * 2] = clip_pixel(dest[stride * 2] + c1); |
66 dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + d1); | 66 dest[stride * 3] = clip_pixel(dest[stride * 3] + d1); |
67 | 67 |
68 ip++; | 68 ip++; |
69 dest++; | 69 dest++; |
70 } | 70 } |
71 } | 71 } |
72 | 72 |
73 void vp9_short_iwalsh4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) { | 73 void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) { |
74 int i; | 74 int i; |
75 int a1, e1; | 75 int a1, e1; |
76 int16_t tmp[4]; | 76 int16_t tmp[4]; |
77 int16_t *ip = in; | 77 const int16_t *ip = in; |
78 int16_t *op = tmp; | 78 int16_t *op = tmp; |
79 | 79 |
80 a1 = ip[0] >> WHT_UPSCALE_FACTOR; | 80 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
81 e1 = a1 >> 1; | 81 e1 = a1 >> 1; |
82 a1 -= e1; | 82 a1 -= e1; |
83 op[0] = a1; | 83 op[0] = a1; |
84 op[1] = op[2] = op[3] = e1; | 84 op[1] = op[2] = op[3] = e1; |
85 | 85 |
86 ip = tmp; | 86 ip = tmp; |
87 for (i = 0; i < 4; i++) { | 87 for (i = 0; i < 4; i++) { |
88 e1 = ip[0] >> 1; | 88 e1 = ip[0] >> 1; |
89 a1 = ip[0] - e1; | 89 a1 = ip[0] - e1; |
90 dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); | 90 dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); |
91 dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1); | 91 dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1); |
92 dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1); | 92 dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1); |
93 dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1); | 93 dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1); |
94 ip++; | 94 ip++; |
95 dest++; | 95 dest++; |
96 } | 96 } |
97 } | 97 } |
98 | 98 |
99 void vp9_idct4_1d_c(int16_t *input, int16_t *output) { | 99 static void idct4_1d(const int16_t *input, int16_t *output) { |
100 int16_t step[4]; | 100 int16_t step[4]; |
101 int temp1, temp2; | 101 int temp1, temp2; |
102 // stage 1 | 102 // stage 1 |
103 temp1 = (input[0] + input[2]) * cospi_16_64; | 103 temp1 = (input[0] + input[2]) * cospi_16_64; |
104 temp2 = (input[0] - input[2]) * cospi_16_64; | 104 temp2 = (input[0] - input[2]) * cospi_16_64; |
105 step[0] = dct_const_round_shift(temp1); | 105 step[0] = dct_const_round_shift(temp1); |
106 step[1] = dct_const_round_shift(temp2); | 106 step[1] = dct_const_round_shift(temp2); |
107 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; | 107 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; |
108 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; | 108 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; |
109 step[2] = dct_const_round_shift(temp1); | 109 step[2] = dct_const_round_shift(temp1); |
110 step[3] = dct_const_round_shift(temp2); | 110 step[3] = dct_const_round_shift(temp2); |
111 | 111 |
112 // stage 2 | 112 // stage 2 |
113 output[0] = step[0] + step[3]; | 113 output[0] = step[0] + step[3]; |
114 output[1] = step[1] + step[2]; | 114 output[1] = step[1] + step[2]; |
115 output[2] = step[1] - step[2]; | 115 output[2] = step[1] - step[2]; |
116 output[3] = step[0] - step[3]; | 116 output[3] = step[0] - step[3]; |
117 } | 117 } |
118 | 118 |
119 void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 119 void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { |
120 int16_t out[4 * 4]; | 120 int16_t out[4 * 4]; |
121 int16_t *outptr = out; | 121 int16_t *outptr = out; |
122 int i, j; | 122 int i, j; |
123 int16_t temp_in[4], temp_out[4]; | 123 int16_t temp_in[4], temp_out[4]; |
124 | 124 |
125 // Rows | 125 // Rows |
126 for (i = 0; i < 4; ++i) { | 126 for (i = 0; i < 4; ++i) { |
127 vp9_idct4_1d(input, outptr); | 127 idct4_1d(input, outptr); |
128 input += 4; | 128 input += 4; |
129 outptr += 4; | 129 outptr += 4; |
130 } | 130 } |
131 | 131 |
132 // Columns | 132 // Columns |
133 for (i = 0; i < 4; ++i) { | 133 for (i = 0; i < 4; ++i) { |
134 for (j = 0; j < 4; ++j) | 134 for (j = 0; j < 4; ++j) |
135 temp_in[j] = out[j * 4 + i]; | 135 temp_in[j] = out[j * 4 + i]; |
136 vp9_idct4_1d(temp_in, temp_out); | 136 idct4_1d(temp_in, temp_out); |
137 for (j = 0; j < 4; ++j) | 137 for (j = 0; j < 4; ++j) |
138 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) | 138 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) |
139 + dest[j * dest_stride + i]); | 139 + dest[j * stride + i]); |
140 } | 140 } |
141 } | 141 } |
142 | 142 |
143 void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 143 void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) { |
144 int i; | 144 int i; |
145 int a1; | 145 int a1; |
146 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); | 146 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); |
147 out = dct_const_round_shift(out * cospi_16_64); | 147 out = dct_const_round_shift(out * cospi_16_64); |
148 a1 = ROUND_POWER_OF_TWO(out, 4); | 148 a1 = ROUND_POWER_OF_TWO(out, 4); |
149 | 149 |
150 for (i = 0; i < 4; i++) { | 150 for (i = 0; i < 4; i++) { |
151 dest[0] = clip_pixel(dest[0] + a1); | 151 dest[0] = clip_pixel(dest[0] + a1); |
152 dest[1] = clip_pixel(dest[1] + a1); | 152 dest[1] = clip_pixel(dest[1] + a1); |
153 dest[2] = clip_pixel(dest[2] + a1); | 153 dest[2] = clip_pixel(dest[2] + a1); |
154 dest[3] = clip_pixel(dest[3] + a1); | 154 dest[3] = clip_pixel(dest[3] + a1); |
155 dest += dest_stride; | 155 dest += dest_stride; |
156 } | 156 } |
157 } | 157 } |
158 | 158 |
159 static void idct8_1d(int16_t *input, int16_t *output) { | 159 static void idct8_1d(const int16_t *input, int16_t *output) { |
160 int16_t step1[8], step2[8]; | 160 int16_t step1[8], step2[8]; |
161 int temp1, temp2; | 161 int temp1, temp2; |
162 // stage 1 | 162 // stage 1 |
163 step1[0] = input[0]; | 163 step1[0] = input[0]; |
164 step1[2] = input[4]; | 164 step1[2] = input[4]; |
165 step1[1] = input[2]; | 165 step1[1] = input[2]; |
166 step1[3] = input[6]; | 166 step1[3] = input[6]; |
167 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; | 167 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; |
168 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; | 168 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; |
169 step1[4] = dct_const_round_shift(temp1); | 169 step1[4] = dct_const_round_shift(temp1); |
170 step1[7] = dct_const_round_shift(temp2); | 170 step1[7] = dct_const_round_shift(temp2); |
171 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; | 171 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; |
172 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; | 172 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; |
173 step1[5] = dct_const_round_shift(temp1); | 173 step1[5] = dct_const_round_shift(temp1); |
174 step1[6] = dct_const_round_shift(temp2); | 174 step1[6] = dct_const_round_shift(temp2); |
175 | 175 |
176 // stage 2 & stage 3 - even half | 176 // stage 2 & stage 3 - even half |
177 vp9_idct4_1d(step1, step1); | 177 idct4_1d(step1, step1); |
178 | 178 |
179 // stage 2 - odd half | 179 // stage 2 - odd half |
180 step2[4] = step1[4] + step1[5]; | 180 step2[4] = step1[4] + step1[5]; |
181 step2[5] = step1[4] - step1[5]; | 181 step2[5] = step1[4] - step1[5]; |
182 step2[6] = -step1[6] + step1[7]; | 182 step2[6] = -step1[6] + step1[7]; |
183 step2[7] = step1[6] + step1[7]; | 183 step2[7] = step1[6] + step1[7]; |
184 | 184 |
185 // stage 3 -odd half | 185 // stage 3 -odd half |
186 step1[4] = step2[4]; | 186 step1[4] = step2[4]; |
187 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 187 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
188 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 188 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
189 step1[5] = dct_const_round_shift(temp1); | 189 step1[5] = dct_const_round_shift(temp1); |
190 step1[6] = dct_const_round_shift(temp2); | 190 step1[6] = dct_const_round_shift(temp2); |
191 step1[7] = step2[7]; | 191 step1[7] = step2[7]; |
192 | 192 |
193 // stage 4 | 193 // stage 4 |
194 output[0] = step1[0] + step1[7]; | 194 output[0] = step1[0] + step1[7]; |
195 output[1] = step1[1] + step1[6]; | 195 output[1] = step1[1] + step1[6]; |
196 output[2] = step1[2] + step1[5]; | 196 output[2] = step1[2] + step1[5]; |
197 output[3] = step1[3] + step1[4]; | 197 output[3] = step1[3] + step1[4]; |
198 output[4] = step1[3] - step1[4]; | 198 output[4] = step1[3] - step1[4]; |
199 output[5] = step1[2] - step1[5]; | 199 output[5] = step1[2] - step1[5]; |
200 output[6] = step1[1] - step1[6]; | 200 output[6] = step1[1] - step1[6]; |
201 output[7] = step1[0] - step1[7]; | 201 output[7] = step1[0] - step1[7]; |
202 } | 202 } |
203 | 203 |
204 void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 204 void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) { |
205 int16_t out[8 * 8]; | 205 int16_t out[8 * 8]; |
206 int16_t *outptr = out; | 206 int16_t *outptr = out; |
207 int i, j; | 207 int i, j; |
208 int16_t temp_in[8], temp_out[8]; | 208 int16_t temp_in[8], temp_out[8]; |
209 | 209 |
210 // First transform rows | 210 // First transform rows |
211 for (i = 0; i < 8; ++i) { | 211 for (i = 0; i < 8; ++i) { |
212 idct8_1d(input, outptr); | 212 idct8_1d(input, outptr); |
213 input += 8; | 213 input += 8; |
214 outptr += 8; | 214 outptr += 8; |
215 } | 215 } |
216 | 216 |
217 // Then transform columns | 217 // Then transform columns |
218 for (i = 0; i < 8; ++i) { | 218 for (i = 0; i < 8; ++i) { |
219 for (j = 0; j < 8; ++j) | 219 for (j = 0; j < 8; ++j) |
220 temp_in[j] = out[j * 8 + i]; | 220 temp_in[j] = out[j * 8 + i]; |
221 idct8_1d(temp_in, temp_out); | 221 idct8_1d(temp_in, temp_out); |
222 for (j = 0; j < 8; ++j) | 222 for (j = 0; j < 8; ++j) |
223 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) | 223 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) |
224 + dest[j * dest_stride + i]); | 224 + dest[j * stride + i]); |
225 } | 225 } |
226 } | 226 } |
227 | 227 |
228 void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 228 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) { |
229 int i, j; | 229 int i, j; |
230 int a1; | 230 int a1; |
231 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); | 231 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); |
232 out = dct_const_round_shift(out * cospi_16_64); | 232 out = dct_const_round_shift(out * cospi_16_64); |
233 a1 = ROUND_POWER_OF_TWO(out, 5); | 233 a1 = ROUND_POWER_OF_TWO(out, 5); |
234 for (j = 0; j < 8; ++j) { | 234 for (j = 0; j < 8; ++j) { |
235 for (i = 0; i < 8; ++i) | 235 for (i = 0; i < 8; ++i) |
236 dest[i] = clip_pixel(dest[i] + a1); | 236 dest[i] = clip_pixel(dest[i] + a1); |
237 dest += dest_stride; | 237 dest += stride; |
238 } | 238 } |
239 } | 239 } |
240 | 240 |
241 static void iadst4_1d(int16_t *input, int16_t *output) { | 241 static void iadst4_1d(const int16_t *input, int16_t *output) { |
242 int s0, s1, s2, s3, s4, s5, s6, s7; | 242 int s0, s1, s2, s3, s4, s5, s6, s7; |
243 | 243 |
244 int x0 = input[0]; | 244 int x0 = input[0]; |
245 int x1 = input[1]; | 245 int x1 = input[1]; |
246 int x2 = input[2]; | 246 int x2 = input[2]; |
247 int x3 = input[3]; | 247 int x3 = input[3]; |
248 | 248 |
249 if (!(x0 | x1 | x2 | x3)) { | 249 if (!(x0 | x1 | x2 | x3)) { |
250 output[0] = output[1] = output[2] = output[3] = 0; | 250 output[0] = output[1] = output[2] = output[3] = 0; |
251 return; | 251 return; |
(...skipping 21 matching lines...) Expand all Loading... |
273 // 1-D transform scaling factor is sqrt(2). | 273 // 1-D transform scaling factor is sqrt(2). |
274 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 274 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
275 // + 1b (addition) = 29b. | 275 // + 1b (addition) = 29b. |
276 // Hence the output bit depth is 15b. | 276 // Hence the output bit depth is 15b. |
277 output[0] = dct_const_round_shift(s0); | 277 output[0] = dct_const_round_shift(s0); |
278 output[1] = dct_const_round_shift(s1); | 278 output[1] = dct_const_round_shift(s1); |
279 output[2] = dct_const_round_shift(s2); | 279 output[2] = dct_const_round_shift(s2); |
280 output[3] = dct_const_round_shift(s3); | 280 output[3] = dct_const_round_shift(s3); |
281 } | 281 } |
282 | 282 |
283 void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, | 283 void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride, |
284 int tx_type) { | 284 int tx_type) { |
285 const transform_2d IHT_4[] = { | 285 const transform_2d IHT_4[] = { |
286 { vp9_idct4_1d, vp9_idct4_1d }, // DCT_DCT = 0 | 286 { idct4_1d, idct4_1d }, // DCT_DCT = 0 |
287 { iadst4_1d, vp9_idct4_1d }, // ADST_DCT = 1 | 287 { iadst4_1d, idct4_1d }, // ADST_DCT = 1 |
288 { vp9_idct4_1d, iadst4_1d }, // DCT_ADST = 2 | 288 { idct4_1d, iadst4_1d }, // DCT_ADST = 2 |
289 { iadst4_1d, iadst4_1d } // ADST_ADST = 3 | 289 { iadst4_1d, iadst4_1d } // ADST_ADST = 3 |
290 }; | 290 }; |
291 | 291 |
292 int i, j; | 292 int i, j; |
293 int16_t out[4 * 4]; | 293 int16_t out[4 * 4]; |
294 int16_t *outptr = out; | 294 int16_t *outptr = out; |
295 int16_t temp_in[4], temp_out[4]; | 295 int16_t temp_in[4], temp_out[4]; |
296 | 296 |
297 // inverse transform row vectors | 297 // inverse transform row vectors |
298 for (i = 0; i < 4; ++i) { | 298 for (i = 0; i < 4; ++i) { |
299 IHT_4[tx_type].rows(input, outptr); | 299 IHT_4[tx_type].rows(input, outptr); |
300 input += 4; | 300 input += 4; |
301 outptr += 4; | 301 outptr += 4; |
302 } | 302 } |
303 | 303 |
304 // inverse transform column vectors | 304 // inverse transform column vectors |
305 for (i = 0; i < 4; ++i) { | 305 for (i = 0; i < 4; ++i) { |
306 for (j = 0; j < 4; ++j) | 306 for (j = 0; j < 4; ++j) |
307 temp_in[j] = out[j * 4 + i]; | 307 temp_in[j] = out[j * 4 + i]; |
308 IHT_4[tx_type].cols(temp_in, temp_out); | 308 IHT_4[tx_type].cols(temp_in, temp_out); |
309 for (j = 0; j < 4; ++j) | 309 for (j = 0; j < 4; ++j) |
310 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) | 310 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) |
311 + dest[j * dest_stride + i]); | 311 + dest[j * stride + i]); |
312 } | 312 } |
313 } | 313 } |
314 static void iadst8_1d(int16_t *input, int16_t *output) { | 314 static void iadst8_1d(const int16_t *input, int16_t *output) { |
315 int s0, s1, s2, s3, s4, s5, s6, s7; | 315 int s0, s1, s2, s3, s4, s5, s6, s7; |
316 | 316 |
317 int x0 = input[7]; | 317 int x0 = input[7]; |
318 int x1 = input[0]; | 318 int x1 = input[0]; |
319 int x2 = input[5]; | 319 int x2 = input[5]; |
320 int x3 = input[2]; | 320 int x3 = input[2]; |
321 int x4 = input[3]; | 321 int x4 = input[3]; |
322 int x5 = input[4]; | 322 int x5 = input[4]; |
323 int x6 = input[1]; | 323 int x6 = input[1]; |
324 int x7 = input[6]; | 324 int x7 = input[6]; |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
388 output[7] = -x1; | 388 output[7] = -x1; |
389 } | 389 } |
390 | 390 |
391 static const transform_2d IHT_8[] = { | 391 static const transform_2d IHT_8[] = { |
392 { idct8_1d, idct8_1d }, // DCT_DCT = 0 | 392 { idct8_1d, idct8_1d }, // DCT_DCT = 0 |
393 { iadst8_1d, idct8_1d }, // ADST_DCT = 1 | 393 { iadst8_1d, idct8_1d }, // ADST_DCT = 1 |
394 { idct8_1d, iadst8_1d }, // DCT_ADST = 2 | 394 { idct8_1d, iadst8_1d }, // DCT_ADST = 2 |
395 { iadst8_1d, iadst8_1d } // ADST_ADST = 3 | 395 { iadst8_1d, iadst8_1d } // ADST_ADST = 3 |
396 }; | 396 }; |
397 | 397 |
398 void vp9_short_iht8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride, | 398 void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride, |
399 int tx_type) { | 399 int tx_type) { |
400 int i, j; | 400 int i, j; |
401 int16_t out[8 * 8]; | 401 int16_t out[8 * 8]; |
402 int16_t *outptr = out; | 402 int16_t *outptr = out; |
403 int16_t temp_in[8], temp_out[8]; | 403 int16_t temp_in[8], temp_out[8]; |
404 const transform_2d ht = IHT_8[tx_type]; | 404 const transform_2d ht = IHT_8[tx_type]; |
405 | 405 |
406 // inverse transform row vectors | 406 // inverse transform row vectors |
407 for (i = 0; i < 8; ++i) { | 407 for (i = 0; i < 8; ++i) { |
408 ht.rows(input, outptr); | 408 ht.rows(input, outptr); |
409 input += 8; | 409 input += 8; |
410 outptr += 8; | 410 outptr += 8; |
411 } | 411 } |
412 | 412 |
413 // inverse transform column vectors | 413 // inverse transform column vectors |
414 for (i = 0; i < 8; ++i) { | 414 for (i = 0; i < 8; ++i) { |
415 for (j = 0; j < 8; ++j) | 415 for (j = 0; j < 8; ++j) |
416 temp_in[j] = out[j * 8 + i]; | 416 temp_in[j] = out[j * 8 + i]; |
417 ht.cols(temp_in, temp_out); | 417 ht.cols(temp_in, temp_out); |
418 for (j = 0; j < 8; ++j) | 418 for (j = 0; j < 8; ++j) |
419 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) | 419 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) |
420 + dest[j * dest_stride + i]); } | 420 + dest[j * stride + i]); |
| 421 } |
421 } | 422 } |
422 | 423 |
423 void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, | 424 void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) { |
424 int dest_stride) { | |
425 int16_t out[8 * 8] = { 0 }; | 425 int16_t out[8 * 8] = { 0 }; |
426 int16_t *outptr = out; | 426 int16_t *outptr = out; |
427 int i, j; | 427 int i, j; |
428 int16_t temp_in[8], temp_out[8]; | 428 int16_t temp_in[8], temp_out[8]; |
429 | 429 |
430 // First transform rows | 430 // First transform rows |
431 // only first 4 row has non-zero coefs | 431 // only first 4 row has non-zero coefs |
432 for (i = 0; i < 4; ++i) { | 432 for (i = 0; i < 4; ++i) { |
433 idct8_1d(input, outptr); | 433 idct8_1d(input, outptr); |
434 input += 8; | 434 input += 8; |
435 outptr += 8; | 435 outptr += 8; |
436 } | 436 } |
437 | 437 |
438 // Then transform columns | 438 // Then transform columns |
439 for (i = 0; i < 8; ++i) { | 439 for (i = 0; i < 8; ++i) { |
440 for (j = 0; j < 8; ++j) | 440 for (j = 0; j < 8; ++j) |
441 temp_in[j] = out[j * 8 + i]; | 441 temp_in[j] = out[j * 8 + i]; |
442 idct8_1d(temp_in, temp_out); | 442 idct8_1d(temp_in, temp_out); |
443 for (j = 0; j < 8; ++j) | 443 for (j = 0; j < 8; ++j) |
444 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) | 444 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) |
445 + dest[j * dest_stride + i]); | 445 + dest[j * stride + i]); |
446 } | 446 } |
447 } | 447 } |
448 | 448 |
449 static void idct16_1d(int16_t *input, int16_t *output) { | 449 static void idct16_1d(const int16_t *input, int16_t *output) { |
450 int16_t step1[16], step2[16]; | 450 int16_t step1[16], step2[16]; |
451 int temp1, temp2; | 451 int temp1, temp2; |
452 | 452 |
453 // stage 1 | 453 // stage 1 |
454 step1[0] = input[0/2]; | 454 step1[0] = input[0/2]; |
455 step1[1] = input[16/2]; | 455 step1[1] = input[16/2]; |
456 step1[2] = input[8/2]; | 456 step1[2] = input[8/2]; |
457 step1[3] = input[24/2]; | 457 step1[3] = input[24/2]; |
458 step1[4] = input[4/2]; | 458 step1[4] = input[4/2]; |
459 step1[5] = input[20/2]; | 459 step1[5] = input[20/2]; |
(...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
604 output[8] = step2[7] - step2[8]; | 604 output[8] = step2[7] - step2[8]; |
605 output[9] = step2[6] - step2[9]; | 605 output[9] = step2[6] - step2[9]; |
606 output[10] = step2[5] - step2[10]; | 606 output[10] = step2[5] - step2[10]; |
607 output[11] = step2[4] - step2[11]; | 607 output[11] = step2[4] - step2[11]; |
608 output[12] = step2[3] - step2[12]; | 608 output[12] = step2[3] - step2[12]; |
609 output[13] = step2[2] - step2[13]; | 609 output[13] = step2[2] - step2[13]; |
610 output[14] = step2[1] - step2[14]; | 610 output[14] = step2[1] - step2[14]; |
611 output[15] = step2[0] - step2[15]; | 611 output[15] = step2[0] - step2[15]; |
612 } | 612 } |
613 | 613 |
614 void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 614 void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) { |
615 int16_t out[16 * 16]; | 615 int16_t out[16 * 16]; |
616 int16_t *outptr = out; | 616 int16_t *outptr = out; |
617 int i, j; | 617 int i, j; |
618 int16_t temp_in[16], temp_out[16]; | 618 int16_t temp_in[16], temp_out[16]; |
619 | 619 |
620 // First transform rows | 620 // First transform rows |
621 for (i = 0; i < 16; ++i) { | 621 for (i = 0; i < 16; ++i) { |
622 idct16_1d(input, outptr); | 622 idct16_1d(input, outptr); |
623 input += 16; | 623 input += 16; |
624 outptr += 16; | 624 outptr += 16; |
625 } | 625 } |
626 | 626 |
627 // Then transform columns | 627 // Then transform columns |
628 for (i = 0; i < 16; ++i) { | 628 for (i = 0; i < 16; ++i) { |
629 for (j = 0; j < 16; ++j) | 629 for (j = 0; j < 16; ++j) |
630 temp_in[j] = out[j * 16 + i]; | 630 temp_in[j] = out[j * 16 + i]; |
631 idct16_1d(temp_in, temp_out); | 631 idct16_1d(temp_in, temp_out); |
632 for (j = 0; j < 16; ++j) | 632 for (j = 0; j < 16; ++j) |
633 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 633 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
634 + dest[j * dest_stride + i]); | 634 + dest[j * stride + i]); |
635 } | 635 } |
636 } | 636 } |
637 | 637 |
638 void iadst16_1d(int16_t *input, int16_t *output) { | 638 static void iadst16_1d(const int16_t *input, int16_t *output) { |
639 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; | 639 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; |
640 | 640 |
641 int x0 = input[15]; | 641 int x0 = input[15]; |
642 int x1 = input[0]; | 642 int x1 = input[0]; |
643 int x2 = input[13]; | 643 int x2 = input[13]; |
644 int x3 = input[2]; | 644 int x3 = input[2]; |
645 int x4 = input[11]; | 645 int x4 = input[11]; |
646 int x5 = input[4]; | 646 int x5 = input[4]; |
647 int x6 = input[9]; | 647 int x6 = input[9]; |
648 int x7 = input[6]; | 648 int x7 = input[6]; |
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
806 output[15] = -x1; | 806 output[15] = -x1; |
807 } | 807 } |
808 | 808 |
809 static const transform_2d IHT_16[] = { | 809 static const transform_2d IHT_16[] = { |
810 { idct16_1d, idct16_1d }, // DCT_DCT = 0 | 810 { idct16_1d, idct16_1d }, // DCT_DCT = 0 |
811 { iadst16_1d, idct16_1d }, // ADST_DCT = 1 | 811 { iadst16_1d, idct16_1d }, // ADST_DCT = 1 |
812 { idct16_1d, iadst16_1d }, // DCT_ADST = 2 | 812 { idct16_1d, iadst16_1d }, // DCT_ADST = 2 |
813 { iadst16_1d, iadst16_1d } // ADST_ADST = 3 | 813 { iadst16_1d, iadst16_1d } // ADST_ADST = 3 |
814 }; | 814 }; |
815 | 815 |
816 void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride, | 816 void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride, |
817 int tx_type) { | 817 int tx_type) { |
818 int i, j; | 818 int i, j; |
819 int16_t out[16 * 16]; | 819 int16_t out[16 * 16]; |
820 int16_t *outptr = out; | 820 int16_t *outptr = out; |
821 int16_t temp_in[16], temp_out[16]; | 821 int16_t temp_in[16], temp_out[16]; |
822 const transform_2d ht = IHT_16[tx_type]; | 822 const transform_2d ht = IHT_16[tx_type]; |
823 | 823 |
824 // Rows | 824 // Rows |
825 for (i = 0; i < 16; ++i) { | 825 for (i = 0; i < 16; ++i) { |
826 ht.rows(input, outptr); | 826 ht.rows(input, outptr); |
827 input += 16; | 827 input += 16; |
828 outptr += 16; | 828 outptr += 16; |
829 } | 829 } |
830 | 830 |
831 // Columns | 831 // Columns |
832 for (i = 0; i < 16; ++i) { | 832 for (i = 0; i < 16; ++i) { |
833 for (j = 0; j < 16; ++j) | 833 for (j = 0; j < 16; ++j) |
834 temp_in[j] = out[j * 16 + i]; | 834 temp_in[j] = out[j * 16 + i]; |
835 ht.cols(temp_in, temp_out); | 835 ht.cols(temp_in, temp_out); |
836 for (j = 0; j < 16; ++j) | 836 for (j = 0; j < 16; ++j) |
837 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 837 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
838 + dest[j * dest_stride + i]); } | 838 + dest[j * stride + i]); } |
839 } | 839 } |
840 | 840 |
841 void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, | 841 void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) { |
842 int dest_stride) { | |
843 int16_t out[16 * 16] = { 0 }; | 842 int16_t out[16 * 16] = { 0 }; |
844 int16_t *outptr = out; | 843 int16_t *outptr = out; |
845 int i, j; | 844 int i, j; |
846 int16_t temp_in[16], temp_out[16]; | 845 int16_t temp_in[16], temp_out[16]; |
847 | 846 |
848 // First transform rows. Since all non-zero dct coefficients are in | 847 // First transform rows. Since all non-zero dct coefficients are in |
849 // upper-left 4x4 area, we only need to calculate first 4 rows here. | 848 // upper-left 4x4 area, we only need to calculate first 4 rows here. |
850 for (i = 0; i < 4; ++i) { | 849 for (i = 0; i < 4; ++i) { |
851 idct16_1d(input, outptr); | 850 idct16_1d(input, outptr); |
852 input += 16; | 851 input += 16; |
853 outptr += 16; | 852 outptr += 16; |
854 } | 853 } |
855 | 854 |
856 // Then transform columns | 855 // Then transform columns |
857 for (i = 0; i < 16; ++i) { | 856 for (i = 0; i < 16; ++i) { |
858 for (j = 0; j < 16; ++j) | 857 for (j = 0; j < 16; ++j) |
859 temp_in[j] = out[j*16 + i]; | 858 temp_in[j] = out[j*16 + i]; |
860 idct16_1d(temp_in, temp_out); | 859 idct16_1d(temp_in, temp_out); |
861 for (j = 0; j < 16; ++j) | 860 for (j = 0; j < 16; ++j) |
862 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 861 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
863 + dest[j * dest_stride + i]); | 862 + dest[j * stride + i]); |
864 } | 863 } |
865 } | 864 } |
866 | 865 |
867 void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, | 866 void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) { |
868 int dest_stride) { | |
869 int i, j; | 867 int i, j; |
870 int a1; | 868 int a1; |
871 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); | 869 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); |
872 out = dct_const_round_shift(out * cospi_16_64); | 870 out = dct_const_round_shift(out * cospi_16_64); |
873 a1 = ROUND_POWER_OF_TWO(out, 6); | 871 a1 = ROUND_POWER_OF_TWO(out, 6); |
874 for (j = 0; j < 16; ++j) { | 872 for (j = 0; j < 16; ++j) { |
875 for (i = 0; i < 16; ++i) | 873 for (i = 0; i < 16; ++i) |
876 dest[i] = clip_pixel(dest[i] + a1); | 874 dest[i] = clip_pixel(dest[i] + a1); |
877 dest += dest_stride; | 875 dest += stride; |
878 } | 876 } |
879 } | 877 } |
880 | 878 |
881 static void idct32_1d(int16_t *input, int16_t *output) { | 879 static void idct32_1d(const int16_t *input, int16_t *output) { |
882 int16_t step1[32], step2[32]; | 880 int16_t step1[32], step2[32]; |
883 int temp1, temp2; | 881 int temp1, temp2; |
884 | 882 |
885 // stage 1 | 883 // stage 1 |
886 step1[0] = input[0]; | 884 step1[0] = input[0]; |
887 step1[1] = input[16]; | 885 step1[1] = input[16]; |
888 step1[2] = input[8]; | 886 step1[2] = input[8]; |
889 step1[3] = input[24]; | 887 step1[3] = input[24]; |
890 step1[4] = input[4]; | 888 step1[4] = input[4]; |
891 step1[5] = input[20]; | 889 step1[5] = input[20]; |
(...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1238 output[24] = step1[7] - step1[24]; | 1236 output[24] = step1[7] - step1[24]; |
1239 output[25] = step1[6] - step1[25]; | 1237 output[25] = step1[6] - step1[25]; |
1240 output[26] = step1[5] - step1[26]; | 1238 output[26] = step1[5] - step1[26]; |
1241 output[27] = step1[4] - step1[27]; | 1239 output[27] = step1[4] - step1[27]; |
1242 output[28] = step1[3] - step1[28]; | 1240 output[28] = step1[3] - step1[28]; |
1243 output[29] = step1[2] - step1[29]; | 1241 output[29] = step1[2] - step1[29]; |
1244 output[30] = step1[1] - step1[30]; | 1242 output[30] = step1[1] - step1[30]; |
1245 output[31] = step1[0] - step1[31]; | 1243 output[31] = step1[0] - step1[31]; |
1246 } | 1244 } |
1247 | 1245 |
1248 void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 1246 void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) { |
1249 int16_t out[32 * 32]; | 1247 int16_t out[32 * 32]; |
1250 int16_t *outptr = out; | 1248 int16_t *outptr = out; |
1251 int i, j; | 1249 int i, j; |
1252 int16_t temp_in[32], temp_out[32]; | 1250 int16_t temp_in[32], temp_out[32]; |
1253 | 1251 |
1254 // Rows | 1252 // Rows |
1255 for (i = 0; i < 32; ++i) { | 1253 for (i = 0; i < 32; ++i) { |
| 1254 int16_t zero_coeff[16]; |
| 1255 for (j = 0; j < 16; ++j) |
| 1256 zero_coeff[j] = input[2 * j] | input[2 * j + 1]; |
| 1257 for (j = 0; j < 8; ++j) |
| 1258 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; |
| 1259 for (j = 0; j < 4; ++j) |
| 1260 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; |
| 1261 for (j = 0; j < 2; ++j) |
| 1262 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; |
| 1263 |
| 1264 if (zero_coeff[0] | zero_coeff[1]) |
| 1265 idct32_1d(input, outptr); |
| 1266 else |
| 1267 vpx_memset(outptr, 0, sizeof(int16_t) * 32); |
| 1268 input += 32; |
| 1269 outptr += 32; |
| 1270 } |
| 1271 |
| 1272 // Columns |
| 1273 for (i = 0; i < 32; ++i) { |
| 1274 for (j = 0; j < 32; ++j) |
| 1275 temp_in[j] = out[j * 32 + i]; |
| 1276 idct32_1d(temp_in, temp_out); |
| 1277 for (j = 0; j < 32; ++j) |
| 1278 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
| 1279 + dest[j * stride + i]); |
| 1280 } |
| 1281 } |
| 1282 |
| 1283 void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 1284 int16_t out[32 * 32] = {0}; |
| 1285 int16_t *outptr = out; |
| 1286 int i, j; |
| 1287 int16_t temp_in[32], temp_out[32]; |
| 1288 |
| 1289 // Rows |
| 1290 // only upper-left 8x8 has non-zero coeff |
| 1291 for (i = 0; i < 8; ++i) { |
1256 idct32_1d(input, outptr); | 1292 idct32_1d(input, outptr); |
1257 input += 32; | 1293 input += 32; |
1258 outptr += 32; | 1294 outptr += 32; |
1259 } | 1295 } |
1260 | 1296 |
1261 // Columns | 1297 // Columns |
1262 for (i = 0; i < 32; ++i) { | 1298 for (i = 0; i < 32; ++i) { |
1263 for (j = 0; j < 32; ++j) | 1299 for (j = 0; j < 32; ++j) |
1264 temp_in[j] = out[j * 32 + i]; | 1300 temp_in[j] = out[j * 32 + i]; |
1265 idct32_1d(temp_in, temp_out); | 1301 idct32_1d(temp_in, temp_out); |
1266 for (j = 0; j < 32; ++j) | 1302 for (j = 0; j < 32; ++j) |
1267 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 1303 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
1268 + dest[j * dest_stride + i]); | 1304 + dest[j * stride + i]); |
1269 } | 1305 } |
1270 } | 1306 } |
1271 | 1307 |
1272 void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) { | 1308 void vp9_idct32x32_1_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 1309 int i, j; |
| 1310 int a1; |
| 1311 |
1273 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); | 1312 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); |
1274 out = dct_const_round_shift(out * cospi_16_64); | 1313 out = dct_const_round_shift(out * cospi_16_64); |
1275 output[0] = ROUND_POWER_OF_TWO(out, 6); | 1314 a1 = ROUND_POWER_OF_TWO(out, 6); |
| 1315 |
| 1316 for (j = 0; j < 32; ++j) { |
| 1317 for (i = 0; i < 32; ++i) |
| 1318 dest[i] = clip_pixel(dest[i] + a1); |
| 1319 dest += stride; |
| 1320 } |
1276 } | 1321 } |
| 1322 |
| 1323 // idct |
| 1324 void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) { |
| 1325 if (eob > 1) |
| 1326 vp9_idct4x4_16_add(input, dest, stride); |
| 1327 else |
| 1328 vp9_idct4x4_1_add(input, dest, stride); |
| 1329 } |
| 1330 |
| 1331 |
| 1332 void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) { |
| 1333 if (eob > 1) |
| 1334 vp9_iwht4x4_16_add(input, dest, stride); |
| 1335 else |
| 1336 vp9_iwht4x4_1_add(input, dest, stride); |
| 1337 } |
| 1338 |
| 1339 void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob) { |
| 1340 // If dc is 1, then input[0] is the reconstructed value, do not need |
| 1341 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. |
| 1342 |
| 1343 // The calculation can be simplified if there are not many non-zero dct |
| 1344 // coefficients. Use eobs to decide what to do. |
| 1345 // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. |
| 1346 // Combine that with code here. |
| 1347 if (eob) { |
| 1348 if (eob == 1) |
| 1349 // DC only DCT coefficient |
| 1350 vp9_idct8x8_1_add(input, dest, stride); |
| 1351 else if (eob <= 10) |
| 1352 vp9_idct8x8_10_add(input, dest, stride); |
| 1353 else |
| 1354 vp9_idct8x8_64_add(input, dest, stride); |
| 1355 } |
| 1356 } |
| 1357 |
| 1358 void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, |
| 1359 int eob) { |
| 1360 /* The calculation can be simplified if there are not many non-zero dct |
| 1361 * coefficients. Use eobs to separate different cases. */ |
| 1362 if (eob) { |
| 1363 if (eob == 1) |
| 1364 /* DC only DCT coefficient. */ |
| 1365 vp9_idct16x16_1_add(input, dest, stride); |
| 1366 else if (eob <= 10) |
| 1367 vp9_idct16x16_10_add(input, dest, stride); |
| 1368 else |
| 1369 vp9_idct16x16_256_add(input, dest, stride); |
| 1370 } |
| 1371 } |
| 1372 |
| 1373 void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride, |
| 1374 int eob) { |
| 1375 if (eob) { |
| 1376 if (eob == 1) |
| 1377 vp9_idct32x32_1_add(input, dest, stride); |
| 1378 else if (eob <= 34) |
| 1379 // non-zero coeff only in upper-left 8x8 |
| 1380 vp9_idct32x32_34_add(input, dest, stride); |
| 1381 else |
| 1382 vp9_idct32x32_1024_add(input, dest, stride); |
| 1383 } |
| 1384 } |
| 1385 |
| 1386 // iht |
| 1387 void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, |
| 1388 int stride, int eob) { |
| 1389 if (tx_type == DCT_DCT) |
| 1390 vp9_idct4x4_add(input, dest, stride, eob); |
| 1391 else |
| 1392 vp9_iht4x4_16_add(input, dest, stride, tx_type); |
| 1393 } |
| 1394 |
| 1395 void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, |
| 1396 int stride, int eob) { |
| 1397 if (tx_type == DCT_DCT) { |
| 1398 vp9_idct8x8_add(input, dest, stride, eob); |
| 1399 } else { |
| 1400 if (eob > 0) { |
| 1401 vp9_iht8x8_64_add(input, dest, stride, tx_type); |
| 1402 } |
| 1403 } |
| 1404 } |
| 1405 |
| 1406 void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, |
| 1407 int stride, int eob) { |
| 1408 if (tx_type == DCT_DCT) { |
| 1409 vp9_idct16x16_add(input, dest, stride, eob); |
| 1410 } else { |
| 1411 if (eob > 0) { |
| 1412 vp9_iht16x16_256_add(input, dest, stride, tx_type); |
| 1413 } |
| 1414 } |
| 1415 } |
OLD | NEW |