Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(255)

Side by Side Diff: source/libvpx/vp9/encoder/vp9_dct.c

Issue 54923004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_dct.h ('k') | source/libvpx/vp9/encoder/vp9_encodeframe.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11
12 #include <assert.h> 11 #include <assert.h>
13 #include <math.h> 12 #include <math.h>
13
14 #include "./vpx_config.h" 14 #include "./vpx_config.h"
15 #include "vp9/common/vp9_systemdependent.h" 15 #include "./vp9_rtcd.h"
16 16
17 #include "vp9/common/vp9_blockd.h" 17 #include "vp9/common/vp9_blockd.h"
18 #include "vp9/common/vp9_idct.h" 18 #include "vp9/common/vp9_idct.h"
19 #include "vp9/common/vp9_systemdependent.h"
19 20
20 static void fdct4_1d(int16_t *input, int16_t *output) { 21 #include "vp9/encoder/vp9_dct.h"
22
23 static void fdct4(const int16_t *input, int16_t *output) {
21 int16_t step[4]; 24 int16_t step[4];
22 int temp1, temp2; 25 int temp1, temp2;
23 26
24 step[0] = input[0] + input[3]; 27 step[0] = input[0] + input[3];
25 step[1] = input[1] + input[2]; 28 step[1] = input[1] + input[2];
26 step[2] = input[1] - input[2]; 29 step[2] = input[1] - input[2];
27 step[3] = input[0] - input[3]; 30 step[3] = input[0] - input[3];
28 31
29 temp1 = (step[0] + step[1]) * cospi_16_64; 32 temp1 = (step[0] + step[1]) * cospi_16_64;
30 temp2 = (step[0] - step[1]) * cospi_16_64; 33 temp2 = (step[0] - step[1]) * cospi_16_64;
31 output[0] = dct_const_round_shift(temp1); 34 output[0] = dct_const_round_shift(temp1);
32 output[2] = dct_const_round_shift(temp2); 35 output[2] = dct_const_round_shift(temp2);
33 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; 36 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
34 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; 37 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
35 output[1] = dct_const_round_shift(temp1); 38 output[1] = dct_const_round_shift(temp1);
36 output[3] = dct_const_round_shift(temp2); 39 output[3] = dct_const_round_shift(temp2);
37 } 40 }
38 41
39 void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) { 42 void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
40 // The 2D transform is done with two passes which are actually pretty 43 // The 2D transform is done with two passes which are actually pretty
41 // similar. In the first one, we transform the columns and transpose 44 // similar. In the first one, we transform the columns and transpose
42 // the results. In the second one, we transform the rows. To achieve that, 45 // the results. In the second one, we transform the rows. To achieve that,
43 // as the first pass results are transposed, we tranpose the columns (that 46 // as the first pass results are transposed, we tranpose the columns (that
44 // is the transposed rows) and transpose the results (so that it goes back 47 // is the transposed rows) and transpose the results (so that it goes back
45 // in normal/row positions). 48 // in normal/row positions).
46 const int stride = pitch >> 1;
47 int pass; 49 int pass;
48 // We need an intermediate buffer between passes. 50 // We need an intermediate buffer between passes.
49 int16_t intermediate[4 * 4]; 51 int16_t intermediate[4 * 4];
50 int16_t *in = input; 52 const int16_t *in = input;
51 int16_t *out = intermediate; 53 int16_t *out = intermediate;
52 // Do the two transform/transpose passes 54 // Do the two transform/transpose passes
53 for (pass = 0; pass < 2; ++pass) { 55 for (pass = 0; pass < 2; ++pass) {
54 /*canbe16*/ int input[4]; 56 /*canbe16*/ int input[4];
55 /*canbe16*/ int step[4]; 57 /*canbe16*/ int step[4];
56 /*needs32*/ int temp1, temp2; 58 /*needs32*/ int temp1, temp2;
57 int i; 59 int i;
58 for (i = 0; i < 4; ++i) { 60 for (i = 0; i < 4; ++i) {
59 // Load inputs. 61 // Load inputs.
60 if (0 == pass) { 62 if (0 == pass) {
61 input[0] = in[0 * stride] << 4; 63 input[0] = in[0 * stride] * 16;
62 input[1] = in[1 * stride] << 4; 64 input[1] = in[1 * stride] * 16;
63 input[2] = in[2 * stride] << 4; 65 input[2] = in[2 * stride] * 16;
64 input[3] = in[3 * stride] << 4; 66 input[3] = in[3 * stride] * 16;
65 if (i == 0 && input[0]) { 67 if (i == 0 && input[0]) {
66 input[0] += 1; 68 input[0] += 1;
67 } 69 }
68 } else { 70 } else {
69 input[0] = in[0 * 4]; 71 input[0] = in[0 * 4];
70 input[1] = in[1 * 4]; 72 input[1] = in[1 * 4];
71 input[2] = in[2 * 4]; 73 input[2] = in[2 * 4];
72 input[3] = in[3 * 4]; 74 input[3] = in[3 * 4];
73 } 75 }
74 // Transform. 76 // Transform.
(...skipping 20 matching lines...) Expand all
95 97
96 { 98 {
97 int i, j; 99 int i, j;
98 for (i = 0; i < 4; ++i) { 100 for (i = 0; i < 4; ++i) {
99 for (j = 0; j < 4; ++j) 101 for (j = 0; j < 4; ++j)
100 output[j + i * 4] = (output[j + i * 4] + 1) >> 2; 102 output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
101 } 103 }
102 } 104 }
103 } 105 }
104 106
105 static void fadst4_1d(int16_t *input, int16_t *output) { 107 static void fadst4(const int16_t *input, int16_t *output) {
106 int x0, x1, x2, x3; 108 int x0, x1, x2, x3;
107 int s0, s1, s2, s3, s4, s5, s6, s7; 109 int s0, s1, s2, s3, s4, s5, s6, s7;
108 110
109 x0 = input[0]; 111 x0 = input[0];
110 x1 = input[1]; 112 x1 = input[1];
111 x2 = input[2]; 113 x2 = input[2];
112 x3 = input[3]; 114 x3 = input[3];
113 115
114 if (!(x0 | x1 | x2 | x3)) { 116 if (!(x0 | x1 | x2 | x3)) {
115 output[0] = output[1] = output[2] = output[3] = 0; 117 output[0] = output[1] = output[2] = output[3] = 0;
(...skipping 20 matching lines...) Expand all
136 s3 = x2 - x0 + x3; 138 s3 = x2 - x0 + x3;
137 139
138 // 1-D transform scaling factor is sqrt(2). 140 // 1-D transform scaling factor is sqrt(2).
139 output[0] = dct_const_round_shift(s0); 141 output[0] = dct_const_round_shift(s0);
140 output[1] = dct_const_round_shift(s1); 142 output[1] = dct_const_round_shift(s1);
141 output[2] = dct_const_round_shift(s2); 143 output[2] = dct_const_round_shift(s2);
142 output[3] = dct_const_round_shift(s3); 144 output[3] = dct_const_round_shift(s3);
143 } 145 }
144 146
145 static const transform_2d FHT_4[] = { 147 static const transform_2d FHT_4[] = {
146 { fdct4_1d, fdct4_1d }, // DCT_DCT = 0 148 { fdct4, fdct4 }, // DCT_DCT = 0
147 { fadst4_1d, fdct4_1d }, // ADST_DCT = 1 149 { fadst4, fdct4 }, // ADST_DCT = 1
148 { fdct4_1d, fadst4_1d }, // DCT_ADST = 2 150 { fdct4, fadst4 }, // DCT_ADST = 2
149 { fadst4_1d, fadst4_1d } // ADST_ADST = 3 151 { fadst4, fadst4 } // ADST_ADST = 3
150 }; 152 };
151 153
152 void vp9_short_fht4x4_c(int16_t *input, int16_t *output, 154 void vp9_short_fht4x4_c(const int16_t *input, int16_t *output,
153 int pitch, TX_TYPE tx_type) { 155 int stride, int tx_type) {
154 int16_t out[4 * 4]; 156 int16_t out[4 * 4];
155 int16_t *outptr = &out[0]; 157 int16_t *outptr = &out[0];
156 int i, j; 158 int i, j;
157 int16_t temp_in[4], temp_out[4]; 159 int16_t temp_in[4], temp_out[4];
158 const transform_2d ht = FHT_4[tx_type]; 160 const transform_2d ht = FHT_4[tx_type];
159 161
160 // Columns 162 // Columns
161 for (i = 0; i < 4; ++i) { 163 for (i = 0; i < 4; ++i) {
162 for (j = 0; j < 4; ++j) 164 for (j = 0; j < 4; ++j)
163 temp_in[j] = input[j * pitch + i] << 4; 165 temp_in[j] = input[j * stride + i] * 16;
164 if (i == 0 && temp_in[0]) 166 if (i == 0 && temp_in[0])
165 temp_in[0] += 1; 167 temp_in[0] += 1;
166 ht.cols(temp_in, temp_out); 168 ht.cols(temp_in, temp_out);
167 for (j = 0; j < 4; ++j) 169 for (j = 0; j < 4; ++j)
168 outptr[j * 4 + i] = temp_out[j]; 170 outptr[j * 4 + i] = temp_out[j];
169 } 171 }
170 172
171 // Rows 173 // Rows
172 for (i = 0; i < 4; ++i) { 174 for (i = 0; i < 4; ++i) {
173 for (j = 0; j < 4; ++j) 175 for (j = 0; j < 4; ++j)
174 temp_in[j] = out[j + i * 4]; 176 temp_in[j] = out[j + i * 4];
175 ht.rows(temp_in, temp_out); 177 ht.rows(temp_in, temp_out);
176 for (j = 0; j < 4; ++j) 178 for (j = 0; j < 4; ++j)
177 output[j + i * 4] = (temp_out[j] + 1) >> 2; 179 output[j + i * 4] = (temp_out[j] + 1) >> 2;
178 } 180 }
179 } 181 }
180 182
181 void vp9_short_fdct8x4_c(int16_t *input, int16_t *output, int pitch) { 183 static void fdct8(const int16_t *input, int16_t *output) {
182 vp9_short_fdct4x4_c(input, output, pitch);
183 vp9_short_fdct4x4_c(input + 4, output + 16, pitch);
184 }
185
186 static void fdct8_1d(int16_t *input, int16_t *output) {
187 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; 184 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
188 /*needs32*/ int t0, t1, t2, t3; 185 /*needs32*/ int t0, t1, t2, t3;
189 /*canbe16*/ int x0, x1, x2, x3; 186 /*canbe16*/ int x0, x1, x2, x3;
190 187
191 // stage 1 188 // stage 1
192 s0 = input[0] + input[7]; 189 s0 = input[0] + input[7];
193 s1 = input[1] + input[6]; 190 s1 = input[1] + input[6];
194 s2 = input[2] + input[5]; 191 s2 = input[2] + input[5];
195 s3 = input[3] + input[4]; 192 s3 = input[3] + input[4];
196 s4 = input[3] - input[4]; 193 s4 = input[3] - input[4];
197 s5 = input[2] - input[5]; 194 s5 = input[2] - input[5];
198 s6 = input[1] - input[6]; 195 s6 = input[1] - input[6];
199 s7 = input[0] - input[7]; 196 s7 = input[0] - input[7];
200 197
201 // fdct4_1d(step, step); 198 // fdct4(step, step);
202 x0 = s0 + s3; 199 x0 = s0 + s3;
203 x1 = s1 + s2; 200 x1 = s1 + s2;
204 x2 = s1 - s2; 201 x2 = s1 - s2;
205 x3 = s0 - s3; 202 x3 = s0 - s3;
206 t0 = (x0 + x1) * cospi_16_64; 203 t0 = (x0 + x1) * cospi_16_64;
207 t1 = (x0 - x1) * cospi_16_64; 204 t1 = (x0 - x1) * cospi_16_64;
208 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; 205 t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
209 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; 206 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
210 output[0] = dct_const_round_shift(t0); 207 output[0] = dct_const_round_shift(t0);
211 output[2] = dct_const_round_shift(t2); 208 output[2] = dct_const_round_shift(t2);
(...skipping 16 matching lines...) Expand all
228 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; 225 t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
229 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; 226 t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
230 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; 227 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
231 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; 228 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
232 output[1] = dct_const_round_shift(t0); 229 output[1] = dct_const_round_shift(t0);
233 output[3] = dct_const_round_shift(t2); 230 output[3] = dct_const_round_shift(t2);
234 output[5] = dct_const_round_shift(t1); 231 output[5] = dct_const_round_shift(t1);
235 output[7] = dct_const_round_shift(t3); 232 output[7] = dct_const_round_shift(t3);
236 } 233 }
237 234
238 void vp9_short_fdct8x8_c(int16_t *input, int16_t *final_output, int pitch) { 235 void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) {
239 const int stride = pitch >> 1;
240 int i, j; 236 int i, j;
241 int16_t intermediate[64]; 237 int16_t intermediate[64];
242 238
243 // Transform columns 239 // Transform columns
244 { 240 {
245 int16_t *output = intermediate; 241 int16_t *output = intermediate;
246 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; 242 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
247 /*needs32*/ int t0, t1, t2, t3; 243 /*needs32*/ int t0, t1, t2, t3;
248 /*canbe16*/ int x0, x1, x2, x3; 244 /*canbe16*/ int x0, x1, x2, x3;
249 245
250 int i; 246 int i;
251 for (i = 0; i < 8; i++) { 247 for (i = 0; i < 8; i++) {
252 // stage 1 248 // stage 1
253 s0 = (input[0 * stride] + input[7 * stride]) << 2; 249 s0 = (input[0 * stride] + input[7 * stride]) * 4;
254 s1 = (input[1 * stride] + input[6 * stride]) << 2; 250 s1 = (input[1 * stride] + input[6 * stride]) * 4;
255 s2 = (input[2 * stride] + input[5 * stride]) << 2; 251 s2 = (input[2 * stride] + input[5 * stride]) * 4;
256 s3 = (input[3 * stride] + input[4 * stride]) << 2; 252 s3 = (input[3 * stride] + input[4 * stride]) * 4;
257 s4 = (input[3 * stride] - input[4 * stride]) << 2; 253 s4 = (input[3 * stride] - input[4 * stride]) * 4;
258 s5 = (input[2 * stride] - input[5 * stride]) << 2; 254 s5 = (input[2 * stride] - input[5 * stride]) * 4;
259 s6 = (input[1 * stride] - input[6 * stride]) << 2; 255 s6 = (input[1 * stride] - input[6 * stride]) * 4;
260 s7 = (input[0 * stride] - input[7 * stride]) << 2; 256 s7 = (input[0 * stride] - input[7 * stride]) * 4;
261 257
262 // fdct4_1d(step, step); 258 // fdct4(step, step);
263 x0 = s0 + s3; 259 x0 = s0 + s3;
264 x1 = s1 + s2; 260 x1 = s1 + s2;
265 x2 = s1 - s2; 261 x2 = s1 - s2;
266 x3 = s0 - s3; 262 x3 = s0 - s3;
267 t0 = (x0 + x1) * cospi_16_64; 263 t0 = (x0 + x1) * cospi_16_64;
268 t1 = (x0 - x1) * cospi_16_64; 264 t1 = (x0 - x1) * cospi_16_64;
269 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; 265 t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
270 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; 266 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
271 output[0 * 8] = dct_const_round_shift(t0); 267 output[0 * 8] = dct_const_round_shift(t0);
272 output[2 * 8] = dct_const_round_shift(t2); 268 output[2 * 8] = dct_const_round_shift(t2);
(...skipping 21 matching lines...) Expand all
294 output[3 * 8] = dct_const_round_shift(t2); 290 output[3 * 8] = dct_const_round_shift(t2);
295 output[5 * 8] = dct_const_round_shift(t1); 291 output[5 * 8] = dct_const_round_shift(t1);
296 output[7 * 8] = dct_const_round_shift(t3); 292 output[7 * 8] = dct_const_round_shift(t3);
297 input++; 293 input++;
298 output++; 294 output++;
299 } 295 }
300 } 296 }
301 297
302 // Rows 298 // Rows
303 for (i = 0; i < 8; ++i) { 299 for (i = 0; i < 8; ++i) {
304 fdct8_1d(&intermediate[i * 8], &final_output[i * 8]); 300 fdct8(&intermediate[i * 8], &final_output[i * 8]);
305 for (j = 0; j < 8; ++j) 301 for (j = 0; j < 8; ++j)
306 final_output[j + i * 8] /= 2; 302 final_output[j + i * 8] /= 2;
307 } 303 }
308 } 304 }
309 305
310 void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int pitch) { 306 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
311 // The 2D transform is done with two passes which are actually pretty 307 // The 2D transform is done with two passes which are actually pretty
312 // similar. In the first one, we transform the columns and transpose 308 // similar. In the first one, we transform the columns and transpose
313 // the results. In the second one, we transform the rows. To achieve that, 309 // the results. In the second one, we transform the rows. To achieve that,
314 // as the first pass results are transposed, we tranpose the columns (that 310 // as the first pass results are transposed, we tranpose the columns (that
315 // is the transposed rows) and transpose the results (so that it goes back 311 // is the transposed rows) and transpose the results (so that it goes back
316 // in normal/row positions). 312 // in normal/row positions).
317 const int stride = pitch >> 1;
318 int pass; 313 int pass;
319 // We need an intermediate buffer between passes. 314 // We need an intermediate buffer between passes.
320 int16_t intermediate[256]; 315 int16_t intermediate[256];
321 int16_t *in = input; 316 const int16_t *in = input;
322 int16_t *out = intermediate; 317 int16_t *out = intermediate;
323 // Do the two transform/transpose passes 318 // Do the two transform/transpose passes
324 for (pass = 0; pass < 2; ++pass) { 319 for (pass = 0; pass < 2; ++pass) {
325 /*canbe16*/ int step1[8]; 320 /*canbe16*/ int step1[8];
326 /*canbe16*/ int step2[8]; 321 /*canbe16*/ int step2[8];
327 /*canbe16*/ int step3[8]; 322 /*canbe16*/ int step3[8];
328 /*canbe16*/ int input[8]; 323 /*canbe16*/ int input[8];
329 /*needs32*/ int temp1, temp2; 324 /*needs32*/ int temp1, temp2;
330 int i; 325 int i;
331 for (i = 0; i < 16; i++) { 326 for (i = 0; i < 16; i++) {
332 if (0 == pass) { 327 if (0 == pass) {
333 // Calculate input for the first 8 results. 328 // Calculate input for the first 8 results.
334 input[0] = (in[0 * stride] + in[15 * stride]) << 2; 329 input[0] = (in[0 * stride] + in[15 * stride]) * 4;
335 input[1] = (in[1 * stride] + in[14 * stride]) << 2; 330 input[1] = (in[1 * stride] + in[14 * stride]) * 4;
336 input[2] = (in[2 * stride] + in[13 * stride]) << 2; 331 input[2] = (in[2 * stride] + in[13 * stride]) * 4;
337 input[3] = (in[3 * stride] + in[12 * stride]) << 2; 332 input[3] = (in[3 * stride] + in[12 * stride]) * 4;
338 input[4] = (in[4 * stride] + in[11 * stride]) << 2; 333 input[4] = (in[4 * stride] + in[11 * stride]) * 4;
339 input[5] = (in[5 * stride] + in[10 * stride]) << 2; 334 input[5] = (in[5 * stride] + in[10 * stride]) * 4;
340 input[6] = (in[6 * stride] + in[ 9 * stride]) << 2; 335 input[6] = (in[6 * stride] + in[ 9 * stride]) * 4;
341 input[7] = (in[7 * stride] + in[ 8 * stride]) << 2; 336 input[7] = (in[7 * stride] + in[ 8 * stride]) * 4;
342 // Calculate input for the next 8 results. 337 // Calculate input for the next 8 results.
343 step1[0] = (in[7 * stride] - in[ 8 * stride]) << 2; 338 step1[0] = (in[7 * stride] - in[ 8 * stride]) * 4;
344 step1[1] = (in[6 * stride] - in[ 9 * stride]) << 2; 339 step1[1] = (in[6 * stride] - in[ 9 * stride]) * 4;
345 step1[2] = (in[5 * stride] - in[10 * stride]) << 2; 340 step1[2] = (in[5 * stride] - in[10 * stride]) * 4;
346 step1[3] = (in[4 * stride] - in[11 * stride]) << 2; 341 step1[3] = (in[4 * stride] - in[11 * stride]) * 4;
347 step1[4] = (in[3 * stride] - in[12 * stride]) << 2; 342 step1[4] = (in[3 * stride] - in[12 * stride]) * 4;
348 step1[5] = (in[2 * stride] - in[13 * stride]) << 2; 343 step1[5] = (in[2 * stride] - in[13 * stride]) * 4;
349 step1[6] = (in[1 * stride] - in[14 * stride]) << 2; 344 step1[6] = (in[1 * stride] - in[14 * stride]) * 4;
350 step1[7] = (in[0 * stride] - in[15 * stride]) << 2; 345 step1[7] = (in[0 * stride] - in[15 * stride]) * 4;
351 } else { 346 } else {
352 // Calculate input for the first 8 results. 347 // Calculate input for the first 8 results.
353 input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2); 348 input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
354 input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2); 349 input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
355 input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2); 350 input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
356 input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2); 351 input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
357 input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2); 352 input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
358 input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2); 353 input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
359 input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2); 354 input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2);
360 input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2); 355 input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2);
361 // Calculate input for the next 8 results. 356 // Calculate input for the next 8 results.
362 step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2); 357 step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2);
363 step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2); 358 step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2);
364 step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2); 359 step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
365 step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2); 360 step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
366 step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2); 361 step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
367 step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2); 362 step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
368 step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2); 363 step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
369 step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2); 364 step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
370 } 365 }
371 // Work on the first eight values; fdct8_1d(input, even_results); 366 // Work on the first eight values; fdct8(input, even_results);
372 { 367 {
373 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; 368 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
374 /*needs32*/ int t0, t1, t2, t3; 369 /*needs32*/ int t0, t1, t2, t3;
375 /*canbe16*/ int x0, x1, x2, x3; 370 /*canbe16*/ int x0, x1, x2, x3;
376 371
377 // stage 1 372 // stage 1
378 s0 = input[0] + input[7]; 373 s0 = input[0] + input[7];
379 s1 = input[1] + input[6]; 374 s1 = input[1] + input[6];
380 s2 = input[2] + input[5]; 375 s2 = input[2] + input[5];
381 s3 = input[3] + input[4]; 376 s3 = input[3] + input[4];
382 s4 = input[3] - input[4]; 377 s4 = input[3] - input[4];
383 s5 = input[2] - input[5]; 378 s5 = input[2] - input[5];
384 s6 = input[1] - input[6]; 379 s6 = input[1] - input[6];
385 s7 = input[0] - input[7]; 380 s7 = input[0] - input[7];
386 381
387 // fdct4_1d(step, step); 382 // fdct4(step, step);
388 x0 = s0 + s3; 383 x0 = s0 + s3;
389 x1 = s1 + s2; 384 x1 = s1 + s2;
390 x2 = s1 - s2; 385 x2 = s1 - s2;
391 x3 = s0 - s3; 386 x3 = s0 - s3;
392 t0 = (x0 + x1) * cospi_16_64; 387 t0 = (x0 + x1) * cospi_16_64;
393 t1 = (x0 - x1) * cospi_16_64; 388 t1 = (x0 - x1) * cospi_16_64;
394 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; 389 t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
395 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; 390 t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
396 out[0] = dct_const_round_shift(t0); 391 out[0] = dct_const_round_shift(t0);
397 out[4] = dct_const_round_shift(t2); 392 out[4] = dct_const_round_shift(t2);
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
479 // Do next column (which is a transposed row in second/horizontal pass) 474 // Do next column (which is a transposed row in second/horizontal pass)
480 in++; 475 in++;
481 out += 16; 476 out += 16;
482 } 477 }
483 // Setup in/out for next pass. 478 // Setup in/out for next pass.
484 in = intermediate; 479 in = intermediate;
485 out = output; 480 out = output;
486 } 481 }
487 } 482 }
488 483
489 static void fadst8_1d(int16_t *input, int16_t *output) { 484 static void fadst8(const int16_t *input, int16_t *output) {
490 int s0, s1, s2, s3, s4, s5, s6, s7; 485 int s0, s1, s2, s3, s4, s5, s6, s7;
491 486
492 int x0 = input[7]; 487 int x0 = input[7];
493 int x1 = input[0]; 488 int x1 = input[0];
494 int x2 = input[5]; 489 int x2 = input[5];
495 int x3 = input[2]; 490 int x3 = input[2];
496 int x4 = input[3]; 491 int x4 = input[3];
497 int x5 = input[4]; 492 int x5 = input[4];
498 int x6 = input[1]; 493 int x6 = input[1];
499 int x7 = input[6]; 494 int x7 = input[6];
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
551 output[1] = - x4; 546 output[1] = - x4;
552 output[2] = x6; 547 output[2] = x6;
553 output[3] = - x2; 548 output[3] = - x2;
554 output[4] = x3; 549 output[4] = x3;
555 output[5] = - x7; 550 output[5] = - x7;
556 output[6] = x5; 551 output[6] = x5;
557 output[7] = - x1; 552 output[7] = - x1;
558 } 553 }
559 554
560 static const transform_2d FHT_8[] = { 555 static const transform_2d FHT_8[] = {
561 { fdct8_1d, fdct8_1d }, // DCT_DCT = 0 556 { fdct8, fdct8 }, // DCT_DCT = 0
562 { fadst8_1d, fdct8_1d }, // ADST_DCT = 1 557 { fadst8, fdct8 }, // ADST_DCT = 1
563 { fdct8_1d, fadst8_1d }, // DCT_ADST = 2 558 { fdct8, fadst8 }, // DCT_ADST = 2
564 { fadst8_1d, fadst8_1d } // ADST_ADST = 3 559 { fadst8, fadst8 } // ADST_ADST = 3
565 }; 560 };
566 561
567 void vp9_short_fht8x8_c(int16_t *input, int16_t *output, 562 void vp9_short_fht8x8_c(const int16_t *input, int16_t *output,
568 int pitch, TX_TYPE tx_type) { 563 int stride, int tx_type) {
569 int16_t out[64]; 564 int16_t out[64];
570 int16_t *outptr = &out[0]; 565 int16_t *outptr = &out[0];
571 int i, j; 566 int i, j;
572 int16_t temp_in[8], temp_out[8]; 567 int16_t temp_in[8], temp_out[8];
573 const transform_2d ht = FHT_8[tx_type]; 568 const transform_2d ht = FHT_8[tx_type];
574 569
575 // Columns 570 // Columns
576 for (i = 0; i < 8; ++i) { 571 for (i = 0; i < 8; ++i) {
577 for (j = 0; j < 8; ++j) 572 for (j = 0; j < 8; ++j)
578 temp_in[j] = input[j * pitch + i] << 2; 573 temp_in[j] = input[j * stride + i] * 4;
579 ht.cols(temp_in, temp_out); 574 ht.cols(temp_in, temp_out);
580 for (j = 0; j < 8; ++j) 575 for (j = 0; j < 8; ++j)
581 outptr[j * 8 + i] = temp_out[j]; 576 outptr[j * 8 + i] = temp_out[j];
582 } 577 }
583 578
584 // Rows 579 // Rows
585 for (i = 0; i < 8; ++i) { 580 for (i = 0; i < 8; ++i) {
586 for (j = 0; j < 8; ++j) 581 for (j = 0; j < 8; ++j)
587 temp_in[j] = out[j + i * 8]; 582 temp_in[j] = out[j + i * 8];
588 ht.rows(temp_in, temp_out); 583 ht.rows(temp_in, temp_out);
589 for (j = 0; j < 8; ++j) 584 for (j = 0; j < 8; ++j)
590 output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; 585 output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
591 } 586 }
592 } 587 }
593 588
594 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per 589 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
595 pixel. */ 590 pixel. */
596 void vp9_short_walsh4x4_c(short *input, short *output, int pitch) { 591 void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride) {
597 int i; 592 int i;
598 int a1, b1, c1, d1, e1; 593 int a1, b1, c1, d1, e1;
599 short *ip = input; 594 const int16_t *ip = input;
600 short *op = output; 595 int16_t *op = output;
601 int pitch_short = pitch >> 1;
602 596
603 for (i = 0; i < 4; i++) { 597 for (i = 0; i < 4; i++) {
604 a1 = ip[0 * pitch_short]; 598 a1 = ip[0 * stride];
605 b1 = ip[1 * pitch_short]; 599 b1 = ip[1 * stride];
606 c1 = ip[2 * pitch_short]; 600 c1 = ip[2 * stride];
607 d1 = ip[3 * pitch_short]; 601 d1 = ip[3 * stride];
608 602
609 a1 += b1; 603 a1 += b1;
610 d1 = d1 - c1; 604 d1 = d1 - c1;
611 e1 = (a1 - d1) >> 1; 605 e1 = (a1 - d1) >> 1;
612 b1 = e1 - b1; 606 b1 = e1 - b1;
613 c1 = e1 - c1; 607 c1 = e1 - c1;
614 a1 -= c1; 608 a1 -= c1;
615 d1 += b1; 609 d1 += b1;
616 op[0] = a1; 610 op[0] = a1;
617 op[4] = c1; 611 op[4] = c1;
(...skipping 12 matching lines...) Expand all
630 c1 = ip[2]; 624 c1 = ip[2];
631 d1 = ip[3]; 625 d1 = ip[3];
632 626
633 a1 += b1; 627 a1 += b1;
634 d1 -= c1; 628 d1 -= c1;
635 e1 = (a1 - d1) >> 1; 629 e1 = (a1 - d1) >> 1;
636 b1 = e1 - b1; 630 b1 = e1 - b1;
637 c1 = e1 - c1; 631 c1 = e1 - c1;
638 a1 -= c1; 632 a1 -= c1;
639 d1 += b1; 633 d1 += b1;
640 op[0] = a1 << WHT_UPSCALE_FACTOR; 634 op[0] = a1 * UNIT_QUANT_FACTOR;
641 op[1] = c1 << WHT_UPSCALE_FACTOR; 635 op[1] = c1 * UNIT_QUANT_FACTOR;
642 op[2] = d1 << WHT_UPSCALE_FACTOR; 636 op[2] = d1 * UNIT_QUANT_FACTOR;
643 op[3] = b1 << WHT_UPSCALE_FACTOR; 637 op[3] = b1 * UNIT_QUANT_FACTOR;
644 638
645 ip += 4; 639 ip += 4;
646 op += 4; 640 op += 4;
647 } 641 }
648 } 642 }
649 643
650 void vp9_short_walsh8x4_c(short *input, short *output, int pitch) {
651 vp9_short_walsh4x4_c(input, output, pitch);
652 vp9_short_walsh4x4_c(input + 4, output + 16, pitch);
653 }
654
655
656 // Rewrote to use same algorithm as others. 644 // Rewrote to use same algorithm as others.
657 static void fdct16_1d(int16_t in[16], int16_t out[16]) { 645 static void fdct16(const int16_t in[16], int16_t out[16]) {
658 /*canbe16*/ int step1[8]; 646 /*canbe16*/ int step1[8];
659 /*canbe16*/ int step2[8]; 647 /*canbe16*/ int step2[8];
660 /*canbe16*/ int step3[8]; 648 /*canbe16*/ int step3[8];
661 /*canbe16*/ int input[8]; 649 /*canbe16*/ int input[8];
662 /*needs32*/ int temp1, temp2; 650 /*needs32*/ int temp1, temp2;
663 651
664 // step 1 652 // step 1
665 input[0] = in[0] + in[15]; 653 input[0] = in[0] + in[15];
666 input[1] = in[1] + in[14]; 654 input[1] = in[1] + in[14];
667 input[2] = in[2] + in[13]; 655 input[2] = in[2] + in[13];
668 input[3] = in[3] + in[12]; 656 input[3] = in[3] + in[12];
669 input[4] = in[4] + in[11]; 657 input[4] = in[4] + in[11];
670 input[5] = in[5] + in[10]; 658 input[5] = in[5] + in[10];
671 input[6] = in[6] + in[ 9]; 659 input[6] = in[6] + in[ 9];
672 input[7] = in[7] + in[ 8]; 660 input[7] = in[7] + in[ 8];
673 661
674 step1[0] = in[7] - in[ 8]; 662 step1[0] = in[7] - in[ 8];
675 step1[1] = in[6] - in[ 9]; 663 step1[1] = in[6] - in[ 9];
676 step1[2] = in[5] - in[10]; 664 step1[2] = in[5] - in[10];
677 step1[3] = in[4] - in[11]; 665 step1[3] = in[4] - in[11];
678 step1[4] = in[3] - in[12]; 666 step1[4] = in[3] - in[12];
679 step1[5] = in[2] - in[13]; 667 step1[5] = in[2] - in[13];
680 step1[6] = in[1] - in[14]; 668 step1[6] = in[1] - in[14];
681 step1[7] = in[0] - in[15]; 669 step1[7] = in[0] - in[15];
682 670
683 // fdct8_1d(step, step); 671 // fdct8(step, step);
684 { 672 {
685 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; 673 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
686 /*needs32*/ int t0, t1, t2, t3; 674 /*needs32*/ int t0, t1, t2, t3;
687 /*canbe16*/ int x0, x1, x2, x3; 675 /*canbe16*/ int x0, x1, x2, x3;
688 676
689 // stage 1 677 // stage 1
690 s0 = input[0] + input[7]; 678 s0 = input[0] + input[7];
691 s1 = input[1] + input[6]; 679 s1 = input[1] + input[6];
692 s2 = input[2] + input[5]; 680 s2 = input[2] + input[5];
693 s3 = input[3] + input[4]; 681 s3 = input[3] + input[4];
694 s4 = input[3] - input[4]; 682 s4 = input[3] - input[4];
695 s5 = input[2] - input[5]; 683 s5 = input[2] - input[5];
696 s6 = input[1] - input[6]; 684 s6 = input[1] - input[6];
697 s7 = input[0] - input[7]; 685 s7 = input[0] - input[7];
698 686
699 // fdct4_1d(step, step); 687 // fdct4(step, step);
700 x0 = s0 + s3; 688 x0 = s0 + s3;
701 x1 = s1 + s2; 689 x1 = s1 + s2;
702 x2 = s1 - s2; 690 x2 = s1 - s2;
703 x3 = s0 - s3; 691 x3 = s0 - s3;
704 t0 = (x0 + x1) * cospi_16_64; 692 t0 = (x0 + x1) * cospi_16_64;
705 t1 = (x0 - x1) * cospi_16_64; 693 t1 = (x0 - x1) * cospi_16_64;
706 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; 694 t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
707 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; 695 t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
708 out[0] = dct_const_round_shift(t0); 696 out[0] = dct_const_round_shift(t0);
709 out[4] = dct_const_round_shift(t2); 697 out[4] = dct_const_round_shift(t2);
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
788 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; 776 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
789 out[3] = dct_const_round_shift(temp1); 777 out[3] = dct_const_round_shift(temp1);
790 out[11] = dct_const_round_shift(temp2); 778 out[11] = dct_const_round_shift(temp2);
791 779
792 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; 780 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
793 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; 781 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
794 out[7] = dct_const_round_shift(temp1); 782 out[7] = dct_const_round_shift(temp1);
795 out[15] = dct_const_round_shift(temp2); 783 out[15] = dct_const_round_shift(temp2);
796 } 784 }
797 785
798 void fadst16_1d(int16_t *input, int16_t *output) { 786 static void fadst16(const int16_t *input, int16_t *output) {
799 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; 787 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
800 788
801 int x0 = input[15]; 789 int x0 = input[15];
802 int x1 = input[0]; 790 int x1 = input[0];
803 int x2 = input[13]; 791 int x2 = input[13];
804 int x3 = input[2]; 792 int x3 = input[2];
805 int x4 = input[11]; 793 int x4 = input[11];
806 int x5 = input[4]; 794 int x5 = input[4];
807 int x6 = input[9]; 795 int x6 = input[9];
808 int x7 = input[6]; 796 int x7 = input[6];
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after
951 output[9] = x11; 939 output[9] = x11;
952 output[10] = x15; 940 output[10] = x15;
953 output[11] = x7; 941 output[11] = x7;
954 output[12] = x5; 942 output[12] = x5;
955 output[13] = - x13; 943 output[13] = - x13;
956 output[14] = x9; 944 output[14] = x9;
957 output[15] = - x1; 945 output[15] = - x1;
958 } 946 }
959 947
960 static const transform_2d FHT_16[] = { 948 static const transform_2d FHT_16[] = {
961 { fdct16_1d, fdct16_1d }, // DCT_DCT = 0 949 { fdct16, fdct16 }, // DCT_DCT = 0
962 { fadst16_1d, fdct16_1d }, // ADST_DCT = 1 950 { fadst16, fdct16 }, // ADST_DCT = 1
963 { fdct16_1d, fadst16_1d }, // DCT_ADST = 2 951 { fdct16, fadst16 }, // DCT_ADST = 2
964 { fadst16_1d, fadst16_1d } // ADST_ADST = 3 952 { fadst16, fadst16 } // ADST_ADST = 3
965 }; 953 };
966 954
967 void vp9_short_fht16x16_c(int16_t *input, int16_t *output, 955 void vp9_short_fht16x16_c(const int16_t *input, int16_t *output,
968 int pitch, TX_TYPE tx_type) { 956 int stride, int tx_type) {
969 int16_t out[256]; 957 int16_t out[256];
970 int16_t *outptr = &out[0]; 958 int16_t *outptr = &out[0];
971 int i, j; 959 int i, j;
972 int16_t temp_in[16], temp_out[16]; 960 int16_t temp_in[16], temp_out[16];
973 const transform_2d ht = FHT_16[tx_type]; 961 const transform_2d ht = FHT_16[tx_type];
974 962
975 // Columns 963 // Columns
976 for (i = 0; i < 16; ++i) { 964 for (i = 0; i < 16; ++i) {
977 for (j = 0; j < 16; ++j) 965 for (j = 0; j < 16; ++j)
978 temp_in[j] = input[j * pitch + i] << 2; 966 temp_in[j] = input[j * stride + i] * 4;
979 ht.cols(temp_in, temp_out); 967 ht.cols(temp_in, temp_out);
980 for (j = 0; j < 16; ++j) 968 for (j = 0; j < 16; ++j)
981 outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; 969 outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
982 // outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; 970 // outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
983 } 971 }
984 972
985 // Rows 973 // Rows
986 for (i = 0; i < 16; ++i) { 974 for (i = 0; i < 16; ++i) {
987 for (j = 0; j < 16; ++j) 975 for (j = 0; j < 16; ++j)
988 temp_in[j] = out[j + i * 16]; 976 temp_in[j] = out[j + i * 16];
989 ht.rows(temp_in, temp_out); 977 ht.rows(temp_in, temp_out);
990 for (j = 0; j < 16; ++j) 978 for (j = 0; j < 16; ++j)
991 output[j + i * 16] = temp_out[j]; 979 output[j + i * 16] = temp_out[j];
992 } 980 }
993 } 981 }
994 982
995 static INLINE int dct_32_round(int input) { 983 static INLINE int dct_32_round(int input) {
996 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); 984 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
997 assert(-131072 <= rv && rv <= 131071); 985 assert(-131072 <= rv && rv <= 131071);
998 return rv; 986 return rv;
999 } 987 }
1000 988
1001 static INLINE int half_round_shift(int input) { 989 static INLINE int half_round_shift(int input) {
1002 int rv = (input + 1 + (input < 0)) >> 2; 990 int rv = (input + 1 + (input < 0)) >> 2;
1003 return rv; 991 return rv;
1004 } 992 }
1005 993
1006 static void dct32_1d(int *input, int *output, int round) { 994 static void dct32_1d(const int *input, int *output, int round) {
1007 int step[32]; 995 int step[32];
1008 // Stage 1 996 // Stage 1
1009 step[0] = input[0] + input[(32 - 1)]; 997 step[0] = input[0] + input[(32 - 1)];
1010 step[1] = input[1] + input[(32 - 2)]; 998 step[1] = input[1] + input[(32 - 2)];
1011 step[2] = input[2] + input[(32 - 3)]; 999 step[2] = input[2] + input[(32 - 3)];
1012 step[3] = input[3] + input[(32 - 4)]; 1000 step[3] = input[3] + input[(32 - 4)];
1013 step[4] = input[4] + input[(32 - 5)]; 1001 step[4] = input[4] + input[(32 - 5)];
1014 step[5] = input[5] + input[(32 - 6)]; 1002 step[5] = input[5] + input[(32 - 6)];
1015 step[6] = input[6] + input[(32 - 7)]; 1003 step[6] = input[6] + input[(32 - 7)];
1016 step[7] = input[7] + input[(32 - 8)]; 1004 step[7] = input[7] + input[(32 - 8)];
(...skipping 302 matching lines...) Expand 10 before | Expand all | Expand 10 after
1319 output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64); 1307 output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64);
1320 output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64); 1308 output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64);
1321 output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64); 1309 output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64);
1322 output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64); 1310 output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64);
1323 output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64); 1311 output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64);
1324 output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64); 1312 output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64);
1325 output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64); 1313 output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64);
1326 output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64); 1314 output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
1327 } 1315 }
1328 1316
1329 void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { 1317 void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {
1330 int shortpitch = pitch >> 1;
1331 int i, j; 1318 int i, j;
1332 int output[32 * 32]; 1319 int output[32 * 32];
1333 1320
1334 // Columns 1321 // Columns
1335 for (i = 0; i < 32; ++i) { 1322 for (i = 0; i < 32; ++i) {
1336 int temp_in[32], temp_out[32]; 1323 int temp_in[32], temp_out[32];
1337 for (j = 0; j < 32; ++j) 1324 for (j = 0; j < 32; ++j)
1338 temp_in[j] = input[j * shortpitch + i] << 2; 1325 temp_in[j] = input[j * stride + i] * 4;
1339 dct32_1d(temp_in, temp_out, 0); 1326 dct32_1d(temp_in, temp_out, 0);
1340 for (j = 0; j < 32; ++j) 1327 for (j = 0; j < 32; ++j)
1341 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; 1328 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
1342 } 1329 }
1343 1330
1344 // Rows 1331 // Rows
1345 for (i = 0; i < 32; ++i) { 1332 for (i = 0; i < 32; ++i) {
1346 int temp_in[32], temp_out[32]; 1333 int temp_in[32], temp_out[32];
1347 for (j = 0; j < 32; ++j) 1334 for (j = 0; j < 32; ++j)
1348 temp_in[j] = output[j + i * 32]; 1335 temp_in[j] = output[j + i * 32];
1349 dct32_1d(temp_in, temp_out, 0); 1336 dct32_1d(temp_in, temp_out, 0);
1350 for (j = 0; j < 32; ++j) 1337 for (j = 0; j < 32; ++j)
1351 out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; 1338 out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
1352 } 1339 }
1353 } 1340 }
1354 1341
1355 // Note that although we use dct_32_round in dct32_1d computation flow, 1342 // Note that although we use dct_32_round in dct32_1d computation flow,
1356 // this 2d fdct32x32 for rate-distortion optimization loop is operating 1343 // this 2d fdct32x32 for rate-distortion optimization loop is operating
1357 // within 16 bits precision. 1344 // within 16 bits precision.
1358 void vp9_short_fdct32x32_rd_c(int16_t *input, int16_t *out, int pitch) { 1345 void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
1359 int shortpitch = pitch >> 1;
1360 int i, j; 1346 int i, j;
1361 int output[32 * 32]; 1347 int output[32 * 32];
1362 1348
1363 // Columns 1349 // Columns
1364 for (i = 0; i < 32; ++i) { 1350 for (i = 0; i < 32; ++i) {
1365 int temp_in[32], temp_out[32]; 1351 int temp_in[32], temp_out[32];
1366 for (j = 0; j < 32; ++j) 1352 for (j = 0; j < 32; ++j)
1367 temp_in[j] = input[j * shortpitch + i] << 2; 1353 temp_in[j] = input[j * stride + i] * 4;
1368 dct32_1d(temp_in, temp_out, 0); 1354 dct32_1d(temp_in, temp_out, 0);
1369 for (j = 0; j < 32; ++j) 1355 for (j = 0; j < 32; ++j)
1370 // TODO(cd): see quality impact of only doing 1356 // TODO(cd): see quality impact of only doing
1371 // output[j * 32 + i] = (temp_out[j] + 1) >> 2; 1357 // output[j * 32 + i] = (temp_out[j] + 1) >> 2;
1372 // PS: also change code in vp9/encoder/x86/vp9_dct_sse2.c 1358 // PS: also change code in vp9/encoder/x86/vp9_dct_sse2.c
1373 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; 1359 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
1374 } 1360 }
1375 1361
1376 // Rows 1362 // Rows
1377 for (i = 0; i < 32; ++i) { 1363 for (i = 0; i < 32; ++i) {
1378 int temp_in[32], temp_out[32]; 1364 int temp_in[32], temp_out[32];
1379 for (j = 0; j < 32; ++j) 1365 for (j = 0; j < 32; ++j)
1380 temp_in[j] = output[j + i * 32]; 1366 temp_in[j] = output[j + i * 32];
1381 dct32_1d(temp_in, temp_out, 1); 1367 dct32_1d(temp_in, temp_out, 1);
1382 for (j = 0; j < 32; ++j) 1368 for (j = 0; j < 32; ++j)
1383 out[j + i * 32] = temp_out[j]; 1369 out[j + i * 32] = temp_out[j];
1384 } 1370 }
1385 } 1371 }
1372
1373 void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
1374 int stride) {
1375 if (tx_type == DCT_DCT)
1376 vp9_fdct4x4(input, output, stride);
1377 else
1378 vp9_short_fht4x4(input, output, stride, tx_type);
1379 }
1380
1381 void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
1382 int stride) {
1383 if (tx_type == DCT_DCT)
1384 vp9_fdct8x8(input, output, stride);
1385 else
1386 vp9_short_fht8x8(input, output, stride, tx_type);
1387 }
1388
1389 void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
1390 int stride) {
1391 if (tx_type == DCT_DCT)
1392 vp9_fdct16x16(input, output, stride);
1393 else
1394 vp9_short_fht16x16(input, output, stride, tx_type);
1395 }
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_dct.h ('k') | source/libvpx/vp9/encoder/vp9_encodeframe.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698