OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <math.h> | 11 #include <math.h> |
12 #include <string.h> | 12 #include <string.h> |
13 | 13 |
14 #include "vpx_dsp/inv_txfm.h" | 14 #include "vpx_dsp/inv_txfm.h" |
15 | 15 |
16 void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 16 void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
17 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, | 17 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
18 0.5 shifts per pixel. */ | 18 0.5 shifts per pixel. */ |
19 int i; | 19 int i; |
20 tran_low_t output[16]; | 20 tran_low_t output[16]; |
21 tran_high_t a1, b1, c1, d1, e1; | 21 tran_high_t a1, b1, c1, d1, e1; |
22 const tran_low_t *ip = input; | 22 const tran_low_t *ip = input; |
23 tran_low_t *op = output; | 23 tran_low_t *op = output; |
24 | 24 |
25 for (i = 0; i < 4; i++) { | 25 for (i = 0; i < 4; i++) { |
26 a1 = ip[0] >> UNIT_QUANT_SHIFT; | 26 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
58 dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1); | 58 dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1); |
59 dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1); | 59 dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1); |
60 dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1); | 60 dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1); |
61 dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1); | 61 dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1); |
62 | 62 |
63 ip++; | 63 ip++; |
64 dest++; | 64 dest++; |
65 } | 65 } |
66 } | 66 } |
67 | 67 |
68 void vp9_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { | 68 void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { |
69 int i; | 69 int i; |
70 tran_high_t a1, e1; | 70 tran_high_t a1, e1; |
71 tran_low_t tmp[4]; | 71 tran_low_t tmp[4]; |
72 const tran_low_t *ip = in; | 72 const tran_low_t *ip = in; |
73 tran_low_t *op = tmp; | 73 tran_low_t *op = tmp; |
74 | 74 |
75 a1 = ip[0] >> UNIT_QUANT_SHIFT; | 75 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
76 e1 = a1 >> 1; | 76 e1 = a1 >> 1; |
77 a1 -= e1; | 77 a1 -= e1; |
78 op[0] = WRAPLOW(a1, 8); | 78 op[0] = WRAPLOW(a1, 8); |
(...skipping 25 matching lines...) Expand all Loading... |
104 step[2] = WRAPLOW(dct_const_round_shift(temp1), 8); | 104 step[2] = WRAPLOW(dct_const_round_shift(temp1), 8); |
105 step[3] = WRAPLOW(dct_const_round_shift(temp2), 8); | 105 step[3] = WRAPLOW(dct_const_round_shift(temp2), 8); |
106 | 106 |
107 // stage 2 | 107 // stage 2 |
108 output[0] = WRAPLOW(step[0] + step[3], 8); | 108 output[0] = WRAPLOW(step[0] + step[3], 8); |
109 output[1] = WRAPLOW(step[1] + step[2], 8); | 109 output[1] = WRAPLOW(step[1] + step[2], 8); |
110 output[2] = WRAPLOW(step[1] - step[2], 8); | 110 output[2] = WRAPLOW(step[1] - step[2], 8); |
111 output[3] = WRAPLOW(step[0] - step[3], 8); | 111 output[3] = WRAPLOW(step[0] - step[3], 8); |
112 } | 112 } |
113 | 113 |
114 void vp9_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 114 void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
115 tran_low_t out[4 * 4]; | 115 tran_low_t out[4 * 4]; |
116 tran_low_t *outptr = out; | 116 tran_low_t *outptr = out; |
117 int i, j; | 117 int i, j; |
118 tran_low_t temp_in[4], temp_out[4]; | 118 tran_low_t temp_in[4], temp_out[4]; |
119 | 119 |
120 // Rows | 120 // Rows |
121 for (i = 0; i < 4; ++i) { | 121 for (i = 0; i < 4; ++i) { |
122 idct4_c(input, outptr); | 122 idct4_c(input, outptr); |
123 input += 4; | 123 input += 4; |
124 outptr += 4; | 124 outptr += 4; |
125 } | 125 } |
126 | 126 |
127 // Columns | 127 // Columns |
128 for (i = 0; i < 4; ++i) { | 128 for (i = 0; i < 4; ++i) { |
129 for (j = 0; j < 4; ++j) | 129 for (j = 0; j < 4; ++j) |
130 temp_in[j] = out[j * 4 + i]; | 130 temp_in[j] = out[j * 4 + i]; |
131 idct4_c(temp_in, temp_out); | 131 idct4_c(temp_in, temp_out); |
132 for (j = 0; j < 4; ++j) { | 132 for (j = 0; j < 4; ++j) { |
133 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], | 133 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
134 ROUND_POWER_OF_TWO(temp_out[j], 4)); | 134 ROUND_POWER_OF_TWO(temp_out[j], 4)); |
135 } | 135 } |
136 } | 136 } |
137 } | 137 } |
138 | 138 |
139 void vp9_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, | 139 void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, |
140 int dest_stride) { | 140 int dest_stride) { |
141 int i; | 141 int i; |
142 tran_high_t a1; | 142 tran_high_t a1; |
143 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); | 143 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); |
144 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); | 144 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); |
145 a1 = ROUND_POWER_OF_TWO(out, 4); | 145 a1 = ROUND_POWER_OF_TWO(out, 4); |
146 | 146 |
147 for (i = 0; i < 4; i++) { | 147 for (i = 0; i < 4; i++) { |
148 dest[0] = clip_pixel_add(dest[0], a1); | 148 dest[0] = clip_pixel_add(dest[0], a1); |
149 dest[1] = clip_pixel_add(dest[1], a1); | 149 dest[1] = clip_pixel_add(dest[1], a1); |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
191 output[0] = WRAPLOW(step1[0] + step1[7], 8); | 191 output[0] = WRAPLOW(step1[0] + step1[7], 8); |
192 output[1] = WRAPLOW(step1[1] + step1[6], 8); | 192 output[1] = WRAPLOW(step1[1] + step1[6], 8); |
193 output[2] = WRAPLOW(step1[2] + step1[5], 8); | 193 output[2] = WRAPLOW(step1[2] + step1[5], 8); |
194 output[3] = WRAPLOW(step1[3] + step1[4], 8); | 194 output[3] = WRAPLOW(step1[3] + step1[4], 8); |
195 output[4] = WRAPLOW(step1[3] - step1[4], 8); | 195 output[4] = WRAPLOW(step1[3] - step1[4], 8); |
196 output[5] = WRAPLOW(step1[2] - step1[5], 8); | 196 output[5] = WRAPLOW(step1[2] - step1[5], 8); |
197 output[6] = WRAPLOW(step1[1] - step1[6], 8); | 197 output[6] = WRAPLOW(step1[1] - step1[6], 8); |
198 output[7] = WRAPLOW(step1[0] - step1[7], 8); | 198 output[7] = WRAPLOW(step1[0] - step1[7], 8); |
199 } | 199 } |
200 | 200 |
201 void vp9_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 201 void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
202 tran_low_t out[8 * 8]; | 202 tran_low_t out[8 * 8]; |
203 tran_low_t *outptr = out; | 203 tran_low_t *outptr = out; |
204 int i, j; | 204 int i, j; |
205 tran_low_t temp_in[8], temp_out[8]; | 205 tran_low_t temp_in[8], temp_out[8]; |
206 | 206 |
207 // First transform rows | 207 // First transform rows |
208 for (i = 0; i < 8; ++i) { | 208 for (i = 0; i < 8; ++i) { |
209 idct8_c(input, outptr); | 209 idct8_c(input, outptr); |
210 input += 8; | 210 input += 8; |
211 outptr += 8; | 211 outptr += 8; |
212 } | 212 } |
213 | 213 |
214 // Then transform columns | 214 // Then transform columns |
215 for (i = 0; i < 8; ++i) { | 215 for (i = 0; i < 8; ++i) { |
216 for (j = 0; j < 8; ++j) | 216 for (j = 0; j < 8; ++j) |
217 temp_in[j] = out[j * 8 + i]; | 217 temp_in[j] = out[j * 8 + i]; |
218 idct8_c(temp_in, temp_out); | 218 idct8_c(temp_in, temp_out); |
219 for (j = 0; j < 8; ++j) { | 219 for (j = 0; j < 8; ++j) { |
220 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], | 220 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
221 ROUND_POWER_OF_TWO(temp_out[j], 5)); | 221 ROUND_POWER_OF_TWO(temp_out[j], 5)); |
222 } | 222 } |
223 } | 223 } |
224 } | 224 } |
225 | 225 |
226 void vp9_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 226 void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
227 int i, j; | 227 int i, j; |
228 tran_high_t a1; | 228 tran_high_t a1; |
229 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); | 229 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); |
230 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); | 230 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); |
231 a1 = ROUND_POWER_OF_TWO(out, 5); | 231 a1 = ROUND_POWER_OF_TWO(out, 5); |
232 for (j = 0; j < 8; ++j) { | 232 for (j = 0; j < 8; ++j) { |
233 for (i = 0; i < 8; ++i) | 233 for (i = 0; i < 8; ++i) |
234 dest[i] = clip_pixel_add(dest[i], a1); | 234 dest[i] = clip_pixel_add(dest[i], a1); |
235 dest += stride; | 235 dest += stride; |
236 } | 236 } |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
343 output[0] = WRAPLOW(x0, 8); | 343 output[0] = WRAPLOW(x0, 8); |
344 output[1] = WRAPLOW(-x4, 8); | 344 output[1] = WRAPLOW(-x4, 8); |
345 output[2] = WRAPLOW(x6, 8); | 345 output[2] = WRAPLOW(x6, 8); |
346 output[3] = WRAPLOW(-x2, 8); | 346 output[3] = WRAPLOW(-x2, 8); |
347 output[4] = WRAPLOW(x3, 8); | 347 output[4] = WRAPLOW(x3, 8); |
348 output[5] = WRAPLOW(-x7, 8); | 348 output[5] = WRAPLOW(-x7, 8); |
349 output[6] = WRAPLOW(x5, 8); | 349 output[6] = WRAPLOW(x5, 8); |
350 output[7] = WRAPLOW(-x1, 8); | 350 output[7] = WRAPLOW(-x1, 8); |
351 } | 351 } |
352 | 352 |
353 void vp9_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 353 void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
354 tran_low_t out[8 * 8] = { 0 }; | 354 tran_low_t out[8 * 8] = { 0 }; |
355 tran_low_t *outptr = out; | 355 tran_low_t *outptr = out; |
356 int i, j; | 356 int i, j; |
357 tran_low_t temp_in[8], temp_out[8]; | 357 tran_low_t temp_in[8], temp_out[8]; |
358 | 358 |
359 // First transform rows | 359 // First transform rows |
360 // only first 4 row has non-zero coefs | 360 // only first 4 row has non-zero coefs |
361 for (i = 0; i < 4; ++i) { | 361 for (i = 0; i < 4; ++i) { |
362 idct8_c(input, outptr); | 362 idct8_c(input, outptr); |
363 input += 8; | 363 input += 8; |
(...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
534 output[8] = WRAPLOW(step2[7] - step2[8], 8); | 534 output[8] = WRAPLOW(step2[7] - step2[8], 8); |
535 output[9] = WRAPLOW(step2[6] - step2[9], 8); | 535 output[9] = WRAPLOW(step2[6] - step2[9], 8); |
536 output[10] = WRAPLOW(step2[5] - step2[10], 8); | 536 output[10] = WRAPLOW(step2[5] - step2[10], 8); |
537 output[11] = WRAPLOW(step2[4] - step2[11], 8); | 537 output[11] = WRAPLOW(step2[4] - step2[11], 8); |
538 output[12] = WRAPLOW(step2[3] - step2[12], 8); | 538 output[12] = WRAPLOW(step2[3] - step2[12], 8); |
539 output[13] = WRAPLOW(step2[2] - step2[13], 8); | 539 output[13] = WRAPLOW(step2[2] - step2[13], 8); |
540 output[14] = WRAPLOW(step2[1] - step2[14], 8); | 540 output[14] = WRAPLOW(step2[1] - step2[14], 8); |
541 output[15] = WRAPLOW(step2[0] - step2[15], 8); | 541 output[15] = WRAPLOW(step2[0] - step2[15], 8); |
542 } | 542 } |
543 | 543 |
544 void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, | 544 void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, |
545 int stride) { | 545 int stride) { |
546 tran_low_t out[16 * 16]; | 546 tran_low_t out[16 * 16]; |
547 tran_low_t *outptr = out; | 547 tran_low_t *outptr = out; |
548 int i, j; | 548 int i, j; |
549 tran_low_t temp_in[16], temp_out[16]; | 549 tran_low_t temp_in[16], temp_out[16]; |
550 | 550 |
551 // First transform rows | 551 // First transform rows |
552 for (i = 0; i < 16; ++i) { | 552 for (i = 0; i < 16; ++i) { |
553 idct16_c(input, outptr); | 553 idct16_c(input, outptr); |
554 input += 16; | 554 input += 16; |
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
732 output[8] = WRAPLOW(x3, 8); | 732 output[8] = WRAPLOW(x3, 8); |
733 output[9] = WRAPLOW(x11, 8); | 733 output[9] = WRAPLOW(x11, 8); |
734 output[10] = WRAPLOW(x15, 8); | 734 output[10] = WRAPLOW(x15, 8); |
735 output[11] = WRAPLOW(x7, 8); | 735 output[11] = WRAPLOW(x7, 8); |
736 output[12] = WRAPLOW(x5, 8); | 736 output[12] = WRAPLOW(x5, 8); |
737 output[13] = WRAPLOW(-x13, 8); | 737 output[13] = WRAPLOW(-x13, 8); |
738 output[14] = WRAPLOW(x9, 8); | 738 output[14] = WRAPLOW(x9, 8); |
739 output[15] = WRAPLOW(-x1, 8); | 739 output[15] = WRAPLOW(-x1, 8); |
740 } | 740 } |
741 | 741 |
742 void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, | 742 void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, |
743 int stride) { | 743 int stride) { |
744 tran_low_t out[16 * 16] = { 0 }; | 744 tran_low_t out[16 * 16] = { 0 }; |
745 tran_low_t *outptr = out; | 745 tran_low_t *outptr = out; |
746 int i, j; | 746 int i, j; |
747 tran_low_t temp_in[16], temp_out[16]; | 747 tran_low_t temp_in[16], temp_out[16]; |
748 | 748 |
749 // First transform rows. Since all non-zero dct coefficients are in | 749 // First transform rows. Since all non-zero dct coefficients are in |
750 // upper-left 4x4 area, we only need to calculate first 4 rows here. | 750 // upper-left 4x4 area, we only need to calculate first 4 rows here. |
751 for (i = 0; i < 4; ++i) { | 751 for (i = 0; i < 4; ++i) { |
752 idct16_c(input, outptr); | 752 idct16_c(input, outptr); |
753 input += 16; | 753 input += 16; |
754 outptr += 16; | 754 outptr += 16; |
755 } | 755 } |
756 | 756 |
757 // Then transform columns | 757 // Then transform columns |
758 for (i = 0; i < 16; ++i) { | 758 for (i = 0; i < 16; ++i) { |
759 for (j = 0; j < 16; ++j) | 759 for (j = 0; j < 16; ++j) |
760 temp_in[j] = out[j*16 + i]; | 760 temp_in[j] = out[j*16 + i]; |
761 idct16_c(temp_in, temp_out); | 761 idct16_c(temp_in, temp_out); |
762 for (j = 0; j < 16; ++j) { | 762 for (j = 0; j < 16; ++j) { |
763 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], | 763 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
764 ROUND_POWER_OF_TWO(temp_out[j], 6)); | 764 ROUND_POWER_OF_TWO(temp_out[j], 6)); |
765 } | 765 } |
766 } | 766 } |
767 } | 767 } |
768 | 768 |
769 void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 769 void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
770 int i, j; | 770 int i, j; |
771 tran_high_t a1; | 771 tran_high_t a1; |
772 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); | 772 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); |
773 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); | 773 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); |
774 a1 = ROUND_POWER_OF_TWO(out, 6); | 774 a1 = ROUND_POWER_OF_TWO(out, 6); |
775 for (j = 0; j < 16; ++j) { | 775 for (j = 0; j < 16; ++j) { |
776 for (i = 0; i < 16; ++i) | 776 for (i = 0; i < 16; ++i) |
777 dest[i] = clip_pixel_add(dest[i], a1); | 777 dest[i] = clip_pixel_add(dest[i], a1); |
778 dest += stride; | 778 dest += stride; |
779 } | 779 } |
(...skipping 359 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1139 output[24] = WRAPLOW(step1[7] - step1[24], 8); | 1139 output[24] = WRAPLOW(step1[7] - step1[24], 8); |
1140 output[25] = WRAPLOW(step1[6] - step1[25], 8); | 1140 output[25] = WRAPLOW(step1[6] - step1[25], 8); |
1141 output[26] = WRAPLOW(step1[5] - step1[26], 8); | 1141 output[26] = WRAPLOW(step1[5] - step1[26], 8); |
1142 output[27] = WRAPLOW(step1[4] - step1[27], 8); | 1142 output[27] = WRAPLOW(step1[4] - step1[27], 8); |
1143 output[28] = WRAPLOW(step1[3] - step1[28], 8); | 1143 output[28] = WRAPLOW(step1[3] - step1[28], 8); |
1144 output[29] = WRAPLOW(step1[2] - step1[29], 8); | 1144 output[29] = WRAPLOW(step1[2] - step1[29], 8); |
1145 output[30] = WRAPLOW(step1[1] - step1[30], 8); | 1145 output[30] = WRAPLOW(step1[1] - step1[30], 8); |
1146 output[31] = WRAPLOW(step1[0] - step1[31], 8); | 1146 output[31] = WRAPLOW(step1[0] - step1[31], 8); |
1147 } | 1147 } |
1148 | 1148 |
1149 void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, | 1149 void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, |
1150 int stride) { | 1150 int stride) { |
1151 tran_low_t out[32 * 32]; | 1151 tran_low_t out[32 * 32]; |
1152 tran_low_t *outptr = out; | 1152 tran_low_t *outptr = out; |
1153 int i, j; | 1153 int i, j; |
1154 tran_low_t temp_in[32], temp_out[32]; | 1154 tran_low_t temp_in[32], temp_out[32]; |
1155 | 1155 |
1156 // Rows | 1156 // Rows |
1157 for (i = 0; i < 32; ++i) { | 1157 for (i = 0; i < 32; ++i) { |
1158 int16_t zero_coeff[16]; | 1158 int16_t zero_coeff[16]; |
1159 for (j = 0; j < 16; ++j) | 1159 for (j = 0; j < 16; ++j) |
(...skipping 18 matching lines...) Expand all Loading... |
1178 for (j = 0; j < 32; ++j) | 1178 for (j = 0; j < 32; ++j) |
1179 temp_in[j] = out[j * 32 + i]; | 1179 temp_in[j] = out[j * 32 + i]; |
1180 idct32_c(temp_in, temp_out); | 1180 idct32_c(temp_in, temp_out); |
1181 for (j = 0; j < 32; ++j) { | 1181 for (j = 0; j < 32; ++j) { |
1182 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], | 1182 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
1183 ROUND_POWER_OF_TWO(temp_out[j], 6)); | 1183 ROUND_POWER_OF_TWO(temp_out[j], 6)); |
1184 } | 1184 } |
1185 } | 1185 } |
1186 } | 1186 } |
1187 | 1187 |
1188 void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, | 1188 void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, |
1189 int stride) { | 1189 int stride) { |
1190 tran_low_t out[32 * 32] = {0}; | 1190 tran_low_t out[32 * 32] = {0}; |
1191 tran_low_t *outptr = out; | 1191 tran_low_t *outptr = out; |
1192 int i, j; | 1192 int i, j; |
1193 tran_low_t temp_in[32], temp_out[32]; | 1193 tran_low_t temp_in[32], temp_out[32]; |
1194 | 1194 |
1195 // Rows | 1195 // Rows |
1196 // only upper-left 8x8 has non-zero coeff | 1196 // only upper-left 8x8 has non-zero coeff |
1197 for (i = 0; i < 8; ++i) { | 1197 for (i = 0; i < 8; ++i) { |
1198 idct32_c(input, outptr); | 1198 idct32_c(input, outptr); |
1199 input += 32; | 1199 input += 32; |
1200 outptr += 32; | 1200 outptr += 32; |
1201 } | 1201 } |
1202 | 1202 |
1203 // Columns | 1203 // Columns |
1204 for (i = 0; i < 32; ++i) { | 1204 for (i = 0; i < 32; ++i) { |
1205 for (j = 0; j < 32; ++j) | 1205 for (j = 0; j < 32; ++j) |
1206 temp_in[j] = out[j * 32 + i]; | 1206 temp_in[j] = out[j * 32 + i]; |
1207 idct32_c(temp_in, temp_out); | 1207 idct32_c(temp_in, temp_out); |
1208 for (j = 0; j < 32; ++j) { | 1208 for (j = 0; j < 32; ++j) { |
1209 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], | 1209 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
1210 ROUND_POWER_OF_TWO(temp_out[j], 6)); | 1210 ROUND_POWER_OF_TWO(temp_out[j], 6)); |
1211 } | 1211 } |
1212 } | 1212 } |
1213 } | 1213 } |
1214 | 1214 |
1215 void vp9_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 1215 void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
1216 int i, j; | 1216 int i, j; |
1217 tran_high_t a1; | 1217 tran_high_t a1; |
1218 | 1218 |
1219 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); | 1219 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); |
1220 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); | 1220 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); |
1221 a1 = ROUND_POWER_OF_TWO(out, 6); | 1221 a1 = ROUND_POWER_OF_TWO(out, 6); |
1222 | 1222 |
1223 for (j = 0; j < 32; ++j) { | 1223 for (j = 0; j < 32; ++j) { |
1224 for (i = 0; i < 32; ++i) | 1224 for (i = 0; i < 32; ++i) |
1225 dest[i] = clip_pixel_add(dest[i], a1); | 1225 dest[i] = clip_pixel_add(dest[i], a1); |
1226 dest += stride; | 1226 dest += stride; |
1227 } | 1227 } |
1228 } | 1228 } |
1229 | 1229 |
1230 #if CONFIG_VP9_HIGHBITDEPTH | 1230 #if CONFIG_VP9_HIGHBITDEPTH |
1231 void vp9_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1231 void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
1232 int stride, int bd) { | 1232 int stride, int bd) { |
1233 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, | 1233 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
1234 0.5 shifts per pixel. */ | 1234 0.5 shifts per pixel. */ |
1235 int i; | 1235 int i; |
1236 tran_low_t output[16]; | 1236 tran_low_t output[16]; |
1237 tran_high_t a1, b1, c1, d1, e1; | 1237 tran_high_t a1, b1, c1, d1, e1; |
1238 const tran_low_t *ip = input; | 1238 const tran_low_t *ip = input; |
1239 tran_low_t *op = output; | 1239 tran_low_t *op = output; |
1240 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1240 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1241 | 1241 |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1275 dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); | 1275 dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); |
1276 dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd); | 1276 dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd); |
1277 dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd); | 1277 dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd); |
1278 dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd); | 1278 dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd); |
1279 | 1279 |
1280 ip++; | 1280 ip++; |
1281 dest++; | 1281 dest++; |
1282 } | 1282 } |
1283 } | 1283 } |
1284 | 1284 |
1285 void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, | 1285 void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, |
1286 int dest_stride, int bd) { | 1286 int dest_stride, int bd) { |
1287 int i; | 1287 int i; |
1288 tran_high_t a1, e1; | 1288 tran_high_t a1, e1; |
1289 tran_low_t tmp[4]; | 1289 tran_low_t tmp[4]; |
1290 const tran_low_t *ip = in; | 1290 const tran_low_t *ip = in; |
1291 tran_low_t *op = tmp; | 1291 tran_low_t *op = tmp; |
1292 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1292 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1293 (void) bd; | 1293 (void) bd; |
1294 | 1294 |
1295 a1 = ip[0] >> UNIT_QUANT_SHIFT; | 1295 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
(...skipping 12 matching lines...) Expand all Loading... |
1308 dest[dest_stride * 1], e1, bd); | 1308 dest[dest_stride * 1], e1, bd); |
1309 dest[dest_stride * 2] = highbd_clip_pixel_add( | 1309 dest[dest_stride * 2] = highbd_clip_pixel_add( |
1310 dest[dest_stride * 2], e1, bd); | 1310 dest[dest_stride * 2], e1, bd); |
1311 dest[dest_stride * 3] = highbd_clip_pixel_add( | 1311 dest[dest_stride * 3] = highbd_clip_pixel_add( |
1312 dest[dest_stride * 3], e1, bd); | 1312 dest[dest_stride * 3], e1, bd); |
1313 ip++; | 1313 ip++; |
1314 dest++; | 1314 dest++; |
1315 } | 1315 } |
1316 } | 1316 } |
1317 | 1317 |
1318 void vp9_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { | 1318 void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { |
1319 tran_low_t step[4]; | 1319 tran_low_t step[4]; |
1320 tran_high_t temp1, temp2; | 1320 tran_high_t temp1, temp2; |
1321 (void) bd; | 1321 (void) bd; |
1322 // stage 1 | 1322 // stage 1 |
1323 temp1 = (input[0] + input[2]) * cospi_16_64; | 1323 temp1 = (input[0] + input[2]) * cospi_16_64; |
1324 temp2 = (input[0] - input[2]) * cospi_16_64; | 1324 temp2 = (input[0] - input[2]) * cospi_16_64; |
1325 step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); | 1325 step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1326 step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); | 1326 step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1327 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; | 1327 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; |
1328 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; | 1328 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; |
1329 step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); | 1329 step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1330 step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); | 1330 step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1331 | 1331 |
1332 // stage 2 | 1332 // stage 2 |
1333 output[0] = WRAPLOW(step[0] + step[3], bd); | 1333 output[0] = WRAPLOW(step[0] + step[3], bd); |
1334 output[1] = WRAPLOW(step[1] + step[2], bd); | 1334 output[1] = WRAPLOW(step[1] + step[2], bd); |
1335 output[2] = WRAPLOW(step[1] - step[2], bd); | 1335 output[2] = WRAPLOW(step[1] - step[2], bd); |
1336 output[3] = WRAPLOW(step[0] - step[3], bd); | 1336 output[3] = WRAPLOW(step[0] - step[3], bd); |
1337 } | 1337 } |
1338 | 1338 |
1339 void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1339 void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
1340 int stride, int bd) { | 1340 int stride, int bd) { |
1341 tran_low_t out[4 * 4]; | 1341 tran_low_t out[4 * 4]; |
1342 tran_low_t *outptr = out; | 1342 tran_low_t *outptr = out; |
1343 int i, j; | 1343 int i, j; |
1344 tran_low_t temp_in[4], temp_out[4]; | 1344 tran_low_t temp_in[4], temp_out[4]; |
1345 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1345 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1346 | 1346 |
1347 // Rows | 1347 // Rows |
1348 for (i = 0; i < 4; ++i) { | 1348 for (i = 0; i < 4; ++i) { |
1349 vp9_highbd_idct4_c(input, outptr, bd); | 1349 vpx_highbd_idct4_c(input, outptr, bd); |
1350 input += 4; | 1350 input += 4; |
1351 outptr += 4; | 1351 outptr += 4; |
1352 } | 1352 } |
1353 | 1353 |
1354 // Columns | 1354 // Columns |
1355 for (i = 0; i < 4; ++i) { | 1355 for (i = 0; i < 4; ++i) { |
1356 for (j = 0; j < 4; ++j) | 1356 for (j = 0; j < 4; ++j) |
1357 temp_in[j] = out[j * 4 + i]; | 1357 temp_in[j] = out[j * 4 + i]; |
1358 vp9_highbd_idct4_c(temp_in, temp_out, bd); | 1358 vpx_highbd_idct4_c(temp_in, temp_out, bd); |
1359 for (j = 0; j < 4; ++j) { | 1359 for (j = 0; j < 4; ++j) { |
1360 dest[j * stride + i] = highbd_clip_pixel_add( | 1360 dest[j * stride + i] = highbd_clip_pixel_add( |
1361 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); | 1361 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); |
1362 } | 1362 } |
1363 } | 1363 } |
1364 } | 1364 } |
1365 | 1365 |
1366 void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, | 1366 void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, |
1367 int dest_stride, int bd) { | 1367 int dest_stride, int bd) { |
1368 int i; | 1368 int i; |
1369 tran_high_t a1; | 1369 tran_high_t a1; |
1370 tran_low_t out = WRAPLOW( | 1370 tran_low_t out = WRAPLOW( |
1371 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); | 1371 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
1372 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1372 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1373 | 1373 |
1374 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); | 1374 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
1375 a1 = ROUND_POWER_OF_TWO(out, 4); | 1375 a1 = ROUND_POWER_OF_TWO(out, 4); |
1376 | 1376 |
1377 for (i = 0; i < 4; i++) { | 1377 for (i = 0; i < 4; i++) { |
1378 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); | 1378 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); |
1379 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); | 1379 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); |
1380 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); | 1380 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); |
1381 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); | 1381 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); |
1382 dest += dest_stride; | 1382 dest += dest_stride; |
1383 } | 1383 } |
1384 } | 1384 } |
1385 | 1385 |
1386 void vp9_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { | 1386 void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { |
1387 tran_low_t step1[8], step2[8]; | 1387 tran_low_t step1[8], step2[8]; |
1388 tran_high_t temp1, temp2; | 1388 tran_high_t temp1, temp2; |
1389 // stage 1 | 1389 // stage 1 |
1390 step1[0] = input[0]; | 1390 step1[0] = input[0]; |
1391 step1[2] = input[4]; | 1391 step1[2] = input[4]; |
1392 step1[1] = input[2]; | 1392 step1[1] = input[2]; |
1393 step1[3] = input[6]; | 1393 step1[3] = input[6]; |
1394 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; | 1394 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; |
1395 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; | 1395 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; |
1396 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); | 1396 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1397 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); | 1397 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1398 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; | 1398 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; |
1399 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; | 1399 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; |
1400 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); | 1400 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1401 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); | 1401 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1402 | 1402 |
1403 // stage 2 & stage 3 - even half | 1403 // stage 2 & stage 3 - even half |
1404 vp9_highbd_idct4_c(step1, step1, bd); | 1404 vpx_highbd_idct4_c(step1, step1, bd); |
1405 | 1405 |
1406 // stage 2 - odd half | 1406 // stage 2 - odd half |
1407 step2[4] = WRAPLOW(step1[4] + step1[5], bd); | 1407 step2[4] = WRAPLOW(step1[4] + step1[5], bd); |
1408 step2[5] = WRAPLOW(step1[4] - step1[5], bd); | 1408 step2[5] = WRAPLOW(step1[4] - step1[5], bd); |
1409 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); | 1409 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); |
1410 step2[7] = WRAPLOW(step1[6] + step1[7], bd); | 1410 step2[7] = WRAPLOW(step1[6] + step1[7], bd); |
1411 | 1411 |
1412 // stage 3 - odd half | 1412 // stage 3 - odd half |
1413 step1[4] = step2[4]; | 1413 step1[4] = step2[4]; |
1414 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 1414 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
1415 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 1415 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
1416 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); | 1416 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1417 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); | 1417 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1418 step1[7] = step2[7]; | 1418 step1[7] = step2[7]; |
1419 | 1419 |
1420 // stage 4 | 1420 // stage 4 |
1421 output[0] = WRAPLOW(step1[0] + step1[7], bd); | 1421 output[0] = WRAPLOW(step1[0] + step1[7], bd); |
1422 output[1] = WRAPLOW(step1[1] + step1[6], bd); | 1422 output[1] = WRAPLOW(step1[1] + step1[6], bd); |
1423 output[2] = WRAPLOW(step1[2] + step1[5], bd); | 1423 output[2] = WRAPLOW(step1[2] + step1[5], bd); |
1424 output[3] = WRAPLOW(step1[3] + step1[4], bd); | 1424 output[3] = WRAPLOW(step1[3] + step1[4], bd); |
1425 output[4] = WRAPLOW(step1[3] - step1[4], bd); | 1425 output[4] = WRAPLOW(step1[3] - step1[4], bd); |
1426 output[5] = WRAPLOW(step1[2] - step1[5], bd); | 1426 output[5] = WRAPLOW(step1[2] - step1[5], bd); |
1427 output[6] = WRAPLOW(step1[1] - step1[6], bd); | 1427 output[6] = WRAPLOW(step1[1] - step1[6], bd); |
1428 output[7] = WRAPLOW(step1[0] - step1[7], bd); | 1428 output[7] = WRAPLOW(step1[0] - step1[7], bd); |
1429 } | 1429 } |
1430 | 1430 |
1431 void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, | 1431 void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, |
1432 int stride, int bd) { | 1432 int stride, int bd) { |
1433 tran_low_t out[8 * 8]; | 1433 tran_low_t out[8 * 8]; |
1434 tran_low_t *outptr = out; | 1434 tran_low_t *outptr = out; |
1435 int i, j; | 1435 int i, j; |
1436 tran_low_t temp_in[8], temp_out[8]; | 1436 tran_low_t temp_in[8], temp_out[8]; |
1437 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1437 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1438 | 1438 |
1439 // First transform rows. | 1439 // First transform rows. |
1440 for (i = 0; i < 8; ++i) { | 1440 for (i = 0; i < 8; ++i) { |
1441 vp9_highbd_idct8_c(input, outptr, bd); | 1441 vpx_highbd_idct8_c(input, outptr, bd); |
1442 input += 8; | 1442 input += 8; |
1443 outptr += 8; | 1443 outptr += 8; |
1444 } | 1444 } |
1445 | 1445 |
1446 // Then transform columns. | 1446 // Then transform columns. |
1447 for (i = 0; i < 8; ++i) { | 1447 for (i = 0; i < 8; ++i) { |
1448 for (j = 0; j < 8; ++j) | 1448 for (j = 0; j < 8; ++j) |
1449 temp_in[j] = out[j * 8 + i]; | 1449 temp_in[j] = out[j * 8 + i]; |
1450 vp9_highbd_idct8_c(temp_in, temp_out, bd); | 1450 vpx_highbd_idct8_c(temp_in, temp_out, bd); |
1451 for (j = 0; j < 8; ++j) { | 1451 for (j = 0; j < 8; ++j) { |
1452 dest[j * stride + i] = highbd_clip_pixel_add( | 1452 dest[j * stride + i] = highbd_clip_pixel_add( |
1453 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); | 1453 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); |
1454 } | 1454 } |
1455 } | 1455 } |
1456 } | 1456 } |
1457 | 1457 |
1458 void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, | 1458 void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, |
1459 int stride, int bd) { | 1459 int stride, int bd) { |
1460 int i, j; | 1460 int i, j; |
1461 tran_high_t a1; | 1461 tran_high_t a1; |
1462 tran_low_t out = WRAPLOW( | 1462 tran_low_t out = WRAPLOW( |
1463 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); | 1463 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
1464 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1464 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1465 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); | 1465 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
1466 a1 = ROUND_POWER_OF_TWO(out, 5); | 1466 a1 = ROUND_POWER_OF_TWO(out, 5); |
1467 for (j = 0; j < 8; ++j) { | 1467 for (j = 0; j < 8; ++j) { |
1468 for (i = 0; i < 8; ++i) | 1468 for (i = 0; i < 8; ++i) |
1469 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 1469 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
1470 dest += stride; | 1470 dest += stride; |
1471 } | 1471 } |
1472 } | 1472 } |
1473 | 1473 |
1474 void highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { | 1474 void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { |
1475 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 1475 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
1476 | 1476 |
1477 tran_low_t x0 = input[0]; | 1477 tran_low_t x0 = input[0]; |
1478 tran_low_t x1 = input[1]; | 1478 tran_low_t x1 = input[1]; |
1479 tran_low_t x2 = input[2]; | 1479 tran_low_t x2 = input[2]; |
1480 tran_low_t x3 = input[3]; | 1480 tran_low_t x3 = input[3]; |
1481 (void) bd; | 1481 (void) bd; |
1482 | 1482 |
1483 if (!(x0 | x1 | x2 | x3)) { | 1483 if (!(x0 | x1 | x2 | x3)) { |
1484 memset(output, 0, 4 * sizeof(*output)); | 1484 memset(output, 0, 4 * sizeof(*output)); |
(...skipping 17 matching lines...) Expand all Loading... |
1502 // 1-D transform scaling factor is sqrt(2). | 1502 // 1-D transform scaling factor is sqrt(2). |
1503 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 1503 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
1504 // + 1b (addition) = 29b. | 1504 // + 1b (addition) = 29b. |
1505 // Hence the output bit depth is 15b. | 1505 // Hence the output bit depth is 15b. |
1506 output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd); | 1506 output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd); |
1507 output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd); | 1507 output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd); |
1508 output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); | 1508 output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); |
1509 output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd); | 1509 output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd); |
1510 } | 1510 } |
1511 | 1511 |
1512 void highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { | 1512 void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { |
1513 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 1513 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
1514 | 1514 |
1515 tran_low_t x0 = input[7]; | 1515 tran_low_t x0 = input[7]; |
1516 tran_low_t x1 = input[0]; | 1516 tran_low_t x1 = input[0]; |
1517 tran_low_t x2 = input[5]; | 1517 tran_low_t x2 = input[5]; |
1518 tran_low_t x3 = input[2]; | 1518 tran_low_t x3 = input[2]; |
1519 tran_low_t x4 = input[3]; | 1519 tran_low_t x4 = input[3]; |
1520 tran_low_t x5 = input[4]; | 1520 tran_low_t x5 = input[4]; |
1521 tran_low_t x6 = input[1]; | 1521 tran_low_t x6 = input[1]; |
1522 tran_low_t x7 = input[6]; | 1522 tran_low_t x7 = input[6]; |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1579 output[0] = WRAPLOW(x0, bd); | 1579 output[0] = WRAPLOW(x0, bd); |
1580 output[1] = WRAPLOW(-x4, bd); | 1580 output[1] = WRAPLOW(-x4, bd); |
1581 output[2] = WRAPLOW(x6, bd); | 1581 output[2] = WRAPLOW(x6, bd); |
1582 output[3] = WRAPLOW(-x2, bd); | 1582 output[3] = WRAPLOW(-x2, bd); |
1583 output[4] = WRAPLOW(x3, bd); | 1583 output[4] = WRAPLOW(x3, bd); |
1584 output[5] = WRAPLOW(-x7, bd); | 1584 output[5] = WRAPLOW(-x7, bd); |
1585 output[6] = WRAPLOW(x5, bd); | 1585 output[6] = WRAPLOW(x5, bd); |
1586 output[7] = WRAPLOW(-x1, bd); | 1586 output[7] = WRAPLOW(-x1, bd); |
1587 } | 1587 } |
1588 | 1588 |
1589 void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, | 1589 void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, |
1590 int stride, int bd) { | 1590 int stride, int bd) { |
1591 tran_low_t out[8 * 8] = { 0 }; | 1591 tran_low_t out[8 * 8] = { 0 }; |
1592 tran_low_t *outptr = out; | 1592 tran_low_t *outptr = out; |
1593 int i, j; | 1593 int i, j; |
1594 tran_low_t temp_in[8], temp_out[8]; | 1594 tran_low_t temp_in[8], temp_out[8]; |
1595 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1595 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1596 | 1596 |
1597 // First transform rows. | 1597 // First transform rows. |
1598 // Only first 4 row has non-zero coefs. | 1598 // Only first 4 row has non-zero coefs. |
1599 for (i = 0; i < 4; ++i) { | 1599 for (i = 0; i < 4; ++i) { |
1600 vp9_highbd_idct8_c(input, outptr, bd); | 1600 vpx_highbd_idct8_c(input, outptr, bd); |
1601 input += 8; | 1601 input += 8; |
1602 outptr += 8; | 1602 outptr += 8; |
1603 } | 1603 } |
1604 // Then transform columns. | 1604 // Then transform columns. |
1605 for (i = 0; i < 8; ++i) { | 1605 for (i = 0; i < 8; ++i) { |
1606 for (j = 0; j < 8; ++j) | 1606 for (j = 0; j < 8; ++j) |
1607 temp_in[j] = out[j * 8 + i]; | 1607 temp_in[j] = out[j * 8 + i]; |
1608 vp9_highbd_idct8_c(temp_in, temp_out, bd); | 1608 vpx_highbd_idct8_c(temp_in, temp_out, bd); |
1609 for (j = 0; j < 8; ++j) { | 1609 for (j = 0; j < 8; ++j) { |
1610 dest[j * stride + i] = highbd_clip_pixel_add( | 1610 dest[j * stride + i] = highbd_clip_pixel_add( |
1611 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); | 1611 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); |
1612 } | 1612 } |
1613 } | 1613 } |
1614 } | 1614 } |
1615 | 1615 |
1616 void vp9_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { | 1616 void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { |
1617 tran_low_t step1[16], step2[16]; | 1617 tran_low_t step1[16], step2[16]; |
1618 tran_high_t temp1, temp2; | 1618 tran_high_t temp1, temp2; |
1619 (void) bd; | 1619 (void) bd; |
1620 | 1620 |
1621 // stage 1 | 1621 // stage 1 |
1622 step1[0] = input[0/2]; | 1622 step1[0] = input[0/2]; |
1623 step1[1] = input[16/2]; | 1623 step1[1] = input[16/2]; |
1624 step1[2] = input[8/2]; | 1624 step1[2] = input[8/2]; |
1625 step1[3] = input[24/2]; | 1625 step1[3] = input[24/2]; |
1626 step1[4] = input[4/2]; | 1626 step1[4] = input[4/2]; |
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1772 output[8] = WRAPLOW(step2[7] - step2[8], bd); | 1772 output[8] = WRAPLOW(step2[7] - step2[8], bd); |
1773 output[9] = WRAPLOW(step2[6] - step2[9], bd); | 1773 output[9] = WRAPLOW(step2[6] - step2[9], bd); |
1774 output[10] = WRAPLOW(step2[5] - step2[10], bd); | 1774 output[10] = WRAPLOW(step2[5] - step2[10], bd); |
1775 output[11] = WRAPLOW(step2[4] - step2[11], bd); | 1775 output[11] = WRAPLOW(step2[4] - step2[11], bd); |
1776 output[12] = WRAPLOW(step2[3] - step2[12], bd); | 1776 output[12] = WRAPLOW(step2[3] - step2[12], bd); |
1777 output[13] = WRAPLOW(step2[2] - step2[13], bd); | 1777 output[13] = WRAPLOW(step2[2] - step2[13], bd); |
1778 output[14] = WRAPLOW(step2[1] - step2[14], bd); | 1778 output[14] = WRAPLOW(step2[1] - step2[14], bd); |
1779 output[15] = WRAPLOW(step2[0] - step2[15], bd); | 1779 output[15] = WRAPLOW(step2[0] - step2[15], bd); |
1780 } | 1780 } |
1781 | 1781 |
1782 void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, | 1782 void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, |
1783 int stride, int bd) { | 1783 int stride, int bd) { |
1784 tran_low_t out[16 * 16]; | 1784 tran_low_t out[16 * 16]; |
1785 tran_low_t *outptr = out; | 1785 tran_low_t *outptr = out; |
1786 int i, j; | 1786 int i, j; |
1787 tran_low_t temp_in[16], temp_out[16]; | 1787 tran_low_t temp_in[16], temp_out[16]; |
1788 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1788 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1789 | 1789 |
1790 // First transform rows. | 1790 // First transform rows. |
1791 for (i = 0; i < 16; ++i) { | 1791 for (i = 0; i < 16; ++i) { |
1792 vp9_highbd_idct16_c(input, outptr, bd); | 1792 vpx_highbd_idct16_c(input, outptr, bd); |
1793 input += 16; | 1793 input += 16; |
1794 outptr += 16; | 1794 outptr += 16; |
1795 } | 1795 } |
1796 | 1796 |
1797 // Then transform columns. | 1797 // Then transform columns. |
1798 for (i = 0; i < 16; ++i) { | 1798 for (i = 0; i < 16; ++i) { |
1799 for (j = 0; j < 16; ++j) | 1799 for (j = 0; j < 16; ++j) |
1800 temp_in[j] = out[j * 16 + i]; | 1800 temp_in[j] = out[j * 16 + i]; |
1801 vp9_highbd_idct16_c(temp_in, temp_out, bd); | 1801 vpx_highbd_idct16_c(temp_in, temp_out, bd); |
1802 for (j = 0; j < 16; ++j) { | 1802 for (j = 0; j < 16; ++j) { |
1803 dest[j * stride + i] = highbd_clip_pixel_add( | 1803 dest[j * stride + i] = highbd_clip_pixel_add( |
1804 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 1804 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
1805 } | 1805 } |
1806 } | 1806 } |
1807 } | 1807 } |
1808 | 1808 |
1809 void highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { | 1809 void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { |
1810 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; | 1810 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; |
1811 tran_high_t s9, s10, s11, s12, s13, s14, s15; | 1811 tran_high_t s9, s10, s11, s12, s13, s14, s15; |
1812 | 1812 |
1813 tran_low_t x0 = input[15]; | 1813 tran_low_t x0 = input[15]; |
1814 tran_low_t x1 = input[0]; | 1814 tran_low_t x1 = input[0]; |
1815 tran_low_t x2 = input[13]; | 1815 tran_low_t x2 = input[13]; |
1816 tran_low_t x3 = input[2]; | 1816 tran_low_t x3 = input[2]; |
1817 tran_low_t x4 = input[11]; | 1817 tran_low_t x4 = input[11]; |
1818 tran_low_t x5 = input[4]; | 1818 tran_low_t x5 = input[4]; |
1819 tran_low_t x6 = input[9]; | 1819 tran_low_t x6 = input[9]; |
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1969 output[8] = WRAPLOW(x3, bd); | 1969 output[8] = WRAPLOW(x3, bd); |
1970 output[9] = WRAPLOW(x11, bd); | 1970 output[9] = WRAPLOW(x11, bd); |
1971 output[10] = WRAPLOW(x15, bd); | 1971 output[10] = WRAPLOW(x15, bd); |
1972 output[11] = WRAPLOW(x7, bd); | 1972 output[11] = WRAPLOW(x7, bd); |
1973 output[12] = WRAPLOW(x5, bd); | 1973 output[12] = WRAPLOW(x5, bd); |
1974 output[13] = WRAPLOW(-x13, bd); | 1974 output[13] = WRAPLOW(-x13, bd); |
1975 output[14] = WRAPLOW(x9, bd); | 1975 output[14] = WRAPLOW(x9, bd); |
1976 output[15] = WRAPLOW(-x1, bd); | 1976 output[15] = WRAPLOW(-x1, bd); |
1977 } | 1977 } |
1978 | 1978 |
1979 void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, | 1979 void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, |
1980 int stride, int bd) { | 1980 int stride, int bd) { |
1981 tran_low_t out[16 * 16] = { 0 }; | 1981 tran_low_t out[16 * 16] = { 0 }; |
1982 tran_low_t *outptr = out; | 1982 tran_low_t *outptr = out; |
1983 int i, j; | 1983 int i, j; |
1984 tran_low_t temp_in[16], temp_out[16]; | 1984 tran_low_t temp_in[16], temp_out[16]; |
1985 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1985 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1986 | 1986 |
1987 // First transform rows. Since all non-zero dct coefficients are in | 1987 // First transform rows. Since all non-zero dct coefficients are in |
1988 // upper-left 4x4 area, we only need to calculate first 4 rows here. | 1988 // upper-left 4x4 area, we only need to calculate first 4 rows here. |
1989 for (i = 0; i < 4; ++i) { | 1989 for (i = 0; i < 4; ++i) { |
1990 vp9_highbd_idct16_c(input, outptr, bd); | 1990 vpx_highbd_idct16_c(input, outptr, bd); |
1991 input += 16; | 1991 input += 16; |
1992 outptr += 16; | 1992 outptr += 16; |
1993 } | 1993 } |
1994 | 1994 |
1995 // Then transform columns. | 1995 // Then transform columns. |
1996 for (i = 0; i < 16; ++i) { | 1996 for (i = 0; i < 16; ++i) { |
1997 for (j = 0; j < 16; ++j) | 1997 for (j = 0; j < 16; ++j) |
1998 temp_in[j] = out[j*16 + i]; | 1998 temp_in[j] = out[j*16 + i]; |
1999 vp9_highbd_idct16_c(temp_in, temp_out, bd); | 1999 vpx_highbd_idct16_c(temp_in, temp_out, bd); |
2000 for (j = 0; j < 16; ++j) { | 2000 for (j = 0; j < 16; ++j) { |
2001 dest[j * stride + i] = highbd_clip_pixel_add( | 2001 dest[j * stride + i] = highbd_clip_pixel_add( |
2002 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2002 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
2003 } | 2003 } |
2004 } | 2004 } |
2005 } | 2005 } |
2006 | 2006 |
2007 void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, | 2007 void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, |
2008 int stride, int bd) { | 2008 int stride, int bd) { |
2009 int i, j; | 2009 int i, j; |
2010 tran_high_t a1; | 2010 tran_high_t a1; |
2011 tran_low_t out = WRAPLOW( | 2011 tran_low_t out = WRAPLOW( |
2012 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); | 2012 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
2013 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2013 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2014 | 2014 |
2015 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); | 2015 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
2016 a1 = ROUND_POWER_OF_TWO(out, 6); | 2016 a1 = ROUND_POWER_OF_TWO(out, 6); |
2017 for (j = 0; j < 16; ++j) { | 2017 for (j = 0; j < 16; ++j) { |
2018 for (i = 0; i < 16; ++i) | 2018 for (i = 0; i < 16; ++i) |
2019 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 2019 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
2020 dest += stride; | 2020 dest += stride; |
2021 } | 2021 } |
2022 } | 2022 } |
2023 | 2023 |
2024 void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd) { | 2024 static void highbd_idct32_c(const tran_low_t *input, |
| 2025 tran_low_t *output, int bd) { |
2025 tran_low_t step1[32], step2[32]; | 2026 tran_low_t step1[32], step2[32]; |
2026 tran_high_t temp1, temp2; | 2027 tran_high_t temp1, temp2; |
2027 (void) bd; | 2028 (void) bd; |
2028 | 2029 |
2029 // stage 1 | 2030 // stage 1 |
2030 step1[0] = input[0]; | 2031 step1[0] = input[0]; |
2031 step1[1] = input[16]; | 2032 step1[1] = input[16]; |
2032 step1[2] = input[8]; | 2033 step1[2] = input[8]; |
2033 step1[3] = input[24]; | 2034 step1[3] = input[24]; |
2034 step1[4] = input[4]; | 2035 step1[4] = input[4]; |
(...skipping 347 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2382 output[24] = WRAPLOW(step1[7] - step1[24], bd); | 2383 output[24] = WRAPLOW(step1[7] - step1[24], bd); |
2383 output[25] = WRAPLOW(step1[6] - step1[25], bd); | 2384 output[25] = WRAPLOW(step1[6] - step1[25], bd); |
2384 output[26] = WRAPLOW(step1[5] - step1[26], bd); | 2385 output[26] = WRAPLOW(step1[5] - step1[26], bd); |
2385 output[27] = WRAPLOW(step1[4] - step1[27], bd); | 2386 output[27] = WRAPLOW(step1[4] - step1[27], bd); |
2386 output[28] = WRAPLOW(step1[3] - step1[28], bd); | 2387 output[28] = WRAPLOW(step1[3] - step1[28], bd); |
2387 output[29] = WRAPLOW(step1[2] - step1[29], bd); | 2388 output[29] = WRAPLOW(step1[2] - step1[29], bd); |
2388 output[30] = WRAPLOW(step1[1] - step1[30], bd); | 2389 output[30] = WRAPLOW(step1[1] - step1[30], bd); |
2389 output[31] = WRAPLOW(step1[0] - step1[31], bd); | 2390 output[31] = WRAPLOW(step1[0] - step1[31], bd); |
2390 } | 2391 } |
2391 | 2392 |
2392 void vp9_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, | 2393 void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, |
2393 int stride, int bd) { | 2394 int stride, int bd) { |
2394 tran_low_t out[32 * 32]; | 2395 tran_low_t out[32 * 32]; |
2395 tran_low_t *outptr = out; | 2396 tran_low_t *outptr = out; |
2396 int i, j; | 2397 int i, j; |
2397 tran_low_t temp_in[32], temp_out[32]; | 2398 tran_low_t temp_in[32], temp_out[32]; |
2398 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2399 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2399 | 2400 |
2400 // Rows | 2401 // Rows |
2401 for (i = 0; i < 32; ++i) { | 2402 for (i = 0; i < 32; ++i) { |
2402 tran_low_t zero_coeff[16]; | 2403 tran_low_t zero_coeff[16]; |
(...skipping 19 matching lines...) Expand all Loading... |
2422 for (j = 0; j < 32; ++j) | 2423 for (j = 0; j < 32; ++j) |
2423 temp_in[j] = out[j * 32 + i]; | 2424 temp_in[j] = out[j * 32 + i]; |
2424 highbd_idct32_c(temp_in, temp_out, bd); | 2425 highbd_idct32_c(temp_in, temp_out, bd); |
2425 for (j = 0; j < 32; ++j) { | 2426 for (j = 0; j < 32; ++j) { |
2426 dest[j * stride + i] = highbd_clip_pixel_add( | 2427 dest[j * stride + i] = highbd_clip_pixel_add( |
2427 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2428 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
2428 } | 2429 } |
2429 } | 2430 } |
2430 } | 2431 } |
2431 | 2432 |
2432 void vp9_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, | 2433 void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, |
2433 int stride, int bd) { | 2434 int stride, int bd) { |
2434 tran_low_t out[32 * 32] = {0}; | 2435 tran_low_t out[32 * 32] = {0}; |
2435 tran_low_t *outptr = out; | 2436 tran_low_t *outptr = out; |
2436 int i, j; | 2437 int i, j; |
2437 tran_low_t temp_in[32], temp_out[32]; | 2438 tran_low_t temp_in[32], temp_out[32]; |
2438 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2439 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2439 | 2440 |
2440 // Rows | 2441 // Rows |
2441 // Only upper-left 8x8 has non-zero coeff. | 2442 // Only upper-left 8x8 has non-zero coeff. |
2442 for (i = 0; i < 8; ++i) { | 2443 for (i = 0; i < 8; ++i) { |
2443 highbd_idct32_c(input, outptr, bd); | 2444 highbd_idct32_c(input, outptr, bd); |
2444 input += 32; | 2445 input += 32; |
2445 outptr += 32; | 2446 outptr += 32; |
2446 } | 2447 } |
2447 // Columns | 2448 // Columns |
2448 for (i = 0; i < 32; ++i) { | 2449 for (i = 0; i < 32; ++i) { |
2449 for (j = 0; j < 32; ++j) | 2450 for (j = 0; j < 32; ++j) |
2450 temp_in[j] = out[j * 32 + i]; | 2451 temp_in[j] = out[j * 32 + i]; |
2451 highbd_idct32_c(temp_in, temp_out, bd); | 2452 highbd_idct32_c(temp_in, temp_out, bd); |
2452 for (j = 0; j < 32; ++j) { | 2453 for (j = 0; j < 32; ++j) { |
2453 dest[j * stride + i] = highbd_clip_pixel_add( | 2454 dest[j * stride + i] = highbd_clip_pixel_add( |
2454 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2455 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
2455 } | 2456 } |
2456 } | 2457 } |
2457 } | 2458 } |
2458 | 2459 |
2459 void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, | 2460 void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, |
2460 int stride, int bd) { | 2461 int stride, int bd) { |
2461 int i, j; | 2462 int i, j; |
2462 int a1; | 2463 int a1; |
2463 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2464 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2464 | 2465 |
2465 tran_low_t out = WRAPLOW( | 2466 tran_low_t out = WRAPLOW( |
2466 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); | 2467 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
2467 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); | 2468 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
2468 a1 = ROUND_POWER_OF_TWO(out, 6); | 2469 a1 = ROUND_POWER_OF_TWO(out, 6); |
2469 | 2470 |
2470 for (j = 0; j < 32; ++j) { | 2471 for (j = 0; j < 32; ++j) { |
2471 for (i = 0; i < 32; ++i) | 2472 for (i = 0; i < 32; ++i) |
2472 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 2473 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
2473 dest += stride; | 2474 dest += stride; |
2474 } | 2475 } |
2475 } | 2476 } |
2476 #endif // CONFIG_VP9_HIGHBITDEPTH | 2477 #endif // CONFIG_VP9_HIGHBITDEPTH |
OLD | NEW |