Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(96)

Side by Side Diff: source/libvpx/vpx_dsp/inv_txfm.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/libvpx/vpx_dsp/inv_txfm.h ('k') | source/libvpx/vpx_dsp/mips/common_dspr2.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <math.h> 11 #include <math.h>
12 #include <string.h> 12 #include <string.h>
13 13
14 #include "vpx_dsp/inv_txfm.h" 14 #include "vpx_dsp/inv_txfm.h"
15 15
16 void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { 16 void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
17 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 17 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
18 0.5 shifts per pixel. */ 18 0.5 shifts per pixel. */
19 int i; 19 int i;
20 tran_low_t output[16]; 20 tran_low_t output[16];
21 tran_high_t a1, b1, c1, d1, e1; 21 tran_high_t a1, b1, c1, d1, e1;
22 const tran_low_t *ip = input; 22 const tran_low_t *ip = input;
23 tran_low_t *op = output; 23 tran_low_t *op = output;
24 24
25 for (i = 0; i < 4; i++) { 25 for (i = 0; i < 4; i++) {
26 a1 = ip[0] >> UNIT_QUANT_SHIFT; 26 a1 = ip[0] >> UNIT_QUANT_SHIFT;
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
58 dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1); 58 dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1);
59 dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1); 59 dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1);
60 dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1); 60 dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1);
61 dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1); 61 dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1);
62 62
63 ip++; 63 ip++;
64 dest++; 64 dest++;
65 } 65 }
66 } 66 }
67 67
68 void vp9_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { 68 void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) {
69 int i; 69 int i;
70 tran_high_t a1, e1; 70 tran_high_t a1, e1;
71 tran_low_t tmp[4]; 71 tran_low_t tmp[4];
72 const tran_low_t *ip = in; 72 const tran_low_t *ip = in;
73 tran_low_t *op = tmp; 73 tran_low_t *op = tmp;
74 74
75 a1 = ip[0] >> UNIT_QUANT_SHIFT; 75 a1 = ip[0] >> UNIT_QUANT_SHIFT;
76 e1 = a1 >> 1; 76 e1 = a1 >> 1;
77 a1 -= e1; 77 a1 -= e1;
78 op[0] = WRAPLOW(a1, 8); 78 op[0] = WRAPLOW(a1, 8);
(...skipping 25 matching lines...) Expand all
104 step[2] = WRAPLOW(dct_const_round_shift(temp1), 8); 104 step[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
105 step[3] = WRAPLOW(dct_const_round_shift(temp2), 8); 105 step[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
106 106
107 // stage 2 107 // stage 2
108 output[0] = WRAPLOW(step[0] + step[3], 8); 108 output[0] = WRAPLOW(step[0] + step[3], 8);
109 output[1] = WRAPLOW(step[1] + step[2], 8); 109 output[1] = WRAPLOW(step[1] + step[2], 8);
110 output[2] = WRAPLOW(step[1] - step[2], 8); 110 output[2] = WRAPLOW(step[1] - step[2], 8);
111 output[3] = WRAPLOW(step[0] - step[3], 8); 111 output[3] = WRAPLOW(step[0] - step[3], 8);
112 } 112 }
113 113
114 void vp9_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { 114 void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
115 tran_low_t out[4 * 4]; 115 tran_low_t out[4 * 4];
116 tran_low_t *outptr = out; 116 tran_low_t *outptr = out;
117 int i, j; 117 int i, j;
118 tran_low_t temp_in[4], temp_out[4]; 118 tran_low_t temp_in[4], temp_out[4];
119 119
120 // Rows 120 // Rows
121 for (i = 0; i < 4; ++i) { 121 for (i = 0; i < 4; ++i) {
122 idct4_c(input, outptr); 122 idct4_c(input, outptr);
123 input += 4; 123 input += 4;
124 outptr += 4; 124 outptr += 4;
125 } 125 }
126 126
127 // Columns 127 // Columns
128 for (i = 0; i < 4; ++i) { 128 for (i = 0; i < 4; ++i) {
129 for (j = 0; j < 4; ++j) 129 for (j = 0; j < 4; ++j)
130 temp_in[j] = out[j * 4 + i]; 130 temp_in[j] = out[j * 4 + i];
131 idct4_c(temp_in, temp_out); 131 idct4_c(temp_in, temp_out);
132 for (j = 0; j < 4; ++j) { 132 for (j = 0; j < 4; ++j) {
133 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], 133 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
134 ROUND_POWER_OF_TWO(temp_out[j], 4)); 134 ROUND_POWER_OF_TWO(temp_out[j], 4));
135 } 135 }
136 } 136 }
137 } 137 }
138 138
139 void vp9_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, 139 void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
140 int dest_stride) { 140 int dest_stride) {
141 int i; 141 int i;
142 tran_high_t a1; 142 tran_high_t a1;
143 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); 143 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
144 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); 144 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
145 a1 = ROUND_POWER_OF_TWO(out, 4); 145 a1 = ROUND_POWER_OF_TWO(out, 4);
146 146
147 for (i = 0; i < 4; i++) { 147 for (i = 0; i < 4; i++) {
148 dest[0] = clip_pixel_add(dest[0], a1); 148 dest[0] = clip_pixel_add(dest[0], a1);
149 dest[1] = clip_pixel_add(dest[1], a1); 149 dest[1] = clip_pixel_add(dest[1], a1);
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
191 output[0] = WRAPLOW(step1[0] + step1[7], 8); 191 output[0] = WRAPLOW(step1[0] + step1[7], 8);
192 output[1] = WRAPLOW(step1[1] + step1[6], 8); 192 output[1] = WRAPLOW(step1[1] + step1[6], 8);
193 output[2] = WRAPLOW(step1[2] + step1[5], 8); 193 output[2] = WRAPLOW(step1[2] + step1[5], 8);
194 output[3] = WRAPLOW(step1[3] + step1[4], 8); 194 output[3] = WRAPLOW(step1[3] + step1[4], 8);
195 output[4] = WRAPLOW(step1[3] - step1[4], 8); 195 output[4] = WRAPLOW(step1[3] - step1[4], 8);
196 output[5] = WRAPLOW(step1[2] - step1[5], 8); 196 output[5] = WRAPLOW(step1[2] - step1[5], 8);
197 output[6] = WRAPLOW(step1[1] - step1[6], 8); 197 output[6] = WRAPLOW(step1[1] - step1[6], 8);
198 output[7] = WRAPLOW(step1[0] - step1[7], 8); 198 output[7] = WRAPLOW(step1[0] - step1[7], 8);
199 } 199 }
200 200
201 void vp9_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { 201 void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
202 tran_low_t out[8 * 8]; 202 tran_low_t out[8 * 8];
203 tran_low_t *outptr = out; 203 tran_low_t *outptr = out;
204 int i, j; 204 int i, j;
205 tran_low_t temp_in[8], temp_out[8]; 205 tran_low_t temp_in[8], temp_out[8];
206 206
207 // First transform rows 207 // First transform rows
208 for (i = 0; i < 8; ++i) { 208 for (i = 0; i < 8; ++i) {
209 idct8_c(input, outptr); 209 idct8_c(input, outptr);
210 input += 8; 210 input += 8;
211 outptr += 8; 211 outptr += 8;
212 } 212 }
213 213
214 // Then transform columns 214 // Then transform columns
215 for (i = 0; i < 8; ++i) { 215 for (i = 0; i < 8; ++i) {
216 for (j = 0; j < 8; ++j) 216 for (j = 0; j < 8; ++j)
217 temp_in[j] = out[j * 8 + i]; 217 temp_in[j] = out[j * 8 + i];
218 idct8_c(temp_in, temp_out); 218 idct8_c(temp_in, temp_out);
219 for (j = 0; j < 8; ++j) { 219 for (j = 0; j < 8; ++j) {
220 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], 220 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
221 ROUND_POWER_OF_TWO(temp_out[j], 5)); 221 ROUND_POWER_OF_TWO(temp_out[j], 5));
222 } 222 }
223 } 223 }
224 } 224 }
225 225
226 void vp9_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { 226 void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
227 int i, j; 227 int i, j;
228 tran_high_t a1; 228 tran_high_t a1;
229 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); 229 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
230 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); 230 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
231 a1 = ROUND_POWER_OF_TWO(out, 5); 231 a1 = ROUND_POWER_OF_TWO(out, 5);
232 for (j = 0; j < 8; ++j) { 232 for (j = 0; j < 8; ++j) {
233 for (i = 0; i < 8; ++i) 233 for (i = 0; i < 8; ++i)
234 dest[i] = clip_pixel_add(dest[i], a1); 234 dest[i] = clip_pixel_add(dest[i], a1);
235 dest += stride; 235 dest += stride;
236 } 236 }
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
343 output[0] = WRAPLOW(x0, 8); 343 output[0] = WRAPLOW(x0, 8);
344 output[1] = WRAPLOW(-x4, 8); 344 output[1] = WRAPLOW(-x4, 8);
345 output[2] = WRAPLOW(x6, 8); 345 output[2] = WRAPLOW(x6, 8);
346 output[3] = WRAPLOW(-x2, 8); 346 output[3] = WRAPLOW(-x2, 8);
347 output[4] = WRAPLOW(x3, 8); 347 output[4] = WRAPLOW(x3, 8);
348 output[5] = WRAPLOW(-x7, 8); 348 output[5] = WRAPLOW(-x7, 8);
349 output[6] = WRAPLOW(x5, 8); 349 output[6] = WRAPLOW(x5, 8);
350 output[7] = WRAPLOW(-x1, 8); 350 output[7] = WRAPLOW(-x1, 8);
351 } 351 }
352 352
353 void vp9_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { 353 void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
354 tran_low_t out[8 * 8] = { 0 }; 354 tran_low_t out[8 * 8] = { 0 };
355 tran_low_t *outptr = out; 355 tran_low_t *outptr = out;
356 int i, j; 356 int i, j;
357 tran_low_t temp_in[8], temp_out[8]; 357 tran_low_t temp_in[8], temp_out[8];
358 358
359 // First transform rows 359 // First transform rows
360 // only first 4 row has non-zero coefs 360 // only first 4 row has non-zero coefs
361 for (i = 0; i < 4; ++i) { 361 for (i = 0; i < 4; ++i) {
362 idct8_c(input, outptr); 362 idct8_c(input, outptr);
363 input += 8; 363 input += 8;
(...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after
534 output[8] = WRAPLOW(step2[7] - step2[8], 8); 534 output[8] = WRAPLOW(step2[7] - step2[8], 8);
535 output[9] = WRAPLOW(step2[6] - step2[9], 8); 535 output[9] = WRAPLOW(step2[6] - step2[9], 8);
536 output[10] = WRAPLOW(step2[5] - step2[10], 8); 536 output[10] = WRAPLOW(step2[5] - step2[10], 8);
537 output[11] = WRAPLOW(step2[4] - step2[11], 8); 537 output[11] = WRAPLOW(step2[4] - step2[11], 8);
538 output[12] = WRAPLOW(step2[3] - step2[12], 8); 538 output[12] = WRAPLOW(step2[3] - step2[12], 8);
539 output[13] = WRAPLOW(step2[2] - step2[13], 8); 539 output[13] = WRAPLOW(step2[2] - step2[13], 8);
540 output[14] = WRAPLOW(step2[1] - step2[14], 8); 540 output[14] = WRAPLOW(step2[1] - step2[14], 8);
541 output[15] = WRAPLOW(step2[0] - step2[15], 8); 541 output[15] = WRAPLOW(step2[0] - step2[15], 8);
542 } 542 }
543 543
544 void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, 544 void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
545 int stride) { 545 int stride) {
546 tran_low_t out[16 * 16]; 546 tran_low_t out[16 * 16];
547 tran_low_t *outptr = out; 547 tran_low_t *outptr = out;
548 int i, j; 548 int i, j;
549 tran_low_t temp_in[16], temp_out[16]; 549 tran_low_t temp_in[16], temp_out[16];
550 550
551 // First transform rows 551 // First transform rows
552 for (i = 0; i < 16; ++i) { 552 for (i = 0; i < 16; ++i) {
553 idct16_c(input, outptr); 553 idct16_c(input, outptr);
554 input += 16; 554 input += 16;
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after
732 output[8] = WRAPLOW(x3, 8); 732 output[8] = WRAPLOW(x3, 8);
733 output[9] = WRAPLOW(x11, 8); 733 output[9] = WRAPLOW(x11, 8);
734 output[10] = WRAPLOW(x15, 8); 734 output[10] = WRAPLOW(x15, 8);
735 output[11] = WRAPLOW(x7, 8); 735 output[11] = WRAPLOW(x7, 8);
736 output[12] = WRAPLOW(x5, 8); 736 output[12] = WRAPLOW(x5, 8);
737 output[13] = WRAPLOW(-x13, 8); 737 output[13] = WRAPLOW(-x13, 8);
738 output[14] = WRAPLOW(x9, 8); 738 output[14] = WRAPLOW(x9, 8);
739 output[15] = WRAPLOW(-x1, 8); 739 output[15] = WRAPLOW(-x1, 8);
740 } 740 }
741 741
742 void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, 742 void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest,
743 int stride) { 743 int stride) {
744 tran_low_t out[16 * 16] = { 0 }; 744 tran_low_t out[16 * 16] = { 0 };
745 tran_low_t *outptr = out; 745 tran_low_t *outptr = out;
746 int i, j; 746 int i, j;
747 tran_low_t temp_in[16], temp_out[16]; 747 tran_low_t temp_in[16], temp_out[16];
748 748
749 // First transform rows. Since all non-zero dct coefficients are in 749 // First transform rows. Since all non-zero dct coefficients are in
750 // upper-left 4x4 area, we only need to calculate first 4 rows here. 750 // upper-left 4x4 area, we only need to calculate first 4 rows here.
751 for (i = 0; i < 4; ++i) { 751 for (i = 0; i < 4; ++i) {
752 idct16_c(input, outptr); 752 idct16_c(input, outptr);
753 input += 16; 753 input += 16;
754 outptr += 16; 754 outptr += 16;
755 } 755 }
756 756
757 // Then transform columns 757 // Then transform columns
758 for (i = 0; i < 16; ++i) { 758 for (i = 0; i < 16; ++i) {
759 for (j = 0; j < 16; ++j) 759 for (j = 0; j < 16; ++j)
760 temp_in[j] = out[j*16 + i]; 760 temp_in[j] = out[j*16 + i];
761 idct16_c(temp_in, temp_out); 761 idct16_c(temp_in, temp_out);
762 for (j = 0; j < 16; ++j) { 762 for (j = 0; j < 16; ++j) {
763 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], 763 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
764 ROUND_POWER_OF_TWO(temp_out[j], 6)); 764 ROUND_POWER_OF_TWO(temp_out[j], 6));
765 } 765 }
766 } 766 }
767 } 767 }
768 768
769 void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { 769 void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
770 int i, j; 770 int i, j;
771 tran_high_t a1; 771 tran_high_t a1;
772 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); 772 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
773 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); 773 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
774 a1 = ROUND_POWER_OF_TWO(out, 6); 774 a1 = ROUND_POWER_OF_TWO(out, 6);
775 for (j = 0; j < 16; ++j) { 775 for (j = 0; j < 16; ++j) {
776 for (i = 0; i < 16; ++i) 776 for (i = 0; i < 16; ++i)
777 dest[i] = clip_pixel_add(dest[i], a1); 777 dest[i] = clip_pixel_add(dest[i], a1);
778 dest += stride; 778 dest += stride;
779 } 779 }
(...skipping 359 matching lines...) Expand 10 before | Expand all | Expand 10 after
1139 output[24] = WRAPLOW(step1[7] - step1[24], 8); 1139 output[24] = WRAPLOW(step1[7] - step1[24], 8);
1140 output[25] = WRAPLOW(step1[6] - step1[25], 8); 1140 output[25] = WRAPLOW(step1[6] - step1[25], 8);
1141 output[26] = WRAPLOW(step1[5] - step1[26], 8); 1141 output[26] = WRAPLOW(step1[5] - step1[26], 8);
1142 output[27] = WRAPLOW(step1[4] - step1[27], 8); 1142 output[27] = WRAPLOW(step1[4] - step1[27], 8);
1143 output[28] = WRAPLOW(step1[3] - step1[28], 8); 1143 output[28] = WRAPLOW(step1[3] - step1[28], 8);
1144 output[29] = WRAPLOW(step1[2] - step1[29], 8); 1144 output[29] = WRAPLOW(step1[2] - step1[29], 8);
1145 output[30] = WRAPLOW(step1[1] - step1[30], 8); 1145 output[30] = WRAPLOW(step1[1] - step1[30], 8);
1146 output[31] = WRAPLOW(step1[0] - step1[31], 8); 1146 output[31] = WRAPLOW(step1[0] - step1[31], 8);
1147 } 1147 }
1148 1148
1149 void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, 1149 void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
1150 int stride) { 1150 int stride) {
1151 tran_low_t out[32 * 32]; 1151 tran_low_t out[32 * 32];
1152 tran_low_t *outptr = out; 1152 tran_low_t *outptr = out;
1153 int i, j; 1153 int i, j;
1154 tran_low_t temp_in[32], temp_out[32]; 1154 tran_low_t temp_in[32], temp_out[32];
1155 1155
1156 // Rows 1156 // Rows
1157 for (i = 0; i < 32; ++i) { 1157 for (i = 0; i < 32; ++i) {
1158 int16_t zero_coeff[16]; 1158 int16_t zero_coeff[16];
1159 for (j = 0; j < 16; ++j) 1159 for (j = 0; j < 16; ++j)
(...skipping 18 matching lines...) Expand all
1178 for (j = 0; j < 32; ++j) 1178 for (j = 0; j < 32; ++j)
1179 temp_in[j] = out[j * 32 + i]; 1179 temp_in[j] = out[j * 32 + i];
1180 idct32_c(temp_in, temp_out); 1180 idct32_c(temp_in, temp_out);
1181 for (j = 0; j < 32; ++j) { 1181 for (j = 0; j < 32; ++j) {
1182 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], 1182 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
1183 ROUND_POWER_OF_TWO(temp_out[j], 6)); 1183 ROUND_POWER_OF_TWO(temp_out[j], 6));
1184 } 1184 }
1185 } 1185 }
1186 } 1186 }
1187 1187
1188 void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, 1188 void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,
1189 int stride) { 1189 int stride) {
1190 tran_low_t out[32 * 32] = {0}; 1190 tran_low_t out[32 * 32] = {0};
1191 tran_low_t *outptr = out; 1191 tran_low_t *outptr = out;
1192 int i, j; 1192 int i, j;
1193 tran_low_t temp_in[32], temp_out[32]; 1193 tran_low_t temp_in[32], temp_out[32];
1194 1194
1195 // Rows 1195 // Rows
1196 // only upper-left 8x8 has non-zero coeff 1196 // only upper-left 8x8 has non-zero coeff
1197 for (i = 0; i < 8; ++i) { 1197 for (i = 0; i < 8; ++i) {
1198 idct32_c(input, outptr); 1198 idct32_c(input, outptr);
1199 input += 32; 1199 input += 32;
1200 outptr += 32; 1200 outptr += 32;
1201 } 1201 }
1202 1202
1203 // Columns 1203 // Columns
1204 for (i = 0; i < 32; ++i) { 1204 for (i = 0; i < 32; ++i) {
1205 for (j = 0; j < 32; ++j) 1205 for (j = 0; j < 32; ++j)
1206 temp_in[j] = out[j * 32 + i]; 1206 temp_in[j] = out[j * 32 + i];
1207 idct32_c(temp_in, temp_out); 1207 idct32_c(temp_in, temp_out);
1208 for (j = 0; j < 32; ++j) { 1208 for (j = 0; j < 32; ++j) {
1209 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], 1209 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
1210 ROUND_POWER_OF_TWO(temp_out[j], 6)); 1210 ROUND_POWER_OF_TWO(temp_out[j], 6));
1211 } 1211 }
1212 } 1212 }
1213 } 1213 }
1214 1214
1215 void vp9_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { 1215 void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
1216 int i, j; 1216 int i, j;
1217 tran_high_t a1; 1217 tran_high_t a1;
1218 1218
1219 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); 1219 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
1220 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); 1220 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
1221 a1 = ROUND_POWER_OF_TWO(out, 6); 1221 a1 = ROUND_POWER_OF_TWO(out, 6);
1222 1222
1223 for (j = 0; j < 32; ++j) { 1223 for (j = 0; j < 32; ++j) {
1224 for (i = 0; i < 32; ++i) 1224 for (i = 0; i < 32; ++i)
1225 dest[i] = clip_pixel_add(dest[i], a1); 1225 dest[i] = clip_pixel_add(dest[i], a1);
1226 dest += stride; 1226 dest += stride;
1227 } 1227 }
1228 } 1228 }
1229 1229
1230 #if CONFIG_VP9_HIGHBITDEPTH 1230 #if CONFIG_VP9_HIGHBITDEPTH
1231 void vp9_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, 1231 void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
1232 int stride, int bd) { 1232 int stride, int bd) {
1233 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 1233 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
1234 0.5 shifts per pixel. */ 1234 0.5 shifts per pixel. */
1235 int i; 1235 int i;
1236 tran_low_t output[16]; 1236 tran_low_t output[16];
1237 tran_high_t a1, b1, c1, d1, e1; 1237 tran_high_t a1, b1, c1, d1, e1;
1238 const tran_low_t *ip = input; 1238 const tran_low_t *ip = input;
1239 tran_low_t *op = output; 1239 tran_low_t *op = output;
1240 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 1240 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1241 1241
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
1275 dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); 1275 dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd);
1276 dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd); 1276 dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd);
1277 dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd); 1277 dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd);
1278 dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd); 1278 dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd);
1279 1279
1280 ip++; 1280 ip++;
1281 dest++; 1281 dest++;
1282 } 1282 }
1283 } 1283 }
1284 1284
1285 void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, 1285 void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
1286 int dest_stride, int bd) { 1286 int dest_stride, int bd) {
1287 int i; 1287 int i;
1288 tran_high_t a1, e1; 1288 tran_high_t a1, e1;
1289 tran_low_t tmp[4]; 1289 tran_low_t tmp[4];
1290 const tran_low_t *ip = in; 1290 const tran_low_t *ip = in;
1291 tran_low_t *op = tmp; 1291 tran_low_t *op = tmp;
1292 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 1292 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1293 (void) bd; 1293 (void) bd;
1294 1294
1295 a1 = ip[0] >> UNIT_QUANT_SHIFT; 1295 a1 = ip[0] >> UNIT_QUANT_SHIFT;
(...skipping 12 matching lines...) Expand all
1308 dest[dest_stride * 1], e1, bd); 1308 dest[dest_stride * 1], e1, bd);
1309 dest[dest_stride * 2] = highbd_clip_pixel_add( 1309 dest[dest_stride * 2] = highbd_clip_pixel_add(
1310 dest[dest_stride * 2], e1, bd); 1310 dest[dest_stride * 2], e1, bd);
1311 dest[dest_stride * 3] = highbd_clip_pixel_add( 1311 dest[dest_stride * 3] = highbd_clip_pixel_add(
1312 dest[dest_stride * 3], e1, bd); 1312 dest[dest_stride * 3], e1, bd);
1313 ip++; 1313 ip++;
1314 dest++; 1314 dest++;
1315 } 1315 }
1316 } 1316 }
1317 1317
1318 void vp9_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { 1318 void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) {
1319 tran_low_t step[4]; 1319 tran_low_t step[4];
1320 tran_high_t temp1, temp2; 1320 tran_high_t temp1, temp2;
1321 (void) bd; 1321 (void) bd;
1322 // stage 1 1322 // stage 1
1323 temp1 = (input[0] + input[2]) * cospi_16_64; 1323 temp1 = (input[0] + input[2]) * cospi_16_64;
1324 temp2 = (input[0] - input[2]) * cospi_16_64; 1324 temp2 = (input[0] - input[2]) * cospi_16_64;
1325 step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); 1325 step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1326 step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); 1326 step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1327 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; 1327 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
1328 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; 1328 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
1329 step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); 1329 step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1330 step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); 1330 step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1331 1331
1332 // stage 2 1332 // stage 2
1333 output[0] = WRAPLOW(step[0] + step[3], bd); 1333 output[0] = WRAPLOW(step[0] + step[3], bd);
1334 output[1] = WRAPLOW(step[1] + step[2], bd); 1334 output[1] = WRAPLOW(step[1] + step[2], bd);
1335 output[2] = WRAPLOW(step[1] - step[2], bd); 1335 output[2] = WRAPLOW(step[1] - step[2], bd);
1336 output[3] = WRAPLOW(step[0] - step[3], bd); 1336 output[3] = WRAPLOW(step[0] - step[3], bd);
1337 } 1337 }
1338 1338
1339 void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, 1339 void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
1340 int stride, int bd) { 1340 int stride, int bd) {
1341 tran_low_t out[4 * 4]; 1341 tran_low_t out[4 * 4];
1342 tran_low_t *outptr = out; 1342 tran_low_t *outptr = out;
1343 int i, j; 1343 int i, j;
1344 tran_low_t temp_in[4], temp_out[4]; 1344 tran_low_t temp_in[4], temp_out[4];
1345 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 1345 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1346 1346
1347 // Rows 1347 // Rows
1348 for (i = 0; i < 4; ++i) { 1348 for (i = 0; i < 4; ++i) {
1349 vp9_highbd_idct4_c(input, outptr, bd); 1349 vpx_highbd_idct4_c(input, outptr, bd);
1350 input += 4; 1350 input += 4;
1351 outptr += 4; 1351 outptr += 4;
1352 } 1352 }
1353 1353
1354 // Columns 1354 // Columns
1355 for (i = 0; i < 4; ++i) { 1355 for (i = 0; i < 4; ++i) {
1356 for (j = 0; j < 4; ++j) 1356 for (j = 0; j < 4; ++j)
1357 temp_in[j] = out[j * 4 + i]; 1357 temp_in[j] = out[j * 4 + i];
1358 vp9_highbd_idct4_c(temp_in, temp_out, bd); 1358 vpx_highbd_idct4_c(temp_in, temp_out, bd);
1359 for (j = 0; j < 4; ++j) { 1359 for (j = 0; j < 4; ++j) {
1360 dest[j * stride + i] = highbd_clip_pixel_add( 1360 dest[j * stride + i] = highbd_clip_pixel_add(
1361 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); 1361 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
1362 } 1362 }
1363 } 1363 }
1364 } 1364 }
1365 1365
1366 void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, 1366 void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
1367 int dest_stride, int bd) { 1367 int dest_stride, int bd) {
1368 int i; 1368 int i;
1369 tran_high_t a1; 1369 tran_high_t a1;
1370 tran_low_t out = WRAPLOW( 1370 tran_low_t out = WRAPLOW(
1371 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); 1371 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);
1372 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 1372 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1373 1373
1374 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); 1374 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);
1375 a1 = ROUND_POWER_OF_TWO(out, 4); 1375 a1 = ROUND_POWER_OF_TWO(out, 4);
1376 1376
1377 for (i = 0; i < 4; i++) { 1377 for (i = 0; i < 4; i++) {
1378 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); 1378 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd);
1379 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); 1379 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd);
1380 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); 1380 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd);
1381 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); 1381 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd);
1382 dest += dest_stride; 1382 dest += dest_stride;
1383 } 1383 }
1384 } 1384 }
1385 1385
1386 void vp9_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { 1386 void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) {
1387 tran_low_t step1[8], step2[8]; 1387 tran_low_t step1[8], step2[8];
1388 tran_high_t temp1, temp2; 1388 tran_high_t temp1, temp2;
1389 // stage 1 1389 // stage 1
1390 step1[0] = input[0]; 1390 step1[0] = input[0];
1391 step1[2] = input[4]; 1391 step1[2] = input[4];
1392 step1[1] = input[2]; 1392 step1[1] = input[2];
1393 step1[3] = input[6]; 1393 step1[3] = input[6];
1394 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; 1394 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
1395 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; 1395 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
1396 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); 1396 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1397 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); 1397 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1398 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; 1398 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
1399 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; 1399 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
1400 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); 1400 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1401 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); 1401 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1402 1402
1403 // stage 2 & stage 3 - even half 1403 // stage 2 & stage 3 - even half
1404 vp9_highbd_idct4_c(step1, step1, bd); 1404 vpx_highbd_idct4_c(step1, step1, bd);
1405 1405
1406 // stage 2 - odd half 1406 // stage 2 - odd half
1407 step2[4] = WRAPLOW(step1[4] + step1[5], bd); 1407 step2[4] = WRAPLOW(step1[4] + step1[5], bd);
1408 step2[5] = WRAPLOW(step1[4] - step1[5], bd); 1408 step2[5] = WRAPLOW(step1[4] - step1[5], bd);
1409 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); 1409 step2[6] = WRAPLOW(-step1[6] + step1[7], bd);
1410 step2[7] = WRAPLOW(step1[6] + step1[7], bd); 1410 step2[7] = WRAPLOW(step1[6] + step1[7], bd);
1411 1411
1412 // stage 3 - odd half 1412 // stage 3 - odd half
1413 step1[4] = step2[4]; 1413 step1[4] = step2[4];
1414 temp1 = (step2[6] - step2[5]) * cospi_16_64; 1414 temp1 = (step2[6] - step2[5]) * cospi_16_64;
1415 temp2 = (step2[5] + step2[6]) * cospi_16_64; 1415 temp2 = (step2[5] + step2[6]) * cospi_16_64;
1416 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); 1416 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1417 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); 1417 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1418 step1[7] = step2[7]; 1418 step1[7] = step2[7];
1419 1419
1420 // stage 4 1420 // stage 4
1421 output[0] = WRAPLOW(step1[0] + step1[7], bd); 1421 output[0] = WRAPLOW(step1[0] + step1[7], bd);
1422 output[1] = WRAPLOW(step1[1] + step1[6], bd); 1422 output[1] = WRAPLOW(step1[1] + step1[6], bd);
1423 output[2] = WRAPLOW(step1[2] + step1[5], bd); 1423 output[2] = WRAPLOW(step1[2] + step1[5], bd);
1424 output[3] = WRAPLOW(step1[3] + step1[4], bd); 1424 output[3] = WRAPLOW(step1[3] + step1[4], bd);
1425 output[4] = WRAPLOW(step1[3] - step1[4], bd); 1425 output[4] = WRAPLOW(step1[3] - step1[4], bd);
1426 output[5] = WRAPLOW(step1[2] - step1[5], bd); 1426 output[5] = WRAPLOW(step1[2] - step1[5], bd);
1427 output[6] = WRAPLOW(step1[1] - step1[6], bd); 1427 output[6] = WRAPLOW(step1[1] - step1[6], bd);
1428 output[7] = WRAPLOW(step1[0] - step1[7], bd); 1428 output[7] = WRAPLOW(step1[0] - step1[7], bd);
1429 } 1429 }
1430 1430
1431 void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, 1431 void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
1432 int stride, int bd) { 1432 int stride, int bd) {
1433 tran_low_t out[8 * 8]; 1433 tran_low_t out[8 * 8];
1434 tran_low_t *outptr = out; 1434 tran_low_t *outptr = out;
1435 int i, j; 1435 int i, j;
1436 tran_low_t temp_in[8], temp_out[8]; 1436 tran_low_t temp_in[8], temp_out[8];
1437 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 1437 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1438 1438
1439 // First transform rows. 1439 // First transform rows.
1440 for (i = 0; i < 8; ++i) { 1440 for (i = 0; i < 8; ++i) {
1441 vp9_highbd_idct8_c(input, outptr, bd); 1441 vpx_highbd_idct8_c(input, outptr, bd);
1442 input += 8; 1442 input += 8;
1443 outptr += 8; 1443 outptr += 8;
1444 } 1444 }
1445 1445
1446 // Then transform columns. 1446 // Then transform columns.
1447 for (i = 0; i < 8; ++i) { 1447 for (i = 0; i < 8; ++i) {
1448 for (j = 0; j < 8; ++j) 1448 for (j = 0; j < 8; ++j)
1449 temp_in[j] = out[j * 8 + i]; 1449 temp_in[j] = out[j * 8 + i];
1450 vp9_highbd_idct8_c(temp_in, temp_out, bd); 1450 vpx_highbd_idct8_c(temp_in, temp_out, bd);
1451 for (j = 0; j < 8; ++j) { 1451 for (j = 0; j < 8; ++j) {
1452 dest[j * stride + i] = highbd_clip_pixel_add( 1452 dest[j * stride + i] = highbd_clip_pixel_add(
1453 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); 1453 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
1454 } 1454 }
1455 } 1455 }
1456 } 1456 }
1457 1457
1458 void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, 1458 void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,
1459 int stride, int bd) { 1459 int stride, int bd) {
1460 int i, j; 1460 int i, j;
1461 tran_high_t a1; 1461 tran_high_t a1;
1462 tran_low_t out = WRAPLOW( 1462 tran_low_t out = WRAPLOW(
1463 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); 1463 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);
1464 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 1464 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1465 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); 1465 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);
1466 a1 = ROUND_POWER_OF_TWO(out, 5); 1466 a1 = ROUND_POWER_OF_TWO(out, 5);
1467 for (j = 0; j < 8; ++j) { 1467 for (j = 0; j < 8; ++j) {
1468 for (i = 0; i < 8; ++i) 1468 for (i = 0; i < 8; ++i)
1469 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); 1469 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
1470 dest += stride; 1470 dest += stride;
1471 } 1471 }
1472 } 1472 }
1473 1473
1474 void highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { 1474 void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
1475 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; 1475 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
1476 1476
1477 tran_low_t x0 = input[0]; 1477 tran_low_t x0 = input[0];
1478 tran_low_t x1 = input[1]; 1478 tran_low_t x1 = input[1];
1479 tran_low_t x2 = input[2]; 1479 tran_low_t x2 = input[2];
1480 tran_low_t x3 = input[3]; 1480 tran_low_t x3 = input[3];
1481 (void) bd; 1481 (void) bd;
1482 1482
1483 if (!(x0 | x1 | x2 | x3)) { 1483 if (!(x0 | x1 | x2 | x3)) {
1484 memset(output, 0, 4 * sizeof(*output)); 1484 memset(output, 0, 4 * sizeof(*output));
(...skipping 17 matching lines...) Expand all
1502 // 1-D transform scaling factor is sqrt(2). 1502 // 1-D transform scaling factor is sqrt(2).
1503 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) 1503 // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
1504 // + 1b (addition) = 29b. 1504 // + 1b (addition) = 29b.
1505 // Hence the output bit depth is 15b. 1505 // Hence the output bit depth is 15b.
1506 output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd); 1506 output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd);
1507 output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd); 1507 output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd);
1508 output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); 1508 output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd);
1509 output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd); 1509 output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd);
1510 } 1510 }
1511 1511
1512 void highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { 1512 void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
1513 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; 1513 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
1514 1514
1515 tran_low_t x0 = input[7]; 1515 tran_low_t x0 = input[7];
1516 tran_low_t x1 = input[0]; 1516 tran_low_t x1 = input[0];
1517 tran_low_t x2 = input[5]; 1517 tran_low_t x2 = input[5];
1518 tran_low_t x3 = input[2]; 1518 tran_low_t x3 = input[2];
1519 tran_low_t x4 = input[3]; 1519 tran_low_t x4 = input[3];
1520 tran_low_t x5 = input[4]; 1520 tran_low_t x5 = input[4];
1521 tran_low_t x6 = input[1]; 1521 tran_low_t x6 = input[1];
1522 tran_low_t x7 = input[6]; 1522 tran_low_t x7 = input[6];
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
1579 output[0] = WRAPLOW(x0, bd); 1579 output[0] = WRAPLOW(x0, bd);
1580 output[1] = WRAPLOW(-x4, bd); 1580 output[1] = WRAPLOW(-x4, bd);
1581 output[2] = WRAPLOW(x6, bd); 1581 output[2] = WRAPLOW(x6, bd);
1582 output[3] = WRAPLOW(-x2, bd); 1582 output[3] = WRAPLOW(-x2, bd);
1583 output[4] = WRAPLOW(x3, bd); 1583 output[4] = WRAPLOW(x3, bd);
1584 output[5] = WRAPLOW(-x7, bd); 1584 output[5] = WRAPLOW(-x7, bd);
1585 output[6] = WRAPLOW(x5, bd); 1585 output[6] = WRAPLOW(x5, bd);
1586 output[7] = WRAPLOW(-x1, bd); 1586 output[7] = WRAPLOW(-x1, bd);
1587 } 1587 }
1588 1588
1589 void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, 1589 void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
1590 int stride, int bd) { 1590 int stride, int bd) {
1591 tran_low_t out[8 * 8] = { 0 }; 1591 tran_low_t out[8 * 8] = { 0 };
1592 tran_low_t *outptr = out; 1592 tran_low_t *outptr = out;
1593 int i, j; 1593 int i, j;
1594 tran_low_t temp_in[8], temp_out[8]; 1594 tran_low_t temp_in[8], temp_out[8];
1595 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 1595 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1596 1596
1597 // First transform rows. 1597 // First transform rows.
1598 // Only first 4 row has non-zero coefs. 1598 // Only first 4 row has non-zero coefs.
1599 for (i = 0; i < 4; ++i) { 1599 for (i = 0; i < 4; ++i) {
1600 vp9_highbd_idct8_c(input, outptr, bd); 1600 vpx_highbd_idct8_c(input, outptr, bd);
1601 input += 8; 1601 input += 8;
1602 outptr += 8; 1602 outptr += 8;
1603 } 1603 }
1604 // Then transform columns. 1604 // Then transform columns.
1605 for (i = 0; i < 8; ++i) { 1605 for (i = 0; i < 8; ++i) {
1606 for (j = 0; j < 8; ++j) 1606 for (j = 0; j < 8; ++j)
1607 temp_in[j] = out[j * 8 + i]; 1607 temp_in[j] = out[j * 8 + i];
1608 vp9_highbd_idct8_c(temp_in, temp_out, bd); 1608 vpx_highbd_idct8_c(temp_in, temp_out, bd);
1609 for (j = 0; j < 8; ++j) { 1609 for (j = 0; j < 8; ++j) {
1610 dest[j * stride + i] = highbd_clip_pixel_add( 1610 dest[j * stride + i] = highbd_clip_pixel_add(
1611 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); 1611 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
1612 } 1612 }
1613 } 1613 }
1614 } 1614 }
1615 1615
1616 void vp9_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { 1616 void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
1617 tran_low_t step1[16], step2[16]; 1617 tran_low_t step1[16], step2[16];
1618 tran_high_t temp1, temp2; 1618 tran_high_t temp1, temp2;
1619 (void) bd; 1619 (void) bd;
1620 1620
1621 // stage 1 1621 // stage 1
1622 step1[0] = input[0/2]; 1622 step1[0] = input[0/2];
1623 step1[1] = input[16/2]; 1623 step1[1] = input[16/2];
1624 step1[2] = input[8/2]; 1624 step1[2] = input[8/2];
1625 step1[3] = input[24/2]; 1625 step1[3] = input[24/2];
1626 step1[4] = input[4/2]; 1626 step1[4] = input[4/2];
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after
1772 output[8] = WRAPLOW(step2[7] - step2[8], bd); 1772 output[8] = WRAPLOW(step2[7] - step2[8], bd);
1773 output[9] = WRAPLOW(step2[6] - step2[9], bd); 1773 output[9] = WRAPLOW(step2[6] - step2[9], bd);
1774 output[10] = WRAPLOW(step2[5] - step2[10], bd); 1774 output[10] = WRAPLOW(step2[5] - step2[10], bd);
1775 output[11] = WRAPLOW(step2[4] - step2[11], bd); 1775 output[11] = WRAPLOW(step2[4] - step2[11], bd);
1776 output[12] = WRAPLOW(step2[3] - step2[12], bd); 1776 output[12] = WRAPLOW(step2[3] - step2[12], bd);
1777 output[13] = WRAPLOW(step2[2] - step2[13], bd); 1777 output[13] = WRAPLOW(step2[2] - step2[13], bd);
1778 output[14] = WRAPLOW(step2[1] - step2[14], bd); 1778 output[14] = WRAPLOW(step2[1] - step2[14], bd);
1779 output[15] = WRAPLOW(step2[0] - step2[15], bd); 1779 output[15] = WRAPLOW(step2[0] - step2[15], bd);
1780 } 1780 }
1781 1781
1782 void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, 1782 void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
1783 int stride, int bd) { 1783 int stride, int bd) {
1784 tran_low_t out[16 * 16]; 1784 tran_low_t out[16 * 16];
1785 tran_low_t *outptr = out; 1785 tran_low_t *outptr = out;
1786 int i, j; 1786 int i, j;
1787 tran_low_t temp_in[16], temp_out[16]; 1787 tran_low_t temp_in[16], temp_out[16];
1788 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 1788 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1789 1789
1790 // First transform rows. 1790 // First transform rows.
1791 for (i = 0; i < 16; ++i) { 1791 for (i = 0; i < 16; ++i) {
1792 vp9_highbd_idct16_c(input, outptr, bd); 1792 vpx_highbd_idct16_c(input, outptr, bd);
1793 input += 16; 1793 input += 16;
1794 outptr += 16; 1794 outptr += 16;
1795 } 1795 }
1796 1796
1797 // Then transform columns. 1797 // Then transform columns.
1798 for (i = 0; i < 16; ++i) { 1798 for (i = 0; i < 16; ++i) {
1799 for (j = 0; j < 16; ++j) 1799 for (j = 0; j < 16; ++j)
1800 temp_in[j] = out[j * 16 + i]; 1800 temp_in[j] = out[j * 16 + i];
1801 vp9_highbd_idct16_c(temp_in, temp_out, bd); 1801 vpx_highbd_idct16_c(temp_in, temp_out, bd);
1802 for (j = 0; j < 16; ++j) { 1802 for (j = 0; j < 16; ++j) {
1803 dest[j * stride + i] = highbd_clip_pixel_add( 1803 dest[j * stride + i] = highbd_clip_pixel_add(
1804 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); 1804 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
1805 } 1805 }
1806 } 1806 }
1807 } 1807 }
1808 1808
1809 void highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { 1809 void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
1810 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; 1810 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
1811 tran_high_t s9, s10, s11, s12, s13, s14, s15; 1811 tran_high_t s9, s10, s11, s12, s13, s14, s15;
1812 1812
1813 tran_low_t x0 = input[15]; 1813 tran_low_t x0 = input[15];
1814 tran_low_t x1 = input[0]; 1814 tran_low_t x1 = input[0];
1815 tran_low_t x2 = input[13]; 1815 tran_low_t x2 = input[13];
1816 tran_low_t x3 = input[2]; 1816 tran_low_t x3 = input[2];
1817 tran_low_t x4 = input[11]; 1817 tran_low_t x4 = input[11];
1818 tran_low_t x5 = input[4]; 1818 tran_low_t x5 = input[4];
1819 tran_low_t x6 = input[9]; 1819 tran_low_t x6 = input[9];
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after
1969 output[8] = WRAPLOW(x3, bd); 1969 output[8] = WRAPLOW(x3, bd);
1970 output[9] = WRAPLOW(x11, bd); 1970 output[9] = WRAPLOW(x11, bd);
1971 output[10] = WRAPLOW(x15, bd); 1971 output[10] = WRAPLOW(x15, bd);
1972 output[11] = WRAPLOW(x7, bd); 1972 output[11] = WRAPLOW(x7, bd);
1973 output[12] = WRAPLOW(x5, bd); 1973 output[12] = WRAPLOW(x5, bd);
1974 output[13] = WRAPLOW(-x13, bd); 1974 output[13] = WRAPLOW(-x13, bd);
1975 output[14] = WRAPLOW(x9, bd); 1975 output[14] = WRAPLOW(x9, bd);
1976 output[15] = WRAPLOW(-x1, bd); 1976 output[15] = WRAPLOW(-x1, bd);
1977 } 1977 }
1978 1978
1979 void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, 1979 void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
1980 int stride, int bd) { 1980 int stride, int bd) {
1981 tran_low_t out[16 * 16] = { 0 }; 1981 tran_low_t out[16 * 16] = { 0 };
1982 tran_low_t *outptr = out; 1982 tran_low_t *outptr = out;
1983 int i, j; 1983 int i, j;
1984 tran_low_t temp_in[16], temp_out[16]; 1984 tran_low_t temp_in[16], temp_out[16];
1985 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 1985 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1986 1986
1987 // First transform rows. Since all non-zero dct coefficients are in 1987 // First transform rows. Since all non-zero dct coefficients are in
1988 // upper-left 4x4 area, we only need to calculate first 4 rows here. 1988 // upper-left 4x4 area, we only need to calculate first 4 rows here.
1989 for (i = 0; i < 4; ++i) { 1989 for (i = 0; i < 4; ++i) {
1990 vp9_highbd_idct16_c(input, outptr, bd); 1990 vpx_highbd_idct16_c(input, outptr, bd);
1991 input += 16; 1991 input += 16;
1992 outptr += 16; 1992 outptr += 16;
1993 } 1993 }
1994 1994
1995 // Then transform columns. 1995 // Then transform columns.
1996 for (i = 0; i < 16; ++i) { 1996 for (i = 0; i < 16; ++i) {
1997 for (j = 0; j < 16; ++j) 1997 for (j = 0; j < 16; ++j)
1998 temp_in[j] = out[j*16 + i]; 1998 temp_in[j] = out[j*16 + i];
1999 vp9_highbd_idct16_c(temp_in, temp_out, bd); 1999 vpx_highbd_idct16_c(temp_in, temp_out, bd);
2000 for (j = 0; j < 16; ++j) { 2000 for (j = 0; j < 16; ++j) {
2001 dest[j * stride + i] = highbd_clip_pixel_add( 2001 dest[j * stride + i] = highbd_clip_pixel_add(
2002 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); 2002 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
2003 } 2003 }
2004 } 2004 }
2005 } 2005 }
2006 2006
2007 void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, 2007 void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
2008 int stride, int bd) { 2008 int stride, int bd) {
2009 int i, j; 2009 int i, j;
2010 tran_high_t a1; 2010 tran_high_t a1;
2011 tran_low_t out = WRAPLOW( 2011 tran_low_t out = WRAPLOW(
2012 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); 2012 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);
2013 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 2013 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2014 2014
2015 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); 2015 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);
2016 a1 = ROUND_POWER_OF_TWO(out, 6); 2016 a1 = ROUND_POWER_OF_TWO(out, 6);
2017 for (j = 0; j < 16; ++j) { 2017 for (j = 0; j < 16; ++j) {
2018 for (i = 0; i < 16; ++i) 2018 for (i = 0; i < 16; ++i)
2019 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); 2019 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
2020 dest += stride; 2020 dest += stride;
2021 } 2021 }
2022 } 2022 }
2023 2023
2024 void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd) { 2024 static void highbd_idct32_c(const tran_low_t *input,
2025 tran_low_t *output, int bd) {
2025 tran_low_t step1[32], step2[32]; 2026 tran_low_t step1[32], step2[32];
2026 tran_high_t temp1, temp2; 2027 tran_high_t temp1, temp2;
2027 (void) bd; 2028 (void) bd;
2028 2029
2029 // stage 1 2030 // stage 1
2030 step1[0] = input[0]; 2031 step1[0] = input[0];
2031 step1[1] = input[16]; 2032 step1[1] = input[16];
2032 step1[2] = input[8]; 2033 step1[2] = input[8];
2033 step1[3] = input[24]; 2034 step1[3] = input[24];
2034 step1[4] = input[4]; 2035 step1[4] = input[4];
(...skipping 347 matching lines...) Expand 10 before | Expand all | Expand 10 after
2382 output[24] = WRAPLOW(step1[7] - step1[24], bd); 2383 output[24] = WRAPLOW(step1[7] - step1[24], bd);
2383 output[25] = WRAPLOW(step1[6] - step1[25], bd); 2384 output[25] = WRAPLOW(step1[6] - step1[25], bd);
2384 output[26] = WRAPLOW(step1[5] - step1[26], bd); 2385 output[26] = WRAPLOW(step1[5] - step1[26], bd);
2385 output[27] = WRAPLOW(step1[4] - step1[27], bd); 2386 output[27] = WRAPLOW(step1[4] - step1[27], bd);
2386 output[28] = WRAPLOW(step1[3] - step1[28], bd); 2387 output[28] = WRAPLOW(step1[3] - step1[28], bd);
2387 output[29] = WRAPLOW(step1[2] - step1[29], bd); 2388 output[29] = WRAPLOW(step1[2] - step1[29], bd);
2388 output[30] = WRAPLOW(step1[1] - step1[30], bd); 2389 output[30] = WRAPLOW(step1[1] - step1[30], bd);
2389 output[31] = WRAPLOW(step1[0] - step1[31], bd); 2390 output[31] = WRAPLOW(step1[0] - step1[31], bd);
2390 } 2391 }
2391 2392
2392 void vp9_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, 2393 void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
2393 int stride, int bd) { 2394 int stride, int bd) {
2394 tran_low_t out[32 * 32]; 2395 tran_low_t out[32 * 32];
2395 tran_low_t *outptr = out; 2396 tran_low_t *outptr = out;
2396 int i, j; 2397 int i, j;
2397 tran_low_t temp_in[32], temp_out[32]; 2398 tran_low_t temp_in[32], temp_out[32];
2398 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 2399 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2399 2400
2400 // Rows 2401 // Rows
2401 for (i = 0; i < 32; ++i) { 2402 for (i = 0; i < 32; ++i) {
2402 tran_low_t zero_coeff[16]; 2403 tran_low_t zero_coeff[16];
(...skipping 19 matching lines...) Expand all
2422 for (j = 0; j < 32; ++j) 2423 for (j = 0; j < 32; ++j)
2423 temp_in[j] = out[j * 32 + i]; 2424 temp_in[j] = out[j * 32 + i];
2424 highbd_idct32_c(temp_in, temp_out, bd); 2425 highbd_idct32_c(temp_in, temp_out, bd);
2425 for (j = 0; j < 32; ++j) { 2426 for (j = 0; j < 32; ++j) {
2426 dest[j * stride + i] = highbd_clip_pixel_add( 2427 dest[j * stride + i] = highbd_clip_pixel_add(
2427 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); 2428 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
2428 } 2429 }
2429 } 2430 }
2430 } 2431 }
2431 2432
2432 void vp9_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, 2433 void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
2433 int stride, int bd) { 2434 int stride, int bd) {
2434 tran_low_t out[32 * 32] = {0}; 2435 tran_low_t out[32 * 32] = {0};
2435 tran_low_t *outptr = out; 2436 tran_low_t *outptr = out;
2436 int i, j; 2437 int i, j;
2437 tran_low_t temp_in[32], temp_out[32]; 2438 tran_low_t temp_in[32], temp_out[32];
2438 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 2439 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2439 2440
2440 // Rows 2441 // Rows
2441 // Only upper-left 8x8 has non-zero coeff. 2442 // Only upper-left 8x8 has non-zero coeff.
2442 for (i = 0; i < 8; ++i) { 2443 for (i = 0; i < 8; ++i) {
2443 highbd_idct32_c(input, outptr, bd); 2444 highbd_idct32_c(input, outptr, bd);
2444 input += 32; 2445 input += 32;
2445 outptr += 32; 2446 outptr += 32;
2446 } 2447 }
2447 // Columns 2448 // Columns
2448 for (i = 0; i < 32; ++i) { 2449 for (i = 0; i < 32; ++i) {
2449 for (j = 0; j < 32; ++j) 2450 for (j = 0; j < 32; ++j)
2450 temp_in[j] = out[j * 32 + i]; 2451 temp_in[j] = out[j * 32 + i];
2451 highbd_idct32_c(temp_in, temp_out, bd); 2452 highbd_idct32_c(temp_in, temp_out, bd);
2452 for (j = 0; j < 32; ++j) { 2453 for (j = 0; j < 32; ++j) {
2453 dest[j * stride + i] = highbd_clip_pixel_add( 2454 dest[j * stride + i] = highbd_clip_pixel_add(
2454 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); 2455 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
2455 } 2456 }
2456 } 2457 }
2457 } 2458 }
2458 2459
2459 void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, 2460 void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,
2460 int stride, int bd) { 2461 int stride, int bd) {
2461 int i, j; 2462 int i, j;
2462 int a1; 2463 int a1;
2463 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 2464 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2464 2465
2465 tran_low_t out = WRAPLOW( 2466 tran_low_t out = WRAPLOW(
2466 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); 2467 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);
2467 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); 2468 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);
2468 a1 = ROUND_POWER_OF_TWO(out, 6); 2469 a1 = ROUND_POWER_OF_TWO(out, 6);
2469 2470
2470 for (j = 0; j < 32; ++j) { 2471 for (j = 0; j < 32; ++j) {
2471 for (i = 0; i < 32; ++i) 2472 for (i = 0; i < 32; ++i)
2472 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); 2473 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
2473 dest += stride; 2474 dest += stride;
2474 } 2475 }
2475 } 2476 }
2476 #endif // CONFIG_VP9_HIGHBITDEPTH 2477 #endif // CONFIG_VP9_HIGHBITDEPTH
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/inv_txfm.h ('k') | source/libvpx/vpx_dsp/mips/common_dspr2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698