| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <math.h> | 11 #include <math.h> |
| 12 #include <string.h> | 12 #include <string.h> |
| 13 | 13 |
| 14 #include "vpx_dsp/inv_txfm.h" | 14 #include "vpx_dsp/inv_txfm.h" |
| 15 | 15 |
| 16 void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 16 void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
| 17 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, | 17 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
| 18 0.5 shifts per pixel. */ | 18 0.5 shifts per pixel. */ |
| 19 int i; | 19 int i; |
| 20 tran_low_t output[16]; | 20 tran_low_t output[16]; |
| 21 tran_high_t a1, b1, c1, d1, e1; | 21 tran_high_t a1, b1, c1, d1, e1; |
| 22 const tran_low_t *ip = input; | 22 const tran_low_t *ip = input; |
| 23 tran_low_t *op = output; | 23 tran_low_t *op = output; |
| 24 | 24 |
| 25 for (i = 0; i < 4; i++) { | 25 for (i = 0; i < 4; i++) { |
| 26 a1 = ip[0] >> UNIT_QUANT_SHIFT; | 26 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 58 dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1); | 58 dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1); |
| 59 dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1); | 59 dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1); |
| 60 dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1); | 60 dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1); |
| 61 dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1); | 61 dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1); |
| 62 | 62 |
| 63 ip++; | 63 ip++; |
| 64 dest++; | 64 dest++; |
| 65 } | 65 } |
| 66 } | 66 } |
| 67 | 67 |
| 68 void vp9_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { | 68 void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { |
| 69 int i; | 69 int i; |
| 70 tran_high_t a1, e1; | 70 tran_high_t a1, e1; |
| 71 tran_low_t tmp[4]; | 71 tran_low_t tmp[4]; |
| 72 const tran_low_t *ip = in; | 72 const tran_low_t *ip = in; |
| 73 tran_low_t *op = tmp; | 73 tran_low_t *op = tmp; |
| 74 | 74 |
| 75 a1 = ip[0] >> UNIT_QUANT_SHIFT; | 75 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
| 76 e1 = a1 >> 1; | 76 e1 = a1 >> 1; |
| 77 a1 -= e1; | 77 a1 -= e1; |
| 78 op[0] = WRAPLOW(a1, 8); | 78 op[0] = WRAPLOW(a1, 8); |
| (...skipping 25 matching lines...) Expand all Loading... |
| 104 step[2] = WRAPLOW(dct_const_round_shift(temp1), 8); | 104 step[2] = WRAPLOW(dct_const_round_shift(temp1), 8); |
| 105 step[3] = WRAPLOW(dct_const_round_shift(temp2), 8); | 105 step[3] = WRAPLOW(dct_const_round_shift(temp2), 8); |
| 106 | 106 |
| 107 // stage 2 | 107 // stage 2 |
| 108 output[0] = WRAPLOW(step[0] + step[3], 8); | 108 output[0] = WRAPLOW(step[0] + step[3], 8); |
| 109 output[1] = WRAPLOW(step[1] + step[2], 8); | 109 output[1] = WRAPLOW(step[1] + step[2], 8); |
| 110 output[2] = WRAPLOW(step[1] - step[2], 8); | 110 output[2] = WRAPLOW(step[1] - step[2], 8); |
| 111 output[3] = WRAPLOW(step[0] - step[3], 8); | 111 output[3] = WRAPLOW(step[0] - step[3], 8); |
| 112 } | 112 } |
| 113 | 113 |
| 114 void vp9_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 114 void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
| 115 tran_low_t out[4 * 4]; | 115 tran_low_t out[4 * 4]; |
| 116 tran_low_t *outptr = out; | 116 tran_low_t *outptr = out; |
| 117 int i, j; | 117 int i, j; |
| 118 tran_low_t temp_in[4], temp_out[4]; | 118 tran_low_t temp_in[4], temp_out[4]; |
| 119 | 119 |
| 120 // Rows | 120 // Rows |
| 121 for (i = 0; i < 4; ++i) { | 121 for (i = 0; i < 4; ++i) { |
| 122 idct4_c(input, outptr); | 122 idct4_c(input, outptr); |
| 123 input += 4; | 123 input += 4; |
| 124 outptr += 4; | 124 outptr += 4; |
| 125 } | 125 } |
| 126 | 126 |
| 127 // Columns | 127 // Columns |
| 128 for (i = 0; i < 4; ++i) { | 128 for (i = 0; i < 4; ++i) { |
| 129 for (j = 0; j < 4; ++j) | 129 for (j = 0; j < 4; ++j) |
| 130 temp_in[j] = out[j * 4 + i]; | 130 temp_in[j] = out[j * 4 + i]; |
| 131 idct4_c(temp_in, temp_out); | 131 idct4_c(temp_in, temp_out); |
| 132 for (j = 0; j < 4; ++j) { | 132 for (j = 0; j < 4; ++j) { |
| 133 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], | 133 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
| 134 ROUND_POWER_OF_TWO(temp_out[j], 4)); | 134 ROUND_POWER_OF_TWO(temp_out[j], 4)); |
| 135 } | 135 } |
| 136 } | 136 } |
| 137 } | 137 } |
| 138 | 138 |
| 139 void vp9_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, | 139 void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, |
| 140 int dest_stride) { | 140 int dest_stride) { |
| 141 int i; | 141 int i; |
| 142 tran_high_t a1; | 142 tran_high_t a1; |
| 143 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); | 143 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); |
| 144 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); | 144 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); |
| 145 a1 = ROUND_POWER_OF_TWO(out, 4); | 145 a1 = ROUND_POWER_OF_TWO(out, 4); |
| 146 | 146 |
| 147 for (i = 0; i < 4; i++) { | 147 for (i = 0; i < 4; i++) { |
| 148 dest[0] = clip_pixel_add(dest[0], a1); | 148 dest[0] = clip_pixel_add(dest[0], a1); |
| 149 dest[1] = clip_pixel_add(dest[1], a1); | 149 dest[1] = clip_pixel_add(dest[1], a1); |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 191 output[0] = WRAPLOW(step1[0] + step1[7], 8); | 191 output[0] = WRAPLOW(step1[0] + step1[7], 8); |
| 192 output[1] = WRAPLOW(step1[1] + step1[6], 8); | 192 output[1] = WRAPLOW(step1[1] + step1[6], 8); |
| 193 output[2] = WRAPLOW(step1[2] + step1[5], 8); | 193 output[2] = WRAPLOW(step1[2] + step1[5], 8); |
| 194 output[3] = WRAPLOW(step1[3] + step1[4], 8); | 194 output[3] = WRAPLOW(step1[3] + step1[4], 8); |
| 195 output[4] = WRAPLOW(step1[3] - step1[4], 8); | 195 output[4] = WRAPLOW(step1[3] - step1[4], 8); |
| 196 output[5] = WRAPLOW(step1[2] - step1[5], 8); | 196 output[5] = WRAPLOW(step1[2] - step1[5], 8); |
| 197 output[6] = WRAPLOW(step1[1] - step1[6], 8); | 197 output[6] = WRAPLOW(step1[1] - step1[6], 8); |
| 198 output[7] = WRAPLOW(step1[0] - step1[7], 8); | 198 output[7] = WRAPLOW(step1[0] - step1[7], 8); |
| 199 } | 199 } |
| 200 | 200 |
| 201 void vp9_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 201 void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
| 202 tran_low_t out[8 * 8]; | 202 tran_low_t out[8 * 8]; |
| 203 tran_low_t *outptr = out; | 203 tran_low_t *outptr = out; |
| 204 int i, j; | 204 int i, j; |
| 205 tran_low_t temp_in[8], temp_out[8]; | 205 tran_low_t temp_in[8], temp_out[8]; |
| 206 | 206 |
| 207 // First transform rows | 207 // First transform rows |
| 208 for (i = 0; i < 8; ++i) { | 208 for (i = 0; i < 8; ++i) { |
| 209 idct8_c(input, outptr); | 209 idct8_c(input, outptr); |
| 210 input += 8; | 210 input += 8; |
| 211 outptr += 8; | 211 outptr += 8; |
| 212 } | 212 } |
| 213 | 213 |
| 214 // Then transform columns | 214 // Then transform columns |
| 215 for (i = 0; i < 8; ++i) { | 215 for (i = 0; i < 8; ++i) { |
| 216 for (j = 0; j < 8; ++j) | 216 for (j = 0; j < 8; ++j) |
| 217 temp_in[j] = out[j * 8 + i]; | 217 temp_in[j] = out[j * 8 + i]; |
| 218 idct8_c(temp_in, temp_out); | 218 idct8_c(temp_in, temp_out); |
| 219 for (j = 0; j < 8; ++j) { | 219 for (j = 0; j < 8; ++j) { |
| 220 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], | 220 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
| 221 ROUND_POWER_OF_TWO(temp_out[j], 5)); | 221 ROUND_POWER_OF_TWO(temp_out[j], 5)); |
| 222 } | 222 } |
| 223 } | 223 } |
| 224 } | 224 } |
| 225 | 225 |
| 226 void vp9_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 226 void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
| 227 int i, j; | 227 int i, j; |
| 228 tran_high_t a1; | 228 tran_high_t a1; |
| 229 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); | 229 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); |
| 230 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); | 230 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); |
| 231 a1 = ROUND_POWER_OF_TWO(out, 5); | 231 a1 = ROUND_POWER_OF_TWO(out, 5); |
| 232 for (j = 0; j < 8; ++j) { | 232 for (j = 0; j < 8; ++j) { |
| 233 for (i = 0; i < 8; ++i) | 233 for (i = 0; i < 8; ++i) |
| 234 dest[i] = clip_pixel_add(dest[i], a1); | 234 dest[i] = clip_pixel_add(dest[i], a1); |
| 235 dest += stride; | 235 dest += stride; |
| 236 } | 236 } |
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 343 output[0] = WRAPLOW(x0, 8); | 343 output[0] = WRAPLOW(x0, 8); |
| 344 output[1] = WRAPLOW(-x4, 8); | 344 output[1] = WRAPLOW(-x4, 8); |
| 345 output[2] = WRAPLOW(x6, 8); | 345 output[2] = WRAPLOW(x6, 8); |
| 346 output[3] = WRAPLOW(-x2, 8); | 346 output[3] = WRAPLOW(-x2, 8); |
| 347 output[4] = WRAPLOW(x3, 8); | 347 output[4] = WRAPLOW(x3, 8); |
| 348 output[5] = WRAPLOW(-x7, 8); | 348 output[5] = WRAPLOW(-x7, 8); |
| 349 output[6] = WRAPLOW(x5, 8); | 349 output[6] = WRAPLOW(x5, 8); |
| 350 output[7] = WRAPLOW(-x1, 8); | 350 output[7] = WRAPLOW(-x1, 8); |
| 351 } | 351 } |
| 352 | 352 |
| 353 void vp9_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 353 void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
| 354 tran_low_t out[8 * 8] = { 0 }; | 354 tran_low_t out[8 * 8] = { 0 }; |
| 355 tran_low_t *outptr = out; | 355 tran_low_t *outptr = out; |
| 356 int i, j; | 356 int i, j; |
| 357 tran_low_t temp_in[8], temp_out[8]; | 357 tran_low_t temp_in[8], temp_out[8]; |
| 358 | 358 |
| 359 // First transform rows | 359 // First transform rows |
| 360 // only first 4 row has non-zero coefs | 360 // only first 4 row has non-zero coefs |
| 361 for (i = 0; i < 4; ++i) { | 361 for (i = 0; i < 4; ++i) { |
| 362 idct8_c(input, outptr); | 362 idct8_c(input, outptr); |
| 363 input += 8; | 363 input += 8; |
| (...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 534 output[8] = WRAPLOW(step2[7] - step2[8], 8); | 534 output[8] = WRAPLOW(step2[7] - step2[8], 8); |
| 535 output[9] = WRAPLOW(step2[6] - step2[9], 8); | 535 output[9] = WRAPLOW(step2[6] - step2[9], 8); |
| 536 output[10] = WRAPLOW(step2[5] - step2[10], 8); | 536 output[10] = WRAPLOW(step2[5] - step2[10], 8); |
| 537 output[11] = WRAPLOW(step2[4] - step2[11], 8); | 537 output[11] = WRAPLOW(step2[4] - step2[11], 8); |
| 538 output[12] = WRAPLOW(step2[3] - step2[12], 8); | 538 output[12] = WRAPLOW(step2[3] - step2[12], 8); |
| 539 output[13] = WRAPLOW(step2[2] - step2[13], 8); | 539 output[13] = WRAPLOW(step2[2] - step2[13], 8); |
| 540 output[14] = WRAPLOW(step2[1] - step2[14], 8); | 540 output[14] = WRAPLOW(step2[1] - step2[14], 8); |
| 541 output[15] = WRAPLOW(step2[0] - step2[15], 8); | 541 output[15] = WRAPLOW(step2[0] - step2[15], 8); |
| 542 } | 542 } |
| 543 | 543 |
| 544 void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, | 544 void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, |
| 545 int stride) { | 545 int stride) { |
| 546 tran_low_t out[16 * 16]; | 546 tran_low_t out[16 * 16]; |
| 547 tran_low_t *outptr = out; | 547 tran_low_t *outptr = out; |
| 548 int i, j; | 548 int i, j; |
| 549 tran_low_t temp_in[16], temp_out[16]; | 549 tran_low_t temp_in[16], temp_out[16]; |
| 550 | 550 |
| 551 // First transform rows | 551 // First transform rows |
| 552 for (i = 0; i < 16; ++i) { | 552 for (i = 0; i < 16; ++i) { |
| 553 idct16_c(input, outptr); | 553 idct16_c(input, outptr); |
| 554 input += 16; | 554 input += 16; |
| (...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 732 output[8] = WRAPLOW(x3, 8); | 732 output[8] = WRAPLOW(x3, 8); |
| 733 output[9] = WRAPLOW(x11, 8); | 733 output[9] = WRAPLOW(x11, 8); |
| 734 output[10] = WRAPLOW(x15, 8); | 734 output[10] = WRAPLOW(x15, 8); |
| 735 output[11] = WRAPLOW(x7, 8); | 735 output[11] = WRAPLOW(x7, 8); |
| 736 output[12] = WRAPLOW(x5, 8); | 736 output[12] = WRAPLOW(x5, 8); |
| 737 output[13] = WRAPLOW(-x13, 8); | 737 output[13] = WRAPLOW(-x13, 8); |
| 738 output[14] = WRAPLOW(x9, 8); | 738 output[14] = WRAPLOW(x9, 8); |
| 739 output[15] = WRAPLOW(-x1, 8); | 739 output[15] = WRAPLOW(-x1, 8); |
| 740 } | 740 } |
| 741 | 741 |
| 742 void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, | 742 void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, |
| 743 int stride) { | 743 int stride) { |
| 744 tran_low_t out[16 * 16] = { 0 }; | 744 tran_low_t out[16 * 16] = { 0 }; |
| 745 tran_low_t *outptr = out; | 745 tran_low_t *outptr = out; |
| 746 int i, j; | 746 int i, j; |
| 747 tran_low_t temp_in[16], temp_out[16]; | 747 tran_low_t temp_in[16], temp_out[16]; |
| 748 | 748 |
| 749 // First transform rows. Since all non-zero dct coefficients are in | 749 // First transform rows. Since all non-zero dct coefficients are in |
| 750 // upper-left 4x4 area, we only need to calculate first 4 rows here. | 750 // upper-left 4x4 area, we only need to calculate first 4 rows here. |
| 751 for (i = 0; i < 4; ++i) { | 751 for (i = 0; i < 4; ++i) { |
| 752 idct16_c(input, outptr); | 752 idct16_c(input, outptr); |
| 753 input += 16; | 753 input += 16; |
| 754 outptr += 16; | 754 outptr += 16; |
| 755 } | 755 } |
| 756 | 756 |
| 757 // Then transform columns | 757 // Then transform columns |
| 758 for (i = 0; i < 16; ++i) { | 758 for (i = 0; i < 16; ++i) { |
| 759 for (j = 0; j < 16; ++j) | 759 for (j = 0; j < 16; ++j) |
| 760 temp_in[j] = out[j*16 + i]; | 760 temp_in[j] = out[j*16 + i]; |
| 761 idct16_c(temp_in, temp_out); | 761 idct16_c(temp_in, temp_out); |
| 762 for (j = 0; j < 16; ++j) { | 762 for (j = 0; j < 16; ++j) { |
| 763 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], | 763 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
| 764 ROUND_POWER_OF_TWO(temp_out[j], 6)); | 764 ROUND_POWER_OF_TWO(temp_out[j], 6)); |
| 765 } | 765 } |
| 766 } | 766 } |
| 767 } | 767 } |
| 768 | 768 |
| 769 void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 769 void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
| 770 int i, j; | 770 int i, j; |
| 771 tran_high_t a1; | 771 tran_high_t a1; |
| 772 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); | 772 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); |
| 773 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); | 773 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); |
| 774 a1 = ROUND_POWER_OF_TWO(out, 6); | 774 a1 = ROUND_POWER_OF_TWO(out, 6); |
| 775 for (j = 0; j < 16; ++j) { | 775 for (j = 0; j < 16; ++j) { |
| 776 for (i = 0; i < 16; ++i) | 776 for (i = 0; i < 16; ++i) |
| 777 dest[i] = clip_pixel_add(dest[i], a1); | 777 dest[i] = clip_pixel_add(dest[i], a1); |
| 778 dest += stride; | 778 dest += stride; |
| 779 } | 779 } |
| (...skipping 359 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1139 output[24] = WRAPLOW(step1[7] - step1[24], 8); | 1139 output[24] = WRAPLOW(step1[7] - step1[24], 8); |
| 1140 output[25] = WRAPLOW(step1[6] - step1[25], 8); | 1140 output[25] = WRAPLOW(step1[6] - step1[25], 8); |
| 1141 output[26] = WRAPLOW(step1[5] - step1[26], 8); | 1141 output[26] = WRAPLOW(step1[5] - step1[26], 8); |
| 1142 output[27] = WRAPLOW(step1[4] - step1[27], 8); | 1142 output[27] = WRAPLOW(step1[4] - step1[27], 8); |
| 1143 output[28] = WRAPLOW(step1[3] - step1[28], 8); | 1143 output[28] = WRAPLOW(step1[3] - step1[28], 8); |
| 1144 output[29] = WRAPLOW(step1[2] - step1[29], 8); | 1144 output[29] = WRAPLOW(step1[2] - step1[29], 8); |
| 1145 output[30] = WRAPLOW(step1[1] - step1[30], 8); | 1145 output[30] = WRAPLOW(step1[1] - step1[30], 8); |
| 1146 output[31] = WRAPLOW(step1[0] - step1[31], 8); | 1146 output[31] = WRAPLOW(step1[0] - step1[31], 8); |
| 1147 } | 1147 } |
| 1148 | 1148 |
| 1149 void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, | 1149 void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, |
| 1150 int stride) { | 1150 int stride) { |
| 1151 tran_low_t out[32 * 32]; | 1151 tran_low_t out[32 * 32]; |
| 1152 tran_low_t *outptr = out; | 1152 tran_low_t *outptr = out; |
| 1153 int i, j; | 1153 int i, j; |
| 1154 tran_low_t temp_in[32], temp_out[32]; | 1154 tran_low_t temp_in[32], temp_out[32]; |
| 1155 | 1155 |
| 1156 // Rows | 1156 // Rows |
| 1157 for (i = 0; i < 32; ++i) { | 1157 for (i = 0; i < 32; ++i) { |
| 1158 int16_t zero_coeff[16]; | 1158 int16_t zero_coeff[16]; |
| 1159 for (j = 0; j < 16; ++j) | 1159 for (j = 0; j < 16; ++j) |
| (...skipping 18 matching lines...) Expand all Loading... |
| 1178 for (j = 0; j < 32; ++j) | 1178 for (j = 0; j < 32; ++j) |
| 1179 temp_in[j] = out[j * 32 + i]; | 1179 temp_in[j] = out[j * 32 + i]; |
| 1180 idct32_c(temp_in, temp_out); | 1180 idct32_c(temp_in, temp_out); |
| 1181 for (j = 0; j < 32; ++j) { | 1181 for (j = 0; j < 32; ++j) { |
| 1182 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], | 1182 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
| 1183 ROUND_POWER_OF_TWO(temp_out[j], 6)); | 1183 ROUND_POWER_OF_TWO(temp_out[j], 6)); |
| 1184 } | 1184 } |
| 1185 } | 1185 } |
| 1186 } | 1186 } |
| 1187 | 1187 |
| 1188 void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, | 1188 void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, |
| 1189 int stride) { | 1189 int stride) { |
| 1190 tran_low_t out[32 * 32] = {0}; | 1190 tran_low_t out[32 * 32] = {0}; |
| 1191 tran_low_t *outptr = out; | 1191 tran_low_t *outptr = out; |
| 1192 int i, j; | 1192 int i, j; |
| 1193 tran_low_t temp_in[32], temp_out[32]; | 1193 tran_low_t temp_in[32], temp_out[32]; |
| 1194 | 1194 |
| 1195 // Rows | 1195 // Rows |
| 1196 // only upper-left 8x8 has non-zero coeff | 1196 // only upper-left 8x8 has non-zero coeff |
| 1197 for (i = 0; i < 8; ++i) { | 1197 for (i = 0; i < 8; ++i) { |
| 1198 idct32_c(input, outptr); | 1198 idct32_c(input, outptr); |
| 1199 input += 32; | 1199 input += 32; |
| 1200 outptr += 32; | 1200 outptr += 32; |
| 1201 } | 1201 } |
| 1202 | 1202 |
| 1203 // Columns | 1203 // Columns |
| 1204 for (i = 0; i < 32; ++i) { | 1204 for (i = 0; i < 32; ++i) { |
| 1205 for (j = 0; j < 32; ++j) | 1205 for (j = 0; j < 32; ++j) |
| 1206 temp_in[j] = out[j * 32 + i]; | 1206 temp_in[j] = out[j * 32 + i]; |
| 1207 idct32_c(temp_in, temp_out); | 1207 idct32_c(temp_in, temp_out); |
| 1208 for (j = 0; j < 32; ++j) { | 1208 for (j = 0; j < 32; ++j) { |
| 1209 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], | 1209 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
| 1210 ROUND_POWER_OF_TWO(temp_out[j], 6)); | 1210 ROUND_POWER_OF_TWO(temp_out[j], 6)); |
| 1211 } | 1211 } |
| 1212 } | 1212 } |
| 1213 } | 1213 } |
| 1214 | 1214 |
| 1215 void vp9_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 1215 void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
| 1216 int i, j; | 1216 int i, j; |
| 1217 tran_high_t a1; | 1217 tran_high_t a1; |
| 1218 | 1218 |
| 1219 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); | 1219 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); |
| 1220 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); | 1220 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); |
| 1221 a1 = ROUND_POWER_OF_TWO(out, 6); | 1221 a1 = ROUND_POWER_OF_TWO(out, 6); |
| 1222 | 1222 |
| 1223 for (j = 0; j < 32; ++j) { | 1223 for (j = 0; j < 32; ++j) { |
| 1224 for (i = 0; i < 32; ++i) | 1224 for (i = 0; i < 32; ++i) |
| 1225 dest[i] = clip_pixel_add(dest[i], a1); | 1225 dest[i] = clip_pixel_add(dest[i], a1); |
| 1226 dest += stride; | 1226 dest += stride; |
| 1227 } | 1227 } |
| 1228 } | 1228 } |
| 1229 | 1229 |
| 1230 #if CONFIG_VP9_HIGHBITDEPTH | 1230 #if CONFIG_VP9_HIGHBITDEPTH |
| 1231 void vp9_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1231 void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1232 int stride, int bd) { | 1232 int stride, int bd) { |
| 1233 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, | 1233 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
| 1234 0.5 shifts per pixel. */ | 1234 0.5 shifts per pixel. */ |
| 1235 int i; | 1235 int i; |
| 1236 tran_low_t output[16]; | 1236 tran_low_t output[16]; |
| 1237 tran_high_t a1, b1, c1, d1, e1; | 1237 tran_high_t a1, b1, c1, d1, e1; |
| 1238 const tran_low_t *ip = input; | 1238 const tran_low_t *ip = input; |
| 1239 tran_low_t *op = output; | 1239 tran_low_t *op = output; |
| 1240 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1240 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 1241 | 1241 |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1275 dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); | 1275 dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); |
| 1276 dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd); | 1276 dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd); |
| 1277 dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd); | 1277 dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd); |
| 1278 dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd); | 1278 dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd); |
| 1279 | 1279 |
| 1280 ip++; | 1280 ip++; |
| 1281 dest++; | 1281 dest++; |
| 1282 } | 1282 } |
| 1283 } | 1283 } |
| 1284 | 1284 |
| 1285 void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, | 1285 void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, |
| 1286 int dest_stride, int bd) { | 1286 int dest_stride, int bd) { |
| 1287 int i; | 1287 int i; |
| 1288 tran_high_t a1, e1; | 1288 tran_high_t a1, e1; |
| 1289 tran_low_t tmp[4]; | 1289 tran_low_t tmp[4]; |
| 1290 const tran_low_t *ip = in; | 1290 const tran_low_t *ip = in; |
| 1291 tran_low_t *op = tmp; | 1291 tran_low_t *op = tmp; |
| 1292 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1292 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 1293 (void) bd; | 1293 (void) bd; |
| 1294 | 1294 |
| 1295 a1 = ip[0] >> UNIT_QUANT_SHIFT; | 1295 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
| (...skipping 12 matching lines...) Expand all Loading... |
| 1308 dest[dest_stride * 1], e1, bd); | 1308 dest[dest_stride * 1], e1, bd); |
| 1309 dest[dest_stride * 2] = highbd_clip_pixel_add( | 1309 dest[dest_stride * 2] = highbd_clip_pixel_add( |
| 1310 dest[dest_stride * 2], e1, bd); | 1310 dest[dest_stride * 2], e1, bd); |
| 1311 dest[dest_stride * 3] = highbd_clip_pixel_add( | 1311 dest[dest_stride * 3] = highbd_clip_pixel_add( |
| 1312 dest[dest_stride * 3], e1, bd); | 1312 dest[dest_stride * 3], e1, bd); |
| 1313 ip++; | 1313 ip++; |
| 1314 dest++; | 1314 dest++; |
| 1315 } | 1315 } |
| 1316 } | 1316 } |
| 1317 | 1317 |
| 1318 void vp9_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { | 1318 void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { |
| 1319 tran_low_t step[4]; | 1319 tran_low_t step[4]; |
| 1320 tran_high_t temp1, temp2; | 1320 tran_high_t temp1, temp2; |
| 1321 (void) bd; | 1321 (void) bd; |
| 1322 // stage 1 | 1322 // stage 1 |
| 1323 temp1 = (input[0] + input[2]) * cospi_16_64; | 1323 temp1 = (input[0] + input[2]) * cospi_16_64; |
| 1324 temp2 = (input[0] - input[2]) * cospi_16_64; | 1324 temp2 = (input[0] - input[2]) * cospi_16_64; |
| 1325 step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); | 1325 step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1326 step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); | 1326 step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1327 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; | 1327 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; |
| 1328 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; | 1328 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; |
| 1329 step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); | 1329 step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1330 step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); | 1330 step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1331 | 1331 |
| 1332 // stage 2 | 1332 // stage 2 |
| 1333 output[0] = WRAPLOW(step[0] + step[3], bd); | 1333 output[0] = WRAPLOW(step[0] + step[3], bd); |
| 1334 output[1] = WRAPLOW(step[1] + step[2], bd); | 1334 output[1] = WRAPLOW(step[1] + step[2], bd); |
| 1335 output[2] = WRAPLOW(step[1] - step[2], bd); | 1335 output[2] = WRAPLOW(step[1] - step[2], bd); |
| 1336 output[3] = WRAPLOW(step[0] - step[3], bd); | 1336 output[3] = WRAPLOW(step[0] - step[3], bd); |
| 1337 } | 1337 } |
| 1338 | 1338 |
| 1339 void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1339 void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1340 int stride, int bd) { | 1340 int stride, int bd) { |
| 1341 tran_low_t out[4 * 4]; | 1341 tran_low_t out[4 * 4]; |
| 1342 tran_low_t *outptr = out; | 1342 tran_low_t *outptr = out; |
| 1343 int i, j; | 1343 int i, j; |
| 1344 tran_low_t temp_in[4], temp_out[4]; | 1344 tran_low_t temp_in[4], temp_out[4]; |
| 1345 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1345 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 1346 | 1346 |
| 1347 // Rows | 1347 // Rows |
| 1348 for (i = 0; i < 4; ++i) { | 1348 for (i = 0; i < 4; ++i) { |
| 1349 vp9_highbd_idct4_c(input, outptr, bd); | 1349 vpx_highbd_idct4_c(input, outptr, bd); |
| 1350 input += 4; | 1350 input += 4; |
| 1351 outptr += 4; | 1351 outptr += 4; |
| 1352 } | 1352 } |
| 1353 | 1353 |
| 1354 // Columns | 1354 // Columns |
| 1355 for (i = 0; i < 4; ++i) { | 1355 for (i = 0; i < 4; ++i) { |
| 1356 for (j = 0; j < 4; ++j) | 1356 for (j = 0; j < 4; ++j) |
| 1357 temp_in[j] = out[j * 4 + i]; | 1357 temp_in[j] = out[j * 4 + i]; |
| 1358 vp9_highbd_idct4_c(temp_in, temp_out, bd); | 1358 vpx_highbd_idct4_c(temp_in, temp_out, bd); |
| 1359 for (j = 0; j < 4; ++j) { | 1359 for (j = 0; j < 4; ++j) { |
| 1360 dest[j * stride + i] = highbd_clip_pixel_add( | 1360 dest[j * stride + i] = highbd_clip_pixel_add( |
| 1361 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); | 1361 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); |
| 1362 } | 1362 } |
| 1363 } | 1363 } |
| 1364 } | 1364 } |
| 1365 | 1365 |
| 1366 void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, | 1366 void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1367 int dest_stride, int bd) { | 1367 int dest_stride, int bd) { |
| 1368 int i; | 1368 int i; |
| 1369 tran_high_t a1; | 1369 tran_high_t a1; |
| 1370 tran_low_t out = WRAPLOW( | 1370 tran_low_t out = WRAPLOW( |
| 1371 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); | 1371 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
| 1372 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1372 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 1373 | 1373 |
| 1374 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); | 1374 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
| 1375 a1 = ROUND_POWER_OF_TWO(out, 4); | 1375 a1 = ROUND_POWER_OF_TWO(out, 4); |
| 1376 | 1376 |
| 1377 for (i = 0; i < 4; i++) { | 1377 for (i = 0; i < 4; i++) { |
| 1378 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); | 1378 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); |
| 1379 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); | 1379 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); |
| 1380 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); | 1380 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); |
| 1381 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); | 1381 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); |
| 1382 dest += dest_stride; | 1382 dest += dest_stride; |
| 1383 } | 1383 } |
| 1384 } | 1384 } |
| 1385 | 1385 |
| 1386 void vp9_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { | 1386 void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { |
| 1387 tran_low_t step1[8], step2[8]; | 1387 tran_low_t step1[8], step2[8]; |
| 1388 tran_high_t temp1, temp2; | 1388 tran_high_t temp1, temp2; |
| 1389 // stage 1 | 1389 // stage 1 |
| 1390 step1[0] = input[0]; | 1390 step1[0] = input[0]; |
| 1391 step1[2] = input[4]; | 1391 step1[2] = input[4]; |
| 1392 step1[1] = input[2]; | 1392 step1[1] = input[2]; |
| 1393 step1[3] = input[6]; | 1393 step1[3] = input[6]; |
| 1394 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; | 1394 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; |
| 1395 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; | 1395 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; |
| 1396 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); | 1396 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1397 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); | 1397 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1398 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; | 1398 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; |
| 1399 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; | 1399 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; |
| 1400 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); | 1400 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1401 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); | 1401 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1402 | 1402 |
| 1403 // stage 2 & stage 3 - even half | 1403 // stage 2 & stage 3 - even half |
| 1404 vp9_highbd_idct4_c(step1, step1, bd); | 1404 vpx_highbd_idct4_c(step1, step1, bd); |
| 1405 | 1405 |
| 1406 // stage 2 - odd half | 1406 // stage 2 - odd half |
| 1407 step2[4] = WRAPLOW(step1[4] + step1[5], bd); | 1407 step2[4] = WRAPLOW(step1[4] + step1[5], bd); |
| 1408 step2[5] = WRAPLOW(step1[4] - step1[5], bd); | 1408 step2[5] = WRAPLOW(step1[4] - step1[5], bd); |
| 1409 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); | 1409 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); |
| 1410 step2[7] = WRAPLOW(step1[6] + step1[7], bd); | 1410 step2[7] = WRAPLOW(step1[6] + step1[7], bd); |
| 1411 | 1411 |
| 1412 // stage 3 - odd half | 1412 // stage 3 - odd half |
| 1413 step1[4] = step2[4]; | 1413 step1[4] = step2[4]; |
| 1414 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 1414 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
| 1415 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 1415 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
| 1416 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); | 1416 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1417 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); | 1417 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1418 step1[7] = step2[7]; | 1418 step1[7] = step2[7]; |
| 1419 | 1419 |
| 1420 // stage 4 | 1420 // stage 4 |
| 1421 output[0] = WRAPLOW(step1[0] + step1[7], bd); | 1421 output[0] = WRAPLOW(step1[0] + step1[7], bd); |
| 1422 output[1] = WRAPLOW(step1[1] + step1[6], bd); | 1422 output[1] = WRAPLOW(step1[1] + step1[6], bd); |
| 1423 output[2] = WRAPLOW(step1[2] + step1[5], bd); | 1423 output[2] = WRAPLOW(step1[2] + step1[5], bd); |
| 1424 output[3] = WRAPLOW(step1[3] + step1[4], bd); | 1424 output[3] = WRAPLOW(step1[3] + step1[4], bd); |
| 1425 output[4] = WRAPLOW(step1[3] - step1[4], bd); | 1425 output[4] = WRAPLOW(step1[3] - step1[4], bd); |
| 1426 output[5] = WRAPLOW(step1[2] - step1[5], bd); | 1426 output[5] = WRAPLOW(step1[2] - step1[5], bd); |
| 1427 output[6] = WRAPLOW(step1[1] - step1[6], bd); | 1427 output[6] = WRAPLOW(step1[1] - step1[6], bd); |
| 1428 output[7] = WRAPLOW(step1[0] - step1[7], bd); | 1428 output[7] = WRAPLOW(step1[0] - step1[7], bd); |
| 1429 } | 1429 } |
| 1430 | 1430 |
| 1431 void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, | 1431 void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1432 int stride, int bd) { | 1432 int stride, int bd) { |
| 1433 tran_low_t out[8 * 8]; | 1433 tran_low_t out[8 * 8]; |
| 1434 tran_low_t *outptr = out; | 1434 tran_low_t *outptr = out; |
| 1435 int i, j; | 1435 int i, j; |
| 1436 tran_low_t temp_in[8], temp_out[8]; | 1436 tran_low_t temp_in[8], temp_out[8]; |
| 1437 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1437 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 1438 | 1438 |
| 1439 // First transform rows. | 1439 // First transform rows. |
| 1440 for (i = 0; i < 8; ++i) { | 1440 for (i = 0; i < 8; ++i) { |
| 1441 vp9_highbd_idct8_c(input, outptr, bd); | 1441 vpx_highbd_idct8_c(input, outptr, bd); |
| 1442 input += 8; | 1442 input += 8; |
| 1443 outptr += 8; | 1443 outptr += 8; |
| 1444 } | 1444 } |
| 1445 | 1445 |
| 1446 // Then transform columns. | 1446 // Then transform columns. |
| 1447 for (i = 0; i < 8; ++i) { | 1447 for (i = 0; i < 8; ++i) { |
| 1448 for (j = 0; j < 8; ++j) | 1448 for (j = 0; j < 8; ++j) |
| 1449 temp_in[j] = out[j * 8 + i]; | 1449 temp_in[j] = out[j * 8 + i]; |
| 1450 vp9_highbd_idct8_c(temp_in, temp_out, bd); | 1450 vpx_highbd_idct8_c(temp_in, temp_out, bd); |
| 1451 for (j = 0; j < 8; ++j) { | 1451 for (j = 0; j < 8; ++j) { |
| 1452 dest[j * stride + i] = highbd_clip_pixel_add( | 1452 dest[j * stride + i] = highbd_clip_pixel_add( |
| 1453 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); | 1453 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); |
| 1454 } | 1454 } |
| 1455 } | 1455 } |
| 1456 } | 1456 } |
| 1457 | 1457 |
| 1458 void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, | 1458 void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1459 int stride, int bd) { | 1459 int stride, int bd) { |
| 1460 int i, j; | 1460 int i, j; |
| 1461 tran_high_t a1; | 1461 tran_high_t a1; |
| 1462 tran_low_t out = WRAPLOW( | 1462 tran_low_t out = WRAPLOW( |
| 1463 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); | 1463 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
| 1464 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1464 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 1465 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); | 1465 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
| 1466 a1 = ROUND_POWER_OF_TWO(out, 5); | 1466 a1 = ROUND_POWER_OF_TWO(out, 5); |
| 1467 for (j = 0; j < 8; ++j) { | 1467 for (j = 0; j < 8; ++j) { |
| 1468 for (i = 0; i < 8; ++i) | 1468 for (i = 0; i < 8; ++i) |
| 1469 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 1469 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
| 1470 dest += stride; | 1470 dest += stride; |
| 1471 } | 1471 } |
| 1472 } | 1472 } |
| 1473 | 1473 |
| 1474 void highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { | 1474 void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { |
| 1475 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 1475 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
| 1476 | 1476 |
| 1477 tran_low_t x0 = input[0]; | 1477 tran_low_t x0 = input[0]; |
| 1478 tran_low_t x1 = input[1]; | 1478 tran_low_t x1 = input[1]; |
| 1479 tran_low_t x2 = input[2]; | 1479 tran_low_t x2 = input[2]; |
| 1480 tran_low_t x3 = input[3]; | 1480 tran_low_t x3 = input[3]; |
| 1481 (void) bd; | 1481 (void) bd; |
| 1482 | 1482 |
| 1483 if (!(x0 | x1 | x2 | x3)) { | 1483 if (!(x0 | x1 | x2 | x3)) { |
| 1484 memset(output, 0, 4 * sizeof(*output)); | 1484 memset(output, 0, 4 * sizeof(*output)); |
| (...skipping 17 matching lines...) Expand all Loading... |
| 1502 // 1-D transform scaling factor is sqrt(2). | 1502 // 1-D transform scaling factor is sqrt(2). |
| 1503 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 1503 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
| 1504 // + 1b (addition) = 29b. | 1504 // + 1b (addition) = 29b. |
| 1505 // Hence the output bit depth is 15b. | 1505 // Hence the output bit depth is 15b. |
| 1506 output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd); | 1506 output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd); |
| 1507 output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd); | 1507 output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd); |
| 1508 output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); | 1508 output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); |
| 1509 output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd); | 1509 output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd); |
| 1510 } | 1510 } |
| 1511 | 1511 |
| 1512 void highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { | 1512 void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { |
| 1513 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 1513 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
| 1514 | 1514 |
| 1515 tran_low_t x0 = input[7]; | 1515 tran_low_t x0 = input[7]; |
| 1516 tran_low_t x1 = input[0]; | 1516 tran_low_t x1 = input[0]; |
| 1517 tran_low_t x2 = input[5]; | 1517 tran_low_t x2 = input[5]; |
| 1518 tran_low_t x3 = input[2]; | 1518 tran_low_t x3 = input[2]; |
| 1519 tran_low_t x4 = input[3]; | 1519 tran_low_t x4 = input[3]; |
| 1520 tran_low_t x5 = input[4]; | 1520 tran_low_t x5 = input[4]; |
| 1521 tran_low_t x6 = input[1]; | 1521 tran_low_t x6 = input[1]; |
| 1522 tran_low_t x7 = input[6]; | 1522 tran_low_t x7 = input[6]; |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1579 output[0] = WRAPLOW(x0, bd); | 1579 output[0] = WRAPLOW(x0, bd); |
| 1580 output[1] = WRAPLOW(-x4, bd); | 1580 output[1] = WRAPLOW(-x4, bd); |
| 1581 output[2] = WRAPLOW(x6, bd); | 1581 output[2] = WRAPLOW(x6, bd); |
| 1582 output[3] = WRAPLOW(-x2, bd); | 1582 output[3] = WRAPLOW(-x2, bd); |
| 1583 output[4] = WRAPLOW(x3, bd); | 1583 output[4] = WRAPLOW(x3, bd); |
| 1584 output[5] = WRAPLOW(-x7, bd); | 1584 output[5] = WRAPLOW(-x7, bd); |
| 1585 output[6] = WRAPLOW(x5, bd); | 1585 output[6] = WRAPLOW(x5, bd); |
| 1586 output[7] = WRAPLOW(-x1, bd); | 1586 output[7] = WRAPLOW(-x1, bd); |
| 1587 } | 1587 } |
| 1588 | 1588 |
| 1589 void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, | 1589 void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1590 int stride, int bd) { | 1590 int stride, int bd) { |
| 1591 tran_low_t out[8 * 8] = { 0 }; | 1591 tran_low_t out[8 * 8] = { 0 }; |
| 1592 tran_low_t *outptr = out; | 1592 tran_low_t *outptr = out; |
| 1593 int i, j; | 1593 int i, j; |
| 1594 tran_low_t temp_in[8], temp_out[8]; | 1594 tran_low_t temp_in[8], temp_out[8]; |
| 1595 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1595 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 1596 | 1596 |
| 1597 // First transform rows. | 1597 // First transform rows. |
| 1598 // Only first 4 row has non-zero coefs. | 1598 // Only first 4 row has non-zero coefs. |
| 1599 for (i = 0; i < 4; ++i) { | 1599 for (i = 0; i < 4; ++i) { |
| 1600 vp9_highbd_idct8_c(input, outptr, bd); | 1600 vpx_highbd_idct8_c(input, outptr, bd); |
| 1601 input += 8; | 1601 input += 8; |
| 1602 outptr += 8; | 1602 outptr += 8; |
| 1603 } | 1603 } |
| 1604 // Then transform columns. | 1604 // Then transform columns. |
| 1605 for (i = 0; i < 8; ++i) { | 1605 for (i = 0; i < 8; ++i) { |
| 1606 for (j = 0; j < 8; ++j) | 1606 for (j = 0; j < 8; ++j) |
| 1607 temp_in[j] = out[j * 8 + i]; | 1607 temp_in[j] = out[j * 8 + i]; |
| 1608 vp9_highbd_idct8_c(temp_in, temp_out, bd); | 1608 vpx_highbd_idct8_c(temp_in, temp_out, bd); |
| 1609 for (j = 0; j < 8; ++j) { | 1609 for (j = 0; j < 8; ++j) { |
| 1610 dest[j * stride + i] = highbd_clip_pixel_add( | 1610 dest[j * stride + i] = highbd_clip_pixel_add( |
| 1611 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); | 1611 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); |
| 1612 } | 1612 } |
| 1613 } | 1613 } |
| 1614 } | 1614 } |
| 1615 | 1615 |
| 1616 void vp9_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { | 1616 void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { |
| 1617 tran_low_t step1[16], step2[16]; | 1617 tran_low_t step1[16], step2[16]; |
| 1618 tran_high_t temp1, temp2; | 1618 tran_high_t temp1, temp2; |
| 1619 (void) bd; | 1619 (void) bd; |
| 1620 | 1620 |
| 1621 // stage 1 | 1621 // stage 1 |
| 1622 step1[0] = input[0/2]; | 1622 step1[0] = input[0/2]; |
| 1623 step1[1] = input[16/2]; | 1623 step1[1] = input[16/2]; |
| 1624 step1[2] = input[8/2]; | 1624 step1[2] = input[8/2]; |
| 1625 step1[3] = input[24/2]; | 1625 step1[3] = input[24/2]; |
| 1626 step1[4] = input[4/2]; | 1626 step1[4] = input[4/2]; |
| (...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1772 output[8] = WRAPLOW(step2[7] - step2[8], bd); | 1772 output[8] = WRAPLOW(step2[7] - step2[8], bd); |
| 1773 output[9] = WRAPLOW(step2[6] - step2[9], bd); | 1773 output[9] = WRAPLOW(step2[6] - step2[9], bd); |
| 1774 output[10] = WRAPLOW(step2[5] - step2[10], bd); | 1774 output[10] = WRAPLOW(step2[5] - step2[10], bd); |
| 1775 output[11] = WRAPLOW(step2[4] - step2[11], bd); | 1775 output[11] = WRAPLOW(step2[4] - step2[11], bd); |
| 1776 output[12] = WRAPLOW(step2[3] - step2[12], bd); | 1776 output[12] = WRAPLOW(step2[3] - step2[12], bd); |
| 1777 output[13] = WRAPLOW(step2[2] - step2[13], bd); | 1777 output[13] = WRAPLOW(step2[2] - step2[13], bd); |
| 1778 output[14] = WRAPLOW(step2[1] - step2[14], bd); | 1778 output[14] = WRAPLOW(step2[1] - step2[14], bd); |
| 1779 output[15] = WRAPLOW(step2[0] - step2[15], bd); | 1779 output[15] = WRAPLOW(step2[0] - step2[15], bd); |
| 1780 } | 1780 } |
| 1781 | 1781 |
| 1782 void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, | 1782 void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1783 int stride, int bd) { | 1783 int stride, int bd) { |
| 1784 tran_low_t out[16 * 16]; | 1784 tran_low_t out[16 * 16]; |
| 1785 tran_low_t *outptr = out; | 1785 tran_low_t *outptr = out; |
| 1786 int i, j; | 1786 int i, j; |
| 1787 tran_low_t temp_in[16], temp_out[16]; | 1787 tran_low_t temp_in[16], temp_out[16]; |
| 1788 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1788 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 1789 | 1789 |
| 1790 // First transform rows. | 1790 // First transform rows. |
| 1791 for (i = 0; i < 16; ++i) { | 1791 for (i = 0; i < 16; ++i) { |
| 1792 vp9_highbd_idct16_c(input, outptr, bd); | 1792 vpx_highbd_idct16_c(input, outptr, bd); |
| 1793 input += 16; | 1793 input += 16; |
| 1794 outptr += 16; | 1794 outptr += 16; |
| 1795 } | 1795 } |
| 1796 | 1796 |
| 1797 // Then transform columns. | 1797 // Then transform columns. |
| 1798 for (i = 0; i < 16; ++i) { | 1798 for (i = 0; i < 16; ++i) { |
| 1799 for (j = 0; j < 16; ++j) | 1799 for (j = 0; j < 16; ++j) |
| 1800 temp_in[j] = out[j * 16 + i]; | 1800 temp_in[j] = out[j * 16 + i]; |
| 1801 vp9_highbd_idct16_c(temp_in, temp_out, bd); | 1801 vpx_highbd_idct16_c(temp_in, temp_out, bd); |
| 1802 for (j = 0; j < 16; ++j) { | 1802 for (j = 0; j < 16; ++j) { |
| 1803 dest[j * stride + i] = highbd_clip_pixel_add( | 1803 dest[j * stride + i] = highbd_clip_pixel_add( |
| 1804 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 1804 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
| 1805 } | 1805 } |
| 1806 } | 1806 } |
| 1807 } | 1807 } |
| 1808 | 1808 |
| 1809 void highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { | 1809 void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { |
| 1810 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; | 1810 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; |
| 1811 tran_high_t s9, s10, s11, s12, s13, s14, s15; | 1811 tran_high_t s9, s10, s11, s12, s13, s14, s15; |
| 1812 | 1812 |
| 1813 tran_low_t x0 = input[15]; | 1813 tran_low_t x0 = input[15]; |
| 1814 tran_low_t x1 = input[0]; | 1814 tran_low_t x1 = input[0]; |
| 1815 tran_low_t x2 = input[13]; | 1815 tran_low_t x2 = input[13]; |
| 1816 tran_low_t x3 = input[2]; | 1816 tran_low_t x3 = input[2]; |
| 1817 tran_low_t x4 = input[11]; | 1817 tran_low_t x4 = input[11]; |
| 1818 tran_low_t x5 = input[4]; | 1818 tran_low_t x5 = input[4]; |
| 1819 tran_low_t x6 = input[9]; | 1819 tran_low_t x6 = input[9]; |
| (...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1969 output[8] = WRAPLOW(x3, bd); | 1969 output[8] = WRAPLOW(x3, bd); |
| 1970 output[9] = WRAPLOW(x11, bd); | 1970 output[9] = WRAPLOW(x11, bd); |
| 1971 output[10] = WRAPLOW(x15, bd); | 1971 output[10] = WRAPLOW(x15, bd); |
| 1972 output[11] = WRAPLOW(x7, bd); | 1972 output[11] = WRAPLOW(x7, bd); |
| 1973 output[12] = WRAPLOW(x5, bd); | 1973 output[12] = WRAPLOW(x5, bd); |
| 1974 output[13] = WRAPLOW(-x13, bd); | 1974 output[13] = WRAPLOW(-x13, bd); |
| 1975 output[14] = WRAPLOW(x9, bd); | 1975 output[14] = WRAPLOW(x9, bd); |
| 1976 output[15] = WRAPLOW(-x1, bd); | 1976 output[15] = WRAPLOW(-x1, bd); |
| 1977 } | 1977 } |
| 1978 | 1978 |
| 1979 void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, | 1979 void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1980 int stride, int bd) { | 1980 int stride, int bd) { |
| 1981 tran_low_t out[16 * 16] = { 0 }; | 1981 tran_low_t out[16 * 16] = { 0 }; |
| 1982 tran_low_t *outptr = out; | 1982 tran_low_t *outptr = out; |
| 1983 int i, j; | 1983 int i, j; |
| 1984 tran_low_t temp_in[16], temp_out[16]; | 1984 tran_low_t temp_in[16], temp_out[16]; |
| 1985 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1985 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 1986 | 1986 |
| 1987 // First transform rows. Since all non-zero dct coefficients are in | 1987 // First transform rows. Since all non-zero dct coefficients are in |
| 1988 // upper-left 4x4 area, we only need to calculate first 4 rows here. | 1988 // upper-left 4x4 area, we only need to calculate first 4 rows here. |
| 1989 for (i = 0; i < 4; ++i) { | 1989 for (i = 0; i < 4; ++i) { |
| 1990 vp9_highbd_idct16_c(input, outptr, bd); | 1990 vpx_highbd_idct16_c(input, outptr, bd); |
| 1991 input += 16; | 1991 input += 16; |
| 1992 outptr += 16; | 1992 outptr += 16; |
| 1993 } | 1993 } |
| 1994 | 1994 |
| 1995 // Then transform columns. | 1995 // Then transform columns. |
| 1996 for (i = 0; i < 16; ++i) { | 1996 for (i = 0; i < 16; ++i) { |
| 1997 for (j = 0; j < 16; ++j) | 1997 for (j = 0; j < 16; ++j) |
| 1998 temp_in[j] = out[j*16 + i]; | 1998 temp_in[j] = out[j*16 + i]; |
| 1999 vp9_highbd_idct16_c(temp_in, temp_out, bd); | 1999 vpx_highbd_idct16_c(temp_in, temp_out, bd); |
| 2000 for (j = 0; j < 16; ++j) { | 2000 for (j = 0; j < 16; ++j) { |
| 2001 dest[j * stride + i] = highbd_clip_pixel_add( | 2001 dest[j * stride + i] = highbd_clip_pixel_add( |
| 2002 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2002 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
| 2003 } | 2003 } |
| 2004 } | 2004 } |
| 2005 } | 2005 } |
| 2006 | 2006 |
| 2007 void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, | 2007 void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, |
| 2008 int stride, int bd) { | 2008 int stride, int bd) { |
| 2009 int i, j; | 2009 int i, j; |
| 2010 tran_high_t a1; | 2010 tran_high_t a1; |
| 2011 tran_low_t out = WRAPLOW( | 2011 tran_low_t out = WRAPLOW( |
| 2012 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); | 2012 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
| 2013 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2013 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 2014 | 2014 |
| 2015 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); | 2015 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
| 2016 a1 = ROUND_POWER_OF_TWO(out, 6); | 2016 a1 = ROUND_POWER_OF_TWO(out, 6); |
| 2017 for (j = 0; j < 16; ++j) { | 2017 for (j = 0; j < 16; ++j) { |
| 2018 for (i = 0; i < 16; ++i) | 2018 for (i = 0; i < 16; ++i) |
| 2019 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 2019 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
| 2020 dest += stride; | 2020 dest += stride; |
| 2021 } | 2021 } |
| 2022 } | 2022 } |
| 2023 | 2023 |
| 2024 void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd) { | 2024 static void highbd_idct32_c(const tran_low_t *input, |
| 2025 tran_low_t *output, int bd) { |
| 2025 tran_low_t step1[32], step2[32]; | 2026 tran_low_t step1[32], step2[32]; |
| 2026 tran_high_t temp1, temp2; | 2027 tran_high_t temp1, temp2; |
| 2027 (void) bd; | 2028 (void) bd; |
| 2028 | 2029 |
| 2029 // stage 1 | 2030 // stage 1 |
| 2030 step1[0] = input[0]; | 2031 step1[0] = input[0]; |
| 2031 step1[1] = input[16]; | 2032 step1[1] = input[16]; |
| 2032 step1[2] = input[8]; | 2033 step1[2] = input[8]; |
| 2033 step1[3] = input[24]; | 2034 step1[3] = input[24]; |
| 2034 step1[4] = input[4]; | 2035 step1[4] = input[4]; |
| (...skipping 347 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2382 output[24] = WRAPLOW(step1[7] - step1[24], bd); | 2383 output[24] = WRAPLOW(step1[7] - step1[24], bd); |
| 2383 output[25] = WRAPLOW(step1[6] - step1[25], bd); | 2384 output[25] = WRAPLOW(step1[6] - step1[25], bd); |
| 2384 output[26] = WRAPLOW(step1[5] - step1[26], bd); | 2385 output[26] = WRAPLOW(step1[5] - step1[26], bd); |
| 2385 output[27] = WRAPLOW(step1[4] - step1[27], bd); | 2386 output[27] = WRAPLOW(step1[4] - step1[27], bd); |
| 2386 output[28] = WRAPLOW(step1[3] - step1[28], bd); | 2387 output[28] = WRAPLOW(step1[3] - step1[28], bd); |
| 2387 output[29] = WRAPLOW(step1[2] - step1[29], bd); | 2388 output[29] = WRAPLOW(step1[2] - step1[29], bd); |
| 2388 output[30] = WRAPLOW(step1[1] - step1[30], bd); | 2389 output[30] = WRAPLOW(step1[1] - step1[30], bd); |
| 2389 output[31] = WRAPLOW(step1[0] - step1[31], bd); | 2390 output[31] = WRAPLOW(step1[0] - step1[31], bd); |
| 2390 } | 2391 } |
| 2391 | 2392 |
| 2392 void vp9_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, | 2393 void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, |
| 2393 int stride, int bd) { | 2394 int stride, int bd) { |
| 2394 tran_low_t out[32 * 32]; | 2395 tran_low_t out[32 * 32]; |
| 2395 tran_low_t *outptr = out; | 2396 tran_low_t *outptr = out; |
| 2396 int i, j; | 2397 int i, j; |
| 2397 tran_low_t temp_in[32], temp_out[32]; | 2398 tran_low_t temp_in[32], temp_out[32]; |
| 2398 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2399 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 2399 | 2400 |
| 2400 // Rows | 2401 // Rows |
| 2401 for (i = 0; i < 32; ++i) { | 2402 for (i = 0; i < 32; ++i) { |
| 2402 tran_low_t zero_coeff[16]; | 2403 tran_low_t zero_coeff[16]; |
| (...skipping 19 matching lines...) Expand all Loading... |
| 2422 for (j = 0; j < 32; ++j) | 2423 for (j = 0; j < 32; ++j) |
| 2423 temp_in[j] = out[j * 32 + i]; | 2424 temp_in[j] = out[j * 32 + i]; |
| 2424 highbd_idct32_c(temp_in, temp_out, bd); | 2425 highbd_idct32_c(temp_in, temp_out, bd); |
| 2425 for (j = 0; j < 32; ++j) { | 2426 for (j = 0; j < 32; ++j) { |
| 2426 dest[j * stride + i] = highbd_clip_pixel_add( | 2427 dest[j * stride + i] = highbd_clip_pixel_add( |
| 2427 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2428 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
| 2428 } | 2429 } |
| 2429 } | 2430 } |
| 2430 } | 2431 } |
| 2431 | 2432 |
| 2432 void vp9_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, | 2433 void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, |
| 2433 int stride, int bd) { | 2434 int stride, int bd) { |
| 2434 tran_low_t out[32 * 32] = {0}; | 2435 tran_low_t out[32 * 32] = {0}; |
| 2435 tran_low_t *outptr = out; | 2436 tran_low_t *outptr = out; |
| 2436 int i, j; | 2437 int i, j; |
| 2437 tran_low_t temp_in[32], temp_out[32]; | 2438 tran_low_t temp_in[32], temp_out[32]; |
| 2438 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2439 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 2439 | 2440 |
| 2440 // Rows | 2441 // Rows |
| 2441 // Only upper-left 8x8 has non-zero coeff. | 2442 // Only upper-left 8x8 has non-zero coeff. |
| 2442 for (i = 0; i < 8; ++i) { | 2443 for (i = 0; i < 8; ++i) { |
| 2443 highbd_idct32_c(input, outptr, bd); | 2444 highbd_idct32_c(input, outptr, bd); |
| 2444 input += 32; | 2445 input += 32; |
| 2445 outptr += 32; | 2446 outptr += 32; |
| 2446 } | 2447 } |
| 2447 // Columns | 2448 // Columns |
| 2448 for (i = 0; i < 32; ++i) { | 2449 for (i = 0; i < 32; ++i) { |
| 2449 for (j = 0; j < 32; ++j) | 2450 for (j = 0; j < 32; ++j) |
| 2450 temp_in[j] = out[j * 32 + i]; | 2451 temp_in[j] = out[j * 32 + i]; |
| 2451 highbd_idct32_c(temp_in, temp_out, bd); | 2452 highbd_idct32_c(temp_in, temp_out, bd); |
| 2452 for (j = 0; j < 32; ++j) { | 2453 for (j = 0; j < 32; ++j) { |
| 2453 dest[j * stride + i] = highbd_clip_pixel_add( | 2454 dest[j * stride + i] = highbd_clip_pixel_add( |
| 2454 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2455 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
| 2455 } | 2456 } |
| 2456 } | 2457 } |
| 2457 } | 2458 } |
| 2458 | 2459 |
| 2459 void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, | 2460 void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, |
| 2460 int stride, int bd) { | 2461 int stride, int bd) { |
| 2461 int i, j; | 2462 int i, j; |
| 2462 int a1; | 2463 int a1; |
| 2463 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2464 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 2464 | 2465 |
| 2465 tran_low_t out = WRAPLOW( | 2466 tran_low_t out = WRAPLOW( |
| 2466 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); | 2467 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
| 2467 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); | 2468 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
| 2468 a1 = ROUND_POWER_OF_TWO(out, 6); | 2469 a1 = ROUND_POWER_OF_TWO(out, 6); |
| 2469 | 2470 |
| 2470 for (j = 0; j < 32; ++j) { | 2471 for (j = 0; j < 32; ++j) { |
| 2471 for (i = 0; i < 32; ++i) | 2472 for (i = 0; i < 32; ++i) |
| 2472 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 2473 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
| 2473 dest += stride; | 2474 dest += stride; |
| 2474 } | 2475 } |
| 2475 } | 2476 } |
| 2476 #endif // CONFIG_VP9_HIGHBITDEPTH | 2477 #endif // CONFIG_VP9_HIGHBITDEPTH |
| OLD | NEW |