| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <assert.h> | 11 #include <assert.h> |
| 12 #include <math.h> | 12 #include <math.h> |
| 13 | 13 |
| 14 #include "./vpx_config.h" | 14 #include "./vpx_config.h" |
| 15 #include "./vp9_rtcd.h" | 15 #include "./vp9_rtcd.h" |
| 16 #include "vp9/common/vp9_systemdependent.h" | 16 #include "vp9/common/vp9_systemdependent.h" |
| 17 #include "vp9/common/vp9_blockd.h" | 17 #include "vp9/common/vp9_blockd.h" |
| 18 #include "vp9/common/vp9_common.h" | 18 #include "vp9/common/vp9_common.h" |
| 19 #include "vp9/common/vp9_idct.h" | 19 #include "vp9/common/vp9_idct.h" |
| 20 | 20 |
| 21 void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 21 void vp9_iwht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 22 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, | 22 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
| 23 0.5 shifts per pixel. */ | 23 0.5 shifts per pixel. */ |
| 24 int i; | 24 int i; |
| 25 int16_t output[16]; | 25 int16_t output[16]; |
| 26 int a1, b1, c1, d1, e1; | 26 int a1, b1, c1, d1, e1; |
| 27 int16_t *ip = input; | 27 const int16_t *ip = input; |
| 28 int16_t *op = output; | 28 int16_t *op = output; |
| 29 | 29 |
| 30 for (i = 0; i < 4; i++) { | 30 for (i = 0; i < 4; i++) { |
| 31 a1 = ip[0] >> WHT_UPSCALE_FACTOR; | 31 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
| 32 c1 = ip[1] >> WHT_UPSCALE_FACTOR; | 32 c1 = ip[1] >> UNIT_QUANT_SHIFT; |
| 33 d1 = ip[2] >> WHT_UPSCALE_FACTOR; | 33 d1 = ip[2] >> UNIT_QUANT_SHIFT; |
| 34 b1 = ip[3] >> WHT_UPSCALE_FACTOR; | 34 b1 = ip[3] >> UNIT_QUANT_SHIFT; |
| 35 a1 += c1; | 35 a1 += c1; |
| 36 d1 -= b1; | 36 d1 -= b1; |
| 37 e1 = (a1 - d1) >> 1; | 37 e1 = (a1 - d1) >> 1; |
| 38 b1 = e1 - b1; | 38 b1 = e1 - b1; |
| 39 c1 = e1 - c1; | 39 c1 = e1 - c1; |
| 40 a1 -= b1; | 40 a1 -= b1; |
| 41 d1 += c1; | 41 d1 += c1; |
| 42 op[0] = a1; | 42 op[0] = a1; |
| 43 op[1] = b1; | 43 op[1] = b1; |
| 44 op[2] = c1; | 44 op[2] = c1; |
| 45 op[3] = d1; | 45 op[3] = d1; |
| 46 ip += 4; | 46 ip += 4; |
| 47 op += 4; | 47 op += 4; |
| 48 } | 48 } |
| 49 | 49 |
| 50 ip = output; | 50 ip = output; |
| 51 for (i = 0; i < 4; i++) { | 51 for (i = 0; i < 4; i++) { |
| 52 a1 = ip[4 * 0]; | 52 a1 = ip[4 * 0]; |
| 53 c1 = ip[4 * 1]; | 53 c1 = ip[4 * 1]; |
| 54 d1 = ip[4 * 2]; | 54 d1 = ip[4 * 2]; |
| 55 b1 = ip[4 * 3]; | 55 b1 = ip[4 * 3]; |
| 56 a1 += c1; | 56 a1 += c1; |
| 57 d1 -= b1; | 57 d1 -= b1; |
| 58 e1 = (a1 - d1) >> 1; | 58 e1 = (a1 - d1) >> 1; |
| 59 b1 = e1 - b1; | 59 b1 = e1 - b1; |
| 60 c1 = e1 - c1; | 60 c1 = e1 - c1; |
| 61 a1 -= b1; | 61 a1 -= b1; |
| 62 d1 += c1; | 62 d1 += c1; |
| 63 dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); | 63 dest[stride * 0] = clip_pixel(dest[stride * 0] + a1); |
| 64 dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + b1); | 64 dest[stride * 1] = clip_pixel(dest[stride * 1] + b1); |
| 65 dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + c1); | 65 dest[stride * 2] = clip_pixel(dest[stride * 2] + c1); |
| 66 dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + d1); | 66 dest[stride * 3] = clip_pixel(dest[stride * 3] + d1); |
| 67 | 67 |
| 68 ip++; | 68 ip++; |
| 69 dest++; | 69 dest++; |
| 70 } | 70 } |
| 71 } | 71 } |
| 72 | 72 |
| 73 void vp9_short_iwalsh4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) { | 73 void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) { |
| 74 int i; | 74 int i; |
| 75 int a1, e1; | 75 int a1, e1; |
| 76 int16_t tmp[4]; | 76 int16_t tmp[4]; |
| 77 int16_t *ip = in; | 77 const int16_t *ip = in; |
| 78 int16_t *op = tmp; | 78 int16_t *op = tmp; |
| 79 | 79 |
| 80 a1 = ip[0] >> WHT_UPSCALE_FACTOR; | 80 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
| 81 e1 = a1 >> 1; | 81 e1 = a1 >> 1; |
| 82 a1 -= e1; | 82 a1 -= e1; |
| 83 op[0] = a1; | 83 op[0] = a1; |
| 84 op[1] = op[2] = op[3] = e1; | 84 op[1] = op[2] = op[3] = e1; |
| 85 | 85 |
| 86 ip = tmp; | 86 ip = tmp; |
| 87 for (i = 0; i < 4; i++) { | 87 for (i = 0; i < 4; i++) { |
| 88 e1 = ip[0] >> 1; | 88 e1 = ip[0] >> 1; |
| 89 a1 = ip[0] - e1; | 89 a1 = ip[0] - e1; |
| 90 dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); | 90 dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); |
| 91 dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1); | 91 dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1); |
| 92 dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1); | 92 dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1); |
| 93 dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1); | 93 dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1); |
| 94 ip++; | 94 ip++; |
| 95 dest++; | 95 dest++; |
| 96 } | 96 } |
| 97 } | 97 } |
| 98 | 98 |
| 99 void vp9_idct4_1d_c(int16_t *input, int16_t *output) { | 99 static void idct4_1d(const int16_t *input, int16_t *output) { |
| 100 int16_t step[4]; | 100 int16_t step[4]; |
| 101 int temp1, temp2; | 101 int temp1, temp2; |
| 102 // stage 1 | 102 // stage 1 |
| 103 temp1 = (input[0] + input[2]) * cospi_16_64; | 103 temp1 = (input[0] + input[2]) * cospi_16_64; |
| 104 temp2 = (input[0] - input[2]) * cospi_16_64; | 104 temp2 = (input[0] - input[2]) * cospi_16_64; |
| 105 step[0] = dct_const_round_shift(temp1); | 105 step[0] = dct_const_round_shift(temp1); |
| 106 step[1] = dct_const_round_shift(temp2); | 106 step[1] = dct_const_round_shift(temp2); |
| 107 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; | 107 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; |
| 108 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; | 108 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; |
| 109 step[2] = dct_const_round_shift(temp1); | 109 step[2] = dct_const_round_shift(temp1); |
| 110 step[3] = dct_const_round_shift(temp2); | 110 step[3] = dct_const_round_shift(temp2); |
| 111 | 111 |
| 112 // stage 2 | 112 // stage 2 |
| 113 output[0] = step[0] + step[3]; | 113 output[0] = step[0] + step[3]; |
| 114 output[1] = step[1] + step[2]; | 114 output[1] = step[1] + step[2]; |
| 115 output[2] = step[1] - step[2]; | 115 output[2] = step[1] - step[2]; |
| 116 output[3] = step[0] - step[3]; | 116 output[3] = step[0] - step[3]; |
| 117 } | 117 } |
| 118 | 118 |
| 119 void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 119 void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 120 int16_t out[4 * 4]; | 120 int16_t out[4 * 4]; |
| 121 int16_t *outptr = out; | 121 int16_t *outptr = out; |
| 122 int i, j; | 122 int i, j; |
| 123 int16_t temp_in[4], temp_out[4]; | 123 int16_t temp_in[4], temp_out[4]; |
| 124 | 124 |
| 125 // Rows | 125 // Rows |
| 126 for (i = 0; i < 4; ++i) { | 126 for (i = 0; i < 4; ++i) { |
| 127 vp9_idct4_1d(input, outptr); | 127 idct4_1d(input, outptr); |
| 128 input += 4; | 128 input += 4; |
| 129 outptr += 4; | 129 outptr += 4; |
| 130 } | 130 } |
| 131 | 131 |
| 132 // Columns | 132 // Columns |
| 133 for (i = 0; i < 4; ++i) { | 133 for (i = 0; i < 4; ++i) { |
| 134 for (j = 0; j < 4; ++j) | 134 for (j = 0; j < 4; ++j) |
| 135 temp_in[j] = out[j * 4 + i]; | 135 temp_in[j] = out[j * 4 + i]; |
| 136 vp9_idct4_1d(temp_in, temp_out); | 136 idct4_1d(temp_in, temp_out); |
| 137 for (j = 0; j < 4; ++j) | 137 for (j = 0; j < 4; ++j) |
| 138 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) | 138 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) |
| 139 + dest[j * dest_stride + i]); | 139 + dest[j * stride + i]); |
| 140 } | 140 } |
| 141 } | 141 } |
| 142 | 142 |
| 143 void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 143 void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) { |
| 144 int i; | 144 int i; |
| 145 int a1; | 145 int a1; |
| 146 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); | 146 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); |
| 147 out = dct_const_round_shift(out * cospi_16_64); | 147 out = dct_const_round_shift(out * cospi_16_64); |
| 148 a1 = ROUND_POWER_OF_TWO(out, 4); | 148 a1 = ROUND_POWER_OF_TWO(out, 4); |
| 149 | 149 |
| 150 for (i = 0; i < 4; i++) { | 150 for (i = 0; i < 4; i++) { |
| 151 dest[0] = clip_pixel(dest[0] + a1); | 151 dest[0] = clip_pixel(dest[0] + a1); |
| 152 dest[1] = clip_pixel(dest[1] + a1); | 152 dest[1] = clip_pixel(dest[1] + a1); |
| 153 dest[2] = clip_pixel(dest[2] + a1); | 153 dest[2] = clip_pixel(dest[2] + a1); |
| 154 dest[3] = clip_pixel(dest[3] + a1); | 154 dest[3] = clip_pixel(dest[3] + a1); |
| 155 dest += dest_stride; | 155 dest += dest_stride; |
| 156 } | 156 } |
| 157 } | 157 } |
| 158 | 158 |
| 159 static void idct8_1d(int16_t *input, int16_t *output) { | 159 static void idct8_1d(const int16_t *input, int16_t *output) { |
| 160 int16_t step1[8], step2[8]; | 160 int16_t step1[8], step2[8]; |
| 161 int temp1, temp2; | 161 int temp1, temp2; |
| 162 // stage 1 | 162 // stage 1 |
| 163 step1[0] = input[0]; | 163 step1[0] = input[0]; |
| 164 step1[2] = input[4]; | 164 step1[2] = input[4]; |
| 165 step1[1] = input[2]; | 165 step1[1] = input[2]; |
| 166 step1[3] = input[6]; | 166 step1[3] = input[6]; |
| 167 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; | 167 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; |
| 168 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; | 168 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; |
| 169 step1[4] = dct_const_round_shift(temp1); | 169 step1[4] = dct_const_round_shift(temp1); |
| 170 step1[7] = dct_const_round_shift(temp2); | 170 step1[7] = dct_const_round_shift(temp2); |
| 171 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; | 171 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; |
| 172 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; | 172 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; |
| 173 step1[5] = dct_const_round_shift(temp1); | 173 step1[5] = dct_const_round_shift(temp1); |
| 174 step1[6] = dct_const_round_shift(temp2); | 174 step1[6] = dct_const_round_shift(temp2); |
| 175 | 175 |
| 176 // stage 2 & stage 3 - even half | 176 // stage 2 & stage 3 - even half |
| 177 vp9_idct4_1d(step1, step1); | 177 idct4_1d(step1, step1); |
| 178 | 178 |
| 179 // stage 2 - odd half | 179 // stage 2 - odd half |
| 180 step2[4] = step1[4] + step1[5]; | 180 step2[4] = step1[4] + step1[5]; |
| 181 step2[5] = step1[4] - step1[5]; | 181 step2[5] = step1[4] - step1[5]; |
| 182 step2[6] = -step1[6] + step1[7]; | 182 step2[6] = -step1[6] + step1[7]; |
| 183 step2[7] = step1[6] + step1[7]; | 183 step2[7] = step1[6] + step1[7]; |
| 184 | 184 |
| 185 // stage 3 -odd half | 185 // stage 3 -odd half |
| 186 step1[4] = step2[4]; | 186 step1[4] = step2[4]; |
| 187 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 187 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
| 188 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 188 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
| 189 step1[5] = dct_const_round_shift(temp1); | 189 step1[5] = dct_const_round_shift(temp1); |
| 190 step1[6] = dct_const_round_shift(temp2); | 190 step1[6] = dct_const_round_shift(temp2); |
| 191 step1[7] = step2[7]; | 191 step1[7] = step2[7]; |
| 192 | 192 |
| 193 // stage 4 | 193 // stage 4 |
| 194 output[0] = step1[0] + step1[7]; | 194 output[0] = step1[0] + step1[7]; |
| 195 output[1] = step1[1] + step1[6]; | 195 output[1] = step1[1] + step1[6]; |
| 196 output[2] = step1[2] + step1[5]; | 196 output[2] = step1[2] + step1[5]; |
| 197 output[3] = step1[3] + step1[4]; | 197 output[3] = step1[3] + step1[4]; |
| 198 output[4] = step1[3] - step1[4]; | 198 output[4] = step1[3] - step1[4]; |
| 199 output[5] = step1[2] - step1[5]; | 199 output[5] = step1[2] - step1[5]; |
| 200 output[6] = step1[1] - step1[6]; | 200 output[6] = step1[1] - step1[6]; |
| 201 output[7] = step1[0] - step1[7]; | 201 output[7] = step1[0] - step1[7]; |
| 202 } | 202 } |
| 203 | 203 |
| 204 void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 204 void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 205 int16_t out[8 * 8]; | 205 int16_t out[8 * 8]; |
| 206 int16_t *outptr = out; | 206 int16_t *outptr = out; |
| 207 int i, j; | 207 int i, j; |
| 208 int16_t temp_in[8], temp_out[8]; | 208 int16_t temp_in[8], temp_out[8]; |
| 209 | 209 |
| 210 // First transform rows | 210 // First transform rows |
| 211 for (i = 0; i < 8; ++i) { | 211 for (i = 0; i < 8; ++i) { |
| 212 idct8_1d(input, outptr); | 212 idct8_1d(input, outptr); |
| 213 input += 8; | 213 input += 8; |
| 214 outptr += 8; | 214 outptr += 8; |
| 215 } | 215 } |
| 216 | 216 |
| 217 // Then transform columns | 217 // Then transform columns |
| 218 for (i = 0; i < 8; ++i) { | 218 for (i = 0; i < 8; ++i) { |
| 219 for (j = 0; j < 8; ++j) | 219 for (j = 0; j < 8; ++j) |
| 220 temp_in[j] = out[j * 8 + i]; | 220 temp_in[j] = out[j * 8 + i]; |
| 221 idct8_1d(temp_in, temp_out); | 221 idct8_1d(temp_in, temp_out); |
| 222 for (j = 0; j < 8; ++j) | 222 for (j = 0; j < 8; ++j) |
| 223 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) | 223 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) |
| 224 + dest[j * dest_stride + i]); | 224 + dest[j * stride + i]); |
| 225 } | 225 } |
| 226 } | 226 } |
| 227 | 227 |
| 228 void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 228 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 229 int i, j; | 229 int i, j; |
| 230 int a1; | 230 int a1; |
| 231 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); | 231 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); |
| 232 out = dct_const_round_shift(out * cospi_16_64); | 232 out = dct_const_round_shift(out * cospi_16_64); |
| 233 a1 = ROUND_POWER_OF_TWO(out, 5); | 233 a1 = ROUND_POWER_OF_TWO(out, 5); |
| 234 for (j = 0; j < 8; ++j) { | 234 for (j = 0; j < 8; ++j) { |
| 235 for (i = 0; i < 8; ++i) | 235 for (i = 0; i < 8; ++i) |
| 236 dest[i] = clip_pixel(dest[i] + a1); | 236 dest[i] = clip_pixel(dest[i] + a1); |
| 237 dest += dest_stride; | 237 dest += stride; |
| 238 } | 238 } |
| 239 } | 239 } |
| 240 | 240 |
| 241 static void iadst4_1d(int16_t *input, int16_t *output) { | 241 static void iadst4_1d(const int16_t *input, int16_t *output) { |
| 242 int s0, s1, s2, s3, s4, s5, s6, s7; | 242 int s0, s1, s2, s3, s4, s5, s6, s7; |
| 243 | 243 |
| 244 int x0 = input[0]; | 244 int x0 = input[0]; |
| 245 int x1 = input[1]; | 245 int x1 = input[1]; |
| 246 int x2 = input[2]; | 246 int x2 = input[2]; |
| 247 int x3 = input[3]; | 247 int x3 = input[3]; |
| 248 | 248 |
| 249 if (!(x0 | x1 | x2 | x3)) { | 249 if (!(x0 | x1 | x2 | x3)) { |
| 250 output[0] = output[1] = output[2] = output[3] = 0; | 250 output[0] = output[1] = output[2] = output[3] = 0; |
| 251 return; | 251 return; |
| (...skipping 21 matching lines...) Expand all Loading... |
| 273 // 1-D transform scaling factor is sqrt(2). | 273 // 1-D transform scaling factor is sqrt(2). |
| 274 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 274 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
| 275 // + 1b (addition) = 29b. | 275 // + 1b (addition) = 29b. |
| 276 // Hence the output bit depth is 15b. | 276 // Hence the output bit depth is 15b. |
| 277 output[0] = dct_const_round_shift(s0); | 277 output[0] = dct_const_round_shift(s0); |
| 278 output[1] = dct_const_round_shift(s1); | 278 output[1] = dct_const_round_shift(s1); |
| 279 output[2] = dct_const_round_shift(s2); | 279 output[2] = dct_const_round_shift(s2); |
| 280 output[3] = dct_const_round_shift(s3); | 280 output[3] = dct_const_round_shift(s3); |
| 281 } | 281 } |
| 282 | 282 |
| 283 void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride, | 283 void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride, |
| 284 int tx_type) { | 284 int tx_type) { |
| 285 const transform_2d IHT_4[] = { | 285 const transform_2d IHT_4[] = { |
| 286 { vp9_idct4_1d, vp9_idct4_1d }, // DCT_DCT = 0 | 286 { idct4_1d, idct4_1d }, // DCT_DCT = 0 |
| 287 { iadst4_1d, vp9_idct4_1d }, // ADST_DCT = 1 | 287 { iadst4_1d, idct4_1d }, // ADST_DCT = 1 |
| 288 { vp9_idct4_1d, iadst4_1d }, // DCT_ADST = 2 | 288 { idct4_1d, iadst4_1d }, // DCT_ADST = 2 |
| 289 { iadst4_1d, iadst4_1d } // ADST_ADST = 3 | 289 { iadst4_1d, iadst4_1d } // ADST_ADST = 3 |
| 290 }; | 290 }; |
| 291 | 291 |
| 292 int i, j; | 292 int i, j; |
| 293 int16_t out[4 * 4]; | 293 int16_t out[4 * 4]; |
| 294 int16_t *outptr = out; | 294 int16_t *outptr = out; |
| 295 int16_t temp_in[4], temp_out[4]; | 295 int16_t temp_in[4], temp_out[4]; |
| 296 | 296 |
| 297 // inverse transform row vectors | 297 // inverse transform row vectors |
| 298 for (i = 0; i < 4; ++i) { | 298 for (i = 0; i < 4; ++i) { |
| 299 IHT_4[tx_type].rows(input, outptr); | 299 IHT_4[tx_type].rows(input, outptr); |
| 300 input += 4; | 300 input += 4; |
| 301 outptr += 4; | 301 outptr += 4; |
| 302 } | 302 } |
| 303 | 303 |
| 304 // inverse transform column vectors | 304 // inverse transform column vectors |
| 305 for (i = 0; i < 4; ++i) { | 305 for (i = 0; i < 4; ++i) { |
| 306 for (j = 0; j < 4; ++j) | 306 for (j = 0; j < 4; ++j) |
| 307 temp_in[j] = out[j * 4 + i]; | 307 temp_in[j] = out[j * 4 + i]; |
| 308 IHT_4[tx_type].cols(temp_in, temp_out); | 308 IHT_4[tx_type].cols(temp_in, temp_out); |
| 309 for (j = 0; j < 4; ++j) | 309 for (j = 0; j < 4; ++j) |
| 310 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) | 310 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) |
| 311 + dest[j * dest_stride + i]); | 311 + dest[j * stride + i]); |
| 312 } | 312 } |
| 313 } | 313 } |
| 314 static void iadst8_1d(int16_t *input, int16_t *output) { | 314 static void iadst8_1d(const int16_t *input, int16_t *output) { |
| 315 int s0, s1, s2, s3, s4, s5, s6, s7; | 315 int s0, s1, s2, s3, s4, s5, s6, s7; |
| 316 | 316 |
| 317 int x0 = input[7]; | 317 int x0 = input[7]; |
| 318 int x1 = input[0]; | 318 int x1 = input[0]; |
| 319 int x2 = input[5]; | 319 int x2 = input[5]; |
| 320 int x3 = input[2]; | 320 int x3 = input[2]; |
| 321 int x4 = input[3]; | 321 int x4 = input[3]; |
| 322 int x5 = input[4]; | 322 int x5 = input[4]; |
| 323 int x6 = input[1]; | 323 int x6 = input[1]; |
| 324 int x7 = input[6]; | 324 int x7 = input[6]; |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 388 output[7] = -x1; | 388 output[7] = -x1; |
| 389 } | 389 } |
| 390 | 390 |
| 391 static const transform_2d IHT_8[] = { | 391 static const transform_2d IHT_8[] = { |
| 392 { idct8_1d, idct8_1d }, // DCT_DCT = 0 | 392 { idct8_1d, idct8_1d }, // DCT_DCT = 0 |
| 393 { iadst8_1d, idct8_1d }, // ADST_DCT = 1 | 393 { iadst8_1d, idct8_1d }, // ADST_DCT = 1 |
| 394 { idct8_1d, iadst8_1d }, // DCT_ADST = 2 | 394 { idct8_1d, iadst8_1d }, // DCT_ADST = 2 |
| 395 { iadst8_1d, iadst8_1d } // ADST_ADST = 3 | 395 { iadst8_1d, iadst8_1d } // ADST_ADST = 3 |
| 396 }; | 396 }; |
| 397 | 397 |
| 398 void vp9_short_iht8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride, | 398 void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride, |
| 399 int tx_type) { | 399 int tx_type) { |
| 400 int i, j; | 400 int i, j; |
| 401 int16_t out[8 * 8]; | 401 int16_t out[8 * 8]; |
| 402 int16_t *outptr = out; | 402 int16_t *outptr = out; |
| 403 int16_t temp_in[8], temp_out[8]; | 403 int16_t temp_in[8], temp_out[8]; |
| 404 const transform_2d ht = IHT_8[tx_type]; | 404 const transform_2d ht = IHT_8[tx_type]; |
| 405 | 405 |
| 406 // inverse transform row vectors | 406 // inverse transform row vectors |
| 407 for (i = 0; i < 8; ++i) { | 407 for (i = 0; i < 8; ++i) { |
| 408 ht.rows(input, outptr); | 408 ht.rows(input, outptr); |
| 409 input += 8; | 409 input += 8; |
| 410 outptr += 8; | 410 outptr += 8; |
| 411 } | 411 } |
| 412 | 412 |
| 413 // inverse transform column vectors | 413 // inverse transform column vectors |
| 414 for (i = 0; i < 8; ++i) { | 414 for (i = 0; i < 8; ++i) { |
| 415 for (j = 0; j < 8; ++j) | 415 for (j = 0; j < 8; ++j) |
| 416 temp_in[j] = out[j * 8 + i]; | 416 temp_in[j] = out[j * 8 + i]; |
| 417 ht.cols(temp_in, temp_out); | 417 ht.cols(temp_in, temp_out); |
| 418 for (j = 0; j < 8; ++j) | 418 for (j = 0; j < 8; ++j) |
| 419 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) | 419 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) |
| 420 + dest[j * dest_stride + i]); } | 420 + dest[j * stride + i]); |
| 421 } |
| 421 } | 422 } |
| 422 | 423 |
| 423 void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest, | 424 void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 424 int dest_stride) { | |
| 425 int16_t out[8 * 8] = { 0 }; | 425 int16_t out[8 * 8] = { 0 }; |
| 426 int16_t *outptr = out; | 426 int16_t *outptr = out; |
| 427 int i, j; | 427 int i, j; |
| 428 int16_t temp_in[8], temp_out[8]; | 428 int16_t temp_in[8], temp_out[8]; |
| 429 | 429 |
| 430 // First transform rows | 430 // First transform rows |
| 431 // only first 4 row has non-zero coefs | 431 // only first 4 row has non-zero coefs |
| 432 for (i = 0; i < 4; ++i) { | 432 for (i = 0; i < 4; ++i) { |
| 433 idct8_1d(input, outptr); | 433 idct8_1d(input, outptr); |
| 434 input += 8; | 434 input += 8; |
| 435 outptr += 8; | 435 outptr += 8; |
| 436 } | 436 } |
| 437 | 437 |
| 438 // Then transform columns | 438 // Then transform columns |
| 439 for (i = 0; i < 8; ++i) { | 439 for (i = 0; i < 8; ++i) { |
| 440 for (j = 0; j < 8; ++j) | 440 for (j = 0; j < 8; ++j) |
| 441 temp_in[j] = out[j * 8 + i]; | 441 temp_in[j] = out[j * 8 + i]; |
| 442 idct8_1d(temp_in, temp_out); | 442 idct8_1d(temp_in, temp_out); |
| 443 for (j = 0; j < 8; ++j) | 443 for (j = 0; j < 8; ++j) |
| 444 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) | 444 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) |
| 445 + dest[j * dest_stride + i]); | 445 + dest[j * stride + i]); |
| 446 } | 446 } |
| 447 } | 447 } |
| 448 | 448 |
| 449 static void idct16_1d(int16_t *input, int16_t *output) { | 449 static void idct16_1d(const int16_t *input, int16_t *output) { |
| 450 int16_t step1[16], step2[16]; | 450 int16_t step1[16], step2[16]; |
| 451 int temp1, temp2; | 451 int temp1, temp2; |
| 452 | 452 |
| 453 // stage 1 | 453 // stage 1 |
| 454 step1[0] = input[0/2]; | 454 step1[0] = input[0/2]; |
| 455 step1[1] = input[16/2]; | 455 step1[1] = input[16/2]; |
| 456 step1[2] = input[8/2]; | 456 step1[2] = input[8/2]; |
| 457 step1[3] = input[24/2]; | 457 step1[3] = input[24/2]; |
| 458 step1[4] = input[4/2]; | 458 step1[4] = input[4/2]; |
| 459 step1[5] = input[20/2]; | 459 step1[5] = input[20/2]; |
| (...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 604 output[8] = step2[7] - step2[8]; | 604 output[8] = step2[7] - step2[8]; |
| 605 output[9] = step2[6] - step2[9]; | 605 output[9] = step2[6] - step2[9]; |
| 606 output[10] = step2[5] - step2[10]; | 606 output[10] = step2[5] - step2[10]; |
| 607 output[11] = step2[4] - step2[11]; | 607 output[11] = step2[4] - step2[11]; |
| 608 output[12] = step2[3] - step2[12]; | 608 output[12] = step2[3] - step2[12]; |
| 609 output[13] = step2[2] - step2[13]; | 609 output[13] = step2[2] - step2[13]; |
| 610 output[14] = step2[1] - step2[14]; | 610 output[14] = step2[1] - step2[14]; |
| 611 output[15] = step2[0] - step2[15]; | 611 output[15] = step2[0] - step2[15]; |
| 612 } | 612 } |
| 613 | 613 |
| 614 void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 614 void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 615 int16_t out[16 * 16]; | 615 int16_t out[16 * 16]; |
| 616 int16_t *outptr = out; | 616 int16_t *outptr = out; |
| 617 int i, j; | 617 int i, j; |
| 618 int16_t temp_in[16], temp_out[16]; | 618 int16_t temp_in[16], temp_out[16]; |
| 619 | 619 |
| 620 // First transform rows | 620 // First transform rows |
| 621 for (i = 0; i < 16; ++i) { | 621 for (i = 0; i < 16; ++i) { |
| 622 idct16_1d(input, outptr); | 622 idct16_1d(input, outptr); |
| 623 input += 16; | 623 input += 16; |
| 624 outptr += 16; | 624 outptr += 16; |
| 625 } | 625 } |
| 626 | 626 |
| 627 // Then transform columns | 627 // Then transform columns |
| 628 for (i = 0; i < 16; ++i) { | 628 for (i = 0; i < 16; ++i) { |
| 629 for (j = 0; j < 16; ++j) | 629 for (j = 0; j < 16; ++j) |
| 630 temp_in[j] = out[j * 16 + i]; | 630 temp_in[j] = out[j * 16 + i]; |
| 631 idct16_1d(temp_in, temp_out); | 631 idct16_1d(temp_in, temp_out); |
| 632 for (j = 0; j < 16; ++j) | 632 for (j = 0; j < 16; ++j) |
| 633 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 633 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
| 634 + dest[j * dest_stride + i]); | 634 + dest[j * stride + i]); |
| 635 } | 635 } |
| 636 } | 636 } |
| 637 | 637 |
| 638 void iadst16_1d(int16_t *input, int16_t *output) { | 638 static void iadst16_1d(const int16_t *input, int16_t *output) { |
| 639 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; | 639 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; |
| 640 | 640 |
| 641 int x0 = input[15]; | 641 int x0 = input[15]; |
| 642 int x1 = input[0]; | 642 int x1 = input[0]; |
| 643 int x2 = input[13]; | 643 int x2 = input[13]; |
| 644 int x3 = input[2]; | 644 int x3 = input[2]; |
| 645 int x4 = input[11]; | 645 int x4 = input[11]; |
| 646 int x5 = input[4]; | 646 int x5 = input[4]; |
| 647 int x6 = input[9]; | 647 int x6 = input[9]; |
| 648 int x7 = input[6]; | 648 int x7 = input[6]; |
| (...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 806 output[15] = -x1; | 806 output[15] = -x1; |
| 807 } | 807 } |
| 808 | 808 |
| 809 static const transform_2d IHT_16[] = { | 809 static const transform_2d IHT_16[] = { |
| 810 { idct16_1d, idct16_1d }, // DCT_DCT = 0 | 810 { idct16_1d, idct16_1d }, // DCT_DCT = 0 |
| 811 { iadst16_1d, idct16_1d }, // ADST_DCT = 1 | 811 { iadst16_1d, idct16_1d }, // ADST_DCT = 1 |
| 812 { idct16_1d, iadst16_1d }, // DCT_ADST = 2 | 812 { idct16_1d, iadst16_1d }, // DCT_ADST = 2 |
| 813 { iadst16_1d, iadst16_1d } // ADST_ADST = 3 | 813 { iadst16_1d, iadst16_1d } // ADST_ADST = 3 |
| 814 }; | 814 }; |
| 815 | 815 |
| 816 void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride, | 816 void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride, |
| 817 int tx_type) { | 817 int tx_type) { |
| 818 int i, j; | 818 int i, j; |
| 819 int16_t out[16 * 16]; | 819 int16_t out[16 * 16]; |
| 820 int16_t *outptr = out; | 820 int16_t *outptr = out; |
| 821 int16_t temp_in[16], temp_out[16]; | 821 int16_t temp_in[16], temp_out[16]; |
| 822 const transform_2d ht = IHT_16[tx_type]; | 822 const transform_2d ht = IHT_16[tx_type]; |
| 823 | 823 |
| 824 // Rows | 824 // Rows |
| 825 for (i = 0; i < 16; ++i) { | 825 for (i = 0; i < 16; ++i) { |
| 826 ht.rows(input, outptr); | 826 ht.rows(input, outptr); |
| 827 input += 16; | 827 input += 16; |
| 828 outptr += 16; | 828 outptr += 16; |
| 829 } | 829 } |
| 830 | 830 |
| 831 // Columns | 831 // Columns |
| 832 for (i = 0; i < 16; ++i) { | 832 for (i = 0; i < 16; ++i) { |
| 833 for (j = 0; j < 16; ++j) | 833 for (j = 0; j < 16; ++j) |
| 834 temp_in[j] = out[j * 16 + i]; | 834 temp_in[j] = out[j * 16 + i]; |
| 835 ht.cols(temp_in, temp_out); | 835 ht.cols(temp_in, temp_out); |
| 836 for (j = 0; j < 16; ++j) | 836 for (j = 0; j < 16; ++j) |
| 837 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 837 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
| 838 + dest[j * dest_stride + i]); } | 838 + dest[j * stride + i]); } |
| 839 } | 839 } |
| 840 | 840 |
| 841 void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, | 841 void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 842 int dest_stride) { | |
| 843 int16_t out[16 * 16] = { 0 }; | 842 int16_t out[16 * 16] = { 0 }; |
| 844 int16_t *outptr = out; | 843 int16_t *outptr = out; |
| 845 int i, j; | 844 int i, j; |
| 846 int16_t temp_in[16], temp_out[16]; | 845 int16_t temp_in[16], temp_out[16]; |
| 847 | 846 |
| 848 // First transform rows. Since all non-zero dct coefficients are in | 847 // First transform rows. Since all non-zero dct coefficients are in |
| 849 // upper-left 4x4 area, we only need to calculate first 4 rows here. | 848 // upper-left 4x4 area, we only need to calculate first 4 rows here. |
| 850 for (i = 0; i < 4; ++i) { | 849 for (i = 0; i < 4; ++i) { |
| 851 idct16_1d(input, outptr); | 850 idct16_1d(input, outptr); |
| 852 input += 16; | 851 input += 16; |
| 853 outptr += 16; | 852 outptr += 16; |
| 854 } | 853 } |
| 855 | 854 |
| 856 // Then transform columns | 855 // Then transform columns |
| 857 for (i = 0; i < 16; ++i) { | 856 for (i = 0; i < 16; ++i) { |
| 858 for (j = 0; j < 16; ++j) | 857 for (j = 0; j < 16; ++j) |
| 859 temp_in[j] = out[j*16 + i]; | 858 temp_in[j] = out[j*16 + i]; |
| 860 idct16_1d(temp_in, temp_out); | 859 idct16_1d(temp_in, temp_out); |
| 861 for (j = 0; j < 16; ++j) | 860 for (j = 0; j < 16; ++j) |
| 862 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 861 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
| 863 + dest[j * dest_stride + i]); | 862 + dest[j * stride + i]); |
| 864 } | 863 } |
| 865 } | 864 } |
| 866 | 865 |
| 867 void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest, | 866 void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 868 int dest_stride) { | |
| 869 int i, j; | 867 int i, j; |
| 870 int a1; | 868 int a1; |
| 871 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); | 869 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); |
| 872 out = dct_const_round_shift(out * cospi_16_64); | 870 out = dct_const_round_shift(out * cospi_16_64); |
| 873 a1 = ROUND_POWER_OF_TWO(out, 6); | 871 a1 = ROUND_POWER_OF_TWO(out, 6); |
| 874 for (j = 0; j < 16; ++j) { | 872 for (j = 0; j < 16; ++j) { |
| 875 for (i = 0; i < 16; ++i) | 873 for (i = 0; i < 16; ++i) |
| 876 dest[i] = clip_pixel(dest[i] + a1); | 874 dest[i] = clip_pixel(dest[i] + a1); |
| 877 dest += dest_stride; | 875 dest += stride; |
| 878 } | 876 } |
| 879 } | 877 } |
| 880 | 878 |
| 881 static void idct32_1d(int16_t *input, int16_t *output) { | 879 static void idct32_1d(const int16_t *input, int16_t *output) { |
| 882 int16_t step1[32], step2[32]; | 880 int16_t step1[32], step2[32]; |
| 883 int temp1, temp2; | 881 int temp1, temp2; |
| 884 | 882 |
| 885 // stage 1 | 883 // stage 1 |
| 886 step1[0] = input[0]; | 884 step1[0] = input[0]; |
| 887 step1[1] = input[16]; | 885 step1[1] = input[16]; |
| 888 step1[2] = input[8]; | 886 step1[2] = input[8]; |
| 889 step1[3] = input[24]; | 887 step1[3] = input[24]; |
| 890 step1[4] = input[4]; | 888 step1[4] = input[4]; |
| 891 step1[5] = input[20]; | 889 step1[5] = input[20]; |
| (...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1238 output[24] = step1[7] - step1[24]; | 1236 output[24] = step1[7] - step1[24]; |
| 1239 output[25] = step1[6] - step1[25]; | 1237 output[25] = step1[6] - step1[25]; |
| 1240 output[26] = step1[5] - step1[26]; | 1238 output[26] = step1[5] - step1[26]; |
| 1241 output[27] = step1[4] - step1[27]; | 1239 output[27] = step1[4] - step1[27]; |
| 1242 output[28] = step1[3] - step1[28]; | 1240 output[28] = step1[3] - step1[28]; |
| 1243 output[29] = step1[2] - step1[29]; | 1241 output[29] = step1[2] - step1[29]; |
| 1244 output[30] = step1[1] - step1[30]; | 1242 output[30] = step1[1] - step1[30]; |
| 1245 output[31] = step1[0] - step1[31]; | 1243 output[31] = step1[0] - step1[31]; |
| 1246 } | 1244 } |
| 1247 | 1245 |
| 1248 void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride) { | 1246 void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 1249 int16_t out[32 * 32]; | 1247 int16_t out[32 * 32]; |
| 1250 int16_t *outptr = out; | 1248 int16_t *outptr = out; |
| 1251 int i, j; | 1249 int i, j; |
| 1252 int16_t temp_in[32], temp_out[32]; | 1250 int16_t temp_in[32], temp_out[32]; |
| 1253 | 1251 |
| 1254 // Rows | 1252 // Rows |
| 1255 for (i = 0; i < 32; ++i) { | 1253 for (i = 0; i < 32; ++i) { |
| 1254 int16_t zero_coeff[16]; |
| 1255 for (j = 0; j < 16; ++j) |
| 1256 zero_coeff[j] = input[2 * j] | input[2 * j + 1]; |
| 1257 for (j = 0; j < 8; ++j) |
| 1258 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; |
| 1259 for (j = 0; j < 4; ++j) |
| 1260 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; |
| 1261 for (j = 0; j < 2; ++j) |
| 1262 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; |
| 1263 |
| 1264 if (zero_coeff[0] | zero_coeff[1]) |
| 1265 idct32_1d(input, outptr); |
| 1266 else |
| 1267 vpx_memset(outptr, 0, sizeof(int16_t) * 32); |
| 1268 input += 32; |
| 1269 outptr += 32; |
| 1270 } |
| 1271 |
| 1272 // Columns |
| 1273 for (i = 0; i < 32; ++i) { |
| 1274 for (j = 0; j < 32; ++j) |
| 1275 temp_in[j] = out[j * 32 + i]; |
| 1276 idct32_1d(temp_in, temp_out); |
| 1277 for (j = 0; j < 32; ++j) |
| 1278 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
| 1279 + dest[j * stride + i]); |
| 1280 } |
| 1281 } |
| 1282 |
| 1283 void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 1284 int16_t out[32 * 32] = {0}; |
| 1285 int16_t *outptr = out; |
| 1286 int i, j; |
| 1287 int16_t temp_in[32], temp_out[32]; |
| 1288 |
| 1289 // Rows |
| 1290 // only upper-left 8x8 has non-zero coeff |
| 1291 for (i = 0; i < 8; ++i) { |
| 1256 idct32_1d(input, outptr); | 1292 idct32_1d(input, outptr); |
| 1257 input += 32; | 1293 input += 32; |
| 1258 outptr += 32; | 1294 outptr += 32; |
| 1259 } | 1295 } |
| 1260 | 1296 |
| 1261 // Columns | 1297 // Columns |
| 1262 for (i = 0; i < 32; ++i) { | 1298 for (i = 0; i < 32; ++i) { |
| 1263 for (j = 0; j < 32; ++j) | 1299 for (j = 0; j < 32; ++j) |
| 1264 temp_in[j] = out[j * 32 + i]; | 1300 temp_in[j] = out[j * 32 + i]; |
| 1265 idct32_1d(temp_in, temp_out); | 1301 idct32_1d(temp_in, temp_out); |
| 1266 for (j = 0; j < 32; ++j) | 1302 for (j = 0; j < 32; ++j) |
| 1267 dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 1303 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
| 1268 + dest[j * dest_stride + i]); | 1304 + dest[j * stride + i]); |
| 1269 } | 1305 } |
| 1270 } | 1306 } |
| 1271 | 1307 |
| 1272 void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) { | 1308 void vp9_idct32x32_1_add_c(const int16_t *input, uint8_t *dest, int stride) { |
| 1309 int i, j; |
| 1310 int a1; |
| 1311 |
| 1273 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); | 1312 int16_t out = dct_const_round_shift(input[0] * cospi_16_64); |
| 1274 out = dct_const_round_shift(out * cospi_16_64); | 1313 out = dct_const_round_shift(out * cospi_16_64); |
| 1275 output[0] = ROUND_POWER_OF_TWO(out, 6); | 1314 a1 = ROUND_POWER_OF_TWO(out, 6); |
| 1315 |
| 1316 for (j = 0; j < 32; ++j) { |
| 1317 for (i = 0; i < 32; ++i) |
| 1318 dest[i] = clip_pixel(dest[i] + a1); |
| 1319 dest += stride; |
| 1320 } |
| 1276 } | 1321 } |
| 1322 |
| 1323 // idct |
| 1324 void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) { |
| 1325 if (eob > 1) |
| 1326 vp9_idct4x4_16_add(input, dest, stride); |
| 1327 else |
| 1328 vp9_idct4x4_1_add(input, dest, stride); |
| 1329 } |
| 1330 |
| 1331 |
| 1332 void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) { |
| 1333 if (eob > 1) |
| 1334 vp9_iwht4x4_16_add(input, dest, stride); |
| 1335 else |
| 1336 vp9_iwht4x4_1_add(input, dest, stride); |
| 1337 } |
| 1338 |
| 1339 void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob) { |
| 1340 // If dc is 1, then input[0] is the reconstructed value, do not need |
| 1341 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. |
| 1342 |
| 1343 // The calculation can be simplified if there are not many non-zero dct |
| 1344 // coefficients. Use eobs to decide what to do. |
| 1345 // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. |
| 1346 // Combine that with code here. |
| 1347 if (eob) { |
| 1348 if (eob == 1) |
| 1349 // DC only DCT coefficient |
| 1350 vp9_idct8x8_1_add(input, dest, stride); |
| 1351 else if (eob <= 10) |
| 1352 vp9_idct8x8_10_add(input, dest, stride); |
| 1353 else |
| 1354 vp9_idct8x8_64_add(input, dest, stride); |
| 1355 } |
| 1356 } |
| 1357 |
| 1358 void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, |
| 1359 int eob) { |
| 1360 /* The calculation can be simplified if there are not many non-zero dct |
| 1361 * coefficients. Use eobs to separate different cases. */ |
| 1362 if (eob) { |
| 1363 if (eob == 1) |
| 1364 /* DC only DCT coefficient. */ |
| 1365 vp9_idct16x16_1_add(input, dest, stride); |
| 1366 else if (eob <= 10) |
| 1367 vp9_idct16x16_10_add(input, dest, stride); |
| 1368 else |
| 1369 vp9_idct16x16_256_add(input, dest, stride); |
| 1370 } |
| 1371 } |
| 1372 |
| 1373 void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride, |
| 1374 int eob) { |
| 1375 if (eob) { |
| 1376 if (eob == 1) |
| 1377 vp9_idct32x32_1_add(input, dest, stride); |
| 1378 else if (eob <= 34) |
| 1379 // non-zero coeff only in upper-left 8x8 |
| 1380 vp9_idct32x32_34_add(input, dest, stride); |
| 1381 else |
| 1382 vp9_idct32x32_1024_add(input, dest, stride); |
| 1383 } |
| 1384 } |
| 1385 |
| 1386 // iht |
| 1387 void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, |
| 1388 int stride, int eob) { |
| 1389 if (tx_type == DCT_DCT) |
| 1390 vp9_idct4x4_add(input, dest, stride, eob); |
| 1391 else |
| 1392 vp9_iht4x4_16_add(input, dest, stride, tx_type); |
| 1393 } |
| 1394 |
| 1395 void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, |
| 1396 int stride, int eob) { |
| 1397 if (tx_type == DCT_DCT) { |
| 1398 vp9_idct8x8_add(input, dest, stride, eob); |
| 1399 } else { |
| 1400 if (eob > 0) { |
| 1401 vp9_iht8x8_64_add(input, dest, stride, tx_type); |
| 1402 } |
| 1403 } |
| 1404 } |
| 1405 |
| 1406 void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, |
| 1407 int stride, int eob) { |
| 1408 if (tx_type == DCT_DCT) { |
| 1409 vp9_idct16x16_add(input, dest, stride, eob); |
| 1410 } else { |
| 1411 if (eob > 0) { |
| 1412 vp9_iht16x16_256_add(input, dest, stride, tx_type); |
| 1413 } |
| 1414 } |
| 1415 } |
| OLD | NEW |