| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <assert.h> | 11 #include <assert.h> |
| 12 #include <math.h> | 12 #include <math.h> |
| 13 | 13 |
| 14 #include "./vpx_config.h" | 14 #include "./vpx_config.h" |
| 15 #include "./vp9_rtcd.h" | 15 #include "./vp9_rtcd.h" |
| 16 | 16 |
| 17 #include "vp9/common/vp9_blockd.h" | 17 #include "vp9/common/vp9_blockd.h" |
| 18 #include "vp9/common/vp9_idct.h" | 18 #include "vp9/common/vp9_idct.h" |
| 19 #include "vp9/common/vp9_systemdependent.h" | 19 #include "vp9/common/vp9_systemdependent.h" |
| 20 | 20 |
| 21 #include "vp9/encoder/vp9_dct.h" | |
| 22 | |
| 23 static INLINE int fdct_round_shift(int input) { | 21 static INLINE int fdct_round_shift(int input) { |
| 24 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); | 22 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); |
| 25 assert(INT16_MIN <= rv && rv <= INT16_MAX); | 23 assert(INT16_MIN <= rv && rv <= INT16_MAX); |
| 26 return rv; | 24 return rv; |
| 27 } | 25 } |
| 28 | 26 |
| 29 static void fdct4(const int16_t *input, int16_t *output) { | 27 static void fdct4(const int16_t *input, int16_t *output) { |
| 30 int16_t step[4]; | 28 int16_t step[4]; |
| 31 int temp1, temp2; | 29 int temp1, temp2; |
| 32 | 30 |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 150 output[3] = fdct_round_shift(s3); | 148 output[3] = fdct_round_shift(s3); |
| 151 } | 149 } |
| 152 | 150 |
| 153 static const transform_2d FHT_4[] = { | 151 static const transform_2d FHT_4[] = { |
| 154 { fdct4, fdct4 }, // DCT_DCT = 0 | 152 { fdct4, fdct4 }, // DCT_DCT = 0 |
| 155 { fadst4, fdct4 }, // ADST_DCT = 1 | 153 { fadst4, fdct4 }, // ADST_DCT = 1 |
| 156 { fdct4, fadst4 }, // DCT_ADST = 2 | 154 { fdct4, fadst4 }, // DCT_ADST = 2 |
| 157 { fadst4, fadst4 } // ADST_ADST = 3 | 155 { fadst4, fadst4 } // ADST_ADST = 3 |
| 158 }; | 156 }; |
| 159 | 157 |
| 160 void vp9_short_fht4x4_c(const int16_t *input, int16_t *output, | 158 void vp9_fht4x4_c(const int16_t *input, int16_t *output, |
| 161 int stride, int tx_type) { | 159 int stride, int tx_type) { |
| 162 int16_t out[4 * 4]; | 160 if (tx_type == DCT_DCT) { |
| 163 int16_t *outptr = &out[0]; | 161 vp9_fdct4x4_c(input, output, stride); |
| 164 int i, j; | 162 } else { |
| 165 int16_t temp_in[4], temp_out[4]; | 163 int16_t out[4 * 4]; |
| 166 const transform_2d ht = FHT_4[tx_type]; | 164 int16_t *outptr = &out[0]; |
| 165 int i, j; |
| 166 int16_t temp_in[4], temp_out[4]; |
| 167 const transform_2d ht = FHT_4[tx_type]; |
| 167 | 168 |
| 168 // Columns | 169 // Columns |
| 169 for (i = 0; i < 4; ++i) { | 170 for (i = 0; i < 4; ++i) { |
| 170 for (j = 0; j < 4; ++j) | 171 for (j = 0; j < 4; ++j) |
| 171 temp_in[j] = input[j * stride + i] * 16; | 172 temp_in[j] = input[j * stride + i] * 16; |
| 172 if (i == 0 && temp_in[0]) | 173 if (i == 0 && temp_in[0]) |
| 173 temp_in[0] += 1; | 174 temp_in[0] += 1; |
| 174 ht.cols(temp_in, temp_out); | 175 ht.cols(temp_in, temp_out); |
| 175 for (j = 0; j < 4; ++j) | 176 for (j = 0; j < 4; ++j) |
| 176 outptr[j * 4 + i] = temp_out[j]; | 177 outptr[j * 4 + i] = temp_out[j]; |
| 177 } | 178 } |
| 178 | 179 |
| 179 // Rows | 180 // Rows |
| 180 for (i = 0; i < 4; ++i) { | 181 for (i = 0; i < 4; ++i) { |
| 181 for (j = 0; j < 4; ++j) | 182 for (j = 0; j < 4; ++j) |
| 182 temp_in[j] = out[j + i * 4]; | 183 temp_in[j] = out[j + i * 4]; |
| 183 ht.rows(temp_in, temp_out); | 184 ht.rows(temp_in, temp_out); |
| 184 for (j = 0; j < 4; ++j) | 185 for (j = 0; j < 4; ++j) |
| 185 output[j + i * 4] = (temp_out[j] + 1) >> 2; | 186 output[j + i * 4] = (temp_out[j] + 1) >> 2; |
| 187 } |
| 186 } | 188 } |
| 187 } | 189 } |
| 188 | 190 |
| 189 static void fdct8(const int16_t *input, int16_t *output) { | 191 static void fdct8(const int16_t *input, int16_t *output) { |
| 190 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; | 192 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; |
| 191 /*needs32*/ int t0, t1, t2, t3; | 193 /*needs32*/ int t0, t1, t2, t3; |
| 192 /*canbe16*/ int x0, x1, x2, x3; | 194 /*canbe16*/ int x0, x1, x2, x3; |
| 193 | 195 |
| 194 // stage 1 | 196 // stage 1 |
| 195 s0 = input[0] + input[7]; | 197 s0 = input[0] + input[7]; |
| (...skipping 362 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 558 output[7] = - x1; | 560 output[7] = - x1; |
| 559 } | 561 } |
| 560 | 562 |
| 561 static const transform_2d FHT_8[] = { | 563 static const transform_2d FHT_8[] = { |
| 562 { fdct8, fdct8 }, // DCT_DCT = 0 | 564 { fdct8, fdct8 }, // DCT_DCT = 0 |
| 563 { fadst8, fdct8 }, // ADST_DCT = 1 | 565 { fadst8, fdct8 }, // ADST_DCT = 1 |
| 564 { fdct8, fadst8 }, // DCT_ADST = 2 | 566 { fdct8, fadst8 }, // DCT_ADST = 2 |
| 565 { fadst8, fadst8 } // ADST_ADST = 3 | 567 { fadst8, fadst8 } // ADST_ADST = 3 |
| 566 }; | 568 }; |
| 567 | 569 |
| 568 void vp9_short_fht8x8_c(const int16_t *input, int16_t *output, | 570 void vp9_fht8x8_c(const int16_t *input, int16_t *output, |
| 569 int stride, int tx_type) { | 571 int stride, int tx_type) { |
| 570 int16_t out[64]; | 572 if (tx_type == DCT_DCT) { |
| 571 int16_t *outptr = &out[0]; | 573 vp9_fdct8x8_c(input, output, stride); |
| 572 int i, j; | 574 } else { |
| 573 int16_t temp_in[8], temp_out[8]; | 575 int16_t out[64]; |
| 574 const transform_2d ht = FHT_8[tx_type]; | 576 int16_t *outptr = &out[0]; |
| 577 int i, j; |
| 578 int16_t temp_in[8], temp_out[8]; |
| 579 const transform_2d ht = FHT_8[tx_type]; |
| 575 | 580 |
| 576 // Columns | 581 // Columns |
| 577 for (i = 0; i < 8; ++i) { | 582 for (i = 0; i < 8; ++i) { |
| 578 for (j = 0; j < 8; ++j) | 583 for (j = 0; j < 8; ++j) |
| 579 temp_in[j] = input[j * stride + i] * 4; | 584 temp_in[j] = input[j * stride + i] * 4; |
| 580 ht.cols(temp_in, temp_out); | 585 ht.cols(temp_in, temp_out); |
| 581 for (j = 0; j < 8; ++j) | 586 for (j = 0; j < 8; ++j) |
| 582 outptr[j * 8 + i] = temp_out[j]; | 587 outptr[j * 8 + i] = temp_out[j]; |
| 583 } | 588 } |
| 584 | 589 |
| 585 // Rows | 590 // Rows |
| 586 for (i = 0; i < 8; ++i) { | 591 for (i = 0; i < 8; ++i) { |
| 587 for (j = 0; j < 8; ++j) | 592 for (j = 0; j < 8; ++j) |
| 588 temp_in[j] = out[j + i * 8]; | 593 temp_in[j] = out[j + i * 8]; |
| 589 ht.rows(temp_in, temp_out); | 594 ht.rows(temp_in, temp_out); |
| 590 for (j = 0; j < 8; ++j) | 595 for (j = 0; j < 8; ++j) |
| 591 output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; | 596 output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; |
| 597 } |
| 592 } | 598 } |
| 593 } | 599 } |
| 594 | 600 |
| 595 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per | 601 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per |
| 596 pixel. */ | 602 pixel. */ |
| 597 void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride) { | 603 void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride) { |
| 598 int i; | 604 int i; |
| 599 int a1, b1, c1, d1, e1; | 605 int a1, b1, c1, d1, e1; |
| 600 const int16_t *ip = input; | 606 const int16_t *ip = input; |
| 601 int16_t *op = output; | 607 int16_t *op = output; |
| (...skipping 349 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 951 output[15] = - x1; | 957 output[15] = - x1; |
| 952 } | 958 } |
| 953 | 959 |
| 954 static const transform_2d FHT_16[] = { | 960 static const transform_2d FHT_16[] = { |
| 955 { fdct16, fdct16 }, // DCT_DCT = 0 | 961 { fdct16, fdct16 }, // DCT_DCT = 0 |
| 956 { fadst16, fdct16 }, // ADST_DCT = 1 | 962 { fadst16, fdct16 }, // ADST_DCT = 1 |
| 957 { fdct16, fadst16 }, // DCT_ADST = 2 | 963 { fdct16, fadst16 }, // DCT_ADST = 2 |
| 958 { fadst16, fadst16 } // ADST_ADST = 3 | 964 { fadst16, fadst16 } // ADST_ADST = 3 |
| 959 }; | 965 }; |
| 960 | 966 |
| 961 void vp9_short_fht16x16_c(const int16_t *input, int16_t *output, | 967 void vp9_fht16x16_c(const int16_t *input, int16_t *output, |
| 962 int stride, int tx_type) { | 968 int stride, int tx_type) { |
| 963 int16_t out[256]; | 969 if (tx_type == DCT_DCT) { |
| 964 int16_t *outptr = &out[0]; | 970 vp9_fdct16x16_c(input, output, stride); |
| 965 int i, j; | 971 } else { |
| 966 int16_t temp_in[16], temp_out[16]; | 972 int16_t out[256]; |
| 967 const transform_2d ht = FHT_16[tx_type]; | 973 int16_t *outptr = &out[0]; |
| 974 int i, j; |
| 975 int16_t temp_in[16], temp_out[16]; |
| 976 const transform_2d ht = FHT_16[tx_type]; |
| 968 | 977 |
| 969 // Columns | 978 // Columns |
| 970 for (i = 0; i < 16; ++i) { | 979 for (i = 0; i < 16; ++i) { |
| 971 for (j = 0; j < 16; ++j) | 980 for (j = 0; j < 16; ++j) |
| 972 temp_in[j] = input[j * stride + i] * 4; | 981 temp_in[j] = input[j * stride + i] * 4; |
| 973 ht.cols(temp_in, temp_out); | 982 ht.cols(temp_in, temp_out); |
| 974 for (j = 0; j < 16; ++j) | 983 for (j = 0; j < 16; ++j) |
| 975 outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; | 984 outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; |
| 976 // outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; | 985 } |
| 977 } | |
| 978 | 986 |
| 979 // Rows | 987 // Rows |
| 980 for (i = 0; i < 16; ++i) { | 988 for (i = 0; i < 16; ++i) { |
| 981 for (j = 0; j < 16; ++j) | 989 for (j = 0; j < 16; ++j) |
| 982 temp_in[j] = out[j + i * 16]; | 990 temp_in[j] = out[j + i * 16]; |
| 983 ht.rows(temp_in, temp_out); | 991 ht.rows(temp_in, temp_out); |
| 984 for (j = 0; j < 16; ++j) | 992 for (j = 0; j < 16; ++j) |
| 985 output[j + i * 16] = temp_out[j]; | 993 output[j + i * 16] = temp_out[j]; |
| 994 } |
| 986 } | 995 } |
| 987 } | 996 } |
| 988 | 997 |
| 989 static INLINE int dct_32_round(int input) { | 998 static INLINE int dct_32_round(int input) { |
| 990 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); | 999 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); |
| 991 assert(-131072 <= rv && rv <= 131071); | 1000 assert(-131072 <= rv && rv <= 131071); |
| 992 return rv; | 1001 return rv; |
| 993 } | 1002 } |
| 994 | 1003 |
| 995 static INLINE int half_round_shift(int input) { | 1004 static INLINE int half_round_shift(int input) { |
| (...skipping 372 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1368 // Rows | 1377 // Rows |
| 1369 for (i = 0; i < 32; ++i) { | 1378 for (i = 0; i < 32; ++i) { |
| 1370 int temp_in[32], temp_out[32]; | 1379 int temp_in[32], temp_out[32]; |
| 1371 for (j = 0; j < 32; ++j) | 1380 for (j = 0; j < 32; ++j) |
| 1372 temp_in[j] = output[j + i * 32]; | 1381 temp_in[j] = output[j + i * 32]; |
| 1373 fdct32(temp_in, temp_out, 1); | 1382 fdct32(temp_in, temp_out, 1); |
| 1374 for (j = 0; j < 32; ++j) | 1383 for (j = 0; j < 32; ++j) |
| 1375 out[j + i * 32] = temp_out[j]; | 1384 out[j + i * 32] = temp_out[j]; |
| 1376 } | 1385 } |
| 1377 } | 1386 } |
| 1378 | |
| 1379 void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output, | |
| 1380 int stride) { | |
| 1381 if (tx_type == DCT_DCT) | |
| 1382 vp9_fdct4x4(input, output, stride); | |
| 1383 else | |
| 1384 vp9_short_fht4x4(input, output, stride, tx_type); | |
| 1385 } | |
| 1386 | |
| 1387 void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output, | |
| 1388 int stride) { | |
| 1389 if (tx_type == DCT_DCT) | |
| 1390 vp9_fdct8x8(input, output, stride); | |
| 1391 else | |
| 1392 vp9_short_fht8x8(input, output, stride, tx_type); | |
| 1393 } | |
| 1394 | |
| 1395 void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, | |
| 1396 int stride) { | |
| 1397 if (tx_type == DCT_DCT) | |
| 1398 vp9_fdct16x16(input, output, stride); | |
| 1399 else | |
| 1400 vp9_short_fht16x16(input, output, stride, tx_type); | |
| 1401 } | |
| OLD | NEW |