OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <assert.h> | 11 #include <assert.h> |
12 #include <math.h> | 12 #include <math.h> |
13 | 13 |
14 #include "./vpx_config.h" | 14 #include "./vpx_config.h" |
15 #include "./vp9_rtcd.h" | 15 #include "./vp9_rtcd.h" |
16 | 16 |
17 #include "vp9/common/vp9_blockd.h" | 17 #include "vp9/common/vp9_blockd.h" |
18 #include "vp9/common/vp9_idct.h" | 18 #include "vp9/common/vp9_idct.h" |
19 #include "vp9/common/vp9_systemdependent.h" | 19 #include "vp9/common/vp9_systemdependent.h" |
20 | 20 |
21 #include "vp9/encoder/vp9_dct.h" | |
22 | |
23 static INLINE int fdct_round_shift(int input) { | 21 static INLINE int fdct_round_shift(int input) { |
24 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); | 22 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); |
25 assert(INT16_MIN <= rv && rv <= INT16_MAX); | 23 assert(INT16_MIN <= rv && rv <= INT16_MAX); |
26 return rv; | 24 return rv; |
27 } | 25 } |
28 | 26 |
29 static void fdct4(const int16_t *input, int16_t *output) { | 27 static void fdct4(const int16_t *input, int16_t *output) { |
30 int16_t step[4]; | 28 int16_t step[4]; |
31 int temp1, temp2; | 29 int temp1, temp2; |
32 | 30 |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
150 output[3] = fdct_round_shift(s3); | 148 output[3] = fdct_round_shift(s3); |
151 } | 149 } |
152 | 150 |
153 static const transform_2d FHT_4[] = { | 151 static const transform_2d FHT_4[] = { |
154 { fdct4, fdct4 }, // DCT_DCT = 0 | 152 { fdct4, fdct4 }, // DCT_DCT = 0 |
155 { fadst4, fdct4 }, // ADST_DCT = 1 | 153 { fadst4, fdct4 }, // ADST_DCT = 1 |
156 { fdct4, fadst4 }, // DCT_ADST = 2 | 154 { fdct4, fadst4 }, // DCT_ADST = 2 |
157 { fadst4, fadst4 } // ADST_ADST = 3 | 155 { fadst4, fadst4 } // ADST_ADST = 3 |
158 }; | 156 }; |
159 | 157 |
160 void vp9_short_fht4x4_c(const int16_t *input, int16_t *output, | 158 void vp9_fht4x4_c(const int16_t *input, int16_t *output, |
161 int stride, int tx_type) { | 159 int stride, int tx_type) { |
162 int16_t out[4 * 4]; | 160 if (tx_type == DCT_DCT) { |
163 int16_t *outptr = &out[0]; | 161 vp9_fdct4x4_c(input, output, stride); |
164 int i, j; | 162 } else { |
165 int16_t temp_in[4], temp_out[4]; | 163 int16_t out[4 * 4]; |
166 const transform_2d ht = FHT_4[tx_type]; | 164 int16_t *outptr = &out[0]; |
| 165 int i, j; |
| 166 int16_t temp_in[4], temp_out[4]; |
| 167 const transform_2d ht = FHT_4[tx_type]; |
167 | 168 |
168 // Columns | 169 // Columns |
169 for (i = 0; i < 4; ++i) { | 170 for (i = 0; i < 4; ++i) { |
170 for (j = 0; j < 4; ++j) | 171 for (j = 0; j < 4; ++j) |
171 temp_in[j] = input[j * stride + i] * 16; | 172 temp_in[j] = input[j * stride + i] * 16; |
172 if (i == 0 && temp_in[0]) | 173 if (i == 0 && temp_in[0]) |
173 temp_in[0] += 1; | 174 temp_in[0] += 1; |
174 ht.cols(temp_in, temp_out); | 175 ht.cols(temp_in, temp_out); |
175 for (j = 0; j < 4; ++j) | 176 for (j = 0; j < 4; ++j) |
176 outptr[j * 4 + i] = temp_out[j]; | 177 outptr[j * 4 + i] = temp_out[j]; |
177 } | 178 } |
178 | 179 |
179 // Rows | 180 // Rows |
180 for (i = 0; i < 4; ++i) { | 181 for (i = 0; i < 4; ++i) { |
181 for (j = 0; j < 4; ++j) | 182 for (j = 0; j < 4; ++j) |
182 temp_in[j] = out[j + i * 4]; | 183 temp_in[j] = out[j + i * 4]; |
183 ht.rows(temp_in, temp_out); | 184 ht.rows(temp_in, temp_out); |
184 for (j = 0; j < 4; ++j) | 185 for (j = 0; j < 4; ++j) |
185 output[j + i * 4] = (temp_out[j] + 1) >> 2; | 186 output[j + i * 4] = (temp_out[j] + 1) >> 2; |
| 187 } |
186 } | 188 } |
187 } | 189 } |
188 | 190 |
189 static void fdct8(const int16_t *input, int16_t *output) { | 191 static void fdct8(const int16_t *input, int16_t *output) { |
190 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; | 192 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; |
191 /*needs32*/ int t0, t1, t2, t3; | 193 /*needs32*/ int t0, t1, t2, t3; |
192 /*canbe16*/ int x0, x1, x2, x3; | 194 /*canbe16*/ int x0, x1, x2, x3; |
193 | 195 |
194 // stage 1 | 196 // stage 1 |
195 s0 = input[0] + input[7]; | 197 s0 = input[0] + input[7]; |
(...skipping 362 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
558 output[7] = - x1; | 560 output[7] = - x1; |
559 } | 561 } |
560 | 562 |
561 static const transform_2d FHT_8[] = { | 563 static const transform_2d FHT_8[] = { |
562 { fdct8, fdct8 }, // DCT_DCT = 0 | 564 { fdct8, fdct8 }, // DCT_DCT = 0 |
563 { fadst8, fdct8 }, // ADST_DCT = 1 | 565 { fadst8, fdct8 }, // ADST_DCT = 1 |
564 { fdct8, fadst8 }, // DCT_ADST = 2 | 566 { fdct8, fadst8 }, // DCT_ADST = 2 |
565 { fadst8, fadst8 } // ADST_ADST = 3 | 567 { fadst8, fadst8 } // ADST_ADST = 3 |
566 }; | 568 }; |
567 | 569 |
568 void vp9_short_fht8x8_c(const int16_t *input, int16_t *output, | 570 void vp9_fht8x8_c(const int16_t *input, int16_t *output, |
569 int stride, int tx_type) { | 571 int stride, int tx_type) { |
570 int16_t out[64]; | 572 if (tx_type == DCT_DCT) { |
571 int16_t *outptr = &out[0]; | 573 vp9_fdct8x8_c(input, output, stride); |
572 int i, j; | 574 } else { |
573 int16_t temp_in[8], temp_out[8]; | 575 int16_t out[64]; |
574 const transform_2d ht = FHT_8[tx_type]; | 576 int16_t *outptr = &out[0]; |
| 577 int i, j; |
| 578 int16_t temp_in[8], temp_out[8]; |
| 579 const transform_2d ht = FHT_8[tx_type]; |
575 | 580 |
576 // Columns | 581 // Columns |
577 for (i = 0; i < 8; ++i) { | 582 for (i = 0; i < 8; ++i) { |
578 for (j = 0; j < 8; ++j) | 583 for (j = 0; j < 8; ++j) |
579 temp_in[j] = input[j * stride + i] * 4; | 584 temp_in[j] = input[j * stride + i] * 4; |
580 ht.cols(temp_in, temp_out); | 585 ht.cols(temp_in, temp_out); |
581 for (j = 0; j < 8; ++j) | 586 for (j = 0; j < 8; ++j) |
582 outptr[j * 8 + i] = temp_out[j]; | 587 outptr[j * 8 + i] = temp_out[j]; |
583 } | 588 } |
584 | 589 |
585 // Rows | 590 // Rows |
586 for (i = 0; i < 8; ++i) { | 591 for (i = 0; i < 8; ++i) { |
587 for (j = 0; j < 8; ++j) | 592 for (j = 0; j < 8; ++j) |
588 temp_in[j] = out[j + i * 8]; | 593 temp_in[j] = out[j + i * 8]; |
589 ht.rows(temp_in, temp_out); | 594 ht.rows(temp_in, temp_out); |
590 for (j = 0; j < 8; ++j) | 595 for (j = 0; j < 8; ++j) |
591 output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; | 596 output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; |
| 597 } |
592 } | 598 } |
593 } | 599 } |
594 | 600 |
595 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per | 601 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per |
596 pixel. */ | 602 pixel. */ |
597 void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride) { | 603 void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride) { |
598 int i; | 604 int i; |
599 int a1, b1, c1, d1, e1; | 605 int a1, b1, c1, d1, e1; |
600 const int16_t *ip = input; | 606 const int16_t *ip = input; |
601 int16_t *op = output; | 607 int16_t *op = output; |
(...skipping 349 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
951 output[15] = - x1; | 957 output[15] = - x1; |
952 } | 958 } |
953 | 959 |
954 static const transform_2d FHT_16[] = { | 960 static const transform_2d FHT_16[] = { |
955 { fdct16, fdct16 }, // DCT_DCT = 0 | 961 { fdct16, fdct16 }, // DCT_DCT = 0 |
956 { fadst16, fdct16 }, // ADST_DCT = 1 | 962 { fadst16, fdct16 }, // ADST_DCT = 1 |
957 { fdct16, fadst16 }, // DCT_ADST = 2 | 963 { fdct16, fadst16 }, // DCT_ADST = 2 |
958 { fadst16, fadst16 } // ADST_ADST = 3 | 964 { fadst16, fadst16 } // ADST_ADST = 3 |
959 }; | 965 }; |
960 | 966 |
961 void vp9_short_fht16x16_c(const int16_t *input, int16_t *output, | 967 void vp9_fht16x16_c(const int16_t *input, int16_t *output, |
962 int stride, int tx_type) { | 968 int stride, int tx_type) { |
963 int16_t out[256]; | 969 if (tx_type == DCT_DCT) { |
964 int16_t *outptr = &out[0]; | 970 vp9_fdct16x16_c(input, output, stride); |
965 int i, j; | 971 } else { |
966 int16_t temp_in[16], temp_out[16]; | 972 int16_t out[256]; |
967 const transform_2d ht = FHT_16[tx_type]; | 973 int16_t *outptr = &out[0]; |
| 974 int i, j; |
| 975 int16_t temp_in[16], temp_out[16]; |
| 976 const transform_2d ht = FHT_16[tx_type]; |
968 | 977 |
969 // Columns | 978 // Columns |
970 for (i = 0; i < 16; ++i) { | 979 for (i = 0; i < 16; ++i) { |
971 for (j = 0; j < 16; ++j) | 980 for (j = 0; j < 16; ++j) |
972 temp_in[j] = input[j * stride + i] * 4; | 981 temp_in[j] = input[j * stride + i] * 4; |
973 ht.cols(temp_in, temp_out); | 982 ht.cols(temp_in, temp_out); |
974 for (j = 0; j < 16; ++j) | 983 for (j = 0; j < 16; ++j) |
975 outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; | 984 outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; |
976 // outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; | 985 } |
977 } | |
978 | 986 |
979 // Rows | 987 // Rows |
980 for (i = 0; i < 16; ++i) { | 988 for (i = 0; i < 16; ++i) { |
981 for (j = 0; j < 16; ++j) | 989 for (j = 0; j < 16; ++j) |
982 temp_in[j] = out[j + i * 16]; | 990 temp_in[j] = out[j + i * 16]; |
983 ht.rows(temp_in, temp_out); | 991 ht.rows(temp_in, temp_out); |
984 for (j = 0; j < 16; ++j) | 992 for (j = 0; j < 16; ++j) |
985 output[j + i * 16] = temp_out[j]; | 993 output[j + i * 16] = temp_out[j]; |
| 994 } |
986 } | 995 } |
987 } | 996 } |
988 | 997 |
989 static INLINE int dct_32_round(int input) { | 998 static INLINE int dct_32_round(int input) { |
990 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); | 999 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); |
991 assert(-131072 <= rv && rv <= 131071); | 1000 assert(-131072 <= rv && rv <= 131071); |
992 return rv; | 1001 return rv; |
993 } | 1002 } |
994 | 1003 |
995 static INLINE int half_round_shift(int input) { | 1004 static INLINE int half_round_shift(int input) { |
(...skipping 372 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1368 // Rows | 1377 // Rows |
1369 for (i = 0; i < 32; ++i) { | 1378 for (i = 0; i < 32; ++i) { |
1370 int temp_in[32], temp_out[32]; | 1379 int temp_in[32], temp_out[32]; |
1371 for (j = 0; j < 32; ++j) | 1380 for (j = 0; j < 32; ++j) |
1372 temp_in[j] = output[j + i * 32]; | 1381 temp_in[j] = output[j + i * 32]; |
1373 fdct32(temp_in, temp_out, 1); | 1382 fdct32(temp_in, temp_out, 1); |
1374 for (j = 0; j < 32; ++j) | 1383 for (j = 0; j < 32; ++j) |
1375 out[j + i * 32] = temp_out[j]; | 1384 out[j + i * 32] = temp_out[j]; |
1376 } | 1385 } |
1377 } | 1386 } |
1378 | |
1379 void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output, | |
1380 int stride) { | |
1381 if (tx_type == DCT_DCT) | |
1382 vp9_fdct4x4(input, output, stride); | |
1383 else | |
1384 vp9_short_fht4x4(input, output, stride, tx_type); | |
1385 } | |
1386 | |
1387 void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output, | |
1388 int stride) { | |
1389 if (tx_type == DCT_DCT) | |
1390 vp9_fdct8x8(input, output, stride); | |
1391 else | |
1392 vp9_short_fht8x8(input, output, stride, tx_type); | |
1393 } | |
1394 | |
1395 void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, | |
1396 int stride) { | |
1397 if (tx_type == DCT_DCT) | |
1398 vp9_fdct16x16(input, output, stride); | |
1399 else | |
1400 vp9_short_fht16x16(input, output, stride, tx_type); | |
1401 } | |
OLD | NEW |