| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 560 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 571 | 571 |
| 572 void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { | 572 void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { |
| 573 // The 2D transform is done with two passes which are actually pretty | 573 // The 2D transform is done with two passes which are actually pretty |
| 574 // similar. In the first one, we transform the columns and transpose | 574 // similar. In the first one, we transform the columns and transpose |
| 575 // the results. In the second one, we transform the rows. To achieve that, | 575 // the results. In the second one, we transform the rows. To achieve that, |
| 576 // as the first pass results are transposed, we transpose the columns (that | 576 // as the first pass results are transposed, we transpose the columns (that |
| 577 // is the transposed rows) and transpose the results (so that it goes back | 577 // is the transposed rows) and transpose the results (so that it goes back |
| 578 // in normal/row positions). | 578 // in normal/row positions). |
| 579 int pass; | 579 int pass; |
| 580 // We need an intermediate buffer between passes. | 580 // We need an intermediate buffer between passes. |
| 581 DECLARE_ALIGNED_ARRAY(16, int16_t, intermediate, 256); | 581 DECLARE_ALIGNED(16, int16_t, intermediate[256]); |
| 582 const int16_t *in = input; | 582 const int16_t *in = input; |
| 583 int16_t *out0 = intermediate; | 583 int16_t *out0 = intermediate; |
| 584 tran_low_t *out1 = output; | 584 tran_low_t *out1 = output; |
| 585 // Constants | 585 // Constants |
| 586 // When we use them, in one case, they are all the same. In all others | 586 // When we use them, in one case, they are all the same. In all others |
| 587 // it's a pair of them that we need to repeat four times. This is done | 587 // it's a pair of them that we need to repeat four times. This is done |
| 588 // by constructing the 32 bit constant corresponding to that pair. | 588 // by constructing the 32 bit constant corresponding to that pair. |
| 589 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); | 589 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); |
| 590 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); | 590 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); |
| 591 const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); | 591 const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); |
| (...skipping 421 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1013 out1 += 8*16; | 1013 out1 += 8*16; |
| 1014 } | 1014 } |
| 1015 } | 1015 } |
| 1016 // Setup in/out for next pass. | 1016 // Setup in/out for next pass. |
| 1017 in = intermediate; | 1017 in = intermediate; |
| 1018 } | 1018 } |
| 1019 } | 1019 } |
| 1020 | 1020 |
| 1021 #undef ADD_EPI16 | 1021 #undef ADD_EPI16 |
| 1022 #undef SUB_EPI16 | 1022 #undef SUB_EPI16 |
| OLD | NEW |