source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c - Issue 1124333011: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: only update to last nights LKGR Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c » ('j') | no next file with comments »

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 560 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
571	571

572 void FDCT16x16_2D(const int16_t input, tran_low_t output, int stride) {	572 void FDCT16x16_2D(const int16_t input, tran_low_t output, int stride) {

573 // The 2D transform is done with two passes which are actually pretty	573 // The 2D transform is done with two passes which are actually pretty

574 // similar. In the first one, we transform the columns and transpose	574 // similar. In the first one, we transform the columns and transpose

575 // the results. In the second one, we transform the rows. To achieve that,	575 // the results. In the second one, we transform the rows. To achieve that,

576 // as the first pass results are transposed, we transpose the columns (that	576 // as the first pass results are transposed, we transpose the columns (that

577 // is the transposed rows) and transpose the results (so that it goes back	577 // is the transposed rows) and transpose the results (so that it goes back

578 // in normal/row positions).	578 // in normal/row positions).

579 int pass;	579 int pass;

580 // We need an intermediate buffer between passes.	580 // We need an intermediate buffer between passes.

581 DECLARE_ALIGNED_ARRAY(16, int16_t, intermediate, 256);	581 DECLARE_ALIGNED(16, int16_t, intermediate[256]);

582 const int16_t *in = input;	582 const int16_t *in = input;

583 int16_t *out0 = intermediate;	583 int16_t *out0 = intermediate;

584 tran_low_t *out1 = output;	584 tran_low_t *out1 = output;

585 // Constants	585 // Constants

586 // When we use them, in one case, they are all the same. In all others	586 // When we use them, in one case, they are all the same. In all others

587 // it's a pair of them that we need to repeat four times. This is done	587 // it's a pair of them that we need to repeat four times. This is done

588 // by constructing the 32 bit constant corresponding to that pair.	588 // by constructing the 32 bit constant corresponding to that pair.

589 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);	589 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);

590 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);	590 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);

591 const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);	591 const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);

(...skipping 421 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1013 out1 += 8*16;	1013 out1 += 8*16;

1014 }	1014 }

1015 }	1015 }

1016 // Setup in/out for next pass.	1016 // Setup in/out for next pass.

1017 in = intermediate;	1017 in = intermediate;

1018 }	1018 }

1019 }	1019 }

1020	1020

1021 #undef ADD_EPI16	1021 #undef ADD_EPI16

1022 #undef SUB_EPI16	1022 #undef SUB_EPI16

OLD	NEW