Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(461)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c

Issue 756673003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <emmintrin.h> // SSE2 11 #include <emmintrin.h> // SSE2
12 #include "vp9/common/vp9_idct.h" // for cospi constants 12 #include "vp9/common/vp9_idct.h" // for cospi constants
13 #include "vpx_ports/mem.h" 13 #include "vpx_ports/mem.h"
14 14
15 #define pair_set_epi32(a, b) \ 15 #define pair_set_epi32(a, b) \
16 _mm_set_epi32(b, a, b, a) 16 _mm_set_epi32((int)(b), (int)(a), (int)(b), (int)(a))
17 17
18 #if FDCT32x32_HIGH_PRECISION 18 #if FDCT32x32_HIGH_PRECISION
19 static INLINE __m128i k_madd_epi32(__m128i a, __m128i b) { 19 static INLINE __m128i k_madd_epi32(__m128i a, __m128i b) {
20 __m128i buf0, buf1; 20 __m128i buf0, buf1;
21 buf0 = _mm_mul_epu32(a, b); 21 buf0 = _mm_mul_epu32(a, b);
22 a = _mm_srli_epi64(a, 32); 22 a = _mm_srli_epi64(a, 32);
23 b = _mm_srli_epi64(b, 32); 23 b = _mm_srli_epi64(b, 32);
24 buf1 = _mm_mul_epu32(a, b); 24 buf1 = _mm_mul_epu32(a, b);
25 return _mm_add_epi64(buf0, buf1); 25 return _mm_add_epi64(buf0, buf1);
26 } 26 }
(...skipping 10 matching lines...) Expand all
37 // Calculate pre-multiplied strides 37 // Calculate pre-multiplied strides
38 const int str1 = stride; 38 const int str1 = stride;
39 const int str2 = 2 * stride; 39 const int str2 = 2 * stride;
40 const int str3 = 2 * stride + str1; 40 const int str3 = 2 * stride + str1;
41 // We need an intermediate buffer between passes. 41 // We need an intermediate buffer between passes.
42 DECLARE_ALIGNED(16, int16_t, intermediate[32 * 32]); 42 DECLARE_ALIGNED(16, int16_t, intermediate[32 * 32]);
43 // Constants 43 // Constants
44 // When we use them, in one case, they are all the same. In all others 44 // When we use them, in one case, they are all the same. In all others
45 // it's a pair of them that we need to repeat four times. This is done 45 // it's a pair of them that we need to repeat four times. This is done
46 // by constructing the 32 bit constant corresponding to that pair. 46 // by constructing the 32 bit constant corresponding to that pair.
47 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(+cospi_16_64); 47 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
48 const __m128i k__cospi_p16_m16 = pair_set_epi16(+cospi_16_64, -cospi_16_64); 48 const __m128i k__cospi_p16_m16 = pair_set_epi16(+cospi_16_64, -cospi_16_64);
49 const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); 49 const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
50 const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); 50 const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
51 const __m128i k__cospi_p24_p08 = pair_set_epi16(+cospi_24_64, cospi_8_64); 51 const __m128i k__cospi_p24_p08 = pair_set_epi16(+cospi_24_64, cospi_8_64);
52 const __m128i k__cospi_p12_p20 = pair_set_epi16(+cospi_12_64, cospi_20_64); 52 const __m128i k__cospi_p12_p20 = pair_set_epi16(+cospi_12_64, cospi_20_64);
53 const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64); 53 const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
54 const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); 54 const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
55 const __m128i k__cospi_p28_p04 = pair_set_epi16(+cospi_28_64, cospi_4_64); 55 const __m128i k__cospi_p28_p04 = pair_set_epi16(+cospi_28_64, cospi_4_64);
56 const __m128i k__cospi_m28_m04 = pair_set_epi16(-cospi_28_64, -cospi_4_64); 56 const __m128i k__cospi_m28_m04 = pair_set_epi16(-cospi_28_64, -cospi_4_64);
57 const __m128i k__cospi_m12_m20 = pair_set_epi16(-cospi_12_64, -cospi_20_64); 57 const __m128i k__cospi_m12_m20 = pair_set_epi16(-cospi_12_64, -cospi_20_64);
(...skipping 2624 matching lines...) Expand 10 before | Expand all | Expand 10 after
2682 _mm_storeu_si128((__m128i *)(output + 5 * 32), tr2_5); 2682 _mm_storeu_si128((__m128i *)(output + 5 * 32), tr2_5);
2683 _mm_storeu_si128((__m128i *)(output + 6 * 32), tr2_6); 2683 _mm_storeu_si128((__m128i *)(output + 6 * 32), tr2_6);
2684 _mm_storeu_si128((__m128i *)(output + 7 * 32), tr2_7); 2684 _mm_storeu_si128((__m128i *)(output + 7 * 32), tr2_7);
2685 // Process next 8x8 2685 // Process next 8x8
2686 output += 8; 2686 output += 8;
2687 } 2687 }
2688 } 2688 }
2689 } 2689 }
2690 } 2690 }
2691 } // NOLINT 2691 } // NOLINT
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698