Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(18)

Side by Side Diff: source/libvpx/vpx_dsp/x86/variance_impl_avx2.c

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <immintrin.h> // AVX2 11 #include <immintrin.h> // AVX2
12 12
13 void vp9_get16x16var_avx2(const unsigned char *src_ptr, 13 #include "./vpx_dsp_rtcd.h"
14
15 void vpx_get16x16var_avx2(const unsigned char *src_ptr,
14 int source_stride, 16 int source_stride,
15 const unsigned char *ref_ptr, 17 const unsigned char *ref_ptr,
16 int recon_stride, 18 int recon_stride,
17 unsigned int *SSE, 19 unsigned int *SSE,
18 int *Sum) { 20 int *Sum) {
19 __m256i src, src_expand_low, src_expand_high, ref, ref_expand_low; 21 __m256i src, src_expand_low, src_expand_high, ref, ref_expand_low;
20 __m256i ref_expand_high, madd_low, madd_high; 22 __m256i ref_expand_high, madd_low, madd_high;
21 unsigned int i, src_2strides, ref_2strides; 23 unsigned int i, src_2strides, ref_2strides;
22 __m256i zero_reg = _mm256_set1_epi16(0); 24 __m256i zero_reg = _mm256_set1_epi16(0);
23 __m256i sum_ref_src = _mm256_set1_epi16(0); 25 __m256i sum_ref_src = _mm256_set1_epi16(0);
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
114 116
115 madd_res = _mm_add_epi32(madd_res, expand_madd); 117 madd_res = _mm_add_epi32(madd_res, expand_madd);
116 sum_res = _mm_add_epi32(sum_res, ex_expand_sum); 118 sum_res = _mm_add_epi32(sum_res, ex_expand_sum);
117 119
118 *((int*)SSE)= _mm_cvtsi128_si32(madd_res); 120 *((int*)SSE)= _mm_cvtsi128_si32(madd_res);
119 121
120 *((int*)Sum)= _mm_cvtsi128_si32(sum_res); 122 *((int*)Sum)= _mm_cvtsi128_si32(sum_res);
121 } 123 }
122 } 124 }
123 125
124 void vp9_get32x32var_avx2(const unsigned char *src_ptr, 126 void vpx_get32x32var_avx2(const unsigned char *src_ptr,
125 int source_stride, 127 int source_stride,
126 const unsigned char *ref_ptr, 128 const unsigned char *ref_ptr,
127 int recon_stride, 129 int recon_stride,
128 unsigned int *SSE, 130 unsigned int *SSE,
129 int *Sum) { 131 int *Sum) {
130 __m256i src, src_expand_low, src_expand_high, ref, ref_expand_low; 132 __m256i src, src_expand_low, src_expand_high, ref, ref_expand_low;
131 __m256i ref_expand_high, madd_low, madd_high; 133 __m256i ref_expand_high, madd_low, madd_high;
132 unsigned int i; 134 unsigned int i;
133 __m256i zero_reg = _mm256_set1_epi16(0); 135 __m256i zero_reg = _mm256_set1_epi16(0);
134 __m256i sum_ref_src = _mm256_set1_epi16(0); 136 __m256i sum_ref_src = _mm256_set1_epi16(0);
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
204 sum_ref_src = _mm256_add_epi32(sum_ref_src, ex_expand_sum); 206 sum_ref_src = _mm256_add_epi32(sum_ref_src, ex_expand_sum);
205 207
206 // extract the low lane and the high lane and add the results 208 // extract the low lane and the high lane and add the results
207 *((int*)SSE)= _mm_cvtsi128_si32(_mm256_castsi256_si128(madd_ref_src)) + 209 *((int*)SSE)= _mm_cvtsi128_si32(_mm256_castsi256_si128(madd_ref_src)) +
208 _mm_cvtsi128_si32(_mm256_extractf128_si256(madd_ref_src, 1)); 210 _mm_cvtsi128_si32(_mm256_extractf128_si256(madd_ref_src, 1));
209 211
210 *((int*)Sum)= _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_ref_src)) + 212 *((int*)Sum)= _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_ref_src)) +
211 _mm_cvtsi128_si32(_mm256_extractf128_si256(sum_ref_src, 1)); 213 _mm_cvtsi128_si32(_mm256_extractf128_si256(sum_ref_src, 1));
212 } 214 }
213 } 215 }
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/x86/variance_avx2.c ('k') | source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698