Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(99)

Side by Side Diff: source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/libvpx/vp8/common/arm/neon/sad_neon.c ('k') | source/libvpx/vp8/common/common.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 14 matching lines...) Expand all
25 25
26 unsigned int vp8_sub_pixel_variance16x16_neon_func( 26 unsigned int vp8_sub_pixel_variance16x16_neon_func(
27 const unsigned char *src_ptr, 27 const unsigned char *src_ptr,
28 int src_pixels_per_line, 28 int src_pixels_per_line,
29 int xoffset, 29 int xoffset,
30 int yoffset, 30 int yoffset,
31 const unsigned char *dst_ptr, 31 const unsigned char *dst_ptr,
32 int dst_pixels_per_line, 32 int dst_pixels_per_line,
33 unsigned int *sse) { 33 unsigned int *sse) {
34 int i; 34 int i;
35 DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 528); 35 DECLARE_ALIGNED(16, unsigned char, tmp[528]);
36 unsigned char *tmpp; 36 unsigned char *tmpp;
37 unsigned char *tmpp2; 37 unsigned char *tmpp2;
38 uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; 38 uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
39 uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8; 39 uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8;
40 uint8x8_t d19u8, d20u8, d21u8; 40 uint8x8_t d19u8, d20u8, d21u8;
41 int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; 41 int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
42 uint32x2_t d0u32, d10u32; 42 uint32x2_t d0u32, d10u32;
43 int64x1_t d0s64, d1s64, d2s64, d3s64; 43 int64x1_t d0s64, d1s64, d2s64, d3s64;
44 uint8x16_t q0u8, q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8; 44 uint8x16_t q0u8, q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8;
45 uint8x16_t q10u8, q11u8, q12u8, q13u8, q14u8, q15u8; 45 uint8x16_t q10u8, q11u8, q12u8, q13u8, q14u8, q15u8;
(...skipping 858 matching lines...) Expand 10 before | Expand all | Expand 10 after
904 q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), 904 q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
905 vreinterpret_s32_s64(d0s64)); 905 vreinterpret_s32_s64(d0s64));
906 vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); 906 vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
907 907
908 d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8); 908 d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8);
909 d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); 909 d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
910 910
911 return vget_lane_u32(d0u32, 0); 911 return vget_lane_u32(d0u32, 0);
912 } 912 }
913 913
914 enum { kWidth8 = 8 };
915 enum { kHeight8 = 8 };
916 enum { kHeight8PlusOne = 9 };
917 enum { kPixelStepOne = 1 };
918 enum { kAlign16 = 16 };
919
920 #define FILTER_BITS 7 914 #define FILTER_BITS 7
921 915
922 static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) { 916 static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
923 const int32x4_t a = vpaddlq_s16(v_16x8); 917 const int32x4_t a = vpaddlq_s16(v_16x8);
924 const int64x2_t b = vpaddlq_s32(a); 918 const int64x2_t b = vpaddlq_s32(a);
925 const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), 919 const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
926 vreinterpret_s32_s64(vget_high_s64(b))); 920 vreinterpret_s32_s64(vget_high_s64(b)));
927 return vget_lane_s32(c, 0); 921 return vget_lane_s32(c, 0);
928 } 922 }
929 923
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
961 } 955 }
962 956
963 *sum = horizontal_add_s16x8(v_sum); 957 *sum = horizontal_add_s16x8(v_sum);
964 *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi)); 958 *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi));
965 } 959 }
966 960
967 static unsigned int variance8x8_neon(const uint8_t *a, int a_stride, 961 static unsigned int variance8x8_neon(const uint8_t *a, int a_stride,
968 const uint8_t *b, int b_stride, 962 const uint8_t *b, int b_stride,
969 unsigned int *sse) { 963 unsigned int *sse) {
970 int sum; 964 int sum;
971 variance_neon_w8(a, a_stride, b, b_stride, kWidth8, kHeight8, sse, &sum); 965 variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
972 return *sse - (((int64_t)sum * sum) / (kWidth8 * kHeight8)); 966 return *sse - (((int64_t)sum * sum) / (8 * 8));
973 } 967 }
974 968
975 static void var_filter_block2d_bil_w8(const uint8_t *src_ptr, 969 static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
976 uint8_t *output_ptr, 970 uint8_t *output_ptr,
977 unsigned int src_pixels_per_line, 971 unsigned int src_pixels_per_line,
978 int pixel_step, 972 int pixel_step,
979 unsigned int output_height, 973 unsigned int output_height,
980 unsigned int output_width, 974 unsigned int output_width,
981 const uint16_t *vpx_filter) { 975 const uint16_t *vpx_filter) {
982 const uint8x8_t f0 = vmov_n_u8((uint8_t)vpx_filter[0]); 976 const uint8x8_t f0 = vmov_n_u8((uint8_t)vpx_filter[0]);
(...skipping 13 matching lines...) Expand all
996 } 990 }
997 991
998 unsigned int vp8_sub_pixel_variance8x8_neon( 992 unsigned int vp8_sub_pixel_variance8x8_neon(
999 const unsigned char *src, 993 const unsigned char *src,
1000 int src_stride, 994 int src_stride,
1001 int xoffset, 995 int xoffset,
1002 int yoffset, 996 int yoffset,
1003 const unsigned char *dst, 997 const unsigned char *dst,
1004 int dst_stride, 998 int dst_stride,
1005 unsigned int *sse) { 999 unsigned int *sse) {
1006 DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight8PlusOne * kWidth8); 1000 DECLARE_ALIGNED(16, uint8_t, temp2[9 * 8]);
1007 DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight8PlusOne * kWidth8); 1001 DECLARE_ALIGNED(16, uint8_t, fdata3[9 * 8]);
1008 if (xoffset == 0) { 1002 if (xoffset == 0) {
1009 var_filter_block2d_bil_w8(src, temp2, src_stride, kWidth8, kHeight8, 1003 var_filter_block2d_bil_w8(src, temp2, src_stride, 8, 8,
1010 kWidth8, bilinear_taps_coeff[yoffset]); 1004 8, bilinear_taps_coeff[yoffset]);
1011 } else if (yoffset == 0) { 1005 } else if (yoffset == 0) {
1012 var_filter_block2d_bil_w8(src, temp2, src_stride, kPixelStepOne, 1006 var_filter_block2d_bil_w8(src, temp2, src_stride, 1,
1013 kHeight8PlusOne, kWidth8, 1007 9, 8,
1014 bilinear_taps_coeff[xoffset]); 1008 bilinear_taps_coeff[xoffset]);
1015 } else { 1009 } else {
1016 var_filter_block2d_bil_w8(src, fdata3, src_stride, kPixelStepOne, 1010 var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
1017 kHeight8PlusOne, kWidth8, 1011 9, 8,
1018 bilinear_taps_coeff[xoffset]); 1012 bilinear_taps_coeff[xoffset]);
1019 var_filter_block2d_bil_w8(fdata3, temp2, kWidth8, kWidth8, kHeight8, 1013 var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
1020 kWidth8, bilinear_taps_coeff[yoffset]); 1014 8, bilinear_taps_coeff[yoffset]);
1021 } 1015 }
1022 return variance8x8_neon(temp2, kWidth8, dst, dst_stride, sse); 1016 return variance8x8_neon(temp2, 8, dst, dst_stride, sse);
1023 } 1017 }
OLDNEW
« no previous file with comments | « source/libvpx/vp8/common/arm/neon/sad_neon.c ('k') | source/libvpx/vp8/common/common.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698