Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(502)

Side by Side Diff: source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c

Issue 1169543007: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/libvpx/vp9/decoder/vp9_detokenize.c ('k') | source/libvpx/vp9/encoder/vp9_aq_variance.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <arm_neon.h> 11 #include <arm_neon.h>
12 #include "./vp9_rtcd.h" 12 #include "./vp9_rtcd.h"
13 #include "./vpx_dsp_rtcd.h" 13 #include "./vpx_dsp_rtcd.h"
14 #include "./vpx_config.h" 14 #include "./vpx_config.h"
15 15
16 #include "vpx_ports/mem.h" 16 #include "vpx_ports/mem.h"
17 #include "vpx/vpx_integer.h" 17 #include "vpx/vpx_integer.h"
18 18
19 #include "vp9/common/vp9_common.h"
20 #include "vp9/common/vp9_filter.h" 19 #include "vp9/common/vp9_filter.h"
21 20
22 #include "vp9/encoder/vp9_variance.h" 21 static uint8_t bilinear_filters[8][2] = {
22 { 128, 0, },
23 { 112, 16, },
24 { 96, 32, },
25 { 80, 48, },
26 { 64, 64, },
27 { 48, 80, },
28 { 32, 96, },
29 { 16, 112, },
30 };
23 31
24 static void var_filter_block2d_bil_w8(const uint8_t *src_ptr, 32 static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
25 uint8_t *output_ptr, 33 uint8_t *output_ptr,
26 unsigned int src_pixels_per_line, 34 unsigned int src_pixels_per_line,
27 int pixel_step, 35 int pixel_step,
28 unsigned int output_height, 36 unsigned int output_height,
29 unsigned int output_width, 37 unsigned int output_width,
30 const int16_t *vp9_filter) { 38 const uint8_t *vp9_filter) {
31 const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]); 39 const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
32 const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]); 40 const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
33 unsigned int i; 41 unsigned int i;
34 for (i = 0; i < output_height; ++i) { 42 for (i = 0; i < output_height; ++i) {
35 const uint8x8_t src_0 = vld1_u8(&src_ptr[0]); 43 const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
36 const uint8x8_t src_1 = vld1_u8(&src_ptr[pixel_step]); 44 const uint8x8_t src_1 = vld1_u8(&src_ptr[pixel_step]);
37 const uint16x8_t a = vmull_u8(src_0, f0); 45 const uint16x8_t a = vmull_u8(src_0, f0);
38 const uint16x8_t b = vmlal_u8(a, src_1, f1); 46 const uint16x8_t b = vmlal_u8(a, src_1, f1);
39 const uint8x8_t out = vrshrn_n_u16(b, FILTER_BITS); 47 const uint8x8_t out = vrshrn_n_u16(b, FILTER_BITS);
40 vst1_u8(&output_ptr[0], out); 48 vst1_u8(&output_ptr[0], out);
41 // Next row... 49 // Next row...
42 src_ptr += src_pixels_per_line; 50 src_ptr += src_pixels_per_line;
43 output_ptr += output_width; 51 output_ptr += output_width;
44 } 52 }
45 } 53 }
46 54
47 static void var_filter_block2d_bil_w16(const uint8_t *src_ptr, 55 static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
48 uint8_t *output_ptr, 56 uint8_t *output_ptr,
49 unsigned int src_pixels_per_line, 57 unsigned int src_pixels_per_line,
50 int pixel_step, 58 int pixel_step,
51 unsigned int output_height, 59 unsigned int output_height,
52 unsigned int output_width, 60 unsigned int output_width,
53 const int16_t *vp9_filter) { 61 const uint8_t *vp9_filter) {
54 const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]); 62 const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
55 const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]); 63 const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
56 unsigned int i, j; 64 unsigned int i, j;
57 for (i = 0; i < output_height; ++i) { 65 for (i = 0; i < output_height; ++i) {
58 for (j = 0; j < output_width; j += 16) { 66 for (j = 0; j < output_width; j += 16) {
59 const uint8x16_t src_0 = vld1q_u8(&src_ptr[j]); 67 const uint8x16_t src_0 = vld1q_u8(&src_ptr[j]);
60 const uint8x16_t src_1 = vld1q_u8(&src_ptr[j + pixel_step]); 68 const uint8x16_t src_1 = vld1q_u8(&src_ptr[j + pixel_step]);
61 const uint16x8_t a = vmull_u8(vget_low_u8(src_0), f0); 69 const uint16x8_t a = vmull_u8(vget_low_u8(src_0), f0);
62 const uint16x8_t b = vmlal_u8(a, vget_low_u8(src_1), f1); 70 const uint16x8_t b = vmlal_u8(a, vget_low_u8(src_1), f1);
63 const uint8x8_t out_lo = vrshrn_n_u16(b, FILTER_BITS); 71 const uint8x8_t out_lo = vrshrn_n_u16(b, FILTER_BITS);
64 const uint16x8_t c = vmull_u8(vget_high_u8(src_0), f0); 72 const uint16x8_t c = vmull_u8(vget_high_u8(src_0), f0);
65 const uint16x8_t d = vmlal_u8(c, vget_high_u8(src_1), f1); 73 const uint16x8_t d = vmlal_u8(c, vget_high_u8(src_1), f1);
(...skipping 11 matching lines...) Expand all
77 int xoffset, 85 int xoffset,
78 int yoffset, 86 int yoffset,
79 const uint8_t *dst, 87 const uint8_t *dst,
80 int dst_stride, 88 int dst_stride,
81 unsigned int *sse) { 89 unsigned int *sse) {
82 DECLARE_ALIGNED(16, uint8_t, temp2[8 * 8]); 90 DECLARE_ALIGNED(16, uint8_t, temp2[8 * 8]);
83 DECLARE_ALIGNED(16, uint8_t, fdata3[9 * 8]); 91 DECLARE_ALIGNED(16, uint8_t, fdata3[9 * 8]);
84 92
85 var_filter_block2d_bil_w8(src, fdata3, src_stride, 1, 93 var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
86 9, 8, 94 9, 8,
87 BILINEAR_FILTERS_2TAP(xoffset)); 95 bilinear_filters[xoffset]);
88 var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8, 96 var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
89 8, BILINEAR_FILTERS_2TAP(yoffset)); 97 8, bilinear_filters[yoffset]);
90 return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse); 98 return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
91 } 99 }
92 100
93 unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src, 101 unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src,
94 int src_stride, 102 int src_stride,
95 int xoffset, 103 int xoffset,
96 int yoffset, 104 int yoffset,
97 const uint8_t *dst, 105 const uint8_t *dst,
98 int dst_stride, 106 int dst_stride,
99 unsigned int *sse) { 107 unsigned int *sse) {
100 DECLARE_ALIGNED(16, uint8_t, temp2[16 * 16]); 108 DECLARE_ALIGNED(16, uint8_t, temp2[16 * 16]);
101 DECLARE_ALIGNED(16, uint8_t, fdata3[17 * 16]); 109 DECLARE_ALIGNED(16, uint8_t, fdata3[17 * 16]);
102 110
103 var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, 111 var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
104 17, 16, 112 17, 16,
105 BILINEAR_FILTERS_2TAP(xoffset)); 113 bilinear_filters[xoffset]);
106 var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16, 114 var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
107 16, BILINEAR_FILTERS_2TAP(yoffset)); 115 16, bilinear_filters[yoffset]);
108 return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse); 116 return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
109 } 117 }
110 118
111 unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src, 119 unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
112 int src_stride, 120 int src_stride,
113 int xoffset, 121 int xoffset,
114 int yoffset, 122 int yoffset,
115 const uint8_t *dst, 123 const uint8_t *dst,
116 int dst_stride, 124 int dst_stride,
117 unsigned int *sse) { 125 unsigned int *sse) {
118 DECLARE_ALIGNED(16, uint8_t, temp2[32 * 32]); 126 DECLARE_ALIGNED(16, uint8_t, temp2[32 * 32]);
119 DECLARE_ALIGNED(16, uint8_t, fdata3[33 * 32]); 127 DECLARE_ALIGNED(16, uint8_t, fdata3[33 * 32]);
120 128
121 var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, 129 var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
122 33, 32, 130 33, 32,
123 BILINEAR_FILTERS_2TAP(xoffset)); 131 bilinear_filters[xoffset]);
124 var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32, 132 var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
125 32, BILINEAR_FILTERS_2TAP(yoffset)); 133 32, bilinear_filters[yoffset]);
126 return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse); 134 return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
127 } 135 }
128 136
129 unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src, 137 unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
130 int src_stride, 138 int src_stride,
131 int xoffset, 139 int xoffset,
132 int yoffset, 140 int yoffset,
133 const uint8_t *dst, 141 const uint8_t *dst,
134 int dst_stride, 142 int dst_stride,
135 unsigned int *sse) { 143 unsigned int *sse) {
136 DECLARE_ALIGNED(16, uint8_t, temp2[64 * 64]); 144 DECLARE_ALIGNED(16, uint8_t, temp2[64 * 64]);
137 DECLARE_ALIGNED(16, uint8_t, fdata3[65 * 64]); 145 DECLARE_ALIGNED(16, uint8_t, fdata3[65 * 64]);
138 146
139 var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, 147 var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
140 65, 64, 148 65, 64,
141 BILINEAR_FILTERS_2TAP(xoffset)); 149 bilinear_filters[xoffset]);
142 var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64, 150 var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
143 64, BILINEAR_FILTERS_2TAP(yoffset)); 151 64, bilinear_filters[yoffset]);
144 return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse); 152 return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
145 } 153 }
OLDNEW
« no previous file with comments | « source/libvpx/vp9/decoder/vp9_detokenize.c ('k') | source/libvpx/vp9/encoder/vp9_aq_variance.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698