Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(260)

Side by Side Diff: source/libvpx/vp9/common/arm/neon/vp9_loopfilter_4_neon.c

Issue 812033011: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <arm_neon.h>
12
13 #include "./vpx_config.h"
14
15 static INLINE void vp9_loop_filter_neon(
16 uint8x8_t dblimit, // flimit
17 uint8x8_t dlimit, // limit
18 uint8x8_t dthresh, // thresh
19 uint8x8_t d3u8, // p3
20 uint8x8_t d4u8, // p2
21 uint8x8_t d5u8, // p1
22 uint8x8_t d6u8, // p0
23 uint8x8_t d7u8, // q0
24 uint8x8_t d16u8, // q1
25 uint8x8_t d17u8, // q2
26 uint8x8_t d18u8, // q3
27 uint8x8_t *d4ru8, // p1
28 uint8x8_t *d5ru8, // p0
29 uint8x8_t *d6ru8, // q0
30 uint8x8_t *d7ru8) { // q1
31 uint8x8_t d19u8, d20u8, d21u8, d22u8, d23u8, d27u8, d28u8;
32 int16x8_t q12s16;
33 int8x8_t d19s8, d20s8, d21s8, d26s8, d27s8, d28s8;
34
35 d19u8 = vabd_u8(d3u8, d4u8);
36 d20u8 = vabd_u8(d4u8, d5u8);
37 d21u8 = vabd_u8(d5u8, d6u8);
38 d22u8 = vabd_u8(d16u8, d7u8);
39 d3u8 = vabd_u8(d17u8, d16u8);
40 d4u8 = vabd_u8(d18u8, d17u8);
41
42 d19u8 = vmax_u8(d19u8, d20u8);
43 d20u8 = vmax_u8(d21u8, d22u8);
44 d3u8 = vmax_u8(d3u8, d4u8);
45 d23u8 = vmax_u8(d19u8, d20u8);
46
47 d17u8 = vabd_u8(d6u8, d7u8);
48
49 d21u8 = vcgt_u8(d21u8, dthresh);
50 d22u8 = vcgt_u8(d22u8, dthresh);
51 d23u8 = vmax_u8(d23u8, d3u8);
52
53 d28u8 = vabd_u8(d5u8, d16u8);
54 d17u8 = vqadd_u8(d17u8, d17u8);
55
56 d23u8 = vcge_u8(dlimit, d23u8);
57
58 d18u8 = vdup_n_u8(0x80);
59 d5u8 = veor_u8(d5u8, d18u8);
60 d6u8 = veor_u8(d6u8, d18u8);
61 d7u8 = veor_u8(d7u8, d18u8);
62 d16u8 = veor_u8(d16u8, d18u8);
63
64 d28u8 = vshr_n_u8(d28u8, 1);
65 d17u8 = vqadd_u8(d17u8, d28u8);
66
67 d19u8 = vdup_n_u8(3);
68
69 d28s8 = vsub_s8(vreinterpret_s8_u8(d7u8),
70 vreinterpret_s8_u8(d6u8));
71
72 d17u8 = vcge_u8(dblimit, d17u8);
73
74 d27s8 = vqsub_s8(vreinterpret_s8_u8(d5u8),
75 vreinterpret_s8_u8(d16u8));
76
77 d22u8 = vorr_u8(d21u8, d22u8);
78
79 q12s16 = vmull_s8(d28s8, vreinterpret_s8_u8(d19u8));
80
81 d27u8 = vand_u8(vreinterpret_u8_s8(d27s8), d22u8);
82 d23u8 = vand_u8(d23u8, d17u8);
83
84 q12s16 = vaddw_s8(q12s16, vreinterpret_s8_u8(d27u8));
85
86 d17u8 = vdup_n_u8(4);
87
88 d27s8 = vqmovn_s16(q12s16);
89 d27u8 = vand_u8(vreinterpret_u8_s8(d27s8), d23u8);
90 d27s8 = vreinterpret_s8_u8(d27u8);
91
92 d28s8 = vqadd_s8(d27s8, vreinterpret_s8_u8(d19u8));
93 d27s8 = vqadd_s8(d27s8, vreinterpret_s8_u8(d17u8));
94 d28s8 = vshr_n_s8(d28s8, 3);
95 d27s8 = vshr_n_s8(d27s8, 3);
96
97 d19s8 = vqadd_s8(vreinterpret_s8_u8(d6u8), d28s8);
98 d26s8 = vqsub_s8(vreinterpret_s8_u8(d7u8), d27s8);
99
100 d27s8 = vrshr_n_s8(d27s8, 1);
101 d27s8 = vbic_s8(d27s8, vreinterpret_s8_u8(d22u8));
102
103 d21s8 = vqadd_s8(vreinterpret_s8_u8(d5u8), d27s8);
104 d20s8 = vqsub_s8(vreinterpret_s8_u8(d16u8), d27s8);
105
106 *d4ru8 = veor_u8(vreinterpret_u8_s8(d21s8), d18u8);
107 *d5ru8 = veor_u8(vreinterpret_u8_s8(d19s8), d18u8);
108 *d6ru8 = veor_u8(vreinterpret_u8_s8(d26s8), d18u8);
109 *d7ru8 = veor_u8(vreinterpret_u8_s8(d20s8), d18u8);
110 return;
111 }
112
113 void vp9_lpf_horizontal_4_neon(
114 unsigned char *src,
115 int pitch,
116 unsigned char *blimit,
117 unsigned char *limit,
118 unsigned char *thresh,
119 int count) {
120 int i;
121 uint8_t *s, *psrc;
122 uint8x8_t dblimit, dlimit, dthresh;
123 uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
124
125 if (count == 0) // end_vp9_lf_h_edge
126 return;
127
128 dblimit = vld1_u8(blimit);
129 dlimit = vld1_u8(limit);
130 dthresh = vld1_u8(thresh);
131
132 psrc = src - (pitch << 2);
133 for (i = 0; i < count; i++) {
134 s = psrc + i * 8;
135
136 d3u8 = vld1_u8(s);
137 s += pitch;
138 d4u8 = vld1_u8(s);
139 s += pitch;
140 d5u8 = vld1_u8(s);
141 s += pitch;
142 d6u8 = vld1_u8(s);
143 s += pitch;
144 d7u8 = vld1_u8(s);
145 s += pitch;
146 d16u8 = vld1_u8(s);
147 s += pitch;
148 d17u8 = vld1_u8(s);
149 s += pitch;
150 d18u8 = vld1_u8(s);
151
152 vp9_loop_filter_neon(dblimit, dlimit, dthresh,
153 d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
154 &d4u8, &d5u8, &d6u8, &d7u8);
155
156 s -= (pitch * 5);
157 vst1_u8(s, d4u8);
158 s += pitch;
159 vst1_u8(s, d5u8);
160 s += pitch;
161 vst1_u8(s, d6u8);
162 s += pitch;
163 vst1_u8(s, d7u8);
164 }
165 return;
166 }
167
168 void vp9_lpf_vertical_4_neon(
169 unsigned char *src,
170 int pitch,
171 unsigned char *blimit,
172 unsigned char *limit,
173 unsigned char *thresh,
174 int count) {
175 int i, pitch8;
176 uint8_t *s;
177 uint8x8_t dblimit, dlimit, dthresh;
178 uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
179 uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3;
180 uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7;
181 uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
182 uint8x8x4_t d4Result;
183
184 if (count == 0) // end_vp9_lf_h_edge
185 return;
186
187 dblimit = vld1_u8(blimit);
188 dlimit = vld1_u8(limit);
189 dthresh = vld1_u8(thresh);
190
191 pitch8 = pitch * 8;
192 for (i = 0; i < count; i++, src += pitch8) {
193 s = src - (i + 1) * 4;
194
195 d3u8 = vld1_u8(s);
196 s += pitch;
197 d4u8 = vld1_u8(s);
198 s += pitch;
199 d5u8 = vld1_u8(s);
200 s += pitch;
201 d6u8 = vld1_u8(s);
202 s += pitch;
203 d7u8 = vld1_u8(s);
204 s += pitch;
205 d16u8 = vld1_u8(s);
206 s += pitch;
207 d17u8 = vld1_u8(s);
208 s += pitch;
209 d18u8 = vld1_u8(s);
210
211 d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8),
212 vreinterpret_u32_u8(d7u8));
213 d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8),
214 vreinterpret_u32_u8(d16u8));
215 d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8),
216 vreinterpret_u32_u8(d17u8));
217 d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8),
218 vreinterpret_u32_u8(d18u8));
219
220 d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]),
221 vreinterpret_u16_u32(d2tmp2.val[0]));
222 d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]),
223 vreinterpret_u16_u32(d2tmp3.val[0]));
224 d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]),
225 vreinterpret_u16_u32(d2tmp2.val[1]));
226 d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]),
227 vreinterpret_u16_u32(d2tmp3.val[1]));
228
229 d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]),
230 vreinterpret_u8_u16(d2tmp5.val[0]));
231 d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]),
232 vreinterpret_u8_u16(d2tmp5.val[1]));
233 d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]),
234 vreinterpret_u8_u16(d2tmp7.val[0]));
235 d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]),
236 vreinterpret_u8_u16(d2tmp7.val[1]));
237
238 d3u8 = d2tmp8.val[0];
239 d4u8 = d2tmp8.val[1];
240 d5u8 = d2tmp9.val[0];
241 d6u8 = d2tmp9.val[1];
242 d7u8 = d2tmp10.val[0];
243 d16u8 = d2tmp10.val[1];
244 d17u8 = d2tmp11.val[0];
245 d18u8 = d2tmp11.val[1];
246
247 vp9_loop_filter_neon(dblimit, dlimit, dthresh,
248 d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
249 &d4u8, &d5u8, &d6u8, &d7u8);
250
251 d4Result.val[0] = d4u8;
252 d4Result.val[1] = d5u8;
253 d4Result.val[2] = d6u8;
254 d4Result.val[3] = d7u8;
255
256 src -= 2;
257 vst4_lane_u8(src, d4Result, 0);
258 src += pitch;
259 vst4_lane_u8(src, d4Result, 1);
260 src += pitch;
261 vst4_lane_u8(src, d4Result, 2);
262 src += pitch;
263 vst4_lane_u8(src, d4Result, 3);
264 src += pitch;
265 vst4_lane_u8(src, d4Result, 4);
266 src += pitch;
267 vst4_lane_u8(src, d4Result, 5);
268 src += pitch;
269 vst4_lane_u8(src, d4Result, 6);
270 src += pitch;
271 vst4_lane_u8(src, d4Result, 7);
272 }
273 return;
274 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698