Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(125)

Side by Side Diff: source/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c

Issue 554673004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <arm_neon.h> 11 #include <arm_neon.h>
12 12
13 static const uint16_t bifilter4_coeff[8][2] = { 13 static const uint8_t bifilter4_coeff[8][2] = {
14 {128, 0}, 14 {128, 0},
15 {112, 16}, 15 {112, 16},
16 { 96, 32}, 16 { 96, 32},
17 { 80, 48}, 17 { 80, 48},
18 { 64, 64}, 18 { 64, 64},
19 { 48, 80}, 19 { 48, 80},
20 { 32, 96}, 20 { 32, 96},
21 { 16, 112} 21 { 16, 112}
22 }; 22 };
23 23
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
57 } else { 57 } else {
58 d2u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; 58 d2u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
59 d3u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; 59 d3u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
60 d4u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; 60 d4u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
61 d5u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; 61 d5u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
62 d6u8 = vld1_u8(src_ptr); 62 d6u8 = vld1_u8(src_ptr);
63 63
64 q1u8 = vcombine_u8(d2u8, d3u8); 64 q1u8 = vcombine_u8(d2u8, d3u8);
65 q2u8 = vcombine_u8(d4u8, d5u8); 65 q2u8 = vcombine_u8(d4u8, d5u8);
66 66
67 d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]); 67 d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
68 d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]); 68 d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
69 69
70 q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8); 70 q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8);
71 q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8); 71 q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8);
72 d12u64 = vshr_n_u64(vreinterpret_u64_u8(d6u8), 8); 72 d12u64 = vshr_n_u64(vreinterpret_u64_u8(d6u8), 8);
73 73
74 d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q1u8)), 74 d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q1u8)),
75 vreinterpret_u32_u8(vget_high_u8(q1u8))); 75 vreinterpret_u32_u8(vget_high_u8(q1u8)));
76 d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q2u8)), 76 d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q2u8)),
77 vreinterpret_u32_u8(vget_high_u8(q2u8))); 77 vreinterpret_u32_u8(vget_high_u8(q2u8)));
78 d2u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q4u64)), 78 d2u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q4u64)),
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
148 d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; 148 d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
149 d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; 149 d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
150 d26u8 = vld1_u8(src_ptr); 150 d26u8 = vld1_u8(src_ptr);
151 } else { 151 } else {
152 q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; 152 q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
153 q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; 153 q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
154 q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; 154 q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
155 q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; 155 q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
156 q5u8 = vld1q_u8(src_ptr); 156 q5u8 = vld1q_u8(src_ptr);
157 157
158 d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]); 158 d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
159 d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]); 159 d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
160 160
161 q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); 161 q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
162 q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); 162 q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
163 q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); 163 q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
164 q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); 164 q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
165 q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); 165 q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
166 166
167 d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); 167 d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
168 d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); 168 d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
169 d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); 169 d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
238 d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; 238 d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
239 d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; 239 d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
240 d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; 240 d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
241 d30u8 = vld1_u8(src_ptr); 241 d30u8 = vld1_u8(src_ptr);
242 } else { 242 } else {
243 q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; 243 q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
244 q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; 244 q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
245 q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; 245 q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
246 q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; 246 q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
247 247
248 d0u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][0]); 248 d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
249 d1u8 = vdup_n_u8((uint8_t)bifilter4_coeff[xoffset][1]); 249 d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
250 250
251 q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); 251 q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
252 q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); 252 q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
253 q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); 253 q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
254 q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); 254 q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
255 255
256 d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); 256 d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
257 d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); 257 d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
258 d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); 258 d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
259 d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); 259 d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
(...skipping 430 matching lines...) Expand 10 before | Expand all | Expand 10 after
690 690
691 q11u8 = q15u8; 691 q11u8 = q15u8;
692 692
693 vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch; 693 vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
694 vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch; 694 vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
695 vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch; 695 vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
696 vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch; 696 vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
697 } 697 }
698 return; 698 return;
699 } 699 }
OLDNEW
« no previous file with comments | « source/libvpx/vp8/common/arm/loopfilter_arm.c ('k') | source/libvpx/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698