Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(84)

Side by Side Diff: source/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <arm_neon.h> 11 #include <arm_neon.h>
12 #include <assert.h>
12 13
13 #include "./vpx_config.h" 14 #include "./vpx_config.h"
14 #include "./vpx_dsp_rtcd.h" 15 #include "./vpx_dsp_rtcd.h"
15 #include "vpx/vpx_integer.h" 16 #include "vpx/vpx_integer.h"
16 #include "vpx_ports/mem.h" 17 #include "vpx_ports/mem.h"
17 18
18 void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
19 uint8_t *dst, ptrdiff_t dst_stride,
20 const int16_t *filter_x, int x_step_q4,
21 const int16_t *filter_y, int y_step_q4,
22 int w, int h);
23 void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
24 uint8_t *dst, ptrdiff_t dst_stride,
25 const int16_t *filter_x, int x_step_q4,
26 const int16_t *filter_y, int y_step_q4,
27 int w, int h);
28
29 static INLINE int32x4_t MULTIPLY_BY_Q0( 19 static INLINE int32x4_t MULTIPLY_BY_Q0(
30 int16x4_t dsrc0, 20 int16x4_t dsrc0,
31 int16x4_t dsrc1, 21 int16x4_t dsrc1,
32 int16x4_t dsrc2, 22 int16x4_t dsrc2,
33 int16x4_t dsrc3, 23 int16x4_t dsrc3,
34 int16x4_t dsrc4, 24 int16x4_t dsrc4,
35 int16x4_t dsrc5, 25 int16x4_t dsrc5,
36 int16x4_t dsrc6, 26 int16x4_t dsrc6,
37 int16x4_t dsrc7, 27 int16x4_t dsrc7,
38 int16x8_t q0s16) { 28 int16x8_t q0s16) {
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
75 uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16; 65 uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16;
76 int16x8_t q0s16; 66 int16x8_t q0s16;
77 uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; 67 uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
78 int32x4_t q1s32, q2s32, q14s32, q15s32; 68 int32x4_t q1s32, q2s32, q14s32, q15s32;
79 uint16x8x2_t q0x2u16; 69 uint16x8x2_t q0x2u16;
80 uint8x8x2_t d0x2u8, d1x2u8; 70 uint8x8x2_t d0x2u8, d1x2u8;
81 uint32x2x2_t d0x2u32; 71 uint32x2x2_t d0x2u32;
82 uint16x4x2_t d0x2u16, d1x2u16; 72 uint16x4x2_t d0x2u16, d1x2u16;
83 uint32x4x2_t q0x2u32; 73 uint32x4x2_t q0x2u32;
84 74
85 if (x_step_q4 != 16) { 75 assert(x_step_q4 == 16);
86 vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
87 filter_x, x_step_q4,
88 filter_y, y_step_q4, w, h);
89 return;
90 }
91 76
92 q0s16 = vld1q_s16(filter_x); 77 q0s16 = vld1q_s16(filter_x);
93 78
94 src -= 3; // adjust for taps 79 src -= 3; // adjust for taps
95 for (; h > 0; h -= 4, 80 for (; h > 0; h -= 4,
96 src += src_stride * 4, 81 src += src_stride * 4,
97 dst += dst_stride * 4) { // loop_horiz_v 82 dst += dst_stride * 4) { // loop_horiz_v
98 s = src; 83 s = src;
99 d24u8 = vld1_u8(s); 84 d24u8 = vld1_u8(s);
100 s += src_stride; 85 s += src_stride;
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
248 uint8_t *d; 233 uint8_t *d;
249 uint32x2_t d2u32, d3u32; 234 uint32x2_t d2u32, d3u32;
250 uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32; 235 uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32;
251 int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16; 236 int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16;
252 int16x4_t d24s16, d25s16, d26s16, d27s16; 237 int16x4_t d24s16, d25s16, d26s16, d27s16;
253 uint16x4_t d2u16, d3u16, d4u16, d5u16; 238 uint16x4_t d2u16, d3u16, d4u16, d5u16;
254 int16x8_t q0s16; 239 int16x8_t q0s16;
255 uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; 240 uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
256 int32x4_t q1s32, q2s32, q14s32, q15s32; 241 int32x4_t q1s32, q2s32, q14s32, q15s32;
257 242
258 if (y_step_q4 != 16) { 243 assert(y_step_q4 == 16);
259 vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
260 filter_x, x_step_q4,
261 filter_y, y_step_q4, w, h);
262 return;
263 }
264 244
265 src -= src_stride * 3; 245 src -= src_stride * 3;
266 q0s16 = vld1q_s16(filter_y); 246 q0s16 = vld1q_s16(filter_y);
267 for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h 247 for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h
268 s = src; 248 s = src;
269 d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0); 249 d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0);
270 s += src_stride; 250 s += src_stride;
271 d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1); 251 d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1);
272 s += src_stride; 252 s += src_stride;
273 d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0); 253 d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0);
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
351 331
352 q8u16 = q10u16; 332 q8u16 = q10u16;
353 d18s16 = d22s16; 333 d18s16 = d22s16;
354 d19s16 = d24s16; 334 d19s16 = d24s16;
355 q10u16 = q13u16; 335 q10u16 = q13u16;
356 d22s16 = d25s16; 336 d22s16 = d25s16;
357 } 337 }
358 } 338 }
359 return; 339 return;
360 } 340 }
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm ('k') | source/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698