Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(273)

Side by Side Diff: source/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <arm_neon.h> 11 #include <arm_neon.h>
12 #include <assert.h>
12 13
13 #include "./vpx_config.h" 14 #include "./vpx_config.h"
14 #include "./vpx_dsp_rtcd.h" 15 #include "./vpx_dsp_rtcd.h"
15 #include "vpx/vpx_integer.h" 16 #include "vpx/vpx_integer.h"
16 #include "vpx_ports/mem.h" 17 #include "vpx_ports/mem.h"
17 18
18 void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
19 uint8_t *dst, ptrdiff_t dst_stride,
20 const int16_t *filter_x, int x_step_q4,
21 const int16_t *filter_y, int y_step_q4,
22 int w, int h);
23 void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
24 uint8_t *dst, ptrdiff_t dst_stride,
25 const int16_t *filter_x, int x_step_q4,
26 const int16_t *filter_y, int y_step_q4,
27 int w, int h);
28
29 static INLINE int32x4_t MULTIPLY_BY_Q0( 19 static INLINE int32x4_t MULTIPLY_BY_Q0(
30 int16x4_t dsrc0, 20 int16x4_t dsrc0,
31 int16x4_t dsrc1, 21 int16x4_t dsrc1,
32 int16x4_t dsrc2, 22 int16x4_t dsrc2,
33 int16x4_t dsrc3, 23 int16x4_t dsrc3,
34 int16x4_t dsrc4, 24 int16x4_t dsrc4,
35 int16x4_t dsrc5, 25 int16x4_t dsrc5,
36 int16x4_t dsrc6, 26 int16x4_t dsrc6,
37 int16x4_t dsrc7, 27 int16x4_t dsrc7,
38 int16x8_t q0s16) { 28 int16x8_t q0s16) {
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
75 uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16; 65 uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16;
76 int16x8_t q0s16; 66 int16x8_t q0s16;
77 uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; 67 uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
78 int32x4_t q1s32, q2s32, q14s32, q15s32; 68 int32x4_t q1s32, q2s32, q14s32, q15s32;
79 uint16x8x2_t q0x2u16; 69 uint16x8x2_t q0x2u16;
80 uint8x8x2_t d0x2u8, d1x2u8; 70 uint8x8x2_t d0x2u8, d1x2u8;
81 uint32x2x2_t d0x2u32; 71 uint32x2x2_t d0x2u32;
82 uint16x4x2_t d0x2u16, d1x2u16; 72 uint16x4x2_t d0x2u16, d1x2u16;
83 uint32x4x2_t q0x2u32; 73 uint32x4x2_t q0x2u32;
84 74
85 if (x_step_q4 != 16) { 75 assert(x_step_q4 == 16);
86 vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
87 filter_x, x_step_q4,
88 filter_y, y_step_q4, w, h);
89 return;
90 }
91 76
92 q0s16 = vld1q_s16(filter_x); 77 q0s16 = vld1q_s16(filter_x);
93 78
94 src -= 3; // adjust for taps 79 src -= 3; // adjust for taps
95 for (; h > 0; h -= 4) { // loop_horiz_v 80 for (; h > 0; h -= 4) { // loop_horiz_v
96 s = src; 81 s = src;
97 d24u8 = vld1_u8(s); 82 d24u8 = vld1_u8(s);
98 s += src_stride; 83 s += src_stride;
99 d25u8 = vld1_u8(s); 84 d25u8 = vld1_u8(s);
100 s += src_stride; 85 s += src_stride;
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after
264 uint32x2_t d2u32, d3u32, d6u32, d7u32; 249 uint32x2_t d2u32, d3u32, d6u32, d7u32;
265 uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32; 250 uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32;
266 uint8x16_t q1u8, q3u8; 251 uint8x16_t q1u8, q3u8;
267 int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16; 252 int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16;
268 int16x4_t d24s16, d25s16, d26s16, d27s16; 253 int16x4_t d24s16, d25s16, d26s16, d27s16;
269 uint16x4_t d2u16, d3u16, d4u16, d5u16; 254 uint16x4_t d2u16, d3u16, d4u16, d5u16;
270 int16x8_t q0s16; 255 int16x8_t q0s16;
271 uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; 256 uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
272 int32x4_t q1s32, q2s32, q14s32, q15s32; 257 int32x4_t q1s32, q2s32, q14s32, q15s32;
273 258
274 if (y_step_q4 != 16) { 259 assert(y_step_q4 == 16);
275 vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
276 filter_x, x_step_q4,
277 filter_y, y_step_q4, w, h);
278 return;
279 }
280 260
281 src -= src_stride * 3; 261 src -= src_stride * 3;
282 q0s16 = vld1q_s16(filter_y); 262 q0s16 = vld1q_s16(filter_y);
283 for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h 263 for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h
284 s = src; 264 s = src;
285 d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0); 265 d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0);
286 s += src_stride; 266 s += src_stride;
287 d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1); 267 d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1);
288 s += src_stride; 268 s += src_stride;
289 d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0); 269 d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0);
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
384 364
385 q8u16 = q10u16; 365 q8u16 = q10u16;
386 d18s16 = d22s16; 366 d18s16 = d22s16;
387 d19s16 = d24s16; 367 d19s16 = d24s16;
388 q10u16 = q13u16; 368 q10u16 = q13u16;
389 d22s16 = d25s16; 369 d22s16 = d25s16;
390 } 370 }
391 } 371 }
392 return; 372 return;
393 } 373 }
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/arm/save_reg_neon.asm ('k') | source/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698