OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
12 | 12 #include "vpx_ports/mem.h" |
13 #ifdef _MSC_VER | |
14 #define __builtin_prefetch(x) | |
15 #endif | |
16 | 13 |
17 unsigned int vp8_variance16x16_neon( | 14 unsigned int vp8_variance16x16_neon( |
18 const unsigned char *src_ptr, | 15 const unsigned char *src_ptr, |
19 int source_stride, | 16 int source_stride, |
20 const unsigned char *ref_ptr, | 17 const unsigned char *ref_ptr, |
21 int recon_stride, | 18 int recon_stride, |
22 unsigned int *sse) { | 19 unsigned int *sse) { |
23 int i; | 20 int i; |
24 int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; | 21 int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; |
25 uint32x2_t d0u32, d10u32; | 22 uint32x2_t d0u32, d10u32; |
(...skipping 288 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
314 | 311 |
315 q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), | 312 q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), |
316 vreinterpret_s32_s64(d0s64)); | 313 vreinterpret_s32_s64(d0s64)); |
317 vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); | 314 vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0); |
318 | 315 |
319 d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 6); | 316 d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 6); |
320 d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); | 317 d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32); |
321 | 318 |
322 return vget_lane_u32(d0u32, 0); | 319 return vget_lane_u32(d0u32, 0); |
323 } | 320 } |
OLD | NEW |