OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 14 matching lines...) Expand all Loading... |
25 "eor v18.16b, v18.16b, v18.16b \n" | 25 "eor v18.16b, v18.16b, v18.16b \n" |
26 "eor v17.16b, v17.16b, v17.16b \n" | 26 "eor v17.16b, v17.16b, v17.16b \n" |
27 "eor v19.16b, v19.16b, v19.16b \n" | 27 "eor v19.16b, v19.16b, v19.16b \n" |
28 | 28 |
29 ".p2align 2 \n" | 29 ".p2align 2 \n" |
30 "1: \n" | 30 "1: \n" |
31 MEMACCESS(0) | 31 MEMACCESS(0) |
32 "ld1 {v0.16b}, [%0], #16 \n" | 32 "ld1 {v0.16b}, [%0], #16 \n" |
33 MEMACCESS(1) | 33 MEMACCESS(1) |
34 "ld1 {v1.16b}, [%1], #16 \n" | 34 "ld1 {v1.16b}, [%1], #16 \n" |
35 "subs %2, %2, #16 \n" | 35 "subs %w2, %w2, #16 \n" |
36 "usubl v2.8h, v0.8b, v1.8b \n" | 36 "usubl v2.8h, v0.8b, v1.8b \n" |
37 "usubl2 v3.8h, v0.16b, v1.16b \n" | 37 "usubl2 v3.8h, v0.16b, v1.16b \n" |
38 "smlal v16.4s, v2.4h, v2.4h \n" | 38 "smlal v16.4s, v2.4h, v2.4h \n" |
39 "smlal v17.4s, v3.4h, v3.4h \n" | 39 "smlal v17.4s, v3.4h, v3.4h \n" |
40 "smlal2 v18.4s, v2.8h, v2.8h \n" | 40 "smlal2 v18.4s, v2.8h, v2.8h \n" |
41 "smlal2 v19.4s, v3.8h, v3.8h \n" | 41 "smlal2 v19.4s, v3.8h, v3.8h \n" |
42 "b.gt 1b \n" | 42 "b.gt 1b \n" |
43 | 43 |
44 "add v16.4s, v16.4s, v17.4s \n" | 44 "add v16.4s, v16.4s, v17.4s \n" |
45 "add v18.4s, v18.4s, v19.4s \n" | 45 "add v18.4s, v18.4s, v19.4s \n" |
46 "add v19.4s, v16.4s, v18.4s \n" | 46 "add v19.4s, v16.4s, v18.4s \n" |
47 "addv s0, v19.4s \n" | 47 "addv s0, v19.4s \n" |
48 "fmov %w3, s0 \n" | 48 "fmov %w3, s0 \n" |
49 : "+r"(src_a), | 49 : "+r"(src_a), |
50 "+r"(src_b), | 50 "+r"(src_b), |
51 "+r"(count), | 51 "+r"(count), |
52 "=r"(sse) | 52 "=r"(sse) |
53 : | 53 : |
54 : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); | 54 : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); |
55 return sse; | 55 return sse; |
56 } | 56 } |
57 | 57 |
58 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) | 58 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) |
59 | 59 |
60 #ifdef __cplusplus | 60 #ifdef __cplusplus |
61 } // extern "C" | 61 } // extern "C" |
62 } // namespace libyuv | 62 } // namespace libyuv |
63 #endif | 63 #endif |
OLD | NEW |