OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
49 "vmov.32 %3, d0[0] \n" | 49 "vmov.32 %3, d0[0] \n" |
50 : "+r"(src_a), | 50 : "+r"(src_a), |
51 "+r"(src_b), | 51 "+r"(src_b), |
52 "+r"(count), | 52 "+r"(count), |
53 "=r"(sse) | 53 "=r"(sse) |
54 : | 54 : |
55 : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); | 55 : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); |
56 return sse; | 56 return sse; |
57 } | 57 } |
58 | 58 |
| 59 #elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) |
| 60 |
| 61 uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { |
| 62 volatile uint32 sse; |
| 63 asm volatile ( |
| 64 "eor v16.16b, v16.16b, v16.16b \n" |
| 65 "eor v18.16b, v18.16b, v18.16b \n" |
| 66 "eor v17.16b, v17.16b, v17.16b \n" |
| 67 "eor v19.16b, v19.16b, v19.16b \n" |
| 68 |
| 69 ".p2align 2 \n" |
| 70 "1: \n" |
| 71 MEMACCESS(0) |
| 72 "ld1 {v0.16b}, [%0], #16 \n" |
| 73 MEMACCESS(1) |
| 74 "ld1 {v1.16b}, [%1], #16 \n" |
| 75 "subs %2, %2, #16 \n" |
| 76 "usubl v2.8h, v0.8b, v1.8b \n" |
| 77 "usubl2 v3.8h, v0.16b, v1.16b \n" |
| 78 "smlal v16.4s, v2.4h, v2.4h \n" |
| 79 "smlal v17.4s, v3.4h, v3.4h \n" |
| 80 "smlal2 v18.4s, v2.8h, v2.8h \n" |
| 81 "smlal2 v19.4s, v3.8h, v3.8h \n" |
| 82 "bgt 1b \n" |
| 83 |
| 84 "add v16.4s, v16.4s, v17.4s \n" |
| 85 "add v18.4s, v18.4s, v19.4s \n" |
| 86 "add v19.4s, v16.4s, v18.4s \n" |
| 87 "addv s0, v19.4s \n" |
| 88 "fmov %w3, s0 \n" |
| 89 : "+r"(src_a), |
| 90 "+r"(src_b), |
| 91 "+r"(count), |
| 92 "=r"(sse) |
| 93 : |
| 94 : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); |
| 95 return sse; |
| 96 } |
| 97 |
59 #endif // __ARM_NEON__ | 98 #endif // __ARM_NEON__ |
60 | 99 |
61 #ifdef __cplusplus | 100 #ifdef __cplusplus |
62 } // extern "C" | 101 } // extern "C" |
63 } // namespace libyuv | 102 } // namespace libyuv |
64 #endif | 103 #endif |
OLD | NEW |