Index: source/row_neon.cc |
diff --git a/source/row_neon.cc b/source/row_neon.cc |
index 7574cee85e252159238fd6bac14c73ab7604a13f..9e60237ec4aa63c830a41fe154e3c157c43822f1 100644 |
--- a/source/row_neon.cc |
+++ b/source/row_neon.cc |
@@ -1302,16 +1302,17 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { |
asm volatile ( |
"1: \n" |
MEMACCESS(0) |
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels |
- "subs %2, %2, #8 \n" // 8 processed per loop |
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels |
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels |
+ "subs %2, %2, #16 \n" // 16 processed per loop |
MEMACCESS(1) |
- "vst1.8 {d3}, [%1]! \n" // store 8 A's. |
+ "vst1.8 {q3}, [%1]! \n" // store 16 A's. |
"bgt 1b \n" |
: "+r"(src_argb), // %0 |
"+r"(dst_a), // %1 |
"+r"(width) // %2 |
: |
- : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List |
+ : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List |
); |
} |