Index: source/row_neon64.cc |
diff --git a/source/row_neon64.cc b/source/row_neon64.cc |
index e5f2dc8f396a17a93c0c890c875770fbdec677a0..608241691a4c8a08289629cdb484590d88ab9975 100644 |
--- a/source/row_neon64.cc |
+++ b/source/row_neon64.cc |
@@ -1404,10 +1404,10 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { |
asm volatile ( |
"1: \n" |
MEMACCESS(0) |
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load row 8 pixels |
- "subs %w2, %w2, #8 \n" // 8 processed per loop |
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16 pixels |
+ "subs %w2, %w2, #16 \n" // 16 processed per loop |
MEMACCESS(1) |
- "st1 {v3.8b}, [%1], #8 \n" // store 8 A's. |
+ "st1 {v3.16b}, [%1], #16 \n" // store 16 A's. |
"b.gt 1b \n" |
: "+r"(src_argb), // %0 |
"+r"(dst_a), // %1 |