Index: source/row_neon64.cc |
diff --git a/source/row_neon64.cc b/source/row_neon64.cc |
index e5f2dc8f396a17a93c0c890c875770fbdec677a0..80e1515b296b502b8e61db5178a5dd7f9d79694c 100644 |
--- a/source/row_neon64.cc |
+++ b/source/row_neon64.cc |
@@ -450,7 +450,6 @@ void I422ToARGB4444Row_NEON(const uint8* src_y, |
void I400ToARGBRow_NEON(const uint8* src_y, |
uint8* dst_argb, |
int width) { |
- int64 width64 = (int64)(width); |
asm volatile ( |
YUVTORGB_SETUP |
"movi v23.8b, #255 \n" |
@@ -463,7 +462,7 @@ void I400ToARGBRow_NEON(const uint8* src_y, |
"b.gt 1b \n" |
: "+r"(src_y), // %0 |
"+r"(dst_argb), // %1 |
- "+r"(width64) // %2 |
+ "+r"(width) // %2 |
: [kUVToRB]"r"(&kYuvI601Constants.kUVToRB), |
[kUVToG]"r"(&kYuvI601Constants.kUVToG), |
[kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR), |
@@ -1404,10 +1403,10 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { |
asm volatile ( |
"1: \n" |
MEMACCESS(0) |
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load row 8 pixels |
- "subs %w2, %w2, #8 \n" // 8 processed per loop |
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16 pixels |
+ "subs %w2, %w2, #16 \n" // 16 processed per loop |
MEMACCESS(1) |
- "st1 {v3.8b}, [%1], #8 \n" // store 8 A's. |
+ "st1 {v3.16b}, [%1], #16 \n" // store 16 A's. |
"b.gt 1b \n" |
: "+r"(src_argb), // %0 |
"+r"(dst_a), // %1 |