Chromium Code Reviews| Index: source/row_neon64.cc |
| diff --git a/source/row_neon64.cc b/source/row_neon64.cc |
| index 406dd6a9b5d61dbba82dc1e025fd58010bee8e0a..3d5853a2464e7d6b8d11495bc11e6a90515b95af 100644 |
| --- a/source/row_neon64.cc |
| +++ b/source/row_neon64.cc |
| @@ -563,7 +563,6 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, |
| uint8* dst_argb, |
| const struct YuvConstants* yuvconstants, |
| int width) { |
| - int64 width64 = (int64)(width); |
| asm volatile ( |
| YUVTORGB_SETUP |
| "movi v23.8b, #255 \n" |
| @@ -576,7 +575,7 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, |
| "b.gt 1b \n" |
| : "+r"(src_yuy2), // %0 |
| "+r"(dst_argb), // %1 |
| - "+r"(width64) // %2 |
| + "+r"(width) // %2 |
| : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| [kUVToG]"r"(&yuvconstants->kUVToG), |
| [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| @@ -590,7 +589,6 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, |
| uint8* dst_argb, |
| const struct YuvConstants* yuvconstants, |
| int width) { |
| - int64 width64 = (int64)(width); |
| asm volatile ( |
| YUVTORGB_SETUP |
| "movi v23.8b, #255 \n" |
| @@ -603,7 +601,7 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, |
| "b.gt 1b \n" |
| : "+r"(src_uyvy), // %0 |
| "+r"(dst_argb), // %1 |
| - "+r"(width64) // %2 |
| + "+r"(width) // %2 |
| : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| [kUVToG]"r"(&yuvconstants->kUVToG), |
| [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| @@ -708,42 +706,35 @@ void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) { |
| } |
| void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { |
| - int64 width64 = (int64) width; |
| + src += width - 16; |
| asm volatile ( |
| - // Start at end of source row. |
| - "add %0, %0, %2 \n" |
| - "sub %0, %0, #16 \n" |
| - |
| "1: \n" |
| MEMACCESS(0) |
| "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 |
| - "subs %2, %2, #16 \n" // 16 pixels per loop. |
| + "subs %w2, %w2, #16 \n" // 16 pixels per loop. |
| "rev64 v0.16b, v0.16b \n" |
| MEMACCESS(1) |
| "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 |
| MEMACCESS(1) |
| "st1 {v0.D}[0], [%1], #8 \n" |
| "b.gt 1b \n" |
| - : "+r"(src), // %0 |
| - "+r"(dst), // %1 |
| - "+r"(width64) // %2 |
| + : "+&r"(src), // %0 |
| + "+&r"(dst), // %1 |
| + "+&r"(width) // %2 |
| : "r"((ptrdiff_t)-16) // %3 |
| : "cc", "memory", "v0" |
| ); |
| } |
| +// TODO(fbarchard): Consider single rev64 |
| void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, |
| int width) { |
| - int64 width64 = (int64) width; |
| + src_uv += width * 2 - 16; |
| asm volatile ( |
| - // Start at end of source row. |
| - "add %0, %0, %3, lsl #1 \n" |
| - "sub %0, %0, #16 \n" |
| - |
| "1: \n" |
| MEMACCESS(0) |
| "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16 |
| - "subs %3, %3, #8 \n" // 8 pixels per loop. |
| + "subs %w3, %w3, #8 \n" // 8 pixels per loop. |
| "rev64 v0.8b, v0.8b \n" |
| "rev64 v1.8b, v1.8b \n" |
| MEMACCESS(1) |
| @@ -751,35 +742,32 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, |
| MEMACCESS(2) |
| "st1 {v1.8b}, [%2], #8 \n" |
| "b.gt 1b \n" |
| - : "+r"(src_uv), // %0 |
| - "+r"(dst_u), // %1 |
| - "+r"(dst_v), // %2 |
| - "+r"(width64) // %3 |
| + : "+&r"(src_uv), // %0 |
| + "+&r"(dst_u), // %1 |
| + "+&r"(dst_v), // %2 |
| + "+&r"(width) // %3 |
| : "r"((ptrdiff_t)-16) // %4 |
| : "cc", "memory", "v0", "v1" |
| ); |
| } |
| void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { |
| - int64 width64 = (int64) width; |
| + // Start at end of source row. |
| + src += width * 4 - 16; |
|
fbarchard1
2016/06/07 22:24:50
Code generated is:
0000000000000000 <ARGBMirrorRo
|
| asm volatile ( |
| - // Start at end of source row. |
| - "add %0, %0, %2, lsl #2 \n" |
| - "sub %0, %0, #16 \n" |
| - |
| "1: \n" |
| MEMACCESS(0) |
| "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 |
| - "subs %2, %2, #4 \n" // 4 pixels per loop. |
| + "subs %w2, %w2, #4 \n" // 4 pixels per loop. |
| "rev64 v0.4s, v0.4s \n" |
| MEMACCESS(1) |
| "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 |
| MEMACCESS(1) |
| "st1 {v0.D}[0], [%1], #8 \n" |
| "b.gt 1b \n" |
| - : "+r"(src), // %0 |
| - "+r"(dst), // %1 |
| - "+r"(width64) // %2 |
| + : "+&r"(src), // %0 |
| + "+&r"(dst), // %1 |
| + "+&r"(width) // %2 |
| : "r"((ptrdiff_t)-16) // %3 |
| : "cc", "memory", "v0" |
| ); |
| @@ -797,7 +785,7 @@ void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) { |
| "b.gt 1b \n" |
| : "+r"(src_rgb24), // %0 |
| "+r"(dst_argb), // %1 |
| - "+r"(width) // %2 |
| + "+r"(width) // %2 |
| : |
| : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List |
| ); |
| @@ -817,7 +805,7 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) { |
| "b.gt 1b \n" |
| : "+r"(src_raw), // %0 |
| "+r"(dst_argb), // %1 |
| - "+r"(width) // %2 |
| + "+r"(width) // %2 |
| : |
| : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List |
| ); |