Index: source/row_neon64.cc |
diff --git a/source/row_neon64.cc b/source/row_neon64.cc |
index a9801f2e435f615e34fce7d45bded96d40be6133..f62a34bfba01d030c7cb3a17668e5f23c465c6bf 100644 |
--- a/source/row_neon64.cc |
+++ b/source/row_neon64.cc |
@@ -2336,18 +2336,14 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) { |
void InterpolateRow_NEON(uint8* dst_ptr, |
const uint8* src_ptr, ptrdiff_t src_stride, |
int dst_width, int source_y_fraction) { |
- int y1_fraction = source_y_fraction; |
- int y0_fraction = 256 - y1_fraction; |
+ int y1_fraction = source_y_fraction >> 1; |
+ int y0_fraction = 128 - y1_fraction; |
const uint8* src_ptr1 = src_ptr + src_stride; |
asm volatile ( |
"cmp %w4, #0 \n" |
"b.eq 100f \n" |
"cmp %w4, #64 \n" |
- "b.eq 75f \n" |
- "cmp %w4, #128 \n" |
"b.eq 50f \n" |
- "cmp %w4, #192 \n" |
- "b.eq 25f \n" |
"dup v5.16b, %w4 \n" |
"dup v4.16b, %w5 \n" |
@@ -2362,27 +2358,13 @@ void InterpolateRow_NEON(uint8* dst_ptr, |
"umull2 v3.8h, v0.16b, v4.16b \n" |
"umlal v2.8h, v1.8b, v5.8b \n" |
"umlal2 v3.8h, v1.16b, v5.16b \n" |
- "rshrn v0.8b, v2.8h, #8 \n" |
- "rshrn2 v0.16b, v3.8h, #8 \n" |
+ "rshrn v0.8b, v2.8h, #7 \n" |
+ "rshrn2 v0.16b, v3.8h, #7 \n" |
MEMACCESS(0) |
"st1 {v0.16b}, [%0], #16 \n" |
"b.gt 1b \n" |
"b 99f \n" |
- // Blend 25 / 75. |
- "25: \n" |
- MEMACCESS(1) |
- "ld1 {v0.16b}, [%1], #16 \n" |
- MEMACCESS(2) |
- "ld1 {v1.16b}, [%2], #16 \n" |
- "subs %w3, %w3, #16 \n" |
- "urhadd v0.16b, v0.16b, v1.16b \n" |
- "urhadd v0.16b, v0.16b, v1.16b \n" |
- MEMACCESS(0) |
- "st1 {v0.16b}, [%0], #16 \n" |
- "b.gt 25b \n" |
- "b 99f \n" |
- |
// Blend 50 / 50. |
"50: \n" |
MEMACCESS(1) |
@@ -2396,20 +2378,6 @@ void InterpolateRow_NEON(uint8* dst_ptr, |
"b.gt 50b \n" |
"b 99f \n" |
- // Blend 75 / 25. |
- "75: \n" |
- MEMACCESS(1) |
- "ld1 {v1.16b}, [%1], #16 \n" |
- MEMACCESS(2) |
- "ld1 {v0.16b}, [%2], #16 \n" |
- "subs %w3, %w3, #16 \n" |
- "urhadd v0.16b, v0.16b, v1.16b \n" |
- "urhadd v0.16b, v0.16b, v1.16b \n" |
- MEMACCESS(0) |
- "st1 {v0.16b}, [%0], #16 \n" |
- "b.gt 75b \n" |
- "b 99f \n" |
- |
// Blend 100 / 0 - Copy row unchanged. |
"100: \n" |
MEMACCESS(1) |