| Index: source/scale_neon64.cc
|
| diff --git a/source/scale_neon64.cc b/source/scale_neon64.cc
|
| index ff277f26ff6e46fa829965c4a7d4021725781b3e..2362f0874650ff21271fee4a943b880693317b5f 100644
|
| --- a/source/scale_neon64.cc
|
| +++ b/source/scale_neon64.cc
|
| @@ -587,6 +587,10 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
| MEMACCESS(6) \
|
| "ld2 {v4.b, v5.b}["#n"], [%6] \n"
|
|
|
| +// The NEON version mimics this formula (from row_common.cc):
|
| +// #define BLENDER(a, b, f) (uint8)((int)(a) + \
|
| +// ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
|
| +
|
| void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
| int dst_width, int x, int dx) {
|
| int dx_offset[4] = {0, 1, 2, 3};
|
| @@ -626,8 +630,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
| "ushll2 v6.4s, v6.8h, #0 \n"
|
| "mul v16.4s, v16.4s, v7.4s \n"
|
| "mul v17.4s, v17.4s, v6.4s \n"
|
| - "rshrn v6.4h, v16.4s, #16 \n"
|
| - "rshrn2 v6.8h, v17.4s, #16 \n"
|
| + "rshrn v6.4h, v16.4s, #16 \n"
|
| + "rshrn2 v6.8h, v17.4s, #16 \n"
|
| "add v4.8h, v4.8h, v6.8h \n"
|
| "xtn v4.8b, v4.8h \n"
|
|
|
|
|