Index: source/row_neon64.cc |
diff --git a/source/row_neon64.cc b/source/row_neon64.cc |
index 3ec6bab8c50886a31603a9da940f224b95103859..4ed4e61de91d50f93ccc796727ca489a9ff84035 100644 |
--- a/source/row_neon64.cc |
+++ b/source/row_neon64.cc |
@@ -2711,6 +2711,7 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, |
); |
} |
+// Caveat - rounds float to half float whereas scaling version truncates. |
void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) { |
asm volatile ( |
"1: \n" |
@@ -2721,7 +2722,7 @@ void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) { |
"uxtl2 v3.4s, v1.8h \n" |
"scvtf v2.4s, v2.4s \n" // 8 floats |
"scvtf v3.4s, v3.4s \n" |
- "fcvtn v1.4h, v2.4s \n" // 8 floatsgit |
+ "fcvtn v1.4h, v2.4s \n" // 8 half floats |
"fcvtn2 v1.8h, v3.4s \n" |
MEMACCESS(1) |
"st1 {v1.16b}, [%1], #16 \n" // store 8 shorts |