Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3317)

Unified Diff: source/row_neon64.cc

Issue 2467723002: HalfFloat Neon for ARMv7. (Closed)
Patch Set: bump version Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/row_neon.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/row_neon64.cc
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index 2f0227cf06124f9461c02ed60e59659c01dde750..3ec6bab8c50886a31603a9da940f224b95103859 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -2718,19 +2718,19 @@ void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) {
"ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts
"subs %w2, %w2, #8 \n" // 8 pixels per loop
"uxtl v2.4s, v1.4h \n" // 8 int's
- "uxtl2 v1.4s, v1.8h \n"
+ "uxtl2 v3.4s, v1.8h \n"
"scvtf v2.4s, v2.4s \n" // 8 floats
- "scvtf v1.4s, v1.4s \n"
- "fcvtn v4.4h, v2.4s \n" // 8 floatsgit
- "fcvtn2 v4.8h, v1.4s \n"
+ "scvtf v3.4s, v3.4s \n"
+ "fcvtn v1.4h, v2.4s \n" // 8 floatsgit
+ "fcvtn2 v1.8h, v3.4s \n"
MEMACCESS(1)
- "st1 {v4.16b}, [%1], #16 \n" // store 8 shorts
+ "st1 {v1.16b}, [%1], #16 \n" // store 8 shorts
"b.gt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
:
- : "cc", "memory", "v1", "v2", "v4"
+ : "cc", "memory", "v1", "v2", "v3"
);
}
@@ -2741,21 +2741,21 @@ void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) {
"ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts
"subs %w2, %w2, #8 \n" // 8 pixels per loop
"uxtl v2.4s, v1.4h \n" // 8 int's
- "uxtl2 v1.4s, v1.8h \n"
+ "uxtl2 v3.4s, v1.8h \n"
"scvtf v2.4s, v2.4s \n" // 8 floats
- "scvtf v1.4s, v1.4s \n"
+ "scvtf v3.4s, v3.4s \n"
"fmul v2.4s, v2.4s, %3.s[0] \n" // adjust exponent
- "fmul v1.4s, v1.4s, %3.s[0] \n"
- "uqshrn v4.4h, v2.4s, #13 \n" // isolate halffloat
- "uqshrn2 v4.8h, v1.4s, #13 \n"
+ "fmul v3.4s, v3.4s, %3.s[0] \n"
+ "uqshrn v1.4h, v2.4s, #13 \n" // isolate halffloat
+ "uqshrn2 v1.8h, v3.4s, #13 \n"
MEMACCESS(1)
- "st1 {v4.16b}, [%1], #16 \n" // store 8 shorts
+ "st1 {v1.16b}, [%1], #16 \n" // store 8 shorts
"b.gt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "w"(scale * 1.9259299444e-34f) // %3
- : "cc", "memory", "v1", "v2", "v4"
+ : "cc", "memory", "v1", "v2", "v3"
);
}
« no previous file with comments | « source/row_neon.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698