Index: src/opts/SkFontHost_FreeType_common_opts_neon.cpp |
diff --git a/src/opts/SkFontHost_FreeType_common_opts_neon.cpp b/src/opts/SkFontHost_FreeType_common_opts_neon.cpp |
new file mode 100644 |
index 0000000000000000000000000000000000000000..f881ca50dfa4e9e049c4206189c18e8eb71296d9 |
--- /dev/null |
+++ b/src/opts/SkFontHost_FreeType_common_opts_neon.cpp |
@@ -0,0 +1,72 @@ |
+#include <arm_neon.h> |
+ |
+#include "SkColorPriv.h" |
+ |
+void SkPackARGB32_neon(const uint32_t* src, SkPMColor* dst, int height, int width, |
+ size_t dstRowBytes, size_t srcRowBytes) |
+{ |
+ asm volatile ( |
+ |
+ // if (height == 0) exit |
+ "cmp %[height], #0 \n\t" |
+ "beq End \n\t" |
+ |
+ "RowLoop:" |
+ |
+ // reset r1 = width for each new line |
+ "mov r1, %[width] \n\t" |
+ "mov r2, %[src] \n\t" |
+ "mov r3, %[dst] \n\t" |
+ |
+ "ColumnBatchLoop:" |
+ |
+ // while (r1 >= 8) |
+ "cmp r1, #8 \n\t" |
+ "blt PixelLoopStart \n\t" |
+ |
+ // load src, 8 pixels at a time, each pixel taking 32bit value |
+ // d0: B0B1B2B3...B7 d1: G0G1G2G3...G7 d2: R0R1R2R3...G7 d3: A0A1A2A3...A7 |
+ "vld4.8 {d0, d1, d2, d3}, [r2]! \n\t" |
+ //swap d0 and d2 so we have RGBA format if SkPMColor is RGBA, otherwise keep BGRA |
+#ifdef SK_PMCOLOR_IS_RGBA |
+ "vswp d0, d2 \n\t" |
+#endif |
+ "vst4.8 {d0, d1, d2, d3}, [r3]! \n\t" |
+ //r1 -= 8 because we are processing 8 pixels at a time |
+ "sub r1, r1, #8 \n\t" |
+ "b ColumnBatchLoop \n\t" |
+ |
+ "PixelLoopStart: \n\t" |
+ // while (r1 > 0) |
+ "cmp r1, #0 \n\t" |
+ "beq PixelLoopEnd \n\t" |
+ |
+ "PixelLoop:" |
+ //load 1 32bits pixel at a time and swap again if needed |
+ "vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r2]! \n\t" |
+#ifdef SK_PMCOLOR_IS_RGBA |
+ "vswp d0, d2 \n\t" |
+#endif |
+ "vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r3]! \n\t" |
+ // pixel counter decrement |
+ "subs r1, r1, #1 \n\t" |
+ "bne PixelLoop \n\t" |
+ |
+ "PixelLoopEnd: \n\t" |
+ |
+ "add %[src], %[src], %[srcRowBytes] \n\t" |
+ "add %[dst], %[dst], %[dstRowBytes] \n\t" |
+ // while (height) process next line |
+ "subs %[height], %[height], #1 \n\t" |
+ "bne RowLoop \n\t" |
+ |
+ "End: \n\t" |
+ |
+ : [height] "+r" (height), [dst] "+r" (dst), [src] "+r" (src) |
+ : [width] "r" (width), [srcRowBytes] "r" (srcRowBytes), [dstRowBytes] "r" (dstRowBytes) |
+ : "cc", "memory", |
+ "r1", "r2", "r3", |
+ "d0", "d1", "d2", "d3"); |
+} |
+ |
+ |