Chromium Code Reviews| Index: src/opts/SkFontHost_FreeType_common_opts_neon.cpp |
| diff --git a/src/opts/SkFontHost_FreeType_common_opts_neon.cpp b/src/opts/SkFontHost_FreeType_common_opts_neon.cpp |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..22a9fdfc0ad316a3cb928ecdb3b6e630bf7a9468 |
| --- /dev/null |
| +++ b/src/opts/SkFontHost_FreeType_common_opts_neon.cpp |
| @@ -0,0 +1,60 @@ |
| +#include <arm_neon.h> |
| + |
| +void BGRA2RGBA_Neon(const uint8_t* src, uint8_t* dst, size_t height, const size_t width, const size_t dstRowBytes, const int srcPitch) |
|
reed1
2015/01/29 15:47:19
1. col max is 100
2. why is dst..rowBytes and src.
frederic.ma
2015/01/29 18:33:09
1. Col max fixed
2. dst..rowBytes and src..Pitch r
|
| +{ |
| + asm volatile ( |
| + |
| + "cmp %[height], #0 \n\t" // if (height == 0) exit |
| + "beq End \n\t" |
| + |
| + "RowLoop:" |
| + |
| + "mov r1, %[width] \n\t" // reset r1 = width for each new line |
| + "mov r2, %[src] \n\t" |
| + "mov r3, %[dst] \n\t" |
| + |
| + "ColumnBatchLoop:" |
| + |
| + "cmp r1, #8 \n\t" // while (r1 >= 8) |
| + "blt PixelLoopStart \n\t" |
| + |
| + // load src, 8 pixels at a time, each pixel taking 32bit value |
| + "vld4.8 {d0, d1, d2, d3}, [r2]! \n\t" //d0: B0B1B2B3...B7 d1: G0G1G2G3...G7 d2: R0R1R2R3...G7 d3: A0A1A2A3...A7 |
| + "vswp d0, d2 \n\t" //swap d0 and d2 so we have RGBA format |
| + "vst4.8 {d0, d1, d2, d3}, [r3]! \n\t" //write back to dst |
| + |
| + "sub r1, r1, #8 \n\t" //r1 -= 8 because we are processing 8 pixels at a time |
| + "b ColumnBatchLoop \n\t" |
| + |
| + "PixelLoopStart: \n\t" |
| + |
| + "cmp r1, #0 \n\t" // while (r1 > 0) |
| + "beq PixelLoopEnd \n\t" |
| + |
| + "PixelLoop:" |
| + |
| + "vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r2]! \n\t" //load 1 32bits pixel at a time and swap again |
| + "vswp d0, d2 \n\t" |
| + "vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r3]! \n\t" |
| + |
| + "subs r1, r1, #1 \n\t" // pixel counter decrement |
| + "bne PixelLoop \n\t" |
| + |
| + "PixelLoopEnd: \n\t" |
| + |
| + "add %[src], %[src], %[srcPitch] \n\t" |
| + "add %[dst], %[dst], %[dstRowBytes] \n\t" |
| + |
| + "subs %[height], %[height], #1 \n\t" // while (height) |
| + "bne RowLoop \n\t" // process next line |
| + |
| + "End: \n\t" |
| + |
| + : [height] "+r" (height), [dst] "+r" (dst), [src] "+r" (src) |
| + : [width] "r" (width), [srcPitch] "r" (srcPitch), [dstRowBytes] "r" (dstRowBytes) |
| + : "cc", "memory", |
| + "r1", "r2", "r3", |
| + "d0", "d1", "d2", "d3"); |
| +} |
| + |
| + |