Index: source/row_gcc.cc |
diff --git a/source/row_gcc.cc b/source/row_gcc.cc |
index 3ce0f0a492a99bbbb145b951629adcdcb36315bf..e4084e41ca6aee913dfe3c452eea74ee96bef6b8 100644 |
--- a/source/row_gcc.cc |
+++ b/source/row_gcc.cc |
@@ -526,6 +526,52 @@ void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) { |
); |
} |
+void ARGBToRGB565DitherRow_SSE2(const uint8* src, uint8* dst, |
+ const uint32 dither4, int pix) { |
+ asm volatile ( |
+ "movd %3,%%xmm6 \n" |
+ "punpcklbw %%xmm6,%%xmm6 \n" |
+ "movdqa %%xmm6,%%xmm7 \n" |
+ "punpcklwd %%xmm6,%%xmm6 \n" |
+ "punpckhwd %%xmm7,%%xmm7 \n" |
+ "pcmpeqb %%xmm3,%%xmm3 \n" |
+ "psrld $0x1b,%%xmm3 \n" |
+ "pcmpeqb %%xmm4,%%xmm4 \n" |
+ "psrld $0x1a,%%xmm4 \n" |
+ "pslld $0x5,%%xmm4 \n" |
+ "pcmpeqb %%xmm5,%%xmm5 \n" |
+ "pslld $0xb,%%xmm5 \n" |
+ |
+ LABELALIGN |
+ "1: \n" |
+ "movdqu (%0),%%xmm0 \n" |
+ "paddusb %%xmm6,%%xmm0 \n" |
+ "movdqa %%xmm0,%%xmm1 \n" |
+ "movdqa %%xmm0,%%xmm2 \n" |
+ "pslld $0x8,%%xmm0 \n" |
+ "psrld $0x3,%%xmm1 \n" |
+ "psrld $0x5,%%xmm2 \n" |
+ "psrad $0x10,%%xmm0 \n" |
+ "pand %%xmm3,%%xmm1 \n" |
+ "pand %%xmm4,%%xmm2 \n" |
+ "pand %%xmm5,%%xmm0 \n" |
+ "por %%xmm2,%%xmm1 \n" |
+ "por %%xmm1,%%xmm0 \n" |
+ "packssdw %%xmm0,%%xmm0 \n" |
+ "lea 0x10(%0),%0 \n" |
+ "movq %%xmm0,(%1) \n" |
+ "lea 0x8(%1),%1 \n" |
+ "sub $0x4,%2 \n" |
+ "jg 1b \n" |
+ : "+r"(src), // %0 |
+ "+r"(dst), // %1 |
+ "+r"(pix) // %2 |
+ : "m"(dither4) // %3 |
+ : "memory", "cc", |
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" |
+ ); |
+} |
+ |
void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) { |
asm volatile ( |
"pcmpeqb %%xmm4,%%xmm4 \n" |