Index: media/base/simd/convert_yuva_to_argb_mmx.inc |
diff --git a/media/base/simd/convert_yuva_to_argb_mmx.inc b/media/base/simd/convert_yuva_to_argb_mmx.inc |
index 5faa6a54236858c03b05c486694baa11d4ba9394..d4933836ca89bec26134e015048ae8151bd3531b 100644 |
--- a/media/base/simd/convert_yuva_to_argb_mmx.inc |
+++ b/media/base/simd/convert_yuva_to_argb_mmx.inc |
@@ -9,9 +9,10 @@ |
mangle(SYMBOL): |
%assign stack_offset 0 |
- PROLOGUE 7, 7, 3, Y, U, V, A, ARGB, WIDTH, TABLE, TEMP |
+ PROLOGUE 7, 7, 3, Y, U, V, A, ARGB, WIDTH, TABLE |
PUSH WIDTHq |
DEFINE_ARGS Y, U, V, A, ARGB, TABLE, TEMP |
+ mov TABLEq, TEMPq |
jmp .convertend |
.convertloop: |
@@ -39,11 +40,25 @@ mangle(SYMBOL): |
psraw mm2, 6 |
packuswb mm1, mm2 |
- ; Unpack and multiply by alpha value, then repack high bytes of words. |
+ ; Unpack |
movq mm0, mm1 |
pxor mm2, mm2 |
punpcklbw mm0, mm2 |
punpckhbw mm1, mm2 |
+ |
+ ; Add one to our alpha values, this is a somewhat unfortunate hack; while |
+ ; the pack/unpack above handle saturating any negative numbers to 0, they also |
+ ; truncate the alpha value to 255. The math ahead wants to produce the same |
+ ; ARGB alpha value as the source pixel in YUVA, but this depends on the alpha |
+ ; value in |mm0| and |mm1| being 256, (let A be the source image alpha, |
+ ; 256 * A >> 8 == A, whereas 255 * A >> 8 is off by one except at 0). |
+ mov TEMPq, 0x00010000 |
+ movd mm2, TEMPd |
+ psllq mm2, 32 |
+ paddsw mm0, mm2 |
+ paddsw mm1, mm2 |
+ |
+ ; Multiply by alpha value, then repack high bytes of words. |
movzx TEMPd, BYTE [Aq] |
movq mm2, [TABLEq + 6144 + 8 * TEMPq] |
pmullw mm0, mm2 |
@@ -79,6 +94,13 @@ mangle(SYMBOL): |
; Multiply ARGB by alpha value. |
pxor mm0, mm0 |
punpcklbw mm1, mm0 |
+ |
+ ; See above note about this hack. |
+ mov TEMPq, 0x00010000 |
+ movd mm0, TEMPd |
+ psllq mm0, 32 |
+ paddsw mm1, mm0 |
+ |
movzx TEMPd, BYTE [Aq] |
movq mm0, [TABLEq + 6144 + 8 * TEMPq] |
pmullw mm1, mm0 |