Index: trunk/src/media/base/simd/convert_yuva_to_argb_mmx.inc |
=================================================================== |
--- trunk/src/media/base/simd/convert_yuva_to_argb_mmx.inc (revision 194468) |
+++ trunk/src/media/base/simd/convert_yuva_to_argb_mmx.inc (working copy) |
@@ -1,174 +0,0 @@ |
-; Copyright (c) 2011 The Chromium Authors. All rights reserved. |
-; Use of this source code is governed by a BSD-style license that can be |
-; found in the LICENSE file. |
- |
- global mangle(SYMBOL) PRIVATE |
- align function_align |
- |
-; Non-PIC code is the fastest so use this if possible. |
-%ifndef PIC |
-mangle(SYMBOL): |
- %assign stack_offset 0 |
- PROLOGUE 6, 7, 3, Y, U, V, A, ARGB, WIDTH, TEMP |
- extern mangle(kCoefficientsRgbY) |
- jmp .convertend |
- |
-.convertloop: |
- movzx TEMPd, BYTE [Uq] |
- movq mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPq] |
- add Uq, 1 |
- movzx TEMPd, BYTE [Vq] |
- paddsw mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPq] |
- add Vq, 1 |
- movzx TEMPd, BYTE [Yq] |
- movq mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPq] |
- movzx TEMPd, BYTE [Yq + 1] |
- movq mm2, [mangle(kCoefficientsRgbY) + 8 * TEMPq] |
- add Yq, 2 |
- paddsw mm1, mm0 |
- paddsw mm2, mm0 |
- psraw mm1, 6 |
- psraw mm2, 6 |
- packuswb mm1, mm2 |
- |
- ; Multiply ARGB by alpha value. |
- movq mm0, mm1 |
- pxor mm2, mm2 |
- punpcklbw mm0, mm2 |
- punpckhbw mm1, mm2 |
- movzx TEMPd, BYTE [Aq] |
- movq mm2, [mangle(kCoefficientsRgbY) + 6144 + 8 * TEMPq] |
- pmullw mm0, mm2 |
- psrlw mm0, 8 |
- movzx TEMPd, BYTE [Aq + 1] |
- movq mm2, [mangle(kCoefficientsRgbY) + 6144 + 8 * TEMPq] |
- add Aq, 2 |
- pmullw mm1, mm2 |
- psrlw mm1, 8 |
- packuswb mm0, mm1 |
- |
- MOVQ [ARGBq], mm0 |
- add ARGBq, 8 |
- |
-.convertend: |
- sub WIDTHq, 2 |
- jns .convertloop |
- |
- ; If number of pixels is odd then compute it. |
- and WIDTHq, 1 |
- jz .convertdone |
- |
- movzx TEMPd, BYTE [Uq] |
- movq mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPq] |
- movzx TEMPd, BYTE [Vq] |
- paddsw mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPq] |
- movzx TEMPd, BYTE [Yq] |
- movq mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPq] |
- paddsw mm1, mm0 |
- psraw mm1, 6 |
- packuswb mm1, mm1 |
- |
- ; Multiply ARGB by alpha value. |
- pxor mm0, mm0 |
- punpcklbw mm1, mm0 |
- movzx TEMPd, BYTE [Aq] |
- movq mm0, [mangle(kCoefficientsRgbY) + 6144 + 8 * TEMPq] |
- pmullw mm1, mm0 |
- psrlw mm1, 8 |
- packuswb mm1, mm1 |
- |
- movd [ARGBq], mm1 |
- |
-.convertdone: |
- RET |
-%endif |
- |
-; With PIC code we need to load the address of mangle(kCoefficientsRgbY). |
-; This code is slower than the above version. |
-%ifdef PIC |
-mangle(SYMBOL): |
- %assign stack_offset 0 |
- PROLOGUE 6, 7, 3, Y, U, V, A, ARGB, WIDTH, TEMP |
- extern mangle(kCoefficientsRgbY) |
- PUSH WIDTHq |
- DEFINE_ARGS Y, U, V, A, ARGB, TABLE, TEMP |
- LOAD_SYM TABLEq, mangle(kCoefficientsRgbY) |
- jmp .convertend |
- |
-.convertloop: |
- movzx TEMPd, BYTE [Uq] |
- movq mm0, [TABLEq + 2048 + 8 * TEMPq] |
- add Uq, 1 |
- |
- movzx TEMPd, BYTE [Vq] |
- paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] |
- add Vq, 1 |
- |
- movzx TEMPd, BYTE [Yq] |
- movq mm1, [TABLEq + 8 * TEMPq] |
- |
- movzx TEMPd, BYTE [Yq + 1] |
- movq mm2, [TABLEq + 8 * TEMPq] |
- add Yq, 2 |
- |
- ; Add UV components to Y component. |
- paddsw mm1, mm0 |
- paddsw mm2, mm0 |
- |
- ; Down shift and then pack. |
- psraw mm1, 6 |
- psraw mm2, 6 |
- packuswb mm1, mm2 |
- |
- ; Unpack and multiply by alpha value, then repack high bytes of words. |
- movq mm0, mm1 |
- pxor mm2, mm2 |
- punpcklbw mm0, mm2 |
- punpckhbw mm1, mm2 |
- movzx TEMPd, BYTE [Aq] |
- movq mm2, [TABLEq + 6144 + 8 * TEMPq] |
- pmullw mm0, mm2 |
- psrlw mm0, 8 |
- movzx TEMPd, BYTE [Aq + 1] |
- movq mm2, [TABLEq + 6144 + 8 * TEMPq] |
- add Aq, 2 |
- pmullw mm1, mm2 |
- psrlw mm1, 8 |
- packuswb mm0, mm1 |
- |
- MOVQ [ARGBq], mm0 |
- add ARGBq, 8 |
- |
-.convertend: |
- sub dword [rsp], 2 |
- jns .convertloop |
- |
- ; If number of pixels is odd then compute it. |
- and dword [rsp], 1 |
- jz .convertdone |
- |
- movzx TEMPd, BYTE [Uq] |
- movq mm0, [TABLEq + 2048 + 8 * TEMPq] |
- movzx TEMPd, BYTE [Vq] |
- paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] |
- movzx TEMPd, BYTE [Yq] |
- movq mm1, [TABLEq + 8 * TEMPq] |
- paddsw mm1, mm0 |
- psraw mm1, 6 |
- packuswb mm1, mm1 |
- |
- ; Multiply ARGB by alpha value. |
- pxor mm0, mm0 |
- punpcklbw mm1, mm0 |
- movzx TEMPd, BYTE [Aq] |
- movq mm0, [TABLEq + 6144 + 8 * TEMPq] |
- pmullw mm1, mm0 |
- psrlw mm1, 8 |
- packuswb mm1, mm1 |
- |
- movd [ARGBq], mm1 |
- |
-.convertdone: |
- POP TABLEq |
- RET |
-%endif |