OLD | NEW |
1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 ; Use of this source code is governed by a BSD-style license that can be | 2 ; Use of this source code is governed by a BSD-style license that can be |
3 ; found in the LICENSE file. | 3 ; found in the LICENSE file. |
4 | 4 |
5 %include "media/base/simd/media_export.asm" | 5 %include "media/base/simd/media_export.asm" |
6 | 6 |
7 EXPORT SYMBOL | 7 EXPORT SYMBOL |
8 align function_align | 8 align function_align |
9 | 9 |
10 mangle(SYMBOL): | 10 mangle(SYMBOL): |
11 %assign stack_offset 0 | 11 %assign stack_offset 0 |
12 PROLOGUE 7, 7, 3, Y, U, V, A, ARGB, WIDTH, TABLE, TEMP | 12 PROLOGUE 7, 7, 3, Y, U, V, A, ARGB, WIDTH, TABLE |
13 PUSH WIDTHq | 13 PUSH WIDTHq |
14 DEFINE_ARGS Y, U, V, A, ARGB, TABLE, TEMP | 14 DEFINE_ARGS Y, U, V, A, ARGB, TABLE, TEMP |
| 15 mov TABLEq, TEMPq |
15 jmp .convertend | 16 jmp .convertend |
16 | 17 |
17 .convertloop: | 18 .convertloop: |
18 movzx TEMPd, BYTE [Uq] | 19 movzx TEMPd, BYTE [Uq] |
19 movq mm0, [TABLEq + 2048 + 8 * TEMPq] | 20 movq mm0, [TABLEq + 2048 + 8 * TEMPq] |
20 add Uq, 1 | 21 add Uq, 1 |
21 | 22 |
22 movzx TEMPd, BYTE [Vq] | 23 movzx TEMPd, BYTE [Vq] |
23 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] | 24 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] |
24 add Vq, 1 | 25 add Vq, 1 |
25 | 26 |
26 movzx TEMPd, BYTE [Yq] | 27 movzx TEMPd, BYTE [Yq] |
27 movq mm1, [TABLEq + 8 * TEMPq] | 28 movq mm1, [TABLEq + 8 * TEMPq] |
28 | 29 |
29 movzx TEMPd, BYTE [Yq + 1] | 30 movzx TEMPd, BYTE [Yq + 1] |
30 movq mm2, [TABLEq + 8 * TEMPq] | 31 movq mm2, [TABLEq + 8 * TEMPq] |
31 add Yq, 2 | 32 add Yq, 2 |
32 | 33 |
33 ; Add UV components to Y component. | 34 ; Add UV components to Y component. |
34 paddsw mm1, mm0 | 35 paddsw mm1, mm0 |
35 paddsw mm2, mm0 | 36 paddsw mm2, mm0 |
36 | 37 |
37 ; Down shift and then pack. | 38 ; Down shift and then pack. |
38 psraw mm1, 6 | 39 psraw mm1, 6 |
39 psraw mm2, 6 | 40 psraw mm2, 6 |
40 packuswb mm1, mm2 | 41 packuswb mm1, mm2 |
41 | 42 |
42 ; Unpack and multiply by alpha value, then repack high bytes of words. | 43 ; Unpack |
43 movq mm0, mm1 | 44 movq mm0, mm1 |
44 pxor mm2, mm2 | 45 pxor mm2, mm2 |
45 punpcklbw mm0, mm2 | 46 punpcklbw mm0, mm2 |
46 punpckhbw mm1, mm2 | 47 punpckhbw mm1, mm2 |
| 48 |
| 49 ; Add one to our alpha values, this is a somewhat unfortunate hack; while |
| 50 ; the pack/unpack above handle saturating any negative numbers to 0, they also |
| 51 ; truncate the alpha value to 255. The math ahead wants to produce the same |
| 52 ; ARGB alpha value as the source pixel in YUVA, but this depends on the alpha |
| 53 ; value in |mm0| and |mm1| being 256, (let A be the source image alpha, |
| 54 ; 256 * A >> 8 == A, whereas 255 * A >> 8 is off by one except at 0). |
| 55 mov TEMPq, 0x00010000 |
| 56 movd mm2, TEMPd |
| 57 psllq mm2, 32 |
| 58 paddsw mm0, mm2 |
| 59 paddsw mm1, mm2 |
| 60 |
| 61 ; Multiply by alpha value, then repack high bytes of words. |
47 movzx TEMPd, BYTE [Aq] | 62 movzx TEMPd, BYTE [Aq] |
48 movq mm2, [TABLEq + 6144 + 8 * TEMPq] | 63 movq mm2, [TABLEq + 6144 + 8 * TEMPq] |
49 pmullw mm0, mm2 | 64 pmullw mm0, mm2 |
50 psrlw mm0, 8 | 65 psrlw mm0, 8 |
51 movzx TEMPd, BYTE [Aq + 1] | 66 movzx TEMPd, BYTE [Aq + 1] |
52 movq mm2, [TABLEq + 6144 + 8 * TEMPq] | 67 movq mm2, [TABLEq + 6144 + 8 * TEMPq] |
53 add Aq, 2 | 68 add Aq, 2 |
54 pmullw mm1, mm2 | 69 pmullw mm1, mm2 |
55 psrlw mm1, 8 | 70 psrlw mm1, 8 |
56 packuswb mm0, mm1 | 71 packuswb mm0, mm1 |
(...skipping 15 matching lines...) Expand all Loading... |
72 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] | 87 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] |
73 movzx TEMPd, BYTE [Yq] | 88 movzx TEMPd, BYTE [Yq] |
74 movq mm1, [TABLEq + 8 * TEMPq] | 89 movq mm1, [TABLEq + 8 * TEMPq] |
75 paddsw mm1, mm0 | 90 paddsw mm1, mm0 |
76 psraw mm1, 6 | 91 psraw mm1, 6 |
77 packuswb mm1, mm1 | 92 packuswb mm1, mm1 |
78 | 93 |
79 ; Multiply ARGB by alpha value. | 94 ; Multiply ARGB by alpha value. |
80 pxor mm0, mm0 | 95 pxor mm0, mm0 |
81 punpcklbw mm1, mm0 | 96 punpcklbw mm1, mm0 |
| 97 |
| 98 ; See above note about this hack. |
| 99 mov TEMPq, 0x00010000 |
| 100 movd mm0, TEMPd |
| 101 psllq mm0, 32 |
| 102 paddsw mm1, mm0 |
| 103 |
82 movzx TEMPd, BYTE [Aq] | 104 movzx TEMPd, BYTE [Aq] |
83 movq mm0, [TABLEq + 6144 + 8 * TEMPq] | 105 movq mm0, [TABLEq + 6144 + 8 * TEMPq] |
84 pmullw mm1, mm0 | 106 pmullw mm1, mm0 |
85 psrlw mm1, 8 | 107 psrlw mm1, 8 |
86 packuswb mm1, mm1 | 108 packuswb mm1, mm1 |
87 | 109 |
88 movd [ARGBq], mm1 | 110 movd [ARGBq], mm1 |
89 | 111 |
90 .convertdone: | 112 .convertdone: |
91 POP TABLEq | 113 POP TABLEq |
92 RET | 114 RET |
OLD | NEW |