OLD | NEW |
| (Empty) |
1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 ; Use of this source code is governed by a BSD-style license that can be | |
3 ; found in the LICENSE file. | |
4 | |
5 %include "media/base/simd/media_export.asm" | |
6 | |
7 EXPORT SYMBOL | |
8 align function_align | |
9 | |
10 mangle(SYMBOL): | |
11 %assign stack_offset 0 | |
12 PROLOGUE 7, 7, 3, Y, U, V, A, ARGB, WIDTH, TABLE | |
13 PUSH WIDTHq | |
14 DEFINE_ARGS Y, U, V, A, ARGB, TABLE, TEMP | |
15 mov TABLEq, TEMPq | |
16 jmp .convertend | |
17 | |
18 .convertloop: | |
19 movzx TEMPd, BYTE [Uq] | |
20 movq mm0, [TABLEq + 2048 + 8 * TEMPq] | |
21 add Uq, 1 | |
22 | |
23 movzx TEMPd, BYTE [Vq] | |
24 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] | |
25 add Vq, 1 | |
26 | |
27 movzx TEMPd, BYTE [Yq] | |
28 movq mm1, [TABLEq + 8 * TEMPq] | |
29 | |
30 movzx TEMPd, BYTE [Yq + 1] | |
31 movq mm2, [TABLEq + 8 * TEMPq] | |
32 add Yq, 2 | |
33 | |
34 ; Add UV components to Y component. | |
35 paddsw mm1, mm0 | |
36 paddsw mm2, mm0 | |
37 | |
38 ; Down shift and then pack. | |
39 psraw mm1, 6 | |
40 psraw mm2, 6 | |
41 packuswb mm1, mm2 | |
42 | |
43 ; Unpack | |
44 movq mm0, mm1 | |
45 pxor mm2, mm2 | |
46 punpcklbw mm0, mm2 | |
47 punpckhbw mm1, mm2 | |
48 | |
49 ; Add one to our alpha values, this is a somewhat unfortunate hack; while | |
50 ; the pack/unpack above handle saturating any negative numbers to 0, they also | |
51 ; truncate the alpha value to 255. The math ahead wants to produce the same | |
52 ; ARGB alpha value as the source pixel in YUVA, but this depends on the alpha | |
53 ; value in |mm0| and |mm1| being 256, (let A be the source image alpha, | |
54 ; 256 * A >> 8 == A, whereas 255 * A >> 8 is off by one except at 0). | |
55 mov TEMPq, 0x00010000 | |
56 movd mm2, TEMPd | |
57 psllq mm2, 32 | |
58 paddsw mm0, mm2 | |
59 paddsw mm1, mm2 | |
60 | |
61 ; Multiply by alpha value, then repack high bytes of words. | |
62 movzx TEMPd, BYTE [Aq] | |
63 movq mm2, [TABLEq + 6144 + 8 * TEMPq] | |
64 pmullw mm0, mm2 | |
65 psrlw mm0, 8 | |
66 movzx TEMPd, BYTE [Aq + 1] | |
67 movq mm2, [TABLEq + 6144 + 8 * TEMPq] | |
68 add Aq, 2 | |
69 pmullw mm1, mm2 | |
70 psrlw mm1, 8 | |
71 packuswb mm0, mm1 | |
72 | |
73 MOVQ [ARGBq], mm0 | |
74 add ARGBq, 8 | |
75 | |
76 .convertend: | |
77 sub dword [rsp], 2 | |
78 jns .convertloop | |
79 | |
80 ; If number of pixels is odd then compute it. | |
81 and dword [rsp], 1 | |
82 jz .convertdone | |
83 | |
84 movzx TEMPd, BYTE [Uq] | |
85 movq mm0, [TABLEq + 2048 + 8 * TEMPq] | |
86 movzx TEMPd, BYTE [Vq] | |
87 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq] | |
88 movzx TEMPd, BYTE [Yq] | |
89 movq mm1, [TABLEq + 8 * TEMPq] | |
90 paddsw mm1, mm0 | |
91 psraw mm1, 6 | |
92 packuswb mm1, mm1 | |
93 | |
94 ; Multiply ARGB by alpha value. | |
95 pxor mm0, mm0 | |
96 punpcklbw mm1, mm0 | |
97 | |
98 ; See above note about this hack. | |
99 mov TEMPq, 0x00010000 | |
100 movd mm0, TEMPd | |
101 psllq mm0, 32 | |
102 paddsw mm1, mm0 | |
103 | |
104 movzx TEMPd, BYTE [Aq] | |
105 movq mm0, [TABLEq + 6144 + 8 * TEMPq] | |
106 pmullw mm1, mm0 | |
107 psrlw mm1, 8 | |
108 packuswb mm1, mm1 | |
109 | |
110 movd [ARGBq], mm1 | |
111 | |
112 .convertdone: | |
113 POP TABLEq | |
114 RET | |
OLD | NEW |