Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(190)

Side by Side Diff: media/base/simd/convert_yuva_to_argb_mmx.inc

Issue 12263013: media: Add support for playback of VP8 Alpha video streams (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase and fixes for comments on previous patchset Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
scherkus (not reviewing) 2013/04/04 00:36:17 FYI I have no idea if this code is correct it loo
vignesh 2013/04/04 18:17:52 yes, that is correct. Although, I have to admit th
2 ; Use of this source code is governed by a BSD-style license that can be
3 ; found in the LICENSE file.
4
5 global mangle(SYMBOL) PRIVATE
6 align function_align
7
8 ; Non-PIC code is the fastest so use this if possible.
9 %ifndef PIC
10 mangle(SYMBOL):
11 %assign stack_offset 0
12 PROLOGUE 6, 7, 3, Y, U, V, A, ARGB, WIDTH, TEMP
13 extern mangle(kCoefficientsRgbY)
14 extern mangle(kWordDup)
15 jmp .convertend
16
17 .convertloop:
18 movzx TEMPd, BYTE [Uq]
19 movq mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPq]
20 add Uq, 1
21 movzx TEMPd, BYTE [Vq]
22 paddsw mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPq]
23 add Vq, 1
24 movzx TEMPd, BYTE [Yq]
25 movq mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPq]
26 movzx TEMPd, BYTE [Yq + 1]
27 movq mm2, [mangle(kCoefficientsRgbY) + 8 * TEMPq]
28 add Yq, 2
29 paddsw mm1, mm0
30 paddsw mm2, mm0
31 psraw mm1, 6
32 psraw mm2, 6
33 packuswb mm1, mm2
34
35 ; Multiply ARGB by alpha value.
36 movq mm0, mm1
37 pxor mm2, mm2
38 punpcklbw mm0, mm2
39 punpckhbw mm1, mm2
40 movzx TEMPd, BYTE [Aq]
41 movq mm2, [mangle(kWordDup) + 8 * TEMPq]
42 pmullw mm0, mm2
43 psrlw mm0, 8
44 movzx TEMPd, BYTE [Aq + 1]
45 movq mm2, [mangle(kWordDup) + 8 * TEMPq]
46 add Aq, 2
47 pmullw mm1, mm2
48 psrlw mm1, 8
49 packuswb mm0, mm1
50
51 MOVQ [ARGBq], mm0
52 add ARGBq, 8
53
54 .convertend:
55 sub WIDTHq, 2
56 jns .convertloop
57
58 ; If number of pixels is odd then compute it.
59 and WIDTHq, 1
60 jz .convertdone
61
62 movzx TEMPd, BYTE [Uq]
63 movq mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPq]
64 movzx TEMPd, BYTE [Vq]
65 paddsw mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPq]
66 movzx TEMPd, BYTE [Yq]
67 movq mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPq]
68 paddsw mm1, mm0
69 psraw mm1, 6
70 packuswb mm1, mm1
71
72 ; Multiply ARGB by alpha value.
73 pxor mm0, mm0
74 punpcklbw mm1, mm0
75 movzx TEMPd, BYTE [Aq]
76 movq mm0, [mangle(kWordDup) + 8 * TEMPq]
77 pmullw mm1, mm0
78 psrlw mm1, 8
79 packuswb mm1, mm1
80
81 movd [ARGBq], mm1
82
83 .convertdone:
84 RET
85 %endif
86
87 ; With PIC code we need to load the address of mangle(kCoefficientsRgbY).
88 ; This code is slower than the above version.
89 %ifdef PIC
90 mangle(SYMBOL):
91 %assign stack_offset 0
92 PROLOGUE 6, 7, 3, Y, U, V, A, ARGB, WIDTH, TEMP
93 extern mangle(kCoefficientsRgbY)
94 PUSH WIDTHq
95 DEFINE_ARGS Y, U, V, A, ARGB, TABLE, TEMP
96 LOAD_SYM TABLEq, mangle(kCoefficientsRgbY)
97 jmp .convertend
98
99 .convertloop:
100 movzx TEMPd, BYTE [Uq]
101 movq mm0, [TABLEq + 2048 + 8 * TEMPq]
102 add Uq, 1
103
104 movzx TEMPd, BYTE [Vq]
105 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq]
106 add Vq, 1
107
108 movzx TEMPd, BYTE [Yq]
109 movq mm1, [TABLEq + 8 * TEMPq]
110
111 movzx TEMPd, BYTE [Yq + 1]
112 movq mm2, [TABLEq + 8 * TEMPq]
113 add Yq, 2
114
115 ; Add UV components to Y component.
116 paddsw mm1, mm0
117 paddsw mm2, mm0
118
119 ; Down shift and then pack.
120 psraw mm1, 6
121 psraw mm2, 6
122 packuswb mm1, mm2
123
124 ; Unpack and multiply by alpha value, then repack high bytes of words.
125 movq mm0, mm1
126 pxor mm2, mm2
127 punpcklbw mm0, mm2
128 punpckhbw mm1, mm2
129 movzx TEMPd, BYTE [Aq]
130 movq mm2, [TABLEq + 6144 + 8 * TEMPq]
131 pmullw mm0, mm2
132 psrlw mm0, 8
133 movzx TEMPd, BYTE [Aq + 1]
134 movq mm2, [TABLEq + 6144 + 8 * TEMPq]
135 add Aq, 2
136 pmullw mm1, mm2
137 psrlw mm1, 8
138 packuswb mm0, mm1
139
140 MOVQ [ARGBq], mm0
141 add ARGBq, 8
142
143 .convertend:
144 sub dword [rsp], 2
145 jns .convertloop
146
147 ; If number of pixels is odd then compute it.
148 and dword [rsp], 1
149 jz .convertdone
150
151 movzx TEMPd, BYTE [Uq]
152 movq mm0, [TABLEq + 2048 + 8 * TEMPq]
153 movzx TEMPd, BYTE [Vq]
154 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq]
155 movzx TEMPd, BYTE [Yq]
156 movq mm1, [TABLEq + 8 * TEMPq]
157 paddsw mm1, mm0
158 psraw mm1, 6
159 packuswb mm1, mm1
160
161 ; Multiply ARGB by alpha value.
162 pxor mm0, mm0
163 punpcklbw mm1, mm0
164 movzx TEMPd, BYTE [Aq]
165 movq mm0, [TABLEq + 6144 + 8 * TEMPq]
166 pmullw mm1, mm0
167 psrlw mm1, 8
168 packuswb mm1, mm1
169
170 movd [ARGBq], mm1
171
172 .convertdone:
173 POP TABLEq
174 RET
175 %endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698