Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(70)

Side by Side Diff: media/base/simd/convert_yuva_to_argb_mmx.inc

Issue 12263013: media: Add support for playback of VP8 Alpha video streams (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « media/base/simd/convert_yuva_to_argb_mmx.asm ('k') | media/base/simd/yuv_to_rgb_table.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 ; Use of this source code is governed by a BSD-style license that can be
3 ; found in the LICENSE file.
4
5 global mangle(SYMBOL) PRIVATE
6 align function_align
7
8 ; Non-PIC code is the fastest so use this if possible.
9 %ifndef PIC
10 mangle(SYMBOL):
11 %assign stack_offset 0
12 PROLOGUE 6, 7, 3, Y, U, V, A, ARGB, WIDTH, TEMP
13 extern mangle(kCoefficientsRgbY)
14 jmp .convertend
15
16 .convertloop:
17 movzx TEMPd, BYTE [Uq]
18 movq mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPq]
19 add Uq, 1
20 movzx TEMPd, BYTE [Vq]
21 paddsw mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPq]
22 add Vq, 1
23 movzx TEMPd, BYTE [Yq]
24 movq mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPq]
25 movzx TEMPd, BYTE [Yq + 1]
26 movq mm2, [mangle(kCoefficientsRgbY) + 8 * TEMPq]
27 add Yq, 2
28 paddsw mm1, mm0
29 paddsw mm2, mm0
30 psraw mm1, 6
31 psraw mm2, 6
32 packuswb mm1, mm2
33
34 ; Multiply ARGB by alpha value.
35 movq mm0, mm1
36 pxor mm2, mm2
37 punpcklbw mm0, mm2
38 punpckhbw mm1, mm2
39 movzx TEMPd, BYTE [Aq]
40 movq mm2, [mangle(kCoefficientsRgbY) + 6144 + 8 * TEMPq]
41 pmullw mm0, mm2
42 psrlw mm0, 8
43 movzx TEMPd, BYTE [Aq + 1]
44 movq mm2, [mangle(kCoefficientsRgbY) + 6144 + 8 * TEMPq]
45 add Aq, 2
46 pmullw mm1, mm2
47 psrlw mm1, 8
48 packuswb mm0, mm1
49
50 MOVQ [ARGBq], mm0
51 add ARGBq, 8
52
53 .convertend:
54 sub WIDTHq, 2
55 jns .convertloop
56
57 ; If number of pixels is odd then compute it.
58 and WIDTHq, 1
59 jz .convertdone
60
61 movzx TEMPd, BYTE [Uq]
62 movq mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPq]
63 movzx TEMPd, BYTE [Vq]
64 paddsw mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPq]
65 movzx TEMPd, BYTE [Yq]
66 movq mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPq]
67 paddsw mm1, mm0
68 psraw mm1, 6
69 packuswb mm1, mm1
70
71 ; Multiply ARGB by alpha value.
72 pxor mm0, mm0
73 punpcklbw mm1, mm0
74 movzx TEMPd, BYTE [Aq]
75 movq mm0, [mangle(kCoefficientsRgbY) + 6144 + 8 * TEMPq]
76 pmullw mm1, mm0
77 psrlw mm1, 8
78 packuswb mm1, mm1
79
80 movd [ARGBq], mm1
81
82 .convertdone:
83 RET
84 %endif
85
86 ; With PIC code we need to load the address of mangle(kCoefficientsRgbY).
87 ; This code is slower than the above version.
88 %ifdef PIC
89 mangle(SYMBOL):
90 %assign stack_offset 0
91 PROLOGUE 6, 7, 3, Y, U, V, A, ARGB, WIDTH, TEMP
92 extern mangle(kCoefficientsRgbY)
93 PUSH WIDTHq
94 DEFINE_ARGS Y, U, V, A, ARGB, TABLE, TEMP
95 LOAD_SYM TABLEq, mangle(kCoefficientsRgbY)
96 jmp .convertend
97
98 .convertloop:
99 movzx TEMPd, BYTE [Uq]
100 movq mm0, [TABLEq + 2048 + 8 * TEMPq]
101 add Uq, 1
102
103 movzx TEMPd, BYTE [Vq]
104 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq]
105 add Vq, 1
106
107 movzx TEMPd, BYTE [Yq]
108 movq mm1, [TABLEq + 8 * TEMPq]
109
110 movzx TEMPd, BYTE [Yq + 1]
111 movq mm2, [TABLEq + 8 * TEMPq]
112 add Yq, 2
113
114 ; Add UV components to Y component.
115 paddsw mm1, mm0
116 paddsw mm2, mm0
117
118 ; Down shift and then pack.
119 psraw mm1, 6
120 psraw mm2, 6
121 packuswb mm1, mm2
122
123 ; Unpack and multiply by alpha value, then repack high bytes of words.
124 movq mm0, mm1
125 pxor mm2, mm2
126 punpcklbw mm0, mm2
127 punpckhbw mm1, mm2
128 movzx TEMPd, BYTE [Aq]
129 movq mm2, [TABLEq + 6144 + 8 * TEMPq]
130 pmullw mm0, mm2
131 psrlw mm0, 8
132 movzx TEMPd, BYTE [Aq + 1]
133 movq mm2, [TABLEq + 6144 + 8 * TEMPq]
134 add Aq, 2
135 pmullw mm1, mm2
136 psrlw mm1, 8
137 packuswb mm0, mm1
138
139 MOVQ [ARGBq], mm0
140 add ARGBq, 8
141
142 .convertend:
143 sub dword [rsp], 2
144 jns .convertloop
145
146 ; If number of pixels is odd then compute it.
147 and dword [rsp], 1
148 jz .convertdone
149
150 movzx TEMPd, BYTE [Uq]
151 movq mm0, [TABLEq + 2048 + 8 * TEMPq]
152 movzx TEMPd, BYTE [Vq]
153 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq]
154 movzx TEMPd, BYTE [Yq]
155 movq mm1, [TABLEq + 8 * TEMPq]
156 paddsw mm1, mm0
157 psraw mm1, 6
158 packuswb mm1, mm1
159
160 ; Multiply ARGB by alpha value.
161 pxor mm0, mm0
162 punpcklbw mm1, mm0
163 movzx TEMPd, BYTE [Aq]
164 movq mm0, [TABLEq + 6144 + 8 * TEMPq]
165 pmullw mm1, mm0
166 psrlw mm1, 8
167 packuswb mm1, mm1
168
169 movd [ARGBq], mm1
170
171 .convertdone:
172 POP TABLEq
173 RET
174 %endif
OLDNEW
« no previous file with comments | « media/base/simd/convert_yuva_to_argb_mmx.asm ('k') | media/base/simd/yuv_to_rgb_table.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698