OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "media/base/yuv_row.h" | 5 #include "media/base/yuv_row.h" |
6 | 6 |
7 #ifdef _DEBUG | 7 #ifdef _DEBUG |
8 #include "base/logging.h" | 8 #include "base/logging.h" |
9 #else | 9 #else |
10 #define DCHECK(a) | 10 #define DCHECK(a) |
11 #endif | 11 #endif |
12 | 12 |
13 extern "C" { | 13 extern "C" { |
14 #if USE_MMX | |
15 | 14 |
16 #define RGBY(i) { \ | 15 #if USE_SSE2 && defined(ARCH_CPU_X86_64) |
17 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | |
18 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | |
19 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | |
20 0 \ | |
21 } | |
22 | |
23 #define RGBU(i) { \ | |
24 static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \ | |
25 static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \ | |
26 0, \ | |
27 static_cast<int16>(256 * 64 - 1) \ | |
28 } | |
29 | |
30 #define RGBV(i) { \ | |
31 0, \ | |
32 static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \ | |
33 static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \ | |
34 0 \ | |
35 } | |
36 | |
37 #define MMX_ALIGNED(var) var __attribute__((aligned(16))) | |
38 | |
39 | |
40 MMX_ALIGNED(int16 kCoefficientsRgbY[768][4]) = { | |
41 RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), | |
42 RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), | |
43 RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), | |
44 RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), | |
45 RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), | |
46 RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), | |
47 RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), | |
48 RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), | |
49 RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23), | |
50 RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27), | |
51 RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B), | |
52 RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F), | |
53 RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33), | |
54 RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37), | |
55 RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B), | |
56 RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F), | |
57 RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43), | |
58 RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47), | |
59 RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B), | |
60 RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F), | |
61 RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53), | |
62 RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57), | |
63 RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B), | |
64 RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F), | |
65 RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63), | |
66 RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67), | |
67 RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B), | |
68 RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F), | |
69 RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73), | |
70 RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77), | |
71 RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B), | |
72 RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F), | |
73 RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83), | |
74 RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87), | |
75 RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B), | |
76 RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F), | |
77 RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93), | |
78 RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97), | |
79 RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B), | |
80 RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F), | |
81 RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3), | |
82 RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7), | |
83 RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB), | |
84 RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF), | |
85 RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3), | |
86 RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7), | |
87 RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB), | |
88 RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF), | |
89 RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3), | |
90 RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7), | |
91 RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB), | |
92 RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF), | |
93 RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3), | |
94 RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7), | |
95 RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB), | |
96 RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF), | |
97 RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3), | |
98 RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7), | |
99 RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB), | |
100 RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF), | |
101 RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3), | |
102 RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7), | |
103 RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB), | |
104 RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF), | |
105 | |
106 // Chroma U table. | |
107 RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03), | |
108 RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07), | |
109 RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B), | |
110 RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F), | |
111 RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13), | |
112 RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17), | |
113 RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B), | |
114 RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F), | |
115 RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23), | |
116 RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27), | |
117 RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B), | |
118 RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F), | |
119 RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33), | |
120 RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37), | |
121 RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B), | |
122 RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F), | |
123 RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43), | |
124 RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47), | |
125 RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B), | |
126 RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F), | |
127 RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53), | |
128 RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57), | |
129 RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B), | |
130 RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F), | |
131 RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63), | |
132 RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67), | |
133 RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B), | |
134 RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F), | |
135 RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73), | |
136 RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77), | |
137 RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B), | |
138 RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F), | |
139 RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83), | |
140 RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87), | |
141 RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B), | |
142 RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F), | |
143 RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93), | |
144 RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97), | |
145 RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B), | |
146 RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F), | |
147 RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3), | |
148 RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7), | |
149 RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB), | |
150 RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF), | |
151 RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3), | |
152 RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7), | |
153 RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB), | |
154 RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF), | |
155 RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3), | |
156 RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7), | |
157 RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB), | |
158 RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF), | |
159 RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3), | |
160 RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7), | |
161 RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB), | |
162 RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF), | |
163 RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3), | |
164 RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7), | |
165 RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB), | |
166 RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF), | |
167 RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3), | |
168 RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7), | |
169 RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB), | |
170 RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF), | |
171 | |
172 // Chroma V table. | |
173 RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03), | |
174 RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07), | |
175 RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B), | |
176 RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F), | |
177 RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13), | |
178 RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17), | |
179 RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B), | |
180 RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F), | |
181 RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23), | |
182 RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27), | |
183 RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B), | |
184 RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F), | |
185 RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33), | |
186 RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37), | |
187 RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B), | |
188 RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F), | |
189 RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43), | |
190 RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47), | |
191 RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B), | |
192 RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F), | |
193 RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53), | |
194 RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57), | |
195 RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B), | |
196 RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F), | |
197 RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63), | |
198 RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67), | |
199 RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B), | |
200 RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F), | |
201 RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73), | |
202 RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77), | |
203 RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B), | |
204 RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F), | |
205 RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83), | |
206 RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87), | |
207 RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B), | |
208 RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F), | |
209 RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93), | |
210 RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97), | |
211 RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B), | |
212 RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F), | |
213 RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3), | |
214 RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7), | |
215 RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB), | |
216 RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF), | |
217 RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3), | |
218 RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7), | |
219 RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB), | |
220 RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF), | |
221 RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3), | |
222 RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7), | |
223 RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB), | |
224 RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF), | |
225 RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3), | |
226 RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7), | |
227 RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB), | |
228 RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF), | |
229 RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3), | |
230 RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7), | |
231 RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB), | |
232 RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF), | |
233 RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3), | |
234 RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), | |
235 RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), | |
236 RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), | |
237 }; | |
238 | |
239 #undef RGBY | |
240 #undef RGBU | |
241 #undef RGBV | |
242 #undef MMX_ALIGNED | |
243 | |
244 #if defined(ARCH_CPU_X86_64) | |
245 | 16 |
246 // AMD64 ABI uses register paremters. | 17 // AMD64 ABI uses register paremters. |
247 void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi | 18 void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi |
248 const uint8* u_buf, // rsi | 19 const uint8* u_buf, // rsi |
249 const uint8* v_buf, // rdx | 20 const uint8* v_buf, // rdx |
250 uint8* rgb_buf, // rcx | 21 uint8* rgb_buf, // rcx |
251 int width) { // r8 | 22 int source_width) { // r8 |
252 asm( | 23 asm( |
253 "jmp convertend\n" | 24 "jmp convertend\n" |
254 "convertloop:" | 25 "convertloop:" |
255 "movzb (%1),%%r10\n" | 26 "movzb (%1),%%r10\n" |
256 "add $0x1,%1\n" | 27 "add $0x1,%1\n" |
257 "movzb (%2),%%r11\n" | 28 "movzb (%2),%%r11\n" |
258 "add $0x1,%2\n" | 29 "add $0x1,%2\n" |
259 "movq 2048(%5,%%r10,8),%%xmm0\n" | 30 "movq 2048(%5,%%r10,8),%%xmm0\n" |
260 "movzb (%0),%%r10\n" | 31 "movzb (%0),%%r10\n" |
261 "movq 4096(%5,%%r11,8),%%xmm1\n" | 32 "movq 4096(%5,%%r11,8),%%xmm1\n" |
(...skipping 27 matching lines...) Expand all Loading... |
289 "paddsw %%xmm0,%%xmm1\n" | 60 "paddsw %%xmm0,%%xmm1\n" |
290 "psraw $0x6,%%xmm1\n" | 61 "psraw $0x6,%%xmm1\n" |
291 "packuswb %%xmm1,%%xmm1\n" | 62 "packuswb %%xmm1,%%xmm1\n" |
292 "movd %%xmm1,0x0(%3)\n" | 63 "movd %%xmm1,0x0(%3)\n" |
293 "convertdone:" | 64 "convertdone:" |
294 : | 65 : |
295 : "r"(y_buf), // %0 | 66 : "r"(y_buf), // %0 |
296 "r"(u_buf), // %1 | 67 "r"(u_buf), // %1 |
297 "r"(v_buf), // %2 | 68 "r"(v_buf), // %2 |
298 "r"(rgb_buf), // %3 | 69 "r"(rgb_buf), // %3 |
299 "r"(width), // %4 | 70 "r"(source_width), // %4 |
300 "r" (kCoefficientsRgbY) // %5 | 71 "r" (kCoefficientsRgbY) // %5 |
301 : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" | 72 : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" |
302 ); | 73 ); |
303 } | 74 } |
304 | 75 |
305 void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi | 76 void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi |
306 const uint8* u_buf, // rsi | 77 const uint8* u_buf, // rsi |
307 const uint8* v_buf, // rdx | 78 const uint8* v_buf, // rdx |
308 uint8* rgb_buf, // rcx | 79 uint8* rgb_buf, // rcx |
309 int width, // r8 | 80 int source_width, // r8 |
310 int scaled_dx) { // r9 | 81 int source_dx) { // r9 |
311 asm( | 82 asm( |
312 "xor %%r11,%%r11\n" | 83 "xor %%r11,%%r11\n" |
313 "sub $0x2,%4\n" | 84 "sub $0x2,%4\n" |
314 "js scalenext\n" | 85 "js scalenext\n" |
315 | 86 |
316 "scaleloop:" | 87 "scaleloop:" |
317 "mov %%r11,%%r10\n" | 88 "mov %%r11,%%r10\n" |
318 "sar $0x11,%%r10\n" | 89 "sar $0x11,%%r10\n" |
319 "movzb (%1,%%r10,1),%%rax\n" | 90 "movzb (%1,%%r10,1),%%rax\n" |
320 "movq 2048(%5,%%rax,8),%%xmm0\n" | 91 "movq 2048(%5,%%rax,8),%%xmm0\n" |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
357 "psraw $0x6,%%xmm1\n" | 128 "psraw $0x6,%%xmm1\n" |
358 "packuswb %%xmm1,%%xmm1\n" | 129 "packuswb %%xmm1,%%xmm1\n" |
359 "movd %%xmm1,0x0(%3)\n" | 130 "movd %%xmm1,0x0(%3)\n" |
360 | 131 |
361 "scaledone:" | 132 "scaledone:" |
362 : | 133 : |
363 : "r"(y_buf), // %0 | 134 : "r"(y_buf), // %0 |
364 "r"(u_buf), // %1 | 135 "r"(u_buf), // %1 |
365 "r"(v_buf), // %2 | 136 "r"(v_buf), // %2 |
366 "r"(rgb_buf), // %3 | 137 "r"(rgb_buf), // %3 |
367 "r"(width), // %4 | 138 "r"(source_width), // %4 |
368 "r" (kCoefficientsRgbY), // %5 | 139 "r" (kCoefficientsRgbY), // %5 |
369 "r"(static_cast<long>(scaled_dx)) // %6 | 140 "r"(static_cast<long>(source_dx)) // %6 |
370 : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2" | 141 : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2" |
371 ); | 142 ); |
372 } | 143 } |
373 | 144 |
374 void LinearScaleYUVToRGB32Row(const uint8* y_buf, | 145 void LinearScaleYUVToRGB32Row(const uint8* y_buf, |
375 const uint8* u_buf, | 146 const uint8* u_buf, |
376 const uint8* v_buf, | 147 const uint8* v_buf, |
377 uint8* rgb_buf, | 148 uint8* rgb_buf, |
378 int width, | 149 int source_width, |
379 int scaled_dx) { | 150 int source_dx) { |
380 asm( | 151 asm( |
381 "xor %%r11,%%r11\n" | 152 "xor %%r11,%%r11\n" |
382 "sub $0x2,%4\n" | 153 "sub $0x2,%4\n" |
383 "js .lscalenext\n" | 154 "js .lscalenext\n" |
384 | 155 |
385 ".lscaleloop:" | 156 ".lscaleloop:" |
386 "mov %%r11,%%r10\n" | 157 "mov %%r11,%%r10\n" |
387 "sar $0x11,%%r10\n" | 158 "sar $0x11,%%r10\n" |
388 | 159 |
389 "movzb (%1, %%r10, 1), %%r13 \n" | 160 "movzb (%1, %%r10, 1), %%r13 \n" |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
470 "psraw $0x6,%%xmm1\n" | 241 "psraw $0x6,%%xmm1\n" |
471 "packuswb %%xmm1,%%xmm1\n" | 242 "packuswb %%xmm1,%%xmm1\n" |
472 "movd %%xmm1,0x0(%3)\n" | 243 "movd %%xmm1,0x0(%3)\n" |
473 | 244 |
474 ".lscaledone:" | 245 ".lscaledone:" |
475 : | 246 : |
476 : "r"(y_buf), // %0 | 247 : "r"(y_buf), // %0 |
477 "r"(u_buf), // %1 | 248 "r"(u_buf), // %1 |
478 "r"(v_buf), // %2 | 249 "r"(v_buf), // %2 |
479 "r"(rgb_buf), // %3 | 250 "r"(rgb_buf), // %3 |
480 "r"(width), // %4 | 251 "r"(source_width), // %4 |
481 "r" (kCoefficientsRgbY), // %5 | 252 "r" (kCoefficientsRgbY), // %5 |
482 "r"(static_cast<long>(scaled_dx)) // %6 | 253 "r"(static_cast<long>(source_dx)) // %6 |
483 : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2" | 254 : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2" |
484 ); | 255 ); |
485 } | 256 } |
486 | 257 |
487 #else // !AMD64 | 258 #elif USE_MMX && !defined(ARCH_CPU_X86_64) && !defined(__PIC__) |
488 | 259 |
489 // PIC version is slower because less registers are available, so | 260 // PIC version is slower because less registers are available, so |
490 // non-PIC is used on platforms where it is possible. | 261 // non-PIC is used on platforms where it is possible. |
491 | 262 |
492 #if !defined(__PIC__) | |
493 | |
494 void FastConvertYUVToRGB32Row(const uint8* y_buf, | 263 void FastConvertYUVToRGB32Row(const uint8* y_buf, |
495 const uint8* u_buf, | 264 const uint8* u_buf, |
496 const uint8* v_buf, | 265 const uint8* v_buf, |
497 uint8* rgb_buf, | 266 uint8* rgb_buf, |
498 int width); | 267 int source_width); |
499 | 268 |
500 asm( | 269 asm( |
501 ".global FastConvertYUVToRGB32Row\n" | 270 ".global FastConvertYUVToRGB32Row\n" |
502 "FastConvertYUVToRGB32Row:\n" | 271 "FastConvertYUVToRGB32Row:\n" |
503 "pusha\n" | 272 "pusha\n" |
504 "mov 0x24(%esp),%edx\n" | 273 "mov 0x24(%esp),%edx\n" |
505 "mov 0x28(%esp),%edi\n" | 274 "mov 0x28(%esp),%edi\n" |
506 "mov 0x2c(%esp),%esi\n" | 275 "mov 0x2c(%esp),%esi\n" |
507 "mov 0x30(%esp),%ebp\n" | 276 "mov 0x30(%esp),%ebp\n" |
508 "mov 0x34(%esp),%ecx\n" | 277 "mov 0x34(%esp),%ecx\n" |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
547 "convertdone:" | 316 "convertdone:" |
548 "popa\n" | 317 "popa\n" |
549 "ret\n" | 318 "ret\n" |
550 ); | 319 ); |
551 | 320 |
552 | 321 |
553 void ScaleYUVToRGB32Row(const uint8* y_buf, | 322 void ScaleYUVToRGB32Row(const uint8* y_buf, |
554 const uint8* u_buf, | 323 const uint8* u_buf, |
555 const uint8* v_buf, | 324 const uint8* v_buf, |
556 uint8* rgb_buf, | 325 uint8* rgb_buf, |
557 int width, | 326 int source_width, |
558 int scaled_dx); | 327 int source_dx); |
559 | 328 |
560 asm( | 329 asm( |
561 ".global ScaleYUVToRGB32Row\n" | 330 ".global ScaleYUVToRGB32Row\n" |
562 "ScaleYUVToRGB32Row:\n" | 331 "ScaleYUVToRGB32Row:\n" |
563 "pusha\n" | 332 "pusha\n" |
564 "mov 0x24(%esp),%edx\n" | 333 "mov 0x24(%esp),%edx\n" |
565 "mov 0x28(%esp),%edi\n" | 334 "mov 0x28(%esp),%edi\n" |
566 "mov 0x2c(%esp),%esi\n" | 335 "mov 0x2c(%esp),%esi\n" |
567 "mov 0x30(%esp),%ebp\n" | 336 "mov 0x30(%esp),%ebp\n" |
568 "mov 0x34(%esp),%ecx\n" | 337 "mov 0x34(%esp),%ecx\n" |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
621 | 390 |
622 "scaledone:" | 391 "scaledone:" |
623 "popa\n" | 392 "popa\n" |
624 "ret\n" | 393 "ret\n" |
625 ); | 394 ); |
626 | 395 |
627 void LinearScaleYUVToRGB32Row(const uint8* y_buf, | 396 void LinearScaleYUVToRGB32Row(const uint8* y_buf, |
628 const uint8* u_buf, | 397 const uint8* u_buf, |
629 const uint8* v_buf, | 398 const uint8* v_buf, |
630 uint8* rgb_buf, | 399 uint8* rgb_buf, |
631 int width, | 400 int source_width, |
632 int scaled_dx); | 401 int source_dx); |
633 | 402 |
634 asm( | 403 asm( |
635 ".global LinearScaleYUVToRGB32Row\n" | 404 ".global LinearScaleYUVToRGB32Row\n" |
636 "LinearScaleYUVToRGB32Row:\n" | 405 "LinearScaleYUVToRGB32Row:\n" |
637 "pusha\n" | 406 "pusha\n" |
638 "mov 0x24(%esp),%edx\n" | 407 "mov 0x24(%esp),%edx\n" |
639 "mov 0x28(%esp),%edi\n" | 408 "mov 0x28(%esp),%edi\n" |
640 "mov 0x30(%esp),%ebp\n" | 409 "mov 0x30(%esp),%ebp\n" |
641 "xor %ebx,%ebx\n" | 410 "xor %ebx,%ebx\n" |
642 | 411 |
643 // width = width * scaled_dx + ebx | 412 // source_width = source_width * source_dx + ebx |
644 "mov 0x34(%esp), %ecx\n" | 413 "mov 0x34(%esp), %ecx\n" |
645 "imull 0x38(%esp), %ecx\n" | 414 "imull 0x38(%esp), %ecx\n" |
646 "addl %ebx, %ecx\n" | 415 "addl %ebx, %ecx\n" |
647 "mov %ecx, 0x34(%esp)\n" | 416 "mov %ecx, 0x34(%esp)\n" |
648 | 417 |
649 "jmp .lscaleend\n" | 418 "jmp .lscaleend\n" |
650 | 419 |
651 ".lscaleloop:" | 420 ".lscaleloop:" |
652 "mov %ebx,%eax\n" | 421 "mov %ebx,%eax\n" |
653 "sar $0x11,%eax\n" | 422 "sar $0x11,%eax\n" |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
725 | 494 |
726 ".lscalelastpixel:" | 495 ".lscalelastpixel:" |
727 "paddsw %mm0, %mm1\n" | 496 "paddsw %mm0, %mm1\n" |
728 "psraw $6, %mm1\n" | 497 "psraw $6, %mm1\n" |
729 "packuswb %mm1, %mm1\n" | 498 "packuswb %mm1, %mm1\n" |
730 "movd %mm1, (%ebp)\n" | 499 "movd %mm1, (%ebp)\n" |
731 "popa\n" | 500 "popa\n" |
732 "ret\n" | 501 "ret\n" |
733 ); | 502 ); |
734 | 503 |
735 #else // __PIC__ | 504 #elif USE_MMX && !defined(ARCH_CPU_X86_64) && defined(__PIC__) |
736 | 505 |
737 extern void PICConvertYUVToRGB32Row(const uint8* y_buf, | 506 extern void PICConvertYUVToRGB32Row(const uint8* y_buf, |
738 const uint8* u_buf, | 507 const uint8* u_buf, |
739 const uint8* v_buf, | 508 const uint8* v_buf, |
740 uint8* rgb_buf, | 509 uint8* rgb_buf, |
741 int width, | 510 int source_width, |
742 int16 *kCoefficientsRgbY); | 511 int16 *kCoefficientsRgbY); |
743 __asm__( | 512 __asm__( |
744 "_PICConvertYUVToRGB32Row:\n" | 513 "_PICConvertYUVToRGB32Row:\n" |
745 "pusha\n" | 514 "pusha\n" |
746 "mov 0x24(%esp),%edx\n" | 515 "mov 0x24(%esp),%edx\n" |
747 "mov 0x28(%esp),%edi\n" | 516 "mov 0x28(%esp),%edi\n" |
748 "mov 0x2c(%esp),%esi\n" | 517 "mov 0x2c(%esp),%esi\n" |
749 "mov 0x30(%esp),%ebp\n" | 518 "mov 0x30(%esp),%ebp\n" |
750 "mov 0x38(%esp),%ecx\n" | 519 "mov 0x38(%esp),%ecx\n" |
751 | 520 |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
789 "movd %mm1,0x0(%ebp)\n" | 558 "movd %mm1,0x0(%ebp)\n" |
790 ".Lconvertdone:\n" | 559 ".Lconvertdone:\n" |
791 "popa\n" | 560 "popa\n" |
792 "ret\n" | 561 "ret\n" |
793 ); | 562 ); |
794 | 563 |
795 void FastConvertYUVToRGB32Row(const uint8* y_buf, | 564 void FastConvertYUVToRGB32Row(const uint8* y_buf, |
796 const uint8* u_buf, | 565 const uint8* u_buf, |
797 const uint8* v_buf, | 566 const uint8* v_buf, |
798 uint8* rgb_buf, | 567 uint8* rgb_buf, |
799 int width) { | 568 int source_width) { |
800 PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, | 569 PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, source_width, |
801 &kCoefficientsRgbY[0][0]); | 570 &kCoefficientsRgbY[0][0]); |
802 } | 571 } |
803 | 572 |
804 extern void PICScaleYUVToRGB32Row(const uint8* y_buf, | 573 extern void PICScaleYUVToRGB32Row(const uint8* y_buf, |
805 const uint8* u_buf, | 574 const uint8* u_buf, |
806 const uint8* v_buf, | 575 const uint8* v_buf, |
807 uint8* rgb_buf, | 576 uint8* rgb_buf, |
808 int width, | 577 int source_width, |
809 int scaled_dx, | 578 int source_dx, |
810 int16 *kCoefficientsRgbY); | 579 int16 *kCoefficientsRgbY); |
811 | 580 |
812 __asm__( | 581 __asm__( |
813 "_PICScaleYUVToRGB32Row:\n" | 582 "_PICScaleYUVToRGB32Row:\n" |
814 "pusha\n" | 583 "pusha\n" |
815 "mov 0x24(%esp),%edx\n" | 584 "mov 0x24(%esp),%edx\n" |
816 "mov 0x28(%esp),%edi\n" | 585 "mov 0x28(%esp),%edi\n" |
817 "mov 0x2c(%esp),%esi\n" | 586 "mov 0x2c(%esp),%esi\n" |
818 "mov 0x30(%esp),%ebp\n" | 587 "mov 0x30(%esp),%ebp\n" |
819 "mov 0x3c(%esp),%ecx\n" | 588 "mov 0x3c(%esp),%ecx\n" |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
873 "Lscaledone:" | 642 "Lscaledone:" |
874 "popa\n" | 643 "popa\n" |
875 "ret\n" | 644 "ret\n" |
876 ); | 645 ); |
877 | 646 |
878 | 647 |
879 void ScaleYUVToRGB32Row(const uint8* y_buf, | 648 void ScaleYUVToRGB32Row(const uint8* y_buf, |
880 const uint8* u_buf, | 649 const uint8* u_buf, |
881 const uint8* v_buf, | 650 const uint8* v_buf, |
882 uint8* rgb_buf, | 651 uint8* rgb_buf, |
883 int width, | 652 int source_width, |
884 int scaled_dx) { | 653 int source_dx) { |
885 PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, | 654 PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, source_width, source_dx, |
886 &kCoefficientsRgbY[0][0]); | 655 &kCoefficientsRgbY[0][0]); |
887 } | 656 } |
888 | 657 |
889 void PICLinearScaleYUVToRGB32Row(const uint8* y_buf, | 658 void PICLinearScaleYUVToRGB32Row(const uint8* y_buf, |
890 const uint8* u_buf, | 659 const uint8* u_buf, |
891 const uint8* v_buf, | 660 const uint8* v_buf, |
892 uint8* rgb_buf, | 661 uint8* rgb_buf, |
893 int width, | 662 int source_width, |
894 int scaled_dx, | 663 int source_dx, |
895 int16 *kCoefficientsRgbY); | 664 int16 *kCoefficientsRgbY); |
896 | 665 |
897 asm( | 666 asm( |
898 "_PICLinearScaleYUVToRGB32Row:\n" | 667 "_PICLinearScaleYUVToRGB32Row:\n" |
899 "pusha\n" | 668 "pusha\n" |
900 "mov 0x24(%esp),%edx\n" | 669 "mov 0x24(%esp),%edx\n" |
901 "mov 0x30(%esp),%ebp\n" | 670 "mov 0x30(%esp),%ebp\n" |
902 "mov 0x34(%esp),%ecx\n" | 671 "mov 0x34(%esp),%ecx\n" |
903 "mov 0x3c(%esp),%edi\n" | 672 "mov 0x3c(%esp),%edi\n" |
904 "xor %ebx,%ebx\n" | 673 "xor %ebx,%ebx\n" |
905 | 674 |
906 // width = width * scaled_dx + ebx | 675 // source_width = source_width * source_dx + ebx |
907 "mov 0x34(%esp), %ecx\n" | 676 "mov 0x34(%esp), %ecx\n" |
908 "imull 0x38(%esp), %ecx\n" | 677 "imull 0x38(%esp), %ecx\n" |
909 "addl %ebx, %ecx\n" | 678 "addl %ebx, %ecx\n" |
910 "mov %ecx, 0x34(%esp)\n" | 679 "mov %ecx, 0x34(%esp)\n" |
911 | 680 |
912 "jmp .lscaleend\n" | 681 "jmp .lscaleend\n" |
913 | 682 |
914 ".lscaleloop:" | 683 ".lscaleloop:" |
915 "mov 0x28(%esp),%esi\n" | 684 "mov 0x28(%esp),%esi\n" |
916 "mov %ebx,%eax\n" | 685 "mov %ebx,%eax\n" |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
993 "packuswb %mm1, %mm1\n" | 762 "packuswb %mm1, %mm1\n" |
994 "movd %mm1, (%ebp)\n" | 763 "movd %mm1, (%ebp)\n" |
995 "popa\n" | 764 "popa\n" |
996 "ret\n" | 765 "ret\n" |
997 ); | 766 ); |
998 | 767 |
999 void LinearScaleYUVToRGB32Row(const uint8* y_buf, | 768 void LinearScaleYUVToRGB32Row(const uint8* y_buf, |
1000 const uint8* u_buf, | 769 const uint8* u_buf, |
1001 const uint8* v_buf, | 770 const uint8* v_buf, |
1002 uint8* rgb_buf, | 771 uint8* rgb_buf, |
1003 int width, | 772 int source_width, |
1004 int scaled_dx) { | 773 int source_dx) { |
1005 PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, | 774 PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, source_width, source
_dx, |
1006 &kCoefficientsRgbY[0][0]); | 775 &kCoefficientsRgbY[0][0]); |
1007 } | 776 } |
1008 | 777 |
1009 #endif // !__PIC__ | 778 #else // Use C code instead of MMX/SSE2. |
1010 | 779 |
1011 #endif // !AMD64 | 780 // C reference code that mimic the YUV assembly. |
1012 | 781 #define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x))) |
1013 #else // USE_MMX | 782 #define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \\ |
1014 | 783 (((x) + (y)) > 32767 ? 32767 : ((x) + (y)))) |
1015 // Reference version of YUV converter. | |
1016 static const int kClipTableSize = 256; | |
1017 static const int kClipOverflow = 288; // Cb max is 535. | |
1018 | |
1019 static uint8 kRgbClipTable[kClipOverflow + | |
1020 kClipTableSize + | |
1021 kClipOverflow] = { | |
1022 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 288 underflow values | |
1023 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // clipped to 0. | |
1024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1025 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1028 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1029 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1030 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1031 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1032 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1033 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1034 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1035 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1036 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1037 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1038 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1039 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1040 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1041 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1042 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1043 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1044 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1045 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1046 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1047 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1048 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1049 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1050 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1051 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1052 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1053 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1054 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1055 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1056 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1057 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
1058 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // Unclipped values. | |
1059 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, | |
1060 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | |
1061 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, | |
1062 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, | |
1063 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, | |
1064 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, | |
1065 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, | |
1066 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, | |
1067 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, | |
1068 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, | |
1069 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, | |
1070 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | |
1071 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | |
1072 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | |
1073 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, | |
1074 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, | |
1075 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, | |
1076 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, | |
1077 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, | |
1078 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, | |
1079 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, | |
1080 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, | |
1081 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, | |
1082 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, | |
1083 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | |
1084 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, | |
1085 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | |
1086 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | |
1087 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | |
1088 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, | |
1089 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, | |
1090 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 288 overflow values | |
1091 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // clipped to 255. | |
1092 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1093 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1094 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1095 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1096 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1097 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1098 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1099 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1100 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1101 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1102 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1103 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1104 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1105 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1106 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1107 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1108 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1109 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1110 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1111 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1112 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1113 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1114 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1115 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1116 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1117 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1118 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1119 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1120 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1121 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1122 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1123 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1124 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1125 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
1126 }; | |
1127 | |
1128 // Clip an rgb channel value to 0..255 range. | |
1129 // Source is signed fixed point 8.8. | |
1130 // Table allows for values to underflow or overflow by 128. | |
1131 // Therefore source range is -128 to 384. | |
1132 // Output clips to unsigned 0 to 255. | |
1133 static inline uint32 clip(int32 value) { | |
1134 DCHECK(((value >> 8) + kClipOverflow) >= 0); | |
1135 DCHECK(((value >> 8) + kClipOverflow) < | |
1136 (kClipOverflow + kClipTableSize + kClipOverflow)); | |
1137 return static_cast<uint32>(kRgbClipTable[((value) >> 8) + kClipOverflow]); | |
1138 } | |
1139 | 784 |
1140 static inline void YuvPixel(uint8 y, | 785 static inline void YuvPixel(uint8 y, |
1141 uint8 u, | 786 uint8 u, |
1142 uint8 v, | 787 uint8 v, |
1143 uint8* rgb_buf) { | 788 uint8* rgb_buf) { |
1144 int32 d = static_cast<int32>(u) - 128; | |
1145 int32 e = static_cast<int32>(v) - 128; | |
1146 | 789 |
1147 int32 cb = (516 * d + 128); | 790 int b = kCoefficientsRgbY[256+u][0]; |
1148 int32 cg = (- 100 * d - 208 * e + 128); | 791 int g = kCoefficientsRgbY[256+u][1]; |
1149 int32 cr = (409 * e + 128); | 792 int r = kCoefficientsRgbY[256+u][2]; |
| 793 int a = kCoefficientsRgbY[256+u][3]; |
1150 | 794 |
1151 int32 C298a = ((static_cast<int32>(y) - 16) * 298 + 128); | 795 b = paddsw(b, kCoefficientsRgbY[512+v][0]); |
1152 *reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) | | 796 g = paddsw(g, kCoefficientsRgbY[512+v][1]); |
1153 (clip(C298a + cg) << 8) | | 797 r = paddsw(r, kCoefficientsRgbY[512+v][2]); |
1154 (clip(C298a + cr) << 16) | | 798 a = paddsw(a, kCoefficientsRgbY[512+v][3]); |
1155 (0xff000000); | 799 |
| 800 b = paddsw(b, kCoefficientsRgbY[y][0]); |
| 801 g = paddsw(g, kCoefficientsRgbY[y][1]); |
| 802 r = paddsw(r, kCoefficientsRgbY[y][2]); |
| 803 a = paddsw(a, kCoefficientsRgbY[y][3]); |
| 804 |
| 805 b >>= 6; |
| 806 g >>= 6; |
| 807 r >>= 6; |
| 808 a >>= 6; |
| 809 |
| 810 *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) | |
| 811 (packuswb(g) << 8) | |
| 812 (packuswb(r) << 16) | |
| 813 (packuswb(a) << 24); |
1156 } | 814 } |
1157 | 815 |
| 816 #if TEST_MMX_YUV |
| 817 static inline void YuvPixel(uint8 y, |
| 818 uint8 u, |
| 819 uint8 v, |
| 820 uint8* rgb_buf) { |
| 821 |
| 822 __asm { |
| 823 movzx eax, u |
| 824 movq mm0, [kCoefficientsRgbY+2048 + 8 * eax] |
| 825 movzx eax, v |
| 826 paddsw mm0, [kCoefficientsRgbY+4096 + 8 * eax] |
| 827 movzx eax, y |
| 828 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 829 paddsw mm1, mm0 |
| 830 psraw mm1, 6 |
| 831 packuswb mm1, mm1 |
| 832 mov eax, rgb_buf |
| 833 movd [eax], mm1 |
| 834 emms |
| 835 } |
| 836 } |
| 837 #endif |
| 838 |
1158 void FastConvertYUVToRGB32Row(const uint8* y_buf, | 839 void FastConvertYUVToRGB32Row(const uint8* y_buf, |
1159 const uint8* u_buf, | 840 const uint8* u_buf, |
1160 const uint8* v_buf, | 841 const uint8* v_buf, |
1161 uint8* rgb_buf, | 842 uint8* rgb_buf, |
1162 int width) { | 843 int source_width) { |
1163 for (int x = 0; x < width; x += 2) { | 844 for (int x = 0; x < source_width; x += 2) { |
1164 uint8 u = u_buf[x >> 1]; | 845 uint8 u = u_buf[x >> 1]; |
1165 uint8 v = v_buf[x >> 1]; | 846 uint8 v = v_buf[x >> 1]; |
1166 uint8 y0 = y_buf[x]; | 847 uint8 y0 = y_buf[x]; |
1167 YuvPixel(y0, u, v, rgb_buf); | 848 YuvPixel(y0, u, v, rgb_buf); |
1168 if ((x + 1) < width) { | 849 if ((x + 1) < source_width) { |
1169 uint8 y1 = y_buf[x + 1]; | 850 uint8 y1 = y_buf[x + 1]; |
1170 YuvPixel(y1, u, v, rgb_buf + 4); | 851 YuvPixel(y1, u, v, rgb_buf + 4); |
1171 } | 852 } |
1172 rgb_buf += 8; // Advance 2 pixels. | 853 rgb_buf += 8; // Advance 2 pixels. |
1173 } | 854 } |
1174 } | 855 } |
1175 | 856 |
1176 // 16.16 fixed point is used. A shift by 16 isolates the integer. | 857 // 16.16 fixed point is used. A shift by 16 isolates the integer. |
1177 // A shift by 17 is used to further subsample the chrominence channels. | 858 // A shift by 17 is used to further subsample the chrominence channels. |
1178 // & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits, | 859 // & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits, |
1179 // for 1/65536 pixel accurate interpolation. | 860 // for 1/65536 pixel accurate interpolation. |
1180 void ScaleYUVToRGB32Row(const uint8* y_buf, | 861 void ScaleYUVToRGB32Row(const uint8* y_buf, |
1181 const uint8* u_buf, | 862 const uint8* u_buf, |
1182 const uint8* v_buf, | 863 const uint8* v_buf, |
1183 uint8* rgb_buf, | 864 uint8* rgb_buf, |
1184 int width, | 865 int source_width, |
1185 int scaled_dx) { | 866 int dx) { |
1186 int scaled_x = 0; | 867 int x = 0; |
1187 for (int x = 0; x < width; ++x) { | 868 for (int i = 0; i < source_width; i += 2) { |
1188 uint8 u = u_buf[scaled_x >> 17]; | 869 int y = y_buf[x >> 16]; |
1189 uint8 v = v_buf[scaled_x >> 17]; | 870 int u = u_buf[(x >> 17)]; |
1190 uint8 y0 = y_buf[scaled_x >> 16]; | 871 int v = v_buf[(x >> 17)]; |
1191 YuvPixel(y0, u, v, rgb_buf); | 872 YuvPixel(y, u, v, rgb_buf); |
1192 rgb_buf += 4; | 873 x += dx; |
1193 scaled_x += scaled_dx; | 874 if ((i + 1) < source_width) { |
| 875 y = y_buf[x >> 16]; |
| 876 YuvPixel(y, u, v, rgb_buf+4); |
| 877 x += dx; |
| 878 } |
| 879 rgb_buf += 8; |
1194 } | 880 } |
1195 } | 881 } |
1196 | 882 |
1197 void LinearScaleYUVToRGB32Row(const uint8* y_buf, | 883 void LinearScaleYUVToRGB32Row(const uint8* y_buf, |
1198 const uint8* u_buf, | 884 const uint8* u_buf, |
1199 const uint8* v_buf, | 885 const uint8* v_buf, |
1200 uint8* rgb_buf, | 886 uint8* rgb_buf, |
1201 int width, | 887 int source_width, |
1202 int dx) { | 888 int dx) { |
1203 for (int x = 0; x < width * dx; x += dx) { | 889 int x = 0; |
| 890 for (int i = 0; i < source_width; i += 2) { |
1204 int y0 = y_buf[x >> 16]; | 891 int y0 = y_buf[x >> 16]; |
1205 int y1 = y_buf[(x >> 16) + 1]; | 892 int y1 = y_buf[(x >> 16) + 1]; |
1206 int u0 = u_buf[(x >> 17)]; | 893 int u0 = u_buf[(x >> 17)]; |
1207 int u1 = u_buf[(x >> 17) + 1]; | 894 int u1 = u_buf[(x >> 17) + 1]; |
1208 int v0 = v_buf[(x >> 17)]; | 895 int v0 = v_buf[(x >> 17)]; |
1209 int v1 = v_buf[(x >> 17) + 1]; | 896 int v1 = v_buf[(x >> 17) + 1]; |
1210 int y = ((x & 65535) * y1 + ((x & 65535) ^ 65535) * y0) >> 16; | 897 int y_frac = (x & 65535); |
1211 int u = ((x & 65535) * u1 + ((x & 65535) ^ 65535) * u0) >> 16; | 898 int uv_frac = ((x >> 1) & 65535); |
1212 int v = ((x & 65535) * v1 + ((x & 65535) ^ 65535) * v0) >> 16; | 899 int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; |
| 900 int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16; |
| 901 int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16; |
1213 YuvPixel(y, u, v, rgb_buf); | 902 YuvPixel(y, u, v, rgb_buf); |
1214 rgb_buf += 4; | 903 x += dx; |
| 904 if ((i + 1) < source_width) { |
| 905 y0 = y_buf[x >> 16]; |
| 906 y1 = y_buf[(x >> 16) + 1]; |
| 907 y_frac = (x & 65535); |
| 908 y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; |
| 909 YuvPixel(y, u, v, rgb_buf+4); |
| 910 x += dx; |
| 911 } |
| 912 rgb_buf += 8; |
1215 } | 913 } |
1216 } | 914 } |
1217 | 915 |
1218 #endif // USE_MMX | 916 #endif // USE_MMX |
1219 } // extern "C" | 917 } // extern "C" |
| 918 |
OLD | NEW |