Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(388)

Side by Side Diff: media/base/yuv_row_posix.cc

Issue 1638008: yuv use a shared table for windows, posix and C... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 10 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « media/base/yuv_row.h ('k') | media/base/yuv_row_table.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "media/base/yuv_row.h" 5 #include "media/base/yuv_row.h"
6 6
7 #ifdef _DEBUG 7 #ifdef _DEBUG
8 #include "base/logging.h" 8 #include "base/logging.h"
9 #else 9 #else
10 #define DCHECK(a) 10 #define DCHECK(a)
11 #endif 11 #endif
12 12
13 extern "C" { 13 extern "C" {
14 #if USE_MMX
15 14
16 #define RGBY(i) { \ 15 #if USE_SSE2 && defined(ARCH_CPU_X86_64)
17 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
18 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
19 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
20 0 \
21 }
22
23 #define RGBU(i) { \
24 static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
25 static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
26 0, \
27 static_cast<int16>(256 * 64 - 1) \
28 }
29
30 #define RGBV(i) { \
31 0, \
32 static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \
33 static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \
34 0 \
35 }
36
37 #define MMX_ALIGNED(var) var __attribute__((aligned(16)))
38
39
40 MMX_ALIGNED(int16 kCoefficientsRgbY[768][4]) = {
41 RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
42 RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
43 RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B),
44 RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F),
45 RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13),
46 RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17),
47 RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B),
48 RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F),
49 RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23),
50 RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27),
51 RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B),
52 RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F),
53 RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33),
54 RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37),
55 RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B),
56 RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F),
57 RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43),
58 RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47),
59 RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B),
60 RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F),
61 RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53),
62 RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57),
63 RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B),
64 RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F),
65 RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63),
66 RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67),
67 RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B),
68 RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F),
69 RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73),
70 RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77),
71 RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B),
72 RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F),
73 RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83),
74 RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87),
75 RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B),
76 RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F),
77 RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93),
78 RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97),
79 RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B),
80 RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F),
81 RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3),
82 RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7),
83 RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB),
84 RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF),
85 RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3),
86 RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7),
87 RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB),
88 RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF),
89 RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3),
90 RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7),
91 RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB),
92 RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF),
93 RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3),
94 RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7),
95 RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB),
96 RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF),
97 RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3),
98 RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7),
99 RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB),
100 RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF),
101 RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3),
102 RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7),
103 RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB),
104 RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF),
105
106 // Chroma U table.
107 RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03),
108 RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07),
109 RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B),
110 RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F),
111 RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13),
112 RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17),
113 RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B),
114 RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F),
115 RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23),
116 RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27),
117 RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B),
118 RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F),
119 RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33),
120 RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37),
121 RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B),
122 RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F),
123 RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43),
124 RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47),
125 RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B),
126 RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F),
127 RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53),
128 RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57),
129 RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B),
130 RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F),
131 RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63),
132 RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67),
133 RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B),
134 RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F),
135 RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73),
136 RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77),
137 RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B),
138 RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F),
139 RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83),
140 RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87),
141 RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B),
142 RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F),
143 RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93),
144 RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97),
145 RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B),
146 RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F),
147 RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3),
148 RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7),
149 RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB),
150 RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF),
151 RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3),
152 RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7),
153 RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB),
154 RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF),
155 RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3),
156 RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7),
157 RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB),
158 RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF),
159 RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3),
160 RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7),
161 RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB),
162 RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF),
163 RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3),
164 RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7),
165 RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB),
166 RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF),
167 RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3),
168 RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7),
169 RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB),
170 RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF),
171
172 // Chroma V table.
173 RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03),
174 RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07),
175 RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B),
176 RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F),
177 RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13),
178 RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17),
179 RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B),
180 RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F),
181 RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23),
182 RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27),
183 RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B),
184 RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F),
185 RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33),
186 RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37),
187 RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B),
188 RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F),
189 RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43),
190 RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47),
191 RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B),
192 RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F),
193 RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53),
194 RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57),
195 RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B),
196 RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F),
197 RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63),
198 RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67),
199 RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B),
200 RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F),
201 RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73),
202 RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77),
203 RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B),
204 RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F),
205 RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83),
206 RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87),
207 RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B),
208 RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F),
209 RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93),
210 RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97),
211 RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B),
212 RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F),
213 RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3),
214 RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7),
215 RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB),
216 RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF),
217 RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3),
218 RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7),
219 RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB),
220 RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF),
221 RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3),
222 RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7),
223 RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB),
224 RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF),
225 RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3),
226 RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7),
227 RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB),
228 RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF),
229 RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3),
230 RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7),
231 RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
232 RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
233 RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
234 RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
235 RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
236 RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
237 };
238
239 #undef RGBY
240 #undef RGBU
241 #undef RGBV
242 #undef MMX_ALIGNED
243
244 #if defined(ARCH_CPU_X86_64)
245 16
246 // AMD64 ABI uses register paremters. 17 // AMD64 ABI uses register paremters.
247 void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi 18 void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
248 const uint8* u_buf, // rsi 19 const uint8* u_buf, // rsi
249 const uint8* v_buf, // rdx 20 const uint8* v_buf, // rdx
250 uint8* rgb_buf, // rcx 21 uint8* rgb_buf, // rcx
251 int width) { // r8 22 int source_width) { // r8
252 asm( 23 asm(
253 "jmp convertend\n" 24 "jmp convertend\n"
254 "convertloop:" 25 "convertloop:"
255 "movzb (%1),%%r10\n" 26 "movzb (%1),%%r10\n"
256 "add $0x1,%1\n" 27 "add $0x1,%1\n"
257 "movzb (%2),%%r11\n" 28 "movzb (%2),%%r11\n"
258 "add $0x1,%2\n" 29 "add $0x1,%2\n"
259 "movq 2048(%5,%%r10,8),%%xmm0\n" 30 "movq 2048(%5,%%r10,8),%%xmm0\n"
260 "movzb (%0),%%r10\n" 31 "movzb (%0),%%r10\n"
261 "movq 4096(%5,%%r11,8),%%xmm1\n" 32 "movq 4096(%5,%%r11,8),%%xmm1\n"
(...skipping 27 matching lines...) Expand all
289 "paddsw %%xmm0,%%xmm1\n" 60 "paddsw %%xmm0,%%xmm1\n"
290 "psraw $0x6,%%xmm1\n" 61 "psraw $0x6,%%xmm1\n"
291 "packuswb %%xmm1,%%xmm1\n" 62 "packuswb %%xmm1,%%xmm1\n"
292 "movd %%xmm1,0x0(%3)\n" 63 "movd %%xmm1,0x0(%3)\n"
293 "convertdone:" 64 "convertdone:"
294 : 65 :
295 : "r"(y_buf), // %0 66 : "r"(y_buf), // %0
296 "r"(u_buf), // %1 67 "r"(u_buf), // %1
297 "r"(v_buf), // %2 68 "r"(v_buf), // %2
298 "r"(rgb_buf), // %3 69 "r"(rgb_buf), // %3
299 "r"(width), // %4 70 "r"(source_width), // %4
300 "r" (kCoefficientsRgbY) // %5 71 "r" (kCoefficientsRgbY) // %5
301 : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" 72 : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
302 ); 73 );
303 } 74 }
304 75
305 void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi 76 void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
306 const uint8* u_buf, // rsi 77 const uint8* u_buf, // rsi
307 const uint8* v_buf, // rdx 78 const uint8* v_buf, // rdx
308 uint8* rgb_buf, // rcx 79 uint8* rgb_buf, // rcx
309 int width, // r8 80 int source_width, // r8
310 int scaled_dx) { // r9 81 int source_dx) { // r9
311 asm( 82 asm(
312 "xor %%r11,%%r11\n" 83 "xor %%r11,%%r11\n"
313 "sub $0x2,%4\n" 84 "sub $0x2,%4\n"
314 "js scalenext\n" 85 "js scalenext\n"
315 86
316 "scaleloop:" 87 "scaleloop:"
317 "mov %%r11,%%r10\n" 88 "mov %%r11,%%r10\n"
318 "sar $0x11,%%r10\n" 89 "sar $0x11,%%r10\n"
319 "movzb (%1,%%r10,1),%%rax\n" 90 "movzb (%1,%%r10,1),%%rax\n"
320 "movq 2048(%5,%%rax,8),%%xmm0\n" 91 "movq 2048(%5,%%rax,8),%%xmm0\n"
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
357 "psraw $0x6,%%xmm1\n" 128 "psraw $0x6,%%xmm1\n"
358 "packuswb %%xmm1,%%xmm1\n" 129 "packuswb %%xmm1,%%xmm1\n"
359 "movd %%xmm1,0x0(%3)\n" 130 "movd %%xmm1,0x0(%3)\n"
360 131
361 "scaledone:" 132 "scaledone:"
362 : 133 :
363 : "r"(y_buf), // %0 134 : "r"(y_buf), // %0
364 "r"(u_buf), // %1 135 "r"(u_buf), // %1
365 "r"(v_buf), // %2 136 "r"(v_buf), // %2
366 "r"(rgb_buf), // %3 137 "r"(rgb_buf), // %3
367 "r"(width), // %4 138 "r"(source_width), // %4
368 "r" (kCoefficientsRgbY), // %5 139 "r" (kCoefficientsRgbY), // %5
369 "r"(static_cast<long>(scaled_dx)) // %6 140 "r"(static_cast<long>(source_dx)) // %6
370 : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2" 141 : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
371 ); 142 );
372 } 143 }
373 144
374 void LinearScaleYUVToRGB32Row(const uint8* y_buf, 145 void LinearScaleYUVToRGB32Row(const uint8* y_buf,
375 const uint8* u_buf, 146 const uint8* u_buf,
376 const uint8* v_buf, 147 const uint8* v_buf,
377 uint8* rgb_buf, 148 uint8* rgb_buf,
378 int width, 149 int source_width,
379 int scaled_dx) { 150 int source_dx) {
380 asm( 151 asm(
381 "xor %%r11,%%r11\n" 152 "xor %%r11,%%r11\n"
382 "sub $0x2,%4\n" 153 "sub $0x2,%4\n"
383 "js .lscalenext\n" 154 "js .lscalenext\n"
384 155
385 ".lscaleloop:" 156 ".lscaleloop:"
386 "mov %%r11,%%r10\n" 157 "mov %%r11,%%r10\n"
387 "sar $0x11,%%r10\n" 158 "sar $0x11,%%r10\n"
388 159
389 "movzb (%1, %%r10, 1), %%r13 \n" 160 "movzb (%1, %%r10, 1), %%r13 \n"
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
470 "psraw $0x6,%%xmm1\n" 241 "psraw $0x6,%%xmm1\n"
471 "packuswb %%xmm1,%%xmm1\n" 242 "packuswb %%xmm1,%%xmm1\n"
472 "movd %%xmm1,0x0(%3)\n" 243 "movd %%xmm1,0x0(%3)\n"
473 244
474 ".lscaledone:" 245 ".lscaledone:"
475 : 246 :
476 : "r"(y_buf), // %0 247 : "r"(y_buf), // %0
477 "r"(u_buf), // %1 248 "r"(u_buf), // %1
478 "r"(v_buf), // %2 249 "r"(v_buf), // %2
479 "r"(rgb_buf), // %3 250 "r"(rgb_buf), // %3
480 "r"(width), // %4 251 "r"(source_width), // %4
481 "r" (kCoefficientsRgbY), // %5 252 "r" (kCoefficientsRgbY), // %5
482 "r"(static_cast<long>(scaled_dx)) // %6 253 "r"(static_cast<long>(source_dx)) // %6
483 : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2" 254 : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2"
484 ); 255 );
485 } 256 }
486 257
487 #else // !AMD64 258 #elif USE_MMX && !defined(ARCH_CPU_X86_64) && !defined(__PIC__)
488 259
489 // PIC version is slower because less registers are available, so 260 // PIC version is slower because less registers are available, so
490 // non-PIC is used on platforms where it is possible. 261 // non-PIC is used on platforms where it is possible.
491 262
492 #if !defined(__PIC__)
493
494 void FastConvertYUVToRGB32Row(const uint8* y_buf, 263 void FastConvertYUVToRGB32Row(const uint8* y_buf,
495 const uint8* u_buf, 264 const uint8* u_buf,
496 const uint8* v_buf, 265 const uint8* v_buf,
497 uint8* rgb_buf, 266 uint8* rgb_buf,
498 int width); 267 int source_width);
499 268
500 asm( 269 asm(
501 ".global FastConvertYUVToRGB32Row\n" 270 ".global FastConvertYUVToRGB32Row\n"
502 "FastConvertYUVToRGB32Row:\n" 271 "FastConvertYUVToRGB32Row:\n"
503 "pusha\n" 272 "pusha\n"
504 "mov 0x24(%esp),%edx\n" 273 "mov 0x24(%esp),%edx\n"
505 "mov 0x28(%esp),%edi\n" 274 "mov 0x28(%esp),%edi\n"
506 "mov 0x2c(%esp),%esi\n" 275 "mov 0x2c(%esp),%esi\n"
507 "mov 0x30(%esp),%ebp\n" 276 "mov 0x30(%esp),%ebp\n"
508 "mov 0x34(%esp),%ecx\n" 277 "mov 0x34(%esp),%ecx\n"
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
547 "convertdone:" 316 "convertdone:"
548 "popa\n" 317 "popa\n"
549 "ret\n" 318 "ret\n"
550 ); 319 );
551 320
552 321
553 void ScaleYUVToRGB32Row(const uint8* y_buf, 322 void ScaleYUVToRGB32Row(const uint8* y_buf,
554 const uint8* u_buf, 323 const uint8* u_buf,
555 const uint8* v_buf, 324 const uint8* v_buf,
556 uint8* rgb_buf, 325 uint8* rgb_buf,
557 int width, 326 int source_width,
558 int scaled_dx); 327 int source_dx);
559 328
560 asm( 329 asm(
561 ".global ScaleYUVToRGB32Row\n" 330 ".global ScaleYUVToRGB32Row\n"
562 "ScaleYUVToRGB32Row:\n" 331 "ScaleYUVToRGB32Row:\n"
563 "pusha\n" 332 "pusha\n"
564 "mov 0x24(%esp),%edx\n" 333 "mov 0x24(%esp),%edx\n"
565 "mov 0x28(%esp),%edi\n" 334 "mov 0x28(%esp),%edi\n"
566 "mov 0x2c(%esp),%esi\n" 335 "mov 0x2c(%esp),%esi\n"
567 "mov 0x30(%esp),%ebp\n" 336 "mov 0x30(%esp),%ebp\n"
568 "mov 0x34(%esp),%ecx\n" 337 "mov 0x34(%esp),%ecx\n"
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
621 390
622 "scaledone:" 391 "scaledone:"
623 "popa\n" 392 "popa\n"
624 "ret\n" 393 "ret\n"
625 ); 394 );
626 395
627 void LinearScaleYUVToRGB32Row(const uint8* y_buf, 396 void LinearScaleYUVToRGB32Row(const uint8* y_buf,
628 const uint8* u_buf, 397 const uint8* u_buf,
629 const uint8* v_buf, 398 const uint8* v_buf,
630 uint8* rgb_buf, 399 uint8* rgb_buf,
631 int width, 400 int source_width,
632 int scaled_dx); 401 int source_dx);
633 402
634 asm( 403 asm(
635 ".global LinearScaleYUVToRGB32Row\n" 404 ".global LinearScaleYUVToRGB32Row\n"
636 "LinearScaleYUVToRGB32Row:\n" 405 "LinearScaleYUVToRGB32Row:\n"
637 "pusha\n" 406 "pusha\n"
638 "mov 0x24(%esp),%edx\n" 407 "mov 0x24(%esp),%edx\n"
639 "mov 0x28(%esp),%edi\n" 408 "mov 0x28(%esp),%edi\n"
640 "mov 0x30(%esp),%ebp\n" 409 "mov 0x30(%esp),%ebp\n"
641 "xor %ebx,%ebx\n" 410 "xor %ebx,%ebx\n"
642 411
643 // width = width * scaled_dx + ebx 412 // source_width = source_width * source_dx + ebx
644 "mov 0x34(%esp), %ecx\n" 413 "mov 0x34(%esp), %ecx\n"
645 "imull 0x38(%esp), %ecx\n" 414 "imull 0x38(%esp), %ecx\n"
646 "addl %ebx, %ecx\n" 415 "addl %ebx, %ecx\n"
647 "mov %ecx, 0x34(%esp)\n" 416 "mov %ecx, 0x34(%esp)\n"
648 417
649 "jmp .lscaleend\n" 418 "jmp .lscaleend\n"
650 419
651 ".lscaleloop:" 420 ".lscaleloop:"
652 "mov %ebx,%eax\n" 421 "mov %ebx,%eax\n"
653 "sar $0x11,%eax\n" 422 "sar $0x11,%eax\n"
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
725 494
726 ".lscalelastpixel:" 495 ".lscalelastpixel:"
727 "paddsw %mm0, %mm1\n" 496 "paddsw %mm0, %mm1\n"
728 "psraw $6, %mm1\n" 497 "psraw $6, %mm1\n"
729 "packuswb %mm1, %mm1\n" 498 "packuswb %mm1, %mm1\n"
730 "movd %mm1, (%ebp)\n" 499 "movd %mm1, (%ebp)\n"
731 "popa\n" 500 "popa\n"
732 "ret\n" 501 "ret\n"
733 ); 502 );
734 503
735 #else // __PIC__ 504 #elif USE_MMX && !defined(ARCH_CPU_X86_64) && defined(__PIC__)
736 505
737 extern void PICConvertYUVToRGB32Row(const uint8* y_buf, 506 extern void PICConvertYUVToRGB32Row(const uint8* y_buf,
738 const uint8* u_buf, 507 const uint8* u_buf,
739 const uint8* v_buf, 508 const uint8* v_buf,
740 uint8* rgb_buf, 509 uint8* rgb_buf,
741 int width, 510 int source_width,
742 int16 *kCoefficientsRgbY); 511 int16 *kCoefficientsRgbY);
743 __asm__( 512 __asm__(
744 "_PICConvertYUVToRGB32Row:\n" 513 "_PICConvertYUVToRGB32Row:\n"
745 "pusha\n" 514 "pusha\n"
746 "mov 0x24(%esp),%edx\n" 515 "mov 0x24(%esp),%edx\n"
747 "mov 0x28(%esp),%edi\n" 516 "mov 0x28(%esp),%edi\n"
748 "mov 0x2c(%esp),%esi\n" 517 "mov 0x2c(%esp),%esi\n"
749 "mov 0x30(%esp),%ebp\n" 518 "mov 0x30(%esp),%ebp\n"
750 "mov 0x38(%esp),%ecx\n" 519 "mov 0x38(%esp),%ecx\n"
751 520
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
789 "movd %mm1,0x0(%ebp)\n" 558 "movd %mm1,0x0(%ebp)\n"
790 ".Lconvertdone:\n" 559 ".Lconvertdone:\n"
791 "popa\n" 560 "popa\n"
792 "ret\n" 561 "ret\n"
793 ); 562 );
794 563
795 void FastConvertYUVToRGB32Row(const uint8* y_buf, 564 void FastConvertYUVToRGB32Row(const uint8* y_buf,
796 const uint8* u_buf, 565 const uint8* u_buf,
797 const uint8* v_buf, 566 const uint8* v_buf,
798 uint8* rgb_buf, 567 uint8* rgb_buf,
799 int width) { 568 int source_width) {
800 PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, 569 PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, source_width,
801 &kCoefficientsRgbY[0][0]); 570 &kCoefficientsRgbY[0][0]);
802 } 571 }
803 572
804 extern void PICScaleYUVToRGB32Row(const uint8* y_buf, 573 extern void PICScaleYUVToRGB32Row(const uint8* y_buf,
805 const uint8* u_buf, 574 const uint8* u_buf,
806 const uint8* v_buf, 575 const uint8* v_buf,
807 uint8* rgb_buf, 576 uint8* rgb_buf,
808 int width, 577 int source_width,
809 int scaled_dx, 578 int source_dx,
810 int16 *kCoefficientsRgbY); 579 int16 *kCoefficientsRgbY);
811 580
812 __asm__( 581 __asm__(
813 "_PICScaleYUVToRGB32Row:\n" 582 "_PICScaleYUVToRGB32Row:\n"
814 "pusha\n" 583 "pusha\n"
815 "mov 0x24(%esp),%edx\n" 584 "mov 0x24(%esp),%edx\n"
816 "mov 0x28(%esp),%edi\n" 585 "mov 0x28(%esp),%edi\n"
817 "mov 0x2c(%esp),%esi\n" 586 "mov 0x2c(%esp),%esi\n"
818 "mov 0x30(%esp),%ebp\n" 587 "mov 0x30(%esp),%ebp\n"
819 "mov 0x3c(%esp),%ecx\n" 588 "mov 0x3c(%esp),%ecx\n"
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
873 "Lscaledone:" 642 "Lscaledone:"
874 "popa\n" 643 "popa\n"
875 "ret\n" 644 "ret\n"
876 ); 645 );
877 646
878 647
879 void ScaleYUVToRGB32Row(const uint8* y_buf, 648 void ScaleYUVToRGB32Row(const uint8* y_buf,
880 const uint8* u_buf, 649 const uint8* u_buf,
881 const uint8* v_buf, 650 const uint8* v_buf,
882 uint8* rgb_buf, 651 uint8* rgb_buf,
883 int width, 652 int source_width,
884 int scaled_dx) { 653 int source_dx) {
885 PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 654 PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, source_width, source_dx,
886 &kCoefficientsRgbY[0][0]); 655 &kCoefficientsRgbY[0][0]);
887 } 656 }
888 657
889 void PICLinearScaleYUVToRGB32Row(const uint8* y_buf, 658 void PICLinearScaleYUVToRGB32Row(const uint8* y_buf,
890 const uint8* u_buf, 659 const uint8* u_buf,
891 const uint8* v_buf, 660 const uint8* v_buf,
892 uint8* rgb_buf, 661 uint8* rgb_buf,
893 int width, 662 int source_width,
894 int scaled_dx, 663 int source_dx,
895 int16 *kCoefficientsRgbY); 664 int16 *kCoefficientsRgbY);
896 665
897 asm( 666 asm(
898 "_PICLinearScaleYUVToRGB32Row:\n" 667 "_PICLinearScaleYUVToRGB32Row:\n"
899 "pusha\n" 668 "pusha\n"
900 "mov 0x24(%esp),%edx\n" 669 "mov 0x24(%esp),%edx\n"
901 "mov 0x30(%esp),%ebp\n" 670 "mov 0x30(%esp),%ebp\n"
902 "mov 0x34(%esp),%ecx\n" 671 "mov 0x34(%esp),%ecx\n"
903 "mov 0x3c(%esp),%edi\n" 672 "mov 0x3c(%esp),%edi\n"
904 "xor %ebx,%ebx\n" 673 "xor %ebx,%ebx\n"
905 674
906 // width = width * scaled_dx + ebx 675 // source_width = source_width * source_dx + ebx
907 "mov 0x34(%esp), %ecx\n" 676 "mov 0x34(%esp), %ecx\n"
908 "imull 0x38(%esp), %ecx\n" 677 "imull 0x38(%esp), %ecx\n"
909 "addl %ebx, %ecx\n" 678 "addl %ebx, %ecx\n"
910 "mov %ecx, 0x34(%esp)\n" 679 "mov %ecx, 0x34(%esp)\n"
911 680
912 "jmp .lscaleend\n" 681 "jmp .lscaleend\n"
913 682
914 ".lscaleloop:" 683 ".lscaleloop:"
915 "mov 0x28(%esp),%esi\n" 684 "mov 0x28(%esp),%esi\n"
916 "mov %ebx,%eax\n" 685 "mov %ebx,%eax\n"
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
993 "packuswb %mm1, %mm1\n" 762 "packuswb %mm1, %mm1\n"
994 "movd %mm1, (%ebp)\n" 763 "movd %mm1, (%ebp)\n"
995 "popa\n" 764 "popa\n"
996 "ret\n" 765 "ret\n"
997 ); 766 );
998 767
999 void LinearScaleYUVToRGB32Row(const uint8* y_buf, 768 void LinearScaleYUVToRGB32Row(const uint8* y_buf,
1000 const uint8* u_buf, 769 const uint8* u_buf,
1001 const uint8* v_buf, 770 const uint8* v_buf,
1002 uint8* rgb_buf, 771 uint8* rgb_buf,
1003 int width, 772 int source_width,
1004 int scaled_dx) { 773 int source_dx) {
1005 PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 774 PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, source_width, source _dx,
1006 &kCoefficientsRgbY[0][0]); 775 &kCoefficientsRgbY[0][0]);
1007 } 776 }
1008 777
1009 #endif // !__PIC__ 778 #else // Use C code instead of MMX/SSE2.
1010 779
1011 #endif // !AMD64 780 // C reference code that mimic the YUV assembly.
1012 781 #define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
1013 #else // USE_MMX 782 #define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \\
1014 783 (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
1015 // Reference version of YUV converter.
1016 static const int kClipTableSize = 256;
1017 static const int kClipOverflow = 288; // Cb max is 535.
1018
1019 static uint8 kRgbClipTable[kClipOverflow +
1020 kClipTableSize +
1021 kClipOverflow] = {
1022 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 288 underflow values
1023 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // clipped to 0.
1024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1025 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1028 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1029 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1030 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1031 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1032 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1033 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1034 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1035 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1036 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1037 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1038 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1039 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1040 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1041 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1042 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1043 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1044 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1045 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1046 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1047 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1048 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1049 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1050 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1051 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1052 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1053 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1054 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1055 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1056 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1057 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1058 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // Unclipped values.
1059 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
1060 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1061 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
1062 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
1063 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
1064 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
1065 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
1066 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
1067 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
1068 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
1069 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
1070 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
1071 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
1072 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
1073 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
1074 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
1075 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
1076 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
1077 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
1078 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
1079 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
1080 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
1081 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
1082 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
1083 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
1084 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
1085 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
1086 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
1087 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
1088 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
1089 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
1090 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 288 overflow values
1091 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // clipped to 255.
1092 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1093 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1094 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1095 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1096 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1097 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1098 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1099 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1100 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1101 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1102 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1103 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1104 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1105 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1106 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1107 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1108 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1109 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1110 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1111 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1112 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1113 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1114 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1115 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1116 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1117 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1118 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1119 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1120 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1121 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1122 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1123 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1124 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1125 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1126 };
1127
1128 // Clip an rgb channel value to 0..255 range.
1129 // Source is signed fixed point 8.8.
1130 // Table allows for values to underflow or overflow by 128.
1131 // Therefore source range is -128 to 384.
1132 // Output clips to unsigned 0 to 255.
1133 static inline uint32 clip(int32 value) {
1134 DCHECK(((value >> 8) + kClipOverflow) >= 0);
1135 DCHECK(((value >> 8) + kClipOverflow) <
1136 (kClipOverflow + kClipTableSize + kClipOverflow));
1137 return static_cast<uint32>(kRgbClipTable[((value) >> 8) + kClipOverflow]);
1138 }
1139 784
1140 static inline void YuvPixel(uint8 y, 785 static inline void YuvPixel(uint8 y,
1141 uint8 u, 786 uint8 u,
1142 uint8 v, 787 uint8 v,
1143 uint8* rgb_buf) { 788 uint8* rgb_buf) {
1144 int32 d = static_cast<int32>(u) - 128;
1145 int32 e = static_cast<int32>(v) - 128;
1146 789
1147 int32 cb = (516 * d + 128); 790 int b = kCoefficientsRgbY[256+u][0];
1148 int32 cg = (- 100 * d - 208 * e + 128); 791 int g = kCoefficientsRgbY[256+u][1];
1149 int32 cr = (409 * e + 128); 792 int r = kCoefficientsRgbY[256+u][2];
793 int a = kCoefficientsRgbY[256+u][3];
1150 794
1151 int32 C298a = ((static_cast<int32>(y) - 16) * 298 + 128); 795 b = paddsw(b, kCoefficientsRgbY[512+v][0]);
1152 *reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) | 796 g = paddsw(g, kCoefficientsRgbY[512+v][1]);
1153 (clip(C298a + cg) << 8) | 797 r = paddsw(r, kCoefficientsRgbY[512+v][2]);
1154 (clip(C298a + cr) << 16) | 798 a = paddsw(a, kCoefficientsRgbY[512+v][3]);
1155 (0xff000000); 799
800 b = paddsw(b, kCoefficientsRgbY[y][0]);
801 g = paddsw(g, kCoefficientsRgbY[y][1]);
802 r = paddsw(r, kCoefficientsRgbY[y][2]);
803 a = paddsw(a, kCoefficientsRgbY[y][3]);
804
805 b >>= 6;
806 g >>= 6;
807 r >>= 6;
808 a >>= 6;
809
810 *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
811 (packuswb(g) << 8) |
812 (packuswb(r) << 16) |
813 (packuswb(a) << 24);
1156 } 814 }
1157 815
816 #if TEST_MMX_YUV
817 static inline void YuvPixel(uint8 y,
818 uint8 u,
819 uint8 v,
820 uint8* rgb_buf) {
821
822 __asm {
823 movzx eax, u
824 movq mm0, [kCoefficientsRgbY+2048 + 8 * eax]
825 movzx eax, v
826 paddsw mm0, [kCoefficientsRgbY+4096 + 8 * eax]
827 movzx eax, y
828 movq mm1, [kCoefficientsRgbY + 8 * eax]
829 paddsw mm1, mm0
830 psraw mm1, 6
831 packuswb mm1, mm1
832 mov eax, rgb_buf
833 movd [eax], mm1
834 emms
835 }
836 }
837 #endif
838
1158 void FastConvertYUVToRGB32Row(const uint8* y_buf, 839 void FastConvertYUVToRGB32Row(const uint8* y_buf,
1159 const uint8* u_buf, 840 const uint8* u_buf,
1160 const uint8* v_buf, 841 const uint8* v_buf,
1161 uint8* rgb_buf, 842 uint8* rgb_buf,
1162 int width) { 843 int source_width) {
1163 for (int x = 0; x < width; x += 2) { 844 for (int x = 0; x < source_width; x += 2) {
1164 uint8 u = u_buf[x >> 1]; 845 uint8 u = u_buf[x >> 1];
1165 uint8 v = v_buf[x >> 1]; 846 uint8 v = v_buf[x >> 1];
1166 uint8 y0 = y_buf[x]; 847 uint8 y0 = y_buf[x];
1167 YuvPixel(y0, u, v, rgb_buf); 848 YuvPixel(y0, u, v, rgb_buf);
1168 if ((x + 1) < width) { 849 if ((x + 1) < source_width) {
1169 uint8 y1 = y_buf[x + 1]; 850 uint8 y1 = y_buf[x + 1];
1170 YuvPixel(y1, u, v, rgb_buf + 4); 851 YuvPixel(y1, u, v, rgb_buf + 4);
1171 } 852 }
1172 rgb_buf += 8; // Advance 2 pixels. 853 rgb_buf += 8; // Advance 2 pixels.
1173 } 854 }
1174 } 855 }
1175 856
1176 // 16.16 fixed point is used. A shift by 16 isolates the integer. 857 // 16.16 fixed point is used. A shift by 16 isolates the integer.
1177 // A shift by 17 is used to further subsample the chrominence channels. 858 // A shift by 17 is used to further subsample the chrominence channels.
1178 // & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits, 859 // & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits,
1179 // for 1/65536 pixel accurate interpolation. 860 // for 1/65536 pixel accurate interpolation.
1180 void ScaleYUVToRGB32Row(const uint8* y_buf, 861 void ScaleYUVToRGB32Row(const uint8* y_buf,
1181 const uint8* u_buf, 862 const uint8* u_buf,
1182 const uint8* v_buf, 863 const uint8* v_buf,
1183 uint8* rgb_buf, 864 uint8* rgb_buf,
1184 int width, 865 int source_width,
1185 int scaled_dx) { 866 int dx) {
1186 int scaled_x = 0; 867 int x = 0;
1187 for (int x = 0; x < width; ++x) { 868 for (int i = 0; i < source_width; i += 2) {
1188 uint8 u = u_buf[scaled_x >> 17]; 869 int y = y_buf[x >> 16];
1189 uint8 v = v_buf[scaled_x >> 17]; 870 int u = u_buf[(x >> 17)];
1190 uint8 y0 = y_buf[scaled_x >> 16]; 871 int v = v_buf[(x >> 17)];
1191 YuvPixel(y0, u, v, rgb_buf); 872 YuvPixel(y, u, v, rgb_buf);
1192 rgb_buf += 4; 873 x += dx;
1193 scaled_x += scaled_dx; 874 if ((i + 1) < source_width) {
875 y = y_buf[x >> 16];
876 YuvPixel(y, u, v, rgb_buf+4);
877 x += dx;
878 }
879 rgb_buf += 8;
1194 } 880 }
1195 } 881 }
1196 882
1197 void LinearScaleYUVToRGB32Row(const uint8* y_buf, 883 void LinearScaleYUVToRGB32Row(const uint8* y_buf,
1198 const uint8* u_buf, 884 const uint8* u_buf,
1199 const uint8* v_buf, 885 const uint8* v_buf,
1200 uint8* rgb_buf, 886 uint8* rgb_buf,
1201 int width, 887 int source_width,
1202 int dx) { 888 int dx) {
1203 for (int x = 0; x < width * dx; x += dx) { 889 int x = 0;
890 for (int i = 0; i < source_width; i += 2) {
1204 int y0 = y_buf[x >> 16]; 891 int y0 = y_buf[x >> 16];
1205 int y1 = y_buf[(x >> 16) + 1]; 892 int y1 = y_buf[(x >> 16) + 1];
1206 int u0 = u_buf[(x >> 17)]; 893 int u0 = u_buf[(x >> 17)];
1207 int u1 = u_buf[(x >> 17) + 1]; 894 int u1 = u_buf[(x >> 17) + 1];
1208 int v0 = v_buf[(x >> 17)]; 895 int v0 = v_buf[(x >> 17)];
1209 int v1 = v_buf[(x >> 17) + 1]; 896 int v1 = v_buf[(x >> 17) + 1];
1210 int y = ((x & 65535) * y1 + ((x & 65535) ^ 65535) * y0) >> 16; 897 int y_frac = (x & 65535);
1211 int u = ((x & 65535) * u1 + ((x & 65535) ^ 65535) * u0) >> 16; 898 int uv_frac = ((x >> 1) & 65535);
1212 int v = ((x & 65535) * v1 + ((x & 65535) ^ 65535) * v0) >> 16; 899 int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
900 int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16;
901 int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16;
1213 YuvPixel(y, u, v, rgb_buf); 902 YuvPixel(y, u, v, rgb_buf);
1214 rgb_buf += 4; 903 x += dx;
904 if ((i + 1) < source_width) {
905 y0 = y_buf[x >> 16];
906 y1 = y_buf[(x >> 16) + 1];
907 y_frac = (x & 65535);
908 y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
909 YuvPixel(y, u, v, rgb_buf+4);
910 x += dx;
911 }
912 rgb_buf += 8;
1215 } 913 }
1216 } 914 }
1217 915
1218 #endif // USE_MMX 916 #endif // USE_MMX
1219 } // extern "C" 917 } // extern "C"
918
OLDNEW
« no previous file with comments | « media/base/yuv_row.h ('k') | media/base/yuv_row_table.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698