OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 | |
12 /**************************************************************************** | |
13 * | |
14 * Module Title : scaleopt.cpp | |
15 * | |
16 * Description : Optimized scaling functions | |
17 * | |
18 ****************************************************************************/ | |
19 #include "pragmas.h" | |
20 | |
21 /**************************************************************************** | |
22 * Module Statics | |
23 ****************************************************************************/ | |
24 __declspec(align(16)) const static unsigned short round_values[] = { 128, 128, 1
28, 128 }; | |
25 | |
26 #include "vpx_scale/vpx_scale.h" | |
27 #include "vpx_mem/vpx_mem.h" | |
28 | |
29 __declspec(align(16)) const static unsigned short const54_2[] = { 0, 64, 128,
192 }; | |
30 __declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128,
64 }; | |
31 | |
32 | |
33 /**************************************************************************** | |
34 * | |
35 * ROUTINE : horizontal_line_5_4_scale_mmx | |
36 * | |
37 * INPUTS : const unsigned char *source : Pointer to source data. | |
38 * unsigned int source_width : Stride of source. | |
39 * unsigned char *dest : Pointer to destination data. | |
40 * unsigned int dest_width : Stride of destination (NOT US
ED). | |
41 * | |
42 * OUTPUTS : None. | |
43 * | |
44 * RETURNS : void | |
45 * | |
46 * FUNCTION : Copies horizontal line of pixels from source to | |
47 * destination scaling up by 4 to 5. | |
48 * | |
49 * SPECIAL NOTES : None. | |
50 * | |
51 ****************************************************************************/ | |
52 static | |
53 void horizontal_line_5_4_scale_mmx | |
54 ( | |
55 const unsigned char *source, | |
56 unsigned int source_width, | |
57 unsigned char *dest, | |
58 unsigned int dest_width | |
59 ) { | |
60 /* | |
61 unsigned i; | |
62 unsigned int a, b, c, d, e; | |
63 unsigned char *des = dest; | |
64 const unsigned char *src = source; | |
65 | |
66 (void) dest_width; | |
67 | |
68 for ( i=0; i<source_width; i+=5 ) | |
69 { | |
70 a = src[0]; | |
71 b = src[1]; | |
72 c = src[2]; | |
73 d = src[3]; | |
74 e = src[4]; | |
75 | |
76 des[0] = a; | |
77 des[1] = ((b*192 + c* 64 + 128)>>8); | |
78 des[2] = ((c*128 + d*128 + 128)>>8); | |
79 des[3] = ((d* 64 + e*192 + 128)>>8); | |
80 | |
81 src += 5; | |
82 des += 4; | |
83 } | |
84 */ | |
85 (void) dest_width; | |
86 | |
87 __asm { | |
88 | |
89 mov esi, source; | |
90 mov edi, dest; | |
91 | |
92 mov ecx, source_width; | |
93 movq mm5, const54_1; | |
94 | |
95 pxor mm7, mm7; | |
96 movq mm6, const54_2; | |
97 | |
98 movq mm4, round_values; | |
99 lea edx, [esi+ecx]; | |
100 horizontal_line_5_4_loop: | |
101 | |
102 movq mm0, QWORD PTR [esi]; | |
103 00 01 02 03 04 05 06 07 | |
104 movq mm1, mm0; | |
105 00 01 02 03 04 05 06 07 | |
106 | |
107 psrlq mm0, 8; | |
108 01 02 03 04 05 06 07 xx | |
109 punpcklbw mm1, mm7; | |
110 xx 00 xx 01 xx 02 xx 03 | |
111 | |
112 punpcklbw mm0, mm7; | |
113 xx 01 xx 02 xx 03 xx 04 | |
114 pmullw mm1, mm5 | |
115 | |
116 pmullw mm0, mm6 | |
117 add esi, 5 | |
118 | |
119 add edi, 4 | |
120 paddw mm1, mm0 | |
121 | |
122 paddw mm1, mm4 | |
123 psrlw mm1, 8 | |
124 | |
125 cmp esi, edx | |
126 packuswb mm1, mm7 | |
127 | |
128 movd DWORD PTR [edi-4], mm1 | |
129 | |
130 jl horizontal_line_5_4_loop | |
131 | |
132 } | |
133 | |
134 } | |
135 __declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64,
64, 64 }; | |
136 __declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128,
128, 128 }; | |
137 __declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192,
192, 192 }; | |
138 | |
139 static | |
140 void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch,
unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { | |
141 | |
142 __asm { | |
143 push ebx | |
144 | |
145 mov esi, source // Get the source and destinat
ion pointer | |
146 mov ecx, src_pitch // Get the pitch size | |
147 | |
148 mov edi, dest // tow lines below | |
149 pxor mm7, mm7 // clear out mm7 | |
150 | |
151 mov edx, dest_pitch // Loop counter | |
152 mov ebx, dest_width | |
153 | |
154 vs_5_4_loop: | |
155 | |
156 movd mm0, DWORD ptr [esi] // src[0]; | |
157 movd mm1, DWORD ptr [esi+ecx] // src[1]; | |
158 | |
159 movd mm2, DWORD ptr [esi+ecx*2] | |
160 lea eax, [esi+ecx*2] // | |
161 | |
162 punpcklbw mm1, mm7 | |
163 punpcklbw mm2, mm7 | |
164 | |
165 movq mm3, mm2 | |
166 pmullw mm1, three_fourths | |
167 | |
168 pmullw mm2, one_fourths | |
169 movd mm4, [eax+ecx] | |
170 | |
171 pmullw mm3, two_fourths | |
172 punpcklbw mm4, mm7 | |
173 | |
174 movq mm5, mm4 | |
175 pmullw mm4, two_fourths | |
176 | |
177 paddw mm1, mm2 | |
178 movd mm6, [eax+ecx*2] | |
179 | |
180 pmullw mm5, one_fourths | |
181 paddw mm1, round_values; | |
182 | |
183 paddw mm3, mm4 | |
184 psrlw mm1, 8 | |
185 | |
186 punpcklbw mm6, mm7 | |
187 paddw mm3, round_values | |
188 | |
189 pmullw mm6, three_fourths | |
190 psrlw mm3, 8 | |
191 | |
192 packuswb mm1, mm7 | |
193 packuswb mm3, mm7 | |
194 | |
195 movd DWORD PTR [edi], mm0 | |
196 movd DWORD PTR [edi+edx], mm1 | |
197 | |
198 | |
199 paddw mm5, mm6 | |
200 movd DWORD PTR [edi+edx*2], mm3 | |
201 | |
202 lea eax, [edi+edx*2] | |
203 paddw mm5, round_values | |
204 | |
205 psrlw mm5, 8 | |
206 add edi, 4 | |
207 | |
208 packuswb mm5, mm7 | |
209 movd DWORD PTR [eax+edx], mm5 | |
210 | |
211 add esi, 4 | |
212 sub ebx, 4 | |
213 | |
214 jg vs_5_4_loop | |
215 | |
216 pop ebx | |
217 } | |
218 } | |
219 | |
220 | |
221 __declspec(align(16)) const static unsigned short const53_1[] = { 0, 85, 171,
0 }; | |
222 __declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85,
0 }; | |
223 | |
224 | |
225 static | |
226 void horizontal_line_5_3_scale_mmx | |
227 ( | |
228 const unsigned char *source, | |
229 unsigned int source_width, | |
230 unsigned char *dest, | |
231 unsigned int dest_width | |
232 ) { | |
233 | |
234 (void) dest_width; | |
235 __asm { | |
236 | |
237 mov esi, source; | |
238 mov edi, dest; | |
239 | |
240 mov ecx, source_width; | |
241 movq mm5, const53_1; | |
242 | |
243 pxor mm7, mm7; | |
244 movq mm6, const53_2; | |
245 | |
246 movq mm4, round_values; | |
247 lea edx, [esi+ecx-5]; | |
248 horizontal_line_5_3_loop: | |
249 | |
250 movq mm0, QWORD PTR [esi]; | |
251 00 01 02 03 04 05 06 07 | |
252 movq mm1, mm0; | |
253 00 01 02 03 04 05 06 07 | |
254 | |
255 psllw mm0, 8; | |
256 xx 00 xx 02 xx 04 xx 06 | |
257 psrlw mm1, 8; | |
258 01 xx 03 xx 05 xx 07 xx | |
259 | |
260 psrlw mm0, 8; | |
261 00 xx 02 xx 04 xx 06 xx | |
262 psllq mm1, 16; | |
263 xx xx 01 xx 03 xx 05 xx | |
264 | |
265 pmullw mm0, mm6 | |
266 | |
267 pmullw mm1, mm5 | |
268 add esi, 5 | |
269 | |
270 add edi, 3 | |
271 paddw mm1, mm0 | |
272 | |
273 paddw mm1, mm4 | |
274 psrlw mm1, 8 | |
275 | |
276 cmp esi, edx | |
277 packuswb mm1, mm7 | |
278 | |
279 movd DWORD PTR [edi-3], mm1 | |
280 jl horizontal_line_5_3_loop | |
281 | |
282 // exit condition | |
283 movq mm0, QWORD PTR [esi]; | |
284 00 01 02 03 04 05 06 07 | |
285 movq mm1, mm0; | |
286 00 01 02 03 04 05 06 07 | |
287 | |
288 psllw mm0, 8; | |
289 xx 00 xx 02 xx 04 xx 06 | |
290 psrlw mm1, 8; | |
291 01 xx 03 xx 05 xx 07 xx | |
292 | |
293 psrlw mm0, 8; | |
294 00 xx 02 xx 04 xx 06 xx | |
295 psllq mm1, 16; | |
296 xx xx 01 xx 03 xx 05 xx | |
297 | |
298 pmullw mm0, mm6 | |
299 | |
300 pmullw mm1, mm5 | |
301 paddw mm1, mm0 | |
302 | |
303 paddw mm1, mm4 | |
304 psrlw mm1, 8 | |
305 | |
306 packuswb mm1, mm7 | |
307 movd eax, mm1 | |
308 | |
309 mov edx, eax | |
310 shr edx, 16 | |
311 | |
312 mov WORD PTR[edi], ax | |
313 mov BYTE PTR[edi+2], dl | |
314 | |
315 } | |
316 | |
317 } | |
318 | |
319 __declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85
, 85 }; | |
320 __declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171
, 171 }; | |
321 | |
322 static | |
323 void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch,
unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { | |
324 | |
325 __asm { | |
326 push ebx | |
327 | |
328 mov esi, source // Get the source and destinat
ion pointer | |
329 mov ecx, src_pitch // Get the pitch size | |
330 | |
331 mov edi, dest // tow lines below | |
332 pxor mm7, mm7 // clear out mm7 | |
333 | |
334 mov edx, dest_pitch // Loop counter | |
335 movq mm5, one_thirds | |
336 | |
337 movq mm6, two_thirds | |
338 mov ebx, dest_width; | |
339 | |
340 vs_5_3_loop: | |
341 | |
342 movd mm0, DWORD ptr [esi] // src[0]; | |
343 movd mm1, DWORD ptr [esi+ecx] // src[1]; | |
344 | |
345 movd mm2, DWORD ptr [esi+ecx*2] | |
346 lea eax, [esi+ecx*2] // | |
347 | |
348 punpcklbw mm1, mm7 | |
349 punpcklbw mm2, mm7 | |
350 | |
351 pmullw mm1, mm5 | |
352 pmullw mm2, mm6 | |
353 | |
354 movd mm3, DWORD ptr [eax+ecx] | |
355 movd mm4, DWORD ptr [eax+ecx*2] | |
356 | |
357 punpcklbw mm3, mm7 | |
358 punpcklbw mm4, mm7 | |
359 | |
360 pmullw mm3, mm6 | |
361 pmullw mm4, mm5 | |
362 | |
363 | |
364 movd DWORD PTR [edi], mm0 | |
365 paddw mm1, mm2 | |
366 | |
367 paddw mm1, round_values | |
368 psrlw mm1, 8 | |
369 | |
370 packuswb mm1, mm7 | |
371 paddw mm3, mm4 | |
372 | |
373 paddw mm3, round_values | |
374 movd DWORD PTR [edi+edx], mm1 | |
375 | |
376 psrlw mm3, 8 | |
377 packuswb mm3, mm7 | |
378 | |
379 movd DWORD PTR [edi+edx*2], mm3 | |
380 | |
381 | |
382 add edi, 4 | |
383 add esi, 4 | |
384 | |
385 sub ebx, 4 | |
386 jg vs_5_3_loop | |
387 | |
388 pop ebx | |
389 } | |
390 } | |
391 | |
392 | |
393 | |
394 | |
395 /**************************************************************************** | |
396 * | |
397 * ROUTINE : horizontal_line_2_1_scale | |
398 * | |
399 * INPUTS : const unsigned char *source : | |
400 * unsigned int source_width : | |
401 * unsigned char *dest : | |
402 * unsigned int dest_width : | |
403 * | |
404 * OUTPUTS : None. | |
405 * | |
406 * RETURNS : void | |
407 * | |
408 * FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels. | |
409 * | |
410 * SPECIAL NOTES : None. | |
411 * | |
412 ****************************************************************************/ | |
413 static | |
414 void horizontal_line_2_1_scale_mmx | |
415 ( | |
416 const unsigned char *source, | |
417 unsigned int source_width, | |
418 unsigned char *dest, | |
419 unsigned int dest_width | |
420 ) { | |
421 (void) dest_width; | |
422 (void) source_width; | |
423 __asm { | |
424 mov esi, source | |
425 mov edi, dest | |
426 | |
427 pxor mm7, mm7 | |
428 mov ecx, dest_width | |
429 | |
430 xor edx, edx | |
431 hs_2_1_loop: | |
432 | |
433 movq mm0, [esi+edx*2] | |
434 psllw mm0, 8 | |
435 | |
436 psrlw mm0, 8 | |
437 packuswb mm0, mm7 | |
438 | |
439 movd DWORD Ptr [edi+edx], mm0; | |
440 add edx, 4 | |
441 | |
442 cmp edx, ecx | |
443 jl hs_2_1_loop | |
444 | |
445 } | |
446 } | |
447 | |
448 | |
449 | |
450 static | |
451 void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch,
unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { | |
452 (void) dest_pitch; | |
453 (void) src_pitch; | |
454 vpx_memcpy(dest, source, dest_width); | |
455 } | |
456 | |
457 | |
458 __declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 4
8, 48, 48 }; | |
459 __declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 16
0, 160, 160 }; | |
460 | |
461 static | |
462 void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch
, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { | |
463 | |
464 (void) dest_pitch; | |
465 __asm { | |
466 mov esi, source | |
467 mov edi, dest | |
468 | |
469 mov eax, src_pitch | |
470 mov edx, dest_width | |
471 | |
472 pxor mm7, mm7 | |
473 sub esi, eax // back one line | |
474 | |
475 | |
476 lea ecx, [esi+edx]; | |
477 movq mm6, round_values; | |
478 | |
479 movq mm5, three_sixteenths; | |
480 movq mm4, ten_sixteenths; | |
481 | |
482 vs_2_1_i_loop: | |
483 movd mm0, [esi] // | |
484 movd mm1, [esi+eax] // | |
485 | |
486 movd mm2, [esi+eax*2] // | |
487 punpcklbw mm0, mm7 | |
488 | |
489 pmullw mm0, mm5 | |
490 punpcklbw mm1, mm7 | |
491 | |
492 pmullw mm1, mm4 | |
493 punpcklbw mm2, mm7 | |
494 | |
495 pmullw mm2, mm5 | |
496 paddw mm0, round_values | |
497 | |
498 paddw mm1, mm2 | |
499 paddw mm0, mm1 | |
500 | |
501 psrlw mm0, 8 | |
502 packuswb mm0, mm7 | |
503 | |
504 movd DWORD PTR [edi], mm0 | |
505 add esi, 4 | |
506 | |
507 add edi, 4; | |
508 cmp esi, ecx | |
509 jl vs_2_1_i_loop | |
510 | |
511 } | |
512 } | |
513 | |
514 | |
515 | |
516 void | |
517 register_mmxscalers(void) { | |
518 vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; | |
519 vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; | |
520 vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; | |
521 vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; | |
522 vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; | |
523 vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; | |
524 vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; | |
525 } | |
OLD | NEW |