Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(221)

Side by Side Diff: source/libvpx/vp9/common/x86/vp9_asm_stubs.c

Issue 11555023: libvpx: Add VP9 decoder. (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12 #include "vpx_ports/config.h"
13 #include "vpx_ports/mem.h"
14 #include "vp9/common/vp9_subpixel.h"
15
16 extern const short vp9_six_tap_mmx[16][6 * 8];
17
18 extern const short vp9_bilinear_filters_8x_mmx[16][2 * 8];
19
20 extern void vp9_filter_block1d_h6_mmx(unsigned char *src_ptr,
21 unsigned short *output_ptr,
22 unsigned int src_pixels_per_line,
23 unsigned int pixel_step,
24 unsigned int output_height,
25 unsigned int output_width,
26 const short *vp9_filter);
27
28 extern void vp9_filter_block1dc_v6_mmx(unsigned short *src_ptr,
29 unsigned char *output_ptr,
30 int output_pitch,
31 unsigned int pixels_per_line,
32 unsigned int pixel_step,
33 unsigned int output_height,
34 unsigned int output_width,
35 const short *vp9_filter);
36
37 extern void vp9_filter_block1d8_h6_sse2(unsigned char *src_ptr,
38 unsigned short *output_ptr,
39 unsigned int src_pixels_per_line,
40 unsigned int pixel_step,
41 unsigned int output_height,
42 unsigned int output_width,
43 const short *vp9_filter);
44
45 extern void vp9_filter_block1d16_h6_sse2(unsigned char *src_ptr,
46 unsigned short *output_ptr,
47 unsigned int src_pixels_per_line,
48 unsigned int pixel_step,
49 unsigned int output_height,
50 unsigned int output_width,
51 const short *vp9_filter);
52
53 extern void vp9_filter_block1d8_v6_sse2(unsigned short *src_ptr,
54 unsigned char *output_ptr,
55 int dst_ptich,
56 unsigned int pixels_per_line,
57 unsigned int pixel_step,
58 unsigned int output_height,
59 unsigned int output_width,
60 const short *vp9_filter);
61
62 extern void vp9_filter_block1d16_v6_sse2(unsigned short *src_ptr,
63 unsigned char *output_ptr,
64 int dst_ptich,
65 unsigned int pixels_per_line,
66 unsigned int pixel_step,
67 unsigned int output_height,
68 unsigned int output_width,
69 const short *vp9_filter);
70
71 extern void vp9_unpack_block1d16_h6_sse2(unsigned char *src_ptr,
72 unsigned short *output_ptr,
73 unsigned int src_pixels_per_line,
74 unsigned int output_height,
75 unsigned int output_width);
76
77 extern void vp9_filter_block1d8_h6_only_sse2(unsigned char *src_ptr,
78 unsigned int src_pixels_per_line,
79 unsigned char *output_ptr,
80 int dst_pitch,
81 unsigned int output_height,
82 const short *vp9_filter);
83
84 extern void vp9_filter_block1d16_h6_only_sse2(unsigned char *src_ptr,
85 unsigned int src_pixels_per_lin,
86 unsigned char *output_ptr,
87 int dst_pitch,
88 unsigned int output_height,
89 const short *vp9_filter);
90
91 extern void vp9_filter_block1d8_v6_only_sse2(unsigned char *src_ptr,
92 unsigned int src_pixels_per_line,
93 unsigned char *output_ptr,
94 int dst_pitch,
95 unsigned int output_height,
96 const short *vp9_filter);
97
98 extern prototype_subpixel_predict(vp9_bilinear_predict8x8_mmx);
99
100 ///////////////////////////////////////////////////////////////////////////
101 // the mmx function that does the bilinear filtering and var calculation //
102 // int one pass //
103 ///////////////////////////////////////////////////////////////////////////
104 DECLARE_ALIGNED(16, const short, vp9_bilinear_filters_mmx[16][8]) = {
105 { 128, 128, 128, 128, 0, 0, 0, 0 },
106 { 120, 120, 120, 120, 8, 8, 8, 8 },
107 { 112, 112, 112, 112, 16, 16, 16, 16 },
108 { 104, 104, 104, 104, 24, 24, 24, 24 },
109 { 96, 96, 96, 96, 32, 32, 32, 32 },
110 { 88, 88, 88, 88, 40, 40, 40, 40 },
111 { 80, 80, 80, 80, 48, 48, 48, 48 },
112 { 72, 72, 72, 72, 56, 56, 56, 56 },
113 { 64, 64, 64, 64, 64, 64, 64, 64 },
114 { 56, 56, 56, 56, 72, 72, 72, 72 },
115 { 48, 48, 48, 48, 80, 80, 80, 80 },
116 { 40, 40, 40, 40, 88, 88, 88, 88 },
117 { 32, 32, 32, 32, 96, 96, 96, 96 },
118 { 24, 24, 24, 24, 104, 104, 104, 104 },
119 { 16, 16, 16, 16, 112, 112, 112, 112 },
120 { 8, 8, 8, 8, 120, 120, 120, 120 }
121 };
122
123 #if HAVE_MMX
124 void vp9_sixtap_predict4x4_mmx(unsigned char *src_ptr,
125 int src_pixels_per_line,
126 int xoffset,
127 int yoffset,
128 unsigned char *dst_ptr,
129 int dst_pitch) {
130 #ifdef ANNOUNCE_FUNCTION
131 printf("vp9_sixtap_predict4x4_mmx\n");
132 #endif
133 /* Temp data bufffer used in filtering */
134 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 16 * 16);
135 const short *hfilter, *vfilter;
136 hfilter = vp9_six_tap_mmx[xoffset];
137 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), fdata2,
138 src_pixels_per_line, 1, 9, 8, hfilter);
139 vfilter = vp9_six_tap_mmx[yoffset];
140 vp9_filter_block1dc_v6_mmx(fdata2 + 8, dst_ptr, dst_pitch,
141 8, 4, 4, 4, vfilter);
142 }
143
144 void vp9_sixtap_predict16x16_mmx(unsigned char *src_ptr,
145 int src_pixels_per_line,
146 int xoffset,
147 int yoffset,
148 unsigned char *dst_ptr,
149 int dst_pitch) {
150 #ifdef ANNOUNCE_FUNCTION
151 printf("vp9_sixtap_predict16x16_mmx\n");
152 #endif
153 /* Temp data bufffer used in filtering */
154 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 24 * 24);
155 const short *hfilter, *vfilter;
156
157 hfilter = vp9_six_tap_mmx[xoffset];
158 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),
159 fdata2, src_pixels_per_line, 1, 21, 32,
160 hfilter);
161 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,
162 fdata2 + 4, src_pixels_per_line, 1, 21, 32,
163 hfilter);
164 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8,
165 fdata2 + 8, src_pixels_per_line, 1, 21, 32,
166 hfilter);
167 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12,
168 fdata2 + 12, src_pixels_per_line, 1, 21, 32,
169 hfilter);
170
171 vfilter = vp9_six_tap_mmx[yoffset];
172 vp9_filter_block1dc_v6_mmx(fdata2 + 32, dst_ptr, dst_pitch,
173 32, 16, 16, 16, vfilter);
174 vp9_filter_block1dc_v6_mmx(fdata2 + 36, dst_ptr + 4, dst_pitch,
175 32, 16, 16, 16, vfilter);
176 vp9_filter_block1dc_v6_mmx(fdata2 + 40, dst_ptr + 8, dst_pitch,
177 32, 16, 16, 16, vfilter);
178 vp9_filter_block1dc_v6_mmx(fdata2 + 44, dst_ptr + 12, dst_pitch,
179 32, 16, 16, 16, vfilter);
180 }
181
182 void vp9_sixtap_predict8x8_mmx(unsigned char *src_ptr,
183 int src_pixels_per_line,
184 int xoffset,
185 int yoffset,
186 unsigned char *dst_ptr,
187 int dst_pitch) {
188 #ifdef ANNOUNCE_FUNCTION
189 printf("vp9_sixtap_predict8x8_mmx\n");
190 #endif
191 /* Temp data bufffer used in filtering */
192 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
193 const short *hfilter, *vfilter;
194
195 hfilter = vp9_six_tap_mmx[xoffset];
196 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),
197 fdata2, src_pixels_per_line, 1, 13, 16,
198 hfilter);
199 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,
200 fdata2 + 4, src_pixels_per_line, 1, 13, 16,
201 hfilter);
202
203 vfilter = vp9_six_tap_mmx[yoffset];
204 vp9_filter_block1dc_v6_mmx(fdata2 + 16, dst_ptr, dst_pitch,
205 16, 8, 8, 8, vfilter);
206 vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch,
207 16, 8, 8, 8, vfilter);
208 }
209
210 void vp9_sixtap_predict8x4_mmx(unsigned char *src_ptr,
211 int src_pixels_per_line,
212 int xoffset,
213 int yoffset,
214 unsigned char *dst_ptr,
215 int dst_pitch) {
216 #ifdef ANNOUNCE_FUNCTION
217 printf("vp9_sixtap_predict8x4_mmx\n");
218 #endif
219 /* Temp data bufffer used in filtering */
220 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
221 const short *hfilter, *vfilter;
222
223 hfilter = vp9_six_tap_mmx[xoffset];
224 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),
225 fdata2, src_pixels_per_line, 1, 9, 16, hfilter);
226 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,
227 fdata2 + 4, src_pixels_per_line, 1, 9, 16, hfilter);
228
229 vfilter = vp9_six_tap_mmx[yoffset];
230 vp9_filter_block1dc_v6_mmx(fdata2 + 16, dst_ptr, dst_pitch,
231 16, 8, 4, 8, vfilter);
232 vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch,
233 16, 8, 4, 8, vfilter);
234 }
235
236 void vp9_bilinear_predict16x16_mmx(unsigned char *src_ptr,
237 int src_pixels_per_line,
238 int xoffset,
239 int yoffset,
240 unsigned char *dst_ptr,
241 int dst_pitch) {
242 vp9_bilinear_predict8x8_mmx(src_ptr,
243 src_pixels_per_line, xoffset, yoffset,
244 dst_ptr, dst_pitch);
245 vp9_bilinear_predict8x8_mmx(src_ptr + 8,
246 src_pixels_per_line, xoffset, yoffset,
247 dst_ptr + 8, dst_pitch);
248 vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line,
249 src_pixels_per_line, xoffset, yoffset,
250 dst_ptr + dst_pitch * 8, dst_pitch);
251 vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8,
252 src_pixels_per_line, xoffset, yoffset,
253 dst_ptr + dst_pitch * 8 + 8, dst_pitch);
254 }
255 #endif
256
257 #if HAVE_SSE2
258 void vp9_sixtap_predict16x16_sse2(unsigned char *src_ptr,
259 int src_pixels_per_line,
260 int xoffset,
261 int yoffset,
262 unsigned char *dst_ptr,
263 int dst_pitch) {
264 /* Temp data bufffer used in filtering */
265 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 24 * 24);
266 const short *hfilter, *vfilter;
267 #ifdef ANNOUNCE_FUNCTION
268 printf("vp9_sixtap_predict16x16_sse2\n");
269 #endif
270
271 if (xoffset) {
272 if (yoffset) {
273 hfilter = vp9_six_tap_mmx[xoffset];
274 vp9_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
275 src_pixels_per_line, 1, 21, 32, hfilter);
276 vfilter = vp9_six_tap_mmx[yoffset];
277 vp9_filter_block1d16_v6_sse2(fdata2 + 32, dst_ptr, dst_pitch,
278 32, 16, 16, dst_pitch, vfilter);
279 } else {
280 /* First-pass only */
281 hfilter = vp9_six_tap_mmx[xoffset];
282 vp9_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line,
283 dst_ptr, dst_pitch, 16, hfilter);
284 }
285 } else {
286 /* Second-pass only */
287 vfilter = vp9_six_tap_mmx[yoffset];
288 vp9_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
289 src_pixels_per_line, 21, 32);
290 vp9_filter_block1d16_v6_sse2(fdata2 + 32, dst_ptr, dst_pitch,
291 32, 16, 16, dst_pitch, vfilter);
292 }
293 }
294
295 void vp9_sixtap_predict8x8_sse2(unsigned char *src_ptr,
296 int src_pixels_per_line,
297 int xoffset,
298 int yoffset,
299 unsigned char *dst_ptr,
300 int dst_pitch) {
301 /* Temp data bufffer used in filtering */
302 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
303 const short *hfilter, *vfilter;
304 #ifdef ANNOUNCE_FUNCTION
305 printf("vp9_sixtap_predict8x8_sse2\n");
306 #endif
307
308 if (xoffset) {
309 if (yoffset) {
310 hfilter = vp9_six_tap_mmx[xoffset];
311 vp9_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
312 src_pixels_per_line, 1, 13, 16, hfilter);
313 vfilter = vp9_six_tap_mmx[yoffset];
314 vp9_filter_block1d8_v6_sse2(fdata2 + 16, dst_ptr, dst_pitch,
315 16, 8, 8, dst_pitch, vfilter);
316 } else {
317 /* First-pass only */
318 hfilter = vp9_six_tap_mmx[xoffset];
319 vp9_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line,
320 dst_ptr, dst_pitch, 8, hfilter);
321 }
322 } else {
323 /* Second-pass only */
324 vfilter = vp9_six_tap_mmx[yoffset];
325 vp9_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line),
326 src_pixels_per_line,
327 dst_ptr, dst_pitch, 8, vfilter);
328 }
329 }
330
331 void vp9_sixtap_predict8x4_sse2(unsigned char *src_ptr,
332 int src_pixels_per_line,
333 int xoffset,
334 int yoffset,
335 unsigned char *dst_ptr,
336 int dst_pitch) {
337 /* Temp data bufffer used in filtering */
338 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
339 const short *hfilter, *vfilter;
340 #ifdef ANNOUNCE_FUNCTION
341 printf("vp9_sixtap_predict8x4_sse2\n");
342 #endif
343
344 if (xoffset) {
345 if (yoffset) {
346 hfilter = vp9_six_tap_mmx[xoffset];
347 vp9_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
348 src_pixels_per_line, 1, 9, 16, hfilter);
349 vfilter = vp9_six_tap_mmx[yoffset];
350 vp9_filter_block1d8_v6_sse2(fdata2 + 16, dst_ptr, dst_pitch,
351 16, 8, 4, dst_pitch, vfilter);
352 } else {
353 /* First-pass only */
354 hfilter = vp9_six_tap_mmx[xoffset];
355 vp9_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line,
356 dst_ptr, dst_pitch, 4, hfilter);
357 }
358 } else {
359 /* Second-pass only */
360 vfilter = vp9_six_tap_mmx[yoffset];
361 vp9_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line),
362 src_pixels_per_line,
363 dst_ptr, dst_pitch, 4, vfilter);
364 }
365 }
366 #endif
367
368 #if HAVE_SSSE3
369 extern void vp9_filter_block1d8_h6_ssse3(unsigned char *src_ptr,
370 unsigned int src_pixels_per_line,
371 unsigned char *output_ptr,
372 unsigned int output_pitch,
373 unsigned int output_height,
374 unsigned int vp9_filter_index);
375
376 extern void vp9_filter_block1d16_h6_ssse3(unsigned char *src_ptr,
377 unsigned int src_pixels_per_line,
378 unsigned char *output_ptr,
379 unsigned int output_pitch,
380 unsigned int output_height,
381 unsigned int vp9_filter_index);
382
383 extern void vp9_filter_block1d16_v6_ssse3(unsigned char *src_ptr,
384 unsigned int src_pitch,
385 unsigned char *output_ptr,
386 unsigned int out_pitch,
387 unsigned int output_height,
388 unsigned int vp9_filter_index);
389
390 extern void vp9_filter_block1d8_v6_ssse3(unsigned char *src_ptr,
391 unsigned int src_pitch,
392 unsigned char *output_ptr,
393 unsigned int out_pitch,
394 unsigned int output_height,
395 unsigned int vp9_filter_index);
396
397 extern void vp9_filter_block1d4_h6_ssse3(unsigned char *src_ptr,
398 unsigned int src_pixels_per_line,
399 unsigned char *output_ptr,
400 unsigned int output_pitch,
401 unsigned int output_height,
402 unsigned int vp9_filter_index);
403
404 extern void vp9_filter_block1d4_v6_ssse3(unsigned char *src_ptr,
405 unsigned int src_pitch,
406 unsigned char *output_ptr,
407 unsigned int out_pitch,
408 unsigned int output_height,
409 unsigned int vp9_filter_index);
410
411 void vp9_sixtap_predict16x16_ssse3(unsigned char *src_ptr,
412 int src_pixels_per_line,
413 int xoffset,
414 int yoffset,
415 unsigned char *dst_ptr,
416 int dst_pitch) {
417 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 24 * 24);
418 #ifdef ANNOUNCE_FUNCTION
419 printf("vp9_sixtap_predict16x16_ssse3\n");
420 #endif
421
422 if (xoffset) {
423 if (yoffset) {
424 vp9_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
425 src_pixels_per_line,
426 fdata2, 16, 21, xoffset);
427 vp9_filter_block1d16_v6_ssse3(fdata2, 16, dst_ptr, dst_pitch,
428 16, yoffset);
429 } else {
430 /* First-pass only */
431 vp9_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line,
432 dst_ptr, dst_pitch, 16, xoffset);
433 }
434 } else {
435 /* Second-pass only */
436 vp9_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
437 src_pixels_per_line,
438 dst_ptr, dst_pitch, 16, yoffset);
439 }
440 }
441
442 void vp9_sixtap_predict8x8_ssse3(unsigned char *src_ptr,
443 int src_pixels_per_line,
444 int xoffset,
445 int yoffset,
446 unsigned char *dst_ptr,
447 int dst_pitch) {
448 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 256);
449 #ifdef ANNOUNCE_FUNCTION
450 printf("vp9_sixtap_predict8x8_ssse3\n");
451 #endif
452
453 if (xoffset) {
454 if (yoffset) {
455 vp9_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
456 src_pixels_per_line, fdata2, 8, 13, xoffset);
457 vp9_filter_block1d8_v6_ssse3(fdata2, 8, dst_ptr, dst_pitch, 8, yoffset);
458 } else {
459 vp9_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
460 dst_ptr, dst_pitch, 8, xoffset);
461 }
462 } else {
463 /* Second-pass only */
464 vp9_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
465 src_pixels_per_line,
466 dst_ptr, dst_pitch, 8, yoffset);
467 }
468 }
469
470 void vp9_sixtap_predict8x4_ssse3(unsigned char *src_ptr,
471 int src_pixels_per_line,
472 int xoffset,
473 int yoffset,
474 unsigned char *dst_ptr,
475 int dst_pitch) {
476 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 256);
477 #ifdef ANNOUNCE_FUNCTION
478 printf("vp9_sixtap_predict8x4_ssse3\n");
479 #endif
480
481 if (xoffset) {
482 if (yoffset) {
483 vp9_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
484 src_pixels_per_line, fdata2, 8, 9, xoffset);
485 vp9_filter_block1d8_v6_ssse3(fdata2, 8, dst_ptr, dst_pitch, 4, yoffset);
486 } else {
487 /* First-pass only */
488 vp9_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
489 dst_ptr, dst_pitch, 4, xoffset);
490 }
491 } else {
492 /* Second-pass only */
493 vp9_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
494 src_pixels_per_line,
495 dst_ptr, dst_pitch, 4, yoffset);
496 }
497 }
498
499 void vp9_sixtap_predict4x4_ssse3(unsigned char *src_ptr,
500 int src_pixels_per_line,
501 int xoffset,
502 int yoffset,
503 unsigned char *dst_ptr,
504 int dst_pitch) {
505 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 4 * 9);
506 #ifdef ANNOUNCE_FUNCTION
507 printf("vp9_sixtap_predict4x4_ssse3\n");
508 #endif
509
510 if (xoffset) {
511 if (yoffset) {
512 vp9_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
513 src_pixels_per_line, fdata2, 4, 9, xoffset);
514 vp9_filter_block1d4_v6_ssse3(fdata2, 4, dst_ptr, dst_pitch, 4, yoffset);
515 } else {
516 vp9_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line,
517 dst_ptr, dst_pitch, 4, xoffset);
518 }
519 } else {
520 vp9_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
521 src_pixels_per_line,
522 dst_ptr, dst_pitch, 4, yoffset);
523 }
524 }
525
526 void vp9_filter_block1d16_v8_ssse3(const unsigned char *src_ptr,
527 const unsigned int src_pitch,
528 unsigned char *output_ptr,
529 unsigned int out_pitch,
530 unsigned int output_height,
531 const short *filter);
532
533 void vp9_filter_block1d16_h8_ssse3(const unsigned char *src_ptr,
534 const unsigned int src_pitch,
535 unsigned char *output_ptr,
536 unsigned int out_pitch,
537 unsigned int output_height,
538 const short *filter);
539
540 void vp9_filter_block2d_16x16_8_ssse3(const unsigned char *src_ptr,
541 const unsigned int src_stride,
542 const short *hfilter_aligned16,
543 const short *vfilter_aligned16,
544 unsigned char *dst_ptr,
545 unsigned int dst_stride) {
546 if (hfilter_aligned16[3] != 128 && vfilter_aligned16[3] != 128) {
547 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16);
548
549 vp9_filter_block1d16_h8_ssse3(src_ptr - (3 * src_stride), src_stride,
550 fdata2, 16, 23, hfilter_aligned16);
551 vp9_filter_block1d16_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 16,
552 vfilter_aligned16);
553 } else {
554 if (hfilter_aligned16[3] != 128) {
555 vp9_filter_block1d16_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride,
556 16, hfilter_aligned16);
557 } else {
558 vp9_filter_block1d16_v8_ssse3(src_ptr - (3 * src_stride), src_stride,
559 dst_ptr, dst_stride, 16, vfilter_aligned16);
560 }
561 }
562 }
563
564 void vp9_filter_block1d8_v8_ssse3(const unsigned char *src_ptr,
565 const unsigned int src_pitch,
566 unsigned char *output_ptr,
567 unsigned int out_pitch,
568 unsigned int output_height,
569 const short *filter);
570
571 void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr,
572 const unsigned int src_pitch,
573 unsigned char *output_ptr,
574 unsigned int out_pitch,
575 unsigned int output_height,
576 const short *filter);
577
578 void vp9_filter_block2d_8x8_8_ssse3(const unsigned char *src_ptr,
579 const unsigned int src_stride,
580 const short *hfilter_aligned16,
581 const short *vfilter_aligned16,
582 unsigned char *dst_ptr,
583 unsigned int dst_stride) {
584 if (hfilter_aligned16[3] != 128 && vfilter_aligned16[3] != 128) {
585 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16);
586
587 vp9_filter_block1d8_h8_ssse3(src_ptr - (3 * src_stride), src_stride,
588 fdata2, 16, 15, hfilter_aligned16);
589 vp9_filter_block1d8_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 8,
590 vfilter_aligned16);
591 } else {
592 if (hfilter_aligned16[3] != 128) {
593 vp9_filter_block1d8_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride, 8,
594 hfilter_aligned16);
595 } else {
596 vp9_filter_block1d8_v8_ssse3(src_ptr - (3 * src_stride), src_stride,
597 dst_ptr, dst_stride, 8, vfilter_aligned16);
598 }
599 }
600 }
601
602 void vp9_filter_block2d_8x4_8_ssse3(const unsigned char *src_ptr,
603 const unsigned int src_stride,
604 const short *hfilter_aligned16,
605 const short *vfilter_aligned16,
606 unsigned char *dst_ptr,
607 unsigned int dst_stride) {
608 if (hfilter_aligned16[3] !=128 && vfilter_aligned16[3] != 128) {
609 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16);
610
611 vp9_filter_block1d8_h8_ssse3(src_ptr - (3 * src_stride), src_stride,
612 fdata2, 16, 11, hfilter_aligned16);
613 vp9_filter_block1d8_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 4,
614 vfilter_aligned16);
615 } else {
616 if (hfilter_aligned16[3] != 128) {
617 vp9_filter_block1d8_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride, 4,
618 hfilter_aligned16);
619 } else {
620 vp9_filter_block1d8_v8_ssse3(src_ptr - (3 * src_stride), src_stride,
621 dst_ptr, dst_stride, 4, vfilter_aligned16);
622 }
623 }
624 }
625 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698