Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(104)

Side by Side Diff: source/libvpx/vp9/common/x86/vp9_asm_stubs.c

Issue 54923004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 18 matching lines...) Expand all
29 { 64, 64, 64, 64, 64, 64, 64, 64 }, 29 { 64, 64, 64, 64, 64, 64, 64, 64 },
30 { 56, 56, 56, 56, 72, 72, 72, 72 }, 30 { 56, 56, 56, 56, 72, 72, 72, 72 },
31 { 48, 48, 48, 48, 80, 80, 80, 80 }, 31 { 48, 48, 48, 48, 80, 80, 80, 80 },
32 { 40, 40, 40, 40, 88, 88, 88, 88 }, 32 { 40, 40, 40, 40, 88, 88, 88, 88 },
33 { 32, 32, 32, 32, 96, 96, 96, 96 }, 33 { 32, 32, 32, 32, 96, 96, 96, 96 },
34 { 24, 24, 24, 24, 104, 104, 104, 104 }, 34 { 24, 24, 24, 24, 104, 104, 104, 104 },
35 { 16, 16, 16, 16, 112, 112, 112, 112 }, 35 { 16, 16, 16, 16, 112, 112, 112, 112 },
36 { 8, 8, 8, 8, 120, 120, 120, 120 } 36 { 8, 8, 8, 8, 120, 120, 120, 120 }
37 }; 37 };
38 38
39 typedef void filter8_1dfunction (
40 const unsigned char *src_ptr,
41 const unsigned int src_pitch,
42 unsigned char *output_ptr,
43 unsigned int out_pitch,
44 unsigned int output_height,
45 const short *filter
46 );
47
39 #if HAVE_SSSE3 48 #if HAVE_SSSE3
40 void vp9_filter_block1d16_v8_ssse3(const unsigned char *src_ptr, 49 filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
41 const unsigned int src_pitch, 50 filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
42 unsigned char *output_ptr, 51 filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
43 unsigned int out_pitch, 52 filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
44 unsigned int output_height, 53 filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
45 const short *filter); 54 filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
46 55 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
47 void vp9_filter_block1d16_h8_ssse3(const unsigned char *src_ptr, 56 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
48 const unsigned int src_pitch, 57 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
49 unsigned char *output_ptr, 58 filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
50 unsigned int out_pitch, 59 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
51 unsigned int output_height, 60 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
52 const short *filter);
53
54 void vp9_filter_block1d8_v8_ssse3(const unsigned char *src_ptr,
55 const unsigned int src_pitch,
56 unsigned char *output_ptr,
57 unsigned int out_pitch,
58 unsigned int output_height,
59 const short *filter);
60
61 void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr,
62 const unsigned int src_pitch,
63 unsigned char *output_ptr,
64 unsigned int out_pitch,
65 unsigned int output_height,
66 const short *filter);
67
68 void vp9_filter_block1d4_v8_ssse3(const unsigned char *src_ptr,
69 const unsigned int src_pitch,
70 unsigned char *output_ptr,
71 unsigned int out_pitch,
72 unsigned int output_height,
73 const short *filter);
74
75 void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr,
76 const unsigned int src_pitch,
77 unsigned char *output_ptr,
78 unsigned int out_pitch,
79 unsigned int output_height,
80 const short *filter);
81
82 void vp9_filter_block1d16_v8_avg_ssse3(const unsigned char *src_ptr,
83 const unsigned int src_pitch,
84 unsigned char *output_ptr,
85 unsigned int out_pitch,
86 unsigned int output_height,
87 const short *filter);
88
89 void vp9_filter_block1d16_h8_avg_ssse3(const unsigned char *src_ptr,
90 const unsigned int src_pitch,
91 unsigned char *output_ptr,
92 unsigned int out_pitch,
93 unsigned int output_height,
94 const short *filter);
95
96 void vp9_filter_block1d8_v8_avg_ssse3(const unsigned char *src_ptr,
97 const unsigned int src_pitch,
98 unsigned char *output_ptr,
99 unsigned int out_pitch,
100 unsigned int output_height,
101 const short *filter);
102
103 void vp9_filter_block1d8_h8_avg_ssse3(const unsigned char *src_ptr,
104 const unsigned int src_pitch,
105 unsigned char *output_ptr,
106 unsigned int out_pitch,
107 unsigned int output_height,
108 const short *filter);
109
110 void vp9_filter_block1d4_v8_avg_ssse3(const unsigned char *src_ptr,
111 const unsigned int src_pitch,
112 unsigned char *output_ptr,
113 unsigned int out_pitch,
114 unsigned int output_height,
115 const short *filter);
116
117 void vp9_filter_block1d4_h8_avg_ssse3(const unsigned char *src_ptr,
118 const unsigned int src_pitch,
119 unsigned char *output_ptr,
120 unsigned int out_pitch,
121 unsigned int output_height,
122 const short *filter);
123 61
124 void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, 62 void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
125 uint8_t *dst, ptrdiff_t dst_stride, 63 uint8_t *dst, ptrdiff_t dst_stride,
126 const int16_t *filter_x, int x_step_q4, 64 const int16_t *filter_x, int x_step_q4,
127 const int16_t *filter_y, int y_step_q4, 65 const int16_t *filter_y, int y_step_q4,
128 int w, int h) { 66 int w, int h) {
129 /* Ensure the filter can be compressed to int16_t. */ 67 /* Ensure the filter can be compressed to int16_t. */
130 if (x_step_q4 == 16 && filter_x[3] != 128) { 68 if (x_step_q4 == 16 && filter_x[3] != 128) {
131 while (w >= 16) { 69 while (w >= 16) {
132 vp9_filter_block1d16_h8_ssse3(src, src_stride, 70 vp9_filter_block1d16_h8_ssse3(src, src_stride,
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after
272 filter_x, x_step_q4, filter_y, y_step_q4, 210 filter_x, x_step_q4, filter_y, y_step_q4,
273 w, h); 211 w, h);
274 } 212 }
275 } 213 }
276 214
277 void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, 215 void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
278 uint8_t *dst, ptrdiff_t dst_stride, 216 uint8_t *dst, ptrdiff_t dst_stride,
279 const int16_t *filter_x, int x_step_q4, 217 const int16_t *filter_x, int x_step_q4,
280 const int16_t *filter_y, int y_step_q4, 218 const int16_t *filter_y, int y_step_q4,
281 int w, int h) { 219 int w, int h) {
282 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64*71); 220 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
283 221
284 assert(w <= 64); 222 assert(w <= 64);
285 assert(h <= 64); 223 assert(h <= 64);
286 if (x_step_q4 == 16 && y_step_q4 == 16) { 224 if (x_step_q4 == 16 && y_step_q4 == 16) {
287 vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64, 225 vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
288 filter_x, x_step_q4, filter_y, y_step_q4, 226 filter_x, x_step_q4, filter_y, y_step_q4,
289 w, h + 7); 227 w, h + 7);
290 vp9_convolve8_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride, 228 vp9_convolve8_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
291 filter_x, x_step_q4, filter_y, y_step_q4, w, h); 229 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
292 } else { 230 } else {
293 vp9_convolve8_c(src, src_stride, dst, dst_stride, 231 vp9_convolve8_c(src, src_stride, dst, dst_stride,
294 filter_x, x_step_q4, filter_y, y_step_q4, w, h); 232 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
295 } 233 }
296 } 234 }
297 235
298 void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, 236 void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
299 uint8_t *dst, ptrdiff_t dst_stride, 237 uint8_t *dst, ptrdiff_t dst_stride,
300 const int16_t *filter_x, int x_step_q4, 238 const int16_t *filter_x, int x_step_q4,
301 const int16_t *filter_y, int y_step_q4, 239 const int16_t *filter_y, int y_step_q4,
302 int w, int h) { 240 int w, int h) {
303 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64*71); 241 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
304 242
305 assert(w <= 64); 243 assert(w <= 64);
306 assert(h <= 64); 244 assert(h <= 64);
307 if (x_step_q4 == 16 && y_step_q4 == 16) { 245 if (x_step_q4 == 16 && y_step_q4 == 16) {
308 vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64, 246 vp9_convolve8_horiz_ssse3(src - 3 * src_stride, src_stride, fdata2, 64,
309 filter_x, x_step_q4, filter_y, y_step_q4, 247 filter_x, x_step_q4, filter_y, y_step_q4,
310 w, h + 7); 248 w, h + 7);
311 vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride, 249 vp9_convolve8_avg_vert_ssse3(fdata2 + 3 * 64, 64, dst, dst_stride,
312 filter_x, x_step_q4, filter_y, y_step_q4, 250 filter_x, x_step_q4, filter_y, y_step_q4,
313 w, h); 251 w, h);
314 } else { 252 } else {
315 vp9_convolve8_avg_c(src, src_stride, dst, dst_stride, 253 vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
316 filter_x, x_step_q4, filter_y, y_step_q4, w, h); 254 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
317 } 255 }
318 } 256 }
319 #endif 257 #endif
258
259 #if HAVE_SSE2
260 filter8_1dfunction vp9_filter_block1d16_v8_sse2;
261 filter8_1dfunction vp9_filter_block1d16_h8_sse2;
262 filter8_1dfunction vp9_filter_block1d8_v8_sse2;
263 filter8_1dfunction vp9_filter_block1d8_h8_sse2;
264 filter8_1dfunction vp9_filter_block1d4_v8_sse2;
265 filter8_1dfunction vp9_filter_block1d4_h8_sse2;
266 filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2;
267 filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2;
268 filter8_1dfunction vp9_filter_block1d8_v8_avg_sse2;
269 filter8_1dfunction vp9_filter_block1d8_h8_avg_sse2;
270 filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2;
271 filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2;
272
273 void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
274 uint8_t *dst, ptrdiff_t dst_stride,
275 const int16_t *filter_x, int x_step_q4,
276 const int16_t *filter_y, int y_step_q4,
277 int w, int h) {
278 /* Ensure the filter can be compressed to int16_t. */
279 if (x_step_q4 == 16 && filter_x[3] != 128) {
280 while (w >= 16) {
281 vp9_filter_block1d16_h8_sse2(src, src_stride,
282 dst, dst_stride,
283 h, filter_x);
284 src += 16;
285 dst += 16;
286 w -= 16;
287 }
288 while (w >= 8) {
289 vp9_filter_block1d8_h8_sse2(src, src_stride,
290 dst, dst_stride,
291 h, filter_x);
292 src += 8;
293 dst += 8;
294 w -= 8;
295 }
296 while (w >= 4) {
297 vp9_filter_block1d4_h8_sse2(src, src_stride,
298 dst, dst_stride,
299 h, filter_x);
300 src += 4;
301 dst += 4;
302 w -= 4;
303 }
304 }
305 if (w) {
306 vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
307 filter_x, x_step_q4, filter_y, y_step_q4,
308 w, h);
309 }
310 }
311
312 void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
313 uint8_t *dst, ptrdiff_t dst_stride,
314 const int16_t *filter_x, int x_step_q4,
315 const int16_t *filter_y, int y_step_q4,
316 int w, int h) {
317 if (y_step_q4 == 16 && filter_y[3] != 128) {
318 while (w >= 16) {
319 vp9_filter_block1d16_v8_sse2(src - src_stride * 3, src_stride,
320 dst, dst_stride,
321 h, filter_y);
322 src += 16;
323 dst += 16;
324 w -= 16;
325 }
326 while (w >= 8) {
327 vp9_filter_block1d8_v8_sse2(src - src_stride * 3, src_stride,
328 dst, dst_stride,
329 h, filter_y);
330 src += 8;
331 dst += 8;
332 w -= 8;
333 }
334 while (w >= 4) {
335 vp9_filter_block1d4_v8_sse2(src - src_stride * 3, src_stride,
336 dst, dst_stride,
337 h, filter_y);
338 src += 4;
339 dst += 4;
340 w -= 4;
341 }
342 }
343 if (w) {
344 vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
345 filter_x, x_step_q4, filter_y, y_step_q4,
346 w, h);
347 }
348 }
349
350 void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
351 uint8_t *dst, ptrdiff_t dst_stride,
352 const int16_t *filter_x, int x_step_q4,
353 const int16_t *filter_y, int y_step_q4,
354 int w, int h) {
355 if (x_step_q4 == 16 && filter_x[3] != 128) {
356 while (w >= 16) {
357 vp9_filter_block1d16_h8_avg_sse2(src, src_stride,
358 dst, dst_stride,
359 h, filter_x);
360 src += 16;
361 dst += 16;
362 w -= 16;
363 }
364 while (w >= 8) {
365 vp9_filter_block1d8_h8_avg_sse2(src, src_stride,
366 dst, dst_stride,
367 h, filter_x);
368 src += 8;
369 dst += 8;
370 w -= 8;
371 }
372 while (w >= 4) {
373 vp9_filter_block1d4_h8_avg_sse2(src, src_stride,
374 dst, dst_stride,
375 h, filter_x);
376 src += 4;
377 dst += 4;
378 w -= 4;
379 }
380 }
381 if (w) {
382 vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
383 filter_x, x_step_q4, filter_y, y_step_q4,
384 w, h);
385 }
386 }
387
388 void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
389 uint8_t *dst, ptrdiff_t dst_stride,
390 const int16_t *filter_x, int x_step_q4,
391 const int16_t *filter_y, int y_step_q4,
392 int w, int h) {
393 if (y_step_q4 == 16 && filter_y[3] != 128) {
394 while (w >= 16) {
395 vp9_filter_block1d16_v8_avg_sse2(src - src_stride * 3, src_stride,
396 dst, dst_stride,
397 h, filter_y);
398 src += 16;
399 dst += 16;
400 w -= 16;
401 }
402 while (w >= 8) {
403 vp9_filter_block1d8_v8_avg_sse2(src - src_stride * 3, src_stride,
404 dst, dst_stride,
405 h, filter_y);
406 src += 8;
407 dst += 8;
408 w -= 8;
409 }
410 while (w >= 4) {
411 vp9_filter_block1d4_v8_avg_sse2(src - src_stride * 3, src_stride,
412 dst, dst_stride,
413 h, filter_y);
414 src += 4;
415 dst += 4;
416 w -= 4;
417 }
418 }
419 if (w) {
420 vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
421 filter_x, x_step_q4, filter_y, y_step_q4,
422 w, h);
423 }
424 }
425
426 void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
427 uint8_t *dst, ptrdiff_t dst_stride,
428 const int16_t *filter_x, int x_step_q4,
429 const int16_t *filter_y, int y_step_q4,
430 int w, int h) {
431 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
432
433 assert(w <= 64);
434 assert(h <= 64);
435 if (x_step_q4 == 16 && y_step_q4 == 16) {
436 vp9_convolve8_horiz_sse2(src - 3 * src_stride, src_stride, fdata2, 64,
437 filter_x, x_step_q4, filter_y, y_step_q4,
438 w, h + 7);
439 vp9_convolve8_vert_sse2(fdata2 + 3 * 64, 64, dst, dst_stride,
440 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
441 } else {
442 vp9_convolve8_c(src, src_stride, dst, dst_stride,
443 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
444 }
445 }
446
447 void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
448 uint8_t *dst, ptrdiff_t dst_stride,
449 const int16_t *filter_x, int x_step_q4,
450 const int16_t *filter_y, int y_step_q4,
451 int w, int h) {
452 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71);
453
454 assert(w <= 64);
455 assert(h <= 64);
456 if (x_step_q4 == 16 && y_step_q4 == 16) {
457 vp9_convolve8_horiz_sse2(src - 3 * src_stride, src_stride, fdata2, 64,
458 filter_x, x_step_q4, filter_y, y_step_q4,
459 w, h + 7);
460 vp9_convolve8_avg_vert_sse2(fdata2 + 3 * 64, 64, dst, dst_stride,
461 filter_x, x_step_q4, filter_y, y_step_q4,
462 w, h);
463 } else {
464 vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
465 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
466 }
467 }
468 #endif
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/vp9_treecoder.c ('k') | source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698