Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(690)

Unified Diff: source/libvpx/vp9/common/x86/vp9_asm_stubs.c

Issue 11555023: libvpx: Add VP9 decoder. (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: source/libvpx/vp9/common/x86/vp9_asm_stubs.c
===================================================================
--- source/libvpx/vp9/common/x86/vp9_asm_stubs.c (revision 0)
+++ source/libvpx/vp9/common/x86/vp9_asm_stubs.c (revision 0)
@@ -0,0 +1,625 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/mem.h"
+#include "vp9/common/vp9_subpixel.h"
+
+extern const short vp9_six_tap_mmx[16][6 * 8];
+
+extern const short vp9_bilinear_filters_8x_mmx[16][2 * 8];
+
+extern void vp9_filter_block1d_h6_mmx(unsigned char *src_ptr,
+ unsigned short *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1dc_v6_mmx(unsigned short *src_ptr,
+ unsigned char *output_ptr,
+ int output_pitch,
+ unsigned int pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1d8_h6_sse2(unsigned char *src_ptr,
+ unsigned short *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1d16_h6_sse2(unsigned char *src_ptr,
+ unsigned short *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1d8_v6_sse2(unsigned short *src_ptr,
+ unsigned char *output_ptr,
+ int dst_ptich,
+ unsigned int pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1d16_v6_sse2(unsigned short *src_ptr,
+ unsigned char *output_ptr,
+ int dst_ptich,
+ unsigned int pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const short *vp9_filter);
+
+extern void vp9_unpack_block1d16_h6_sse2(unsigned char *src_ptr,
+ unsigned short *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int output_height,
+ unsigned int output_width);
+
+extern void vp9_filter_block1d8_h6_only_sse2(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ int dst_pitch,
+ unsigned int output_height,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1d16_h6_only_sse2(unsigned char *src_ptr,
+ unsigned int src_pixels_per_lin,
+ unsigned char *output_ptr,
+ int dst_pitch,
+ unsigned int output_height,
+ const short *vp9_filter);
+
+extern void vp9_filter_block1d8_v6_only_sse2(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ int dst_pitch,
+ unsigned int output_height,
+ const short *vp9_filter);
+
+extern prototype_subpixel_predict(vp9_bilinear_predict8x8_mmx);
+
+///////////////////////////////////////////////////////////////////////////
+// the mmx function that does the bilinear filtering and var calculation //
+// int one pass //
+///////////////////////////////////////////////////////////////////////////
+DECLARE_ALIGNED(16, const short, vp9_bilinear_filters_mmx[16][8]) = {
+ { 128, 128, 128, 128, 0, 0, 0, 0 },
+ { 120, 120, 120, 120, 8, 8, 8, 8 },
+ { 112, 112, 112, 112, 16, 16, 16, 16 },
+ { 104, 104, 104, 104, 24, 24, 24, 24 },
+ { 96, 96, 96, 96, 32, 32, 32, 32 },
+ { 88, 88, 88, 88, 40, 40, 40, 40 },
+ { 80, 80, 80, 80, 48, 48, 48, 48 },
+ { 72, 72, 72, 72, 56, 56, 56, 56 },
+ { 64, 64, 64, 64, 64, 64, 64, 64 },
+ { 56, 56, 56, 56, 72, 72, 72, 72 },
+ { 48, 48, 48, 48, 80, 80, 80, 80 },
+ { 40, 40, 40, 40, 88, 88, 88, 88 },
+ { 32, 32, 32, 32, 96, 96, 96, 96 },
+ { 24, 24, 24, 24, 104, 104, 104, 104 },
+ { 16, 16, 16, 16, 112, 112, 112, 112 },
+ { 8, 8, 8, 8, 120, 120, 120, 120 }
+};
+
+#if HAVE_MMX
+void vp9_sixtap_predict4x4_mmx(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict4x4_mmx\n");
+#endif
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 16 * 16);
+ const short *hfilter, *vfilter;
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), fdata2,
+ src_pixels_per_line, 1, 9, 8, hfilter);
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1dc_v6_mmx(fdata2 + 8, dst_ptr, dst_pitch,
+ 8, 4, 4, 4, vfilter);
+}
+
+void vp9_sixtap_predict16x16_mmx(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict16x16_mmx\n");
+#endif
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 24 * 24);
+ const short *hfilter, *vfilter;
+
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),
+ fdata2, src_pixels_per_line, 1, 21, 32,
+ hfilter);
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,
+ fdata2 + 4, src_pixels_per_line, 1, 21, 32,
+ hfilter);
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8,
+ fdata2 + 8, src_pixels_per_line, 1, 21, 32,
+ hfilter);
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12,
+ fdata2 + 12, src_pixels_per_line, 1, 21, 32,
+ hfilter);
+
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1dc_v6_mmx(fdata2 + 32, dst_ptr, dst_pitch,
+ 32, 16, 16, 16, vfilter);
+ vp9_filter_block1dc_v6_mmx(fdata2 + 36, dst_ptr + 4, dst_pitch,
+ 32, 16, 16, 16, vfilter);
+ vp9_filter_block1dc_v6_mmx(fdata2 + 40, dst_ptr + 8, dst_pitch,
+ 32, 16, 16, 16, vfilter);
+ vp9_filter_block1dc_v6_mmx(fdata2 + 44, dst_ptr + 12, dst_pitch,
+ 32, 16, 16, 16, vfilter);
+}
+
+void vp9_sixtap_predict8x8_mmx(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict8x8_mmx\n");
+#endif
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
+ const short *hfilter, *vfilter;
+
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),
+ fdata2, src_pixels_per_line, 1, 13, 16,
+ hfilter);
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,
+ fdata2 + 4, src_pixels_per_line, 1, 13, 16,
+ hfilter);
+
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1dc_v6_mmx(fdata2 + 16, dst_ptr, dst_pitch,
+ 16, 8, 8, 8, vfilter);
+ vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch,
+ 16, 8, 8, 8, vfilter);
+}
+
+void vp9_sixtap_predict8x4_mmx(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict8x4_mmx\n");
+#endif
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
+ const short *hfilter, *vfilter;
+
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),
+ fdata2, src_pixels_per_line, 1, 9, 16, hfilter);
+ vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,
+ fdata2 + 4, src_pixels_per_line, 1, 9, 16, hfilter);
+
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1dc_v6_mmx(fdata2 + 16, dst_ptr, dst_pitch,
+ 16, 8, 4, 8, vfilter);
+ vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch,
+ 16, 8, 4, 8, vfilter);
+}
+
+void vp9_bilinear_predict16x16_mmx(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ vp9_bilinear_predict8x8_mmx(src_ptr,
+ src_pixels_per_line, xoffset, yoffset,
+ dst_ptr, dst_pitch);
+ vp9_bilinear_predict8x8_mmx(src_ptr + 8,
+ src_pixels_per_line, xoffset, yoffset,
+ dst_ptr + 8, dst_pitch);
+ vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line,
+ src_pixels_per_line, xoffset, yoffset,
+ dst_ptr + dst_pitch * 8, dst_pitch);
+ vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8,
+ src_pixels_per_line, xoffset, yoffset,
+ dst_ptr + dst_pitch * 8 + 8, dst_pitch);
+}
+#endif
+
+#if HAVE_SSE2
+void vp9_sixtap_predict16x16_sse2(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 24 * 24);
+ const short *hfilter, *vfilter;
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict16x16_sse2\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
+ src_pixels_per_line, 1, 21, 32, hfilter);
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1d16_v6_sse2(fdata2 + 32, dst_ptr, dst_pitch,
+ 32, 16, 16, dst_pitch, vfilter);
+ } else {
+ /* First-pass only */
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 16, hfilter);
+ }
+ } else {
+ /* Second-pass only */
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
+ src_pixels_per_line, 21, 32);
+ vp9_filter_block1d16_v6_sse2(fdata2 + 32, dst_ptr, dst_pitch,
+ 32, 16, 16, dst_pitch, vfilter);
+ }
+}
+
+void vp9_sixtap_predict8x8_sse2(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
+ const short *hfilter, *vfilter;
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict8x8_sse2\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
+ src_pixels_per_line, 1, 13, 16, hfilter);
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1d8_v6_sse2(fdata2 + 16, dst_ptr, dst_pitch,
+ 16, 8, 8, dst_pitch, vfilter);
+ } else {
+ /* First-pass only */
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 8, hfilter);
+ }
+ } else {
+ /* Second-pass only */
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 8, vfilter);
+ }
+}
+
+void vp9_sixtap_predict8x4_sse2(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
+ const short *hfilter, *vfilter;
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict8x4_sse2\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
+ src_pixels_per_line, 1, 9, 16, hfilter);
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1d8_v6_sse2(fdata2 + 16, dst_ptr, dst_pitch,
+ 16, 8, 4, dst_pitch, vfilter);
+ } else {
+ /* First-pass only */
+ hfilter = vp9_six_tap_mmx[xoffset];
+ vp9_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, hfilter);
+ }
+ } else {
+ /* Second-pass only */
+ vfilter = vp9_six_tap_mmx[yoffset];
+ vp9_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, vfilter);
+ }
+}
+#endif
+
+#if HAVE_SSSE3
+extern void vp9_filter_block1d8_h6_ssse3(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ unsigned int vp9_filter_index);
+
+extern void vp9_filter_block1d16_h6_ssse3(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ unsigned int vp9_filter_index);
+
+extern void vp9_filter_block1d16_v6_ssse3(unsigned char *src_ptr,
+ unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ unsigned int vp9_filter_index);
+
+extern void vp9_filter_block1d8_v6_ssse3(unsigned char *src_ptr,
+ unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ unsigned int vp9_filter_index);
+
+extern void vp9_filter_block1d4_h6_ssse3(unsigned char *src_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned char *output_ptr,
+ unsigned int output_pitch,
+ unsigned int output_height,
+ unsigned int vp9_filter_index);
+
+extern void vp9_filter_block1d4_v6_ssse3(unsigned char *src_ptr,
+ unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ unsigned int vp9_filter_index);
+
+void vp9_sixtap_predict16x16_ssse3(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 24 * 24);
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict16x16_ssse3\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ vp9_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ fdata2, 16, 21, xoffset);
+ vp9_filter_block1d16_v6_ssse3(fdata2, 16, dst_ptr, dst_pitch,
+ 16, yoffset);
+ } else {
+ /* First-pass only */
+ vp9_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 16, xoffset);
+ }
+ } else {
+ /* Second-pass only */
+ vp9_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 16, yoffset);
+ }
+}
+
+void vp9_sixtap_predict8x8_ssse3(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 256);
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict8x8_ssse3\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ vp9_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line, fdata2, 8, 13, xoffset);
+ vp9_filter_block1d8_v6_ssse3(fdata2, 8, dst_ptr, dst_pitch, 8, yoffset);
+ } else {
+ vp9_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 8, xoffset);
+ }
+ } else {
+ /* Second-pass only */
+ vp9_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 8, yoffset);
+ }
+}
+
+void vp9_sixtap_predict8x4_ssse3(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 256);
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict8x4_ssse3\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ vp9_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line, fdata2, 8, 9, xoffset);
+ vp9_filter_block1d8_v6_ssse3(fdata2, 8, dst_ptr, dst_pitch, 4, yoffset);
+ } else {
+ /* First-pass only */
+ vp9_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, xoffset);
+ }
+ } else {
+ /* Second-pass only */
+ vp9_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, yoffset);
+ }
+}
+
+void vp9_sixtap_predict4x4_ssse3(unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ unsigned char *dst_ptr,
+ int dst_pitch) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 4 * 9);
+#ifdef ANNOUNCE_FUNCTION
+ printf("vp9_sixtap_predict4x4_ssse3\n");
+#endif
+
+ if (xoffset) {
+ if (yoffset) {
+ vp9_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line, fdata2, 4, 9, xoffset);
+ vp9_filter_block1d4_v6_ssse3(fdata2, 4, dst_ptr, dst_pitch, 4, yoffset);
+ } else {
+ vp9_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, xoffset);
+ }
+ } else {
+ vp9_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
+ src_pixels_per_line,
+ dst_ptr, dst_pitch, 4, yoffset);
+ }
+}
+
+void vp9_filter_block1d16_v8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
+void vp9_filter_block1d16_h8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
+void vp9_filter_block2d_16x16_8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_stride,
+ const short *hfilter_aligned16,
+ const short *vfilter_aligned16,
+ unsigned char *dst_ptr,
+ unsigned int dst_stride) {
+ if (hfilter_aligned16[3] != 128 && vfilter_aligned16[3] != 128) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16);
+
+ vp9_filter_block1d16_h8_ssse3(src_ptr - (3 * src_stride), src_stride,
+ fdata2, 16, 23, hfilter_aligned16);
+ vp9_filter_block1d16_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 16,
+ vfilter_aligned16);
+ } else {
+ if (hfilter_aligned16[3] != 128) {
+ vp9_filter_block1d16_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride,
+ 16, hfilter_aligned16);
+ } else {
+ vp9_filter_block1d16_v8_ssse3(src_ptr - (3 * src_stride), src_stride,
+ dst_ptr, dst_stride, 16, vfilter_aligned16);
+ }
+ }
+}
+
+void vp9_filter_block1d8_v8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
+void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
+
+void vp9_filter_block2d_8x8_8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_stride,
+ const short *hfilter_aligned16,
+ const short *vfilter_aligned16,
+ unsigned char *dst_ptr,
+ unsigned int dst_stride) {
+ if (hfilter_aligned16[3] != 128 && vfilter_aligned16[3] != 128) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16);
+
+ vp9_filter_block1d8_h8_ssse3(src_ptr - (3 * src_stride), src_stride,
+ fdata2, 16, 15, hfilter_aligned16);
+ vp9_filter_block1d8_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 8,
+ vfilter_aligned16);
+ } else {
+ if (hfilter_aligned16[3] != 128) {
+ vp9_filter_block1d8_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride, 8,
+ hfilter_aligned16);
+ } else {
+ vp9_filter_block1d8_v8_ssse3(src_ptr - (3 * src_stride), src_stride,
+ dst_ptr, dst_stride, 8, vfilter_aligned16);
+ }
+ }
+}
+
+void vp9_filter_block2d_8x4_8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_stride,
+ const short *hfilter_aligned16,
+ const short *vfilter_aligned16,
+ unsigned char *dst_ptr,
+ unsigned int dst_stride) {
+ if (hfilter_aligned16[3] !=128 && vfilter_aligned16[3] != 128) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16);
+
+ vp9_filter_block1d8_h8_ssse3(src_ptr - (3 * src_stride), src_stride,
+ fdata2, 16, 11, hfilter_aligned16);
+ vp9_filter_block1d8_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 4,
+ vfilter_aligned16);
+ } else {
+ if (hfilter_aligned16[3] != 128) {
+ vp9_filter_block1d8_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride, 4,
+ hfilter_aligned16);
+ } else {
+ vp9_filter_block1d8_v8_ssse3(src_ptr - (3 * src_stride), src_stride,
+ dst_ptr, dst_stride, 4, vfilter_aligned16);
+ }
+ }
+}
+#endif
Property changes on: source/libvpx/vp9/common/x86/vp9_asm_stubs.c
___________________________________________________________________
Added: svn:eol-style
+ LF

Powered by Google App Engine
This is Rietveld 408576698