| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 | 11 |
| 12 #include "vpx_ports/config.h" | 12 #include "vpx_config.h" |
| 13 #include "vpx_ports/x86.h" | 13 #include "vpx_ports/x86.h" |
| 14 #include "vp8/common/g_common.h" | 14 #include "vp8/common/g_common.h" |
| 15 #include "vp8/common/subpixel.h" | 15 #include "vp8/common/subpixel.h" |
| 16 #include "vp8/common/loopfilter.h" | 16 #include "vp8/common/loopfilter.h" |
| 17 #include "vp8/common/recon.h" | 17 #include "vp8/common/recon.h" |
| 18 #include "vp8/common/idct.h" | 18 #include "vp8/common/idct.h" |
| 19 #include "vp8/common/pragmas.h" | 19 #include "vp8/common/pragmas.h" |
| 20 #include "vp8/common/onyxc_int.h" | 20 #include "vp8/common/onyxc_int.h" |
| 21 | 21 |
| 22 void vp8_arch_x86_common_init(VP8_COMMON *ctx) | 22 void vp8_arch_x86_common_init(VP8_COMMON *ctx) |
| 23 { | 23 { |
| 24 #if CONFIG_RUNTIME_CPU_DETECT | 24 #if CONFIG_RUNTIME_CPU_DETECT |
| 25 VP8_COMMON_RTCD *rtcd = &ctx->rtcd; | 25 VP8_COMMON_RTCD *rtcd = &ctx->rtcd; |
| 26 int flags = x86_simd_caps(); | 26 int flags = x86_simd_caps(); |
| 27 int mmx_enabled = flags & HAS_MMX; | |
| 28 int xmm_enabled = flags & HAS_SSE; | |
| 29 int wmt_enabled = flags & HAS_SSE2; | |
| 30 int SSSE3Enabled = flags & HAS_SSSE3; | |
| 31 | 27 |
| 32 /* Note: | 28 /* Note: |
| 33 * | 29 * |
| 34 * This platform can be built without runtime CPU detection as well. If | 30 * This platform can be built without runtime CPU detection as well. If |
| 35 * you modify any of the function mappings present in this file, be sure | 31 * you modify any of the function mappings present in this file, be sure |
| 36 * to also update them in static mapings (<arch>/filename_<arch>.h) | 32 * to also update them in static mapings (<arch>/filename_<arch>.h) |
| 37 */ | 33 */ |
| 38 | 34 |
| 39 /* Override default functions with fastest ones for this CPU. */ | 35 /* Override default functions with fastest ones for this CPU. */ |
| 40 #if HAVE_MMX | 36 #if HAVE_MMX |
| 41 | 37 |
| 42 if (mmx_enabled) | 38 if (flags & HAS_MMX) |
| 43 { | 39 { |
| 44 rtcd->idct.idct1 = vp8_short_idct4x4llm_1_mmx; | 40 rtcd->idct.idct1 = vp8_short_idct4x4llm_1_mmx; |
| 45 rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx; | 41 rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx; |
| 46 rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx; | 42 rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx; |
| 47 rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx; | 43 rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx; |
| 48 rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_mmx; | 44 rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_mmx; |
| 49 | 45 |
| 50 | 46 |
| 51 | 47 |
| 52 rtcd->recon.recon = vp8_recon_b_mmx; | 48 rtcd->recon.recon = vp8_recon_b_mmx; |
| 53 rtcd->recon.copy8x8 = vp8_copy_mem8x8_mmx; | 49 rtcd->recon.copy8x8 = vp8_copy_mem8x8_mmx; |
| 54 rtcd->recon.copy8x4 = vp8_copy_mem8x4_mmx; | 50 rtcd->recon.copy8x4 = vp8_copy_mem8x4_mmx; |
| 55 rtcd->recon.copy16x16 = vp8_copy_mem16x16_mmx; | 51 rtcd->recon.copy16x16 = vp8_copy_mem16x16_mmx; |
| 56 | 52 |
| 57 rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_mmx; | 53 rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_mmx; |
| 58 rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_mmx; | 54 rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_mmx; |
| 59 rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_mmx; | 55 rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_mmx; |
| 60 rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_mmx; | 56 rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_mmx; |
| 61 rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_mmx; | 57 rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_mmx; |
| 62 rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_mmx; | 58 rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_mmx; |
| 63 rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_mmx; | 59 rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_mmx; |
| 64 rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_mmx; | 60 rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_mmx; |
| 65 | 61 |
| 66 rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_mmx; | 62 rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_mmx; |
| 67 rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_mmx; | 63 rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_mmx; |
| 68 rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx; | 64 rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx; |
| 69 rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_mmx; | 65 rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_mmx; |
| 70 rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_mmx; | 66 rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_mmx; |
| 71 rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_mmx; | 67 rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_mmx; |
| 72 rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_mmx; | 68 rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_mm
x; |
| 73 rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_mmx; | 69 rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_mmx; |
| 74 | 70 |
| 75 #if CONFIG_POSTPROC | 71 #if CONFIG_POSTPROC |
| 76 rtcd->postproc.down = vp8_mbpost_proc_down_mmx; | 72 rtcd->postproc.down = vp8_mbpost_proc_down_mmx; |
| 77 /*rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;*/ | 73 /*rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;*/ |
| 78 rtcd->postproc.downacross = vp8_post_proc_down_and_across_mmx; | 74 rtcd->postproc.downacross = vp8_post_proc_down_and_across_mmx; |
| 79 rtcd->postproc.addnoise = vp8_plane_add_noise_mmx; | 75 rtcd->postproc.addnoise = vp8_plane_add_noise_mmx; |
| 80 #endif | 76 #endif |
| 81 } | 77 } |
| 82 | 78 |
| 83 #endif | 79 #endif |
| 84 #if HAVE_SSE2 | 80 #if HAVE_SSE2 |
| 85 | 81 |
| 86 if (wmt_enabled) | 82 if (flags & HAS_SSE2) |
| 87 { | 83 { |
| 88 rtcd->recon.recon2 = vp8_recon2b_sse2; | 84 rtcd->recon.recon2 = vp8_recon2b_sse2; |
| 89 rtcd->recon.recon4 = vp8_recon4b_sse2; | 85 rtcd->recon.recon4 = vp8_recon4b_sse2; |
| 90 rtcd->recon.copy16x16 = vp8_copy_mem16x16_sse2; | 86 rtcd->recon.copy16x16 = vp8_copy_mem16x16_sse2; |
| 87 rtcd->recon.build_intra_predictors_mbuv = |
| 88 vp8_build_intra_predictors_mbuv_sse2; |
| 89 rtcd->recon.build_intra_predictors_mbuv_s = |
| 90 vp8_build_intra_predictors_mbuv_s_sse2; |
| 91 | 91 |
| 92 rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_sse2; | 92 rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_sse2; |
| 93 | 93 |
| 94 rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_sse2; | 94 rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_sse2; |
| 95 rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_sse2; | 95 rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_sse2; |
| 96 rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_sse2; | 96 rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_sse2; |
| 97 rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_sse2; | 97 rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_sse2; |
| 98 rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_sse2; | 98 rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_sse2; |
| 99 | 99 |
| 100 rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_sse2; | 100 rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_sse2; |
| 101 rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_sse2; | 101 rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_sse2; |
| 102 rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2; | 102 rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2; |
| 103 rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_sse2; | 103 rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_sse2; |
| 104 rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_sse2; | 104 rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_sse2
; |
| 105 rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_sse2; | 105 rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_sse2; |
| 106 rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_sse2; | 106 rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_ss
e2; |
| 107 rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_sse2; | 107 rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_sse2; |
| 108 | 108 |
| 109 #if CONFIG_POSTPROC | 109 #if CONFIG_POSTPROC |
| 110 rtcd->postproc.down = vp8_mbpost_proc_down_xmm; | 110 rtcd->postproc.down = vp8_mbpost_proc_down_xmm; |
| 111 rtcd->postproc.across = vp8_mbpost_proc_across_ip_xmm; | 111 rtcd->postproc.across = vp8_mbpost_proc_across_ip_xmm; |
| 112 rtcd->postproc.downacross = vp8_post_proc_down_and_across_xmm; | 112 rtcd->postproc.downacross = vp8_post_proc_down_and_across_xmm; |
| 113 rtcd->postproc.addnoise = vp8_plane_add_noise_wmt; | 113 rtcd->postproc.addnoise = vp8_plane_add_noise_wmt; |
| 114 #endif | 114 #endif |
| 115 } | 115 } |
| 116 | 116 |
| 117 #endif | 117 #endif |
| 118 | 118 |
| 119 #if HAVE_SSSE3 | 119 #if HAVE_SSSE3 |
| 120 | 120 |
| 121 if (SSSE3Enabled) | 121 if (flags & HAS_SSSE3) |
| 122 { | 122 { |
| 123 rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_ssse3; | 123 rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_ssse3; |
| 124 rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_ssse3; | 124 rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_ssse3; |
| 125 rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_ssse3; | 125 rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_ssse3; |
| 126 rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_ssse3; | 126 rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_ssse3; |
| 127 rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_ssse3; | 127 rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_ssse3; |
| 128 rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_ssse3; | 128 rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_ssse3; |
| 129 |
| 130 rtcd->recon.build_intra_predictors_mbuv = |
| 131 vp8_build_intra_predictors_mbuv_ssse3; |
| 132 rtcd->recon.build_intra_predictors_mbuv_s = |
| 133 vp8_build_intra_predictors_mbuv_s_ssse3; |
| 129 } | 134 } |
| 130 #endif | 135 #endif |
| 131 | 136 |
| 132 #endif | 137 #endif |
| 133 } | 138 } |
| OLD | NEW |