| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license | |
| 5 * that can be found in the LICENSE file in the root of the source | |
| 6 * tree. An additional intellectual property rights grant can be found | |
| 7 * in the file PATENTS. All contributing project authors may | |
| 8 * be found in the AUTHORS file in the root of the source tree. | |
| 9 */ | |
| 10 | |
| 11 #include "./vp8_rtcd.h" | |
| 12 #include "vpx_config.h" | |
| 13 #include "vp8/common/variance.h" | |
| 14 #include "vpx_ports/mem.h" | |
| 15 #include "vp8/common/x86/filter_x86.h" | |
| 16 | |
| 17 extern void filter_block1d_h6_mmx | |
| 18 ( | |
| 19 const unsigned char *src_ptr, | |
| 20 unsigned short *output_ptr, | |
| 21 unsigned int src_pixels_per_line, | |
| 22 unsigned int pixel_step, | |
| 23 unsigned int output_height, | |
| 24 unsigned int output_width, | |
| 25 short *filter | |
| 26 ); | |
| 27 extern void filter_block1d_v6_mmx | |
| 28 ( | |
| 29 const short *src_ptr, | |
| 30 unsigned char *output_ptr, | |
| 31 unsigned int pixels_per_line, | |
| 32 unsigned int pixel_step, | |
| 33 unsigned int output_height, | |
| 34 unsigned int output_width, | |
| 35 short *filter | |
| 36 ); | |
| 37 | |
| 38 extern void vp8_filter_block2d_bil4x4_var_mmx | |
| 39 ( | |
| 40 const unsigned char *ref_ptr, | |
| 41 int ref_pixels_per_line, | |
| 42 const unsigned char *src_ptr, | |
| 43 int src_pixels_per_line, | |
| 44 const short *HFilter, | |
| 45 const short *VFilter, | |
| 46 int *sum, | |
| 47 unsigned int *sumsquared | |
| 48 ); | |
| 49 extern void vp8_filter_block2d_bil_var_mmx | |
| 50 ( | |
| 51 const unsigned char *ref_ptr, | |
| 52 int ref_pixels_per_line, | |
| 53 const unsigned char *src_ptr, | |
| 54 int src_pixels_per_line, | |
| 55 unsigned int Height, | |
| 56 const short *HFilter, | |
| 57 const short *VFilter, | |
| 58 int *sum, | |
| 59 unsigned int *sumsquared | |
| 60 ); | |
| 61 | |
| 62 unsigned int vp8_sub_pixel_variance4x4_mmx | |
| 63 ( | |
| 64 const unsigned char *src_ptr, | |
| 65 int src_pixels_per_line, | |
| 66 int xoffset, | |
| 67 int yoffset, | |
| 68 const unsigned char *dst_ptr, | |
| 69 int dst_pixels_per_line, | |
| 70 unsigned int *sse) | |
| 71 | |
| 72 { | |
| 73 int xsum; | |
| 74 unsigned int xxsum; | |
| 75 vp8_filter_block2d_bil4x4_var_mmx( | |
| 76 src_ptr, src_pixels_per_line, | |
| 77 dst_ptr, dst_pixels_per_line, | |
| 78 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | |
| 79 &xsum, &xxsum | |
| 80 ); | |
| 81 *sse = xxsum; | |
| 82 return (xxsum - (((unsigned int)xsum * xsum) >> 4)); | |
| 83 } | |
| 84 | |
| 85 | |
| 86 unsigned int vp8_sub_pixel_variance8x8_mmx | |
| 87 ( | |
| 88 const unsigned char *src_ptr, | |
| 89 int src_pixels_per_line, | |
| 90 int xoffset, | |
| 91 int yoffset, | |
| 92 const unsigned char *dst_ptr, | |
| 93 int dst_pixels_per_line, | |
| 94 unsigned int *sse | |
| 95 ) | |
| 96 { | |
| 97 | |
| 98 int xsum; | |
| 99 unsigned int xxsum; | |
| 100 vp8_filter_block2d_bil_var_mmx( | |
| 101 src_ptr, src_pixels_per_line, | |
| 102 dst_ptr, dst_pixels_per_line, 8, | |
| 103 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | |
| 104 &xsum, &xxsum | |
| 105 ); | |
| 106 *sse = xxsum; | |
| 107 return (xxsum - (((unsigned int)xsum * xsum) >> 6)); | |
| 108 } | |
| 109 | |
| 110 unsigned int vp8_sub_pixel_variance16x16_mmx | |
| 111 ( | |
| 112 const unsigned char *src_ptr, | |
| 113 int src_pixels_per_line, | |
| 114 int xoffset, | |
| 115 int yoffset, | |
| 116 const unsigned char *dst_ptr, | |
| 117 int dst_pixels_per_line, | |
| 118 unsigned int *sse | |
| 119 ) | |
| 120 { | |
| 121 | |
| 122 int xsum0, xsum1; | |
| 123 unsigned int xxsum0, xxsum1; | |
| 124 | |
| 125 | |
| 126 vp8_filter_block2d_bil_var_mmx( | |
| 127 src_ptr, src_pixels_per_line, | |
| 128 dst_ptr, dst_pixels_per_line, 16, | |
| 129 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | |
| 130 &xsum0, &xxsum0 | |
| 131 ); | |
| 132 | |
| 133 | |
| 134 vp8_filter_block2d_bil_var_mmx( | |
| 135 src_ptr + 8, src_pixels_per_line, | |
| 136 dst_ptr + 8, dst_pixels_per_line, 16, | |
| 137 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | |
| 138 &xsum1, &xxsum1 | |
| 139 ); | |
| 140 | |
| 141 xsum0 += xsum1; | |
| 142 xxsum0 += xxsum1; | |
| 143 | |
| 144 *sse = xxsum0; | |
| 145 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); | |
| 146 | |
| 147 | |
| 148 } | |
| 149 | |
| 150 unsigned int vp8_sub_pixel_mse16x16_mmx( | |
| 151 const unsigned char *src_ptr, | |
| 152 int src_pixels_per_line, | |
| 153 int xoffset, | |
| 154 int yoffset, | |
| 155 const unsigned char *dst_ptr, | |
| 156 int dst_pixels_per_line, | |
| 157 unsigned int *sse | |
| 158 ) | |
| 159 { | |
| 160 vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffs
et, dst_ptr, dst_pixels_per_line, sse); | |
| 161 return *sse; | |
| 162 } | |
| 163 | |
| 164 unsigned int vp8_sub_pixel_variance16x8_mmx | |
| 165 ( | |
| 166 const unsigned char *src_ptr, | |
| 167 int src_pixels_per_line, | |
| 168 int xoffset, | |
| 169 int yoffset, | |
| 170 const unsigned char *dst_ptr, | |
| 171 int dst_pixels_per_line, | |
| 172 unsigned int *sse | |
| 173 ) | |
| 174 { | |
| 175 int xsum0, xsum1; | |
| 176 unsigned int xxsum0, xxsum1; | |
| 177 | |
| 178 | |
| 179 vp8_filter_block2d_bil_var_mmx( | |
| 180 src_ptr, src_pixels_per_line, | |
| 181 dst_ptr, dst_pixels_per_line, 8, | |
| 182 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | |
| 183 &xsum0, &xxsum0 | |
| 184 ); | |
| 185 | |
| 186 | |
| 187 vp8_filter_block2d_bil_var_mmx( | |
| 188 src_ptr + 8, src_pixels_per_line, | |
| 189 dst_ptr + 8, dst_pixels_per_line, 8, | |
| 190 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | |
| 191 &xsum1, &xxsum1 | |
| 192 ); | |
| 193 | |
| 194 xsum0 += xsum1; | |
| 195 xxsum0 += xxsum1; | |
| 196 | |
| 197 *sse = xxsum0; | |
| 198 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); | |
| 199 } | |
| 200 | |
| 201 unsigned int vp8_sub_pixel_variance8x16_mmx | |
| 202 ( | |
| 203 const unsigned char *src_ptr, | |
| 204 int src_pixels_per_line, | |
| 205 int xoffset, | |
| 206 int yoffset, | |
| 207 const unsigned char *dst_ptr, | |
| 208 int dst_pixels_per_line, | |
| 209 unsigned int *sse | |
| 210 ) | |
| 211 { | |
| 212 int xsum; | |
| 213 unsigned int xxsum; | |
| 214 vp8_filter_block2d_bil_var_mmx( | |
| 215 src_ptr, src_pixels_per_line, | |
| 216 dst_ptr, dst_pixels_per_line, 16, | |
| 217 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | |
| 218 &xsum, &xxsum | |
| 219 ); | |
| 220 *sse = xxsum; | |
| 221 return (xxsum - (((unsigned int)xsum * xsum) >> 7)); | |
| 222 } | |
| 223 | |
| 224 | |
| 225 unsigned int vp8_variance_halfpixvar16x16_h_mmx( | |
| 226 const unsigned char *src_ptr, | |
| 227 int source_stride, | |
| 228 const unsigned char *ref_ptr, | |
| 229 int recon_stride, | |
| 230 unsigned int *sse) | |
| 231 { | |
| 232 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0, | |
| 233 ref_ptr, recon_stride, sse); | |
| 234 } | |
| 235 | |
| 236 | |
| 237 unsigned int vp8_variance_halfpixvar16x16_v_mmx( | |
| 238 const unsigned char *src_ptr, | |
| 239 int source_stride, | |
| 240 const unsigned char *ref_ptr, | |
| 241 int recon_stride, | |
| 242 unsigned int *sse) | |
| 243 { | |
| 244 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4, | |
| 245 ref_ptr, recon_stride, sse); | |
| 246 } | |
| 247 | |
| 248 | |
| 249 unsigned int vp8_variance_halfpixvar16x16_hv_mmx( | |
| 250 const unsigned char *src_ptr, | |
| 251 int source_stride, | |
| 252 const unsigned char *ref_ptr, | |
| 253 int recon_stride, | |
| 254 unsigned int *sse) | |
| 255 { | |
| 256 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4, | |
| 257 ref_ptr, recon_stride, sse); | |
| 258 } | |
| OLD | NEW |