| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license | |
| 5 * that can be found in the LICENSE file in the root of the source | |
| 6 * tree. An additional intellectual property rights grant can be found | |
| 7 * in the file PATENTS. All contributing project authors may | |
| 8 * be found in the AUTHORS file in the root of the source tree. | |
| 9 */ | |
| 10 | |
| 11 #include "./vp8_rtcd.h" | |
| 12 #include "vpx_config.h" | |
| 13 #include "vp8/common/variance.h" | |
| 14 #include "vpx_ports/mem.h" | |
| 15 #include "vp8/common/x86/filter_x86.h" | |
| 16 | |
| 17 extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *
output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned
int output_height, unsigned int output_width, short *filter); | |
| 18 extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_pt
r, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_he
ight, unsigned int output_width, short *filter); | |
| 19 extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short
*output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigne
d int output_height, unsigned int output_width, short *filter); | |
| 20 extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_
ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_
height, unsigned int output_width, short *filter); | |
| 21 | |
| 22 extern void vp8_filter_block2d_bil4x4_var_mmx | |
| 23 ( | |
| 24 const unsigned char *ref_ptr, | |
| 25 int ref_pixels_per_line, | |
| 26 const unsigned char *src_ptr, | |
| 27 int src_pixels_per_line, | |
| 28 const short *HFilter, | |
| 29 const short *VFilter, | |
| 30 int *sum, | |
| 31 unsigned int *sumsquared | |
| 32 ); | |
| 33 | |
| 34 void vp8_filter_block2d_bil_var_sse2 | |
| 35 ( | |
| 36 const unsigned char *ref_ptr, | |
| 37 int ref_pixels_per_line, | |
| 38 const unsigned char *src_ptr, | |
| 39 int src_pixels_per_line, | |
| 40 unsigned int Height, | |
| 41 int xoffset, | |
| 42 int yoffset, | |
| 43 int *sum, | |
| 44 unsigned int *sumsquared | |
| 45 ); | |
| 46 void vp8_half_horiz_vert_variance8x_h_sse2 | |
| 47 ( | |
| 48 const unsigned char *ref_ptr, | |
| 49 int ref_pixels_per_line, | |
| 50 const unsigned char *src_ptr, | |
| 51 int src_pixels_per_line, | |
| 52 unsigned int Height, | |
| 53 int *sum, | |
| 54 unsigned int *sumsquared | |
| 55 ); | |
| 56 void vp8_half_horiz_vert_variance16x_h_sse2 | |
| 57 ( | |
| 58 const unsigned char *ref_ptr, | |
| 59 int ref_pixels_per_line, | |
| 60 const unsigned char *src_ptr, | |
| 61 int src_pixels_per_line, | |
| 62 unsigned int Height, | |
| 63 int *sum, | |
| 64 unsigned int *sumsquared | |
| 65 ); | |
| 66 void vp8_half_horiz_variance8x_h_sse2 | |
| 67 ( | |
| 68 const unsigned char *ref_ptr, | |
| 69 int ref_pixels_per_line, | |
| 70 const unsigned char *src_ptr, | |
| 71 int src_pixels_per_line, | |
| 72 unsigned int Height, | |
| 73 int *sum, | |
| 74 unsigned int *sumsquared | |
| 75 ); | |
| 76 void vp8_half_horiz_variance16x_h_sse2 | |
| 77 ( | |
| 78 const unsigned char *ref_ptr, | |
| 79 int ref_pixels_per_line, | |
| 80 const unsigned char *src_ptr, | |
| 81 int src_pixels_per_line, | |
| 82 unsigned int Height, | |
| 83 int *sum, | |
| 84 unsigned int *sumsquared | |
| 85 ); | |
| 86 void vp8_half_vert_variance8x_h_sse2 | |
| 87 ( | |
| 88 const unsigned char *ref_ptr, | |
| 89 int ref_pixels_per_line, | |
| 90 const unsigned char *src_ptr, | |
| 91 int src_pixels_per_line, | |
| 92 unsigned int Height, | |
| 93 int *sum, | |
| 94 unsigned int *sumsquared | |
| 95 ); | |
| 96 void vp8_half_vert_variance16x_h_sse2 | |
| 97 ( | |
| 98 const unsigned char *ref_ptr, | |
| 99 int ref_pixels_per_line, | |
| 100 const unsigned char *src_ptr, | |
| 101 int src_pixels_per_line, | |
| 102 unsigned int Height, | |
| 103 int *sum, | |
| 104 unsigned int *sumsquared | |
| 105 ); | |
| 106 | |
| 107 unsigned int vp8_sub_pixel_variance4x4_wmt | |
| 108 ( | |
| 109 const unsigned char *src_ptr, | |
| 110 int src_pixels_per_line, | |
| 111 int xoffset, | |
| 112 int yoffset, | |
| 113 const unsigned char *dst_ptr, | |
| 114 int dst_pixels_per_line, | |
| 115 unsigned int *sse | |
| 116 ) | |
| 117 { | |
| 118 int xsum; | |
| 119 unsigned int xxsum; | |
| 120 vp8_filter_block2d_bil4x4_var_mmx( | |
| 121 src_ptr, src_pixels_per_line, | |
| 122 dst_ptr, dst_pixels_per_line, | |
| 123 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | |
| 124 &xsum, &xxsum | |
| 125 ); | |
| 126 *sse = xxsum; | |
| 127 return (xxsum - (((unsigned int)xsum * xsum) >> 4)); | |
| 128 } | |
| 129 | |
| 130 | |
| 131 unsigned int vp8_sub_pixel_variance8x8_wmt | |
| 132 ( | |
| 133 const unsigned char *src_ptr, | |
| 134 int src_pixels_per_line, | |
| 135 int xoffset, | |
| 136 int yoffset, | |
| 137 const unsigned char *dst_ptr, | |
| 138 int dst_pixels_per_line, | |
| 139 unsigned int *sse | |
| 140 ) | |
| 141 { | |
| 142 int xsum; | |
| 143 unsigned int xxsum; | |
| 144 | |
| 145 if (xoffset == 4 && yoffset == 0) | |
| 146 { | |
| 147 vp8_half_horiz_variance8x_h_sse2( | |
| 148 src_ptr, src_pixels_per_line, | |
| 149 dst_ptr, dst_pixels_per_line, 8, | |
| 150 &xsum, &xxsum); | |
| 151 } | |
| 152 else if (xoffset == 0 && yoffset == 4) | |
| 153 { | |
| 154 vp8_half_vert_variance8x_h_sse2( | |
| 155 src_ptr, src_pixels_per_line, | |
| 156 dst_ptr, dst_pixels_per_line, 8, | |
| 157 &xsum, &xxsum); | |
| 158 } | |
| 159 else if (xoffset == 4 && yoffset == 4) | |
| 160 { | |
| 161 vp8_half_horiz_vert_variance8x_h_sse2( | |
| 162 src_ptr, src_pixels_per_line, | |
| 163 dst_ptr, dst_pixels_per_line, 8, | |
| 164 &xsum, &xxsum); | |
| 165 } | |
| 166 else | |
| 167 { | |
| 168 vp8_filter_block2d_bil_var_sse2( | |
| 169 src_ptr, src_pixels_per_line, | |
| 170 dst_ptr, dst_pixels_per_line, 8, | |
| 171 xoffset, yoffset, | |
| 172 &xsum, &xxsum); | |
| 173 } | |
| 174 | |
| 175 *sse = xxsum; | |
| 176 return (xxsum - (((unsigned int)xsum * xsum) >> 6)); | |
| 177 } | |
| 178 | |
| 179 unsigned int vp8_sub_pixel_variance16x16_wmt | |
| 180 ( | |
| 181 const unsigned char *src_ptr, | |
| 182 int src_pixels_per_line, | |
| 183 int xoffset, | |
| 184 int yoffset, | |
| 185 const unsigned char *dst_ptr, | |
| 186 int dst_pixels_per_line, | |
| 187 unsigned int *sse | |
| 188 ) | |
| 189 { | |
| 190 int xsum0, xsum1; | |
| 191 unsigned int xxsum0, xxsum1; | |
| 192 | |
| 193 | |
| 194 /* note we could avoid these if statements if the calling function | |
| 195 * just called the appropriate functions inside. | |
| 196 */ | |
| 197 if (xoffset == 4 && yoffset == 0) | |
| 198 { | |
| 199 vp8_half_horiz_variance16x_h_sse2( | |
| 200 src_ptr, src_pixels_per_line, | |
| 201 dst_ptr, dst_pixels_per_line, 16, | |
| 202 &xsum0, &xxsum0); | |
| 203 } | |
| 204 else if (xoffset == 0 && yoffset == 4) | |
| 205 { | |
| 206 vp8_half_vert_variance16x_h_sse2( | |
| 207 src_ptr, src_pixels_per_line, | |
| 208 dst_ptr, dst_pixels_per_line, 16, | |
| 209 &xsum0, &xxsum0); | |
| 210 } | |
| 211 else if (xoffset == 4 && yoffset == 4) | |
| 212 { | |
| 213 vp8_half_horiz_vert_variance16x_h_sse2( | |
| 214 src_ptr, src_pixels_per_line, | |
| 215 dst_ptr, dst_pixels_per_line, 16, | |
| 216 &xsum0, &xxsum0); | |
| 217 } | |
| 218 else | |
| 219 { | |
| 220 vp8_filter_block2d_bil_var_sse2( | |
| 221 src_ptr, src_pixels_per_line, | |
| 222 dst_ptr, dst_pixels_per_line, 16, | |
| 223 xoffset, yoffset, | |
| 224 &xsum0, &xxsum0 | |
| 225 ); | |
| 226 | |
| 227 vp8_filter_block2d_bil_var_sse2( | |
| 228 src_ptr + 8, src_pixels_per_line, | |
| 229 dst_ptr + 8, dst_pixels_per_line, 16, | |
| 230 xoffset, yoffset, | |
| 231 &xsum1, &xxsum1 | |
| 232 ); | |
| 233 xsum0 += xsum1; | |
| 234 xxsum0 += xxsum1; | |
| 235 } | |
| 236 | |
| 237 *sse = xxsum0; | |
| 238 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); | |
| 239 } | |
| 240 | |
| 241 unsigned int vp8_sub_pixel_mse16x16_wmt( | |
| 242 const unsigned char *src_ptr, | |
| 243 int src_pixels_per_line, | |
| 244 int xoffset, | |
| 245 int yoffset, | |
| 246 const unsigned char *dst_ptr, | |
| 247 int dst_pixels_per_line, | |
| 248 unsigned int *sse | |
| 249 ) | |
| 250 { | |
| 251 vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffs
et, dst_ptr, dst_pixels_per_line, sse); | |
| 252 return *sse; | |
| 253 } | |
| 254 | |
| 255 unsigned int vp8_sub_pixel_variance16x8_wmt | |
| 256 ( | |
| 257 const unsigned char *src_ptr, | |
| 258 int src_pixels_per_line, | |
| 259 int xoffset, | |
| 260 int yoffset, | |
| 261 const unsigned char *dst_ptr, | |
| 262 int dst_pixels_per_line, | |
| 263 unsigned int *sse | |
| 264 | |
| 265 ) | |
| 266 { | |
| 267 int xsum0, xsum1; | |
| 268 unsigned int xxsum0, xxsum1; | |
| 269 | |
| 270 if (xoffset == 4 && yoffset == 0) | |
| 271 { | |
| 272 vp8_half_horiz_variance16x_h_sse2( | |
| 273 src_ptr, src_pixels_per_line, | |
| 274 dst_ptr, dst_pixels_per_line, 8, | |
| 275 &xsum0, &xxsum0); | |
| 276 } | |
| 277 else if (xoffset == 0 && yoffset == 4) | |
| 278 { | |
| 279 vp8_half_vert_variance16x_h_sse2( | |
| 280 src_ptr, src_pixels_per_line, | |
| 281 dst_ptr, dst_pixels_per_line, 8, | |
| 282 &xsum0, &xxsum0); | |
| 283 } | |
| 284 else if (xoffset == 4 && yoffset == 4) | |
| 285 { | |
| 286 vp8_half_horiz_vert_variance16x_h_sse2( | |
| 287 src_ptr, src_pixels_per_line, | |
| 288 dst_ptr, dst_pixels_per_line, 8, | |
| 289 &xsum0, &xxsum0); | |
| 290 } | |
| 291 else | |
| 292 { | |
| 293 vp8_filter_block2d_bil_var_sse2( | |
| 294 src_ptr, src_pixels_per_line, | |
| 295 dst_ptr, dst_pixels_per_line, 8, | |
| 296 xoffset, yoffset, | |
| 297 &xsum0, &xxsum0); | |
| 298 | |
| 299 vp8_filter_block2d_bil_var_sse2( | |
| 300 src_ptr + 8, src_pixels_per_line, | |
| 301 dst_ptr + 8, dst_pixels_per_line, 8, | |
| 302 xoffset, yoffset, | |
| 303 &xsum1, &xxsum1); | |
| 304 xsum0 += xsum1; | |
| 305 xxsum0 += xxsum1; | |
| 306 } | |
| 307 | |
| 308 *sse = xxsum0; | |
| 309 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); | |
| 310 } | |
| 311 | |
| 312 unsigned int vp8_sub_pixel_variance8x16_wmt | |
| 313 ( | |
| 314 const unsigned char *src_ptr, | |
| 315 int src_pixels_per_line, | |
| 316 int xoffset, | |
| 317 int yoffset, | |
| 318 const unsigned char *dst_ptr, | |
| 319 int dst_pixels_per_line, | |
| 320 unsigned int *sse | |
| 321 ) | |
| 322 { | |
| 323 int xsum; | |
| 324 unsigned int xxsum; | |
| 325 | |
| 326 if (xoffset == 4 && yoffset == 0) | |
| 327 { | |
| 328 vp8_half_horiz_variance8x_h_sse2( | |
| 329 src_ptr, src_pixels_per_line, | |
| 330 dst_ptr, dst_pixels_per_line, 16, | |
| 331 &xsum, &xxsum); | |
| 332 } | |
| 333 else if (xoffset == 0 && yoffset == 4) | |
| 334 { | |
| 335 vp8_half_vert_variance8x_h_sse2( | |
| 336 src_ptr, src_pixels_per_line, | |
| 337 dst_ptr, dst_pixels_per_line, 16, | |
| 338 &xsum, &xxsum); | |
| 339 } | |
| 340 else if (xoffset == 4 && yoffset == 4) | |
| 341 { | |
| 342 vp8_half_horiz_vert_variance8x_h_sse2( | |
| 343 src_ptr, src_pixels_per_line, | |
| 344 dst_ptr, dst_pixels_per_line, 16, | |
| 345 &xsum, &xxsum); | |
| 346 } | |
| 347 else | |
| 348 { | |
| 349 vp8_filter_block2d_bil_var_sse2( | |
| 350 src_ptr, src_pixels_per_line, | |
| 351 dst_ptr, dst_pixels_per_line, 16, | |
| 352 xoffset, yoffset, | |
| 353 &xsum, &xxsum); | |
| 354 } | |
| 355 | |
| 356 *sse = xxsum; | |
| 357 return (xxsum - (((unsigned int)xsum * xsum) >> 7)); | |
| 358 } | |
| 359 | |
| 360 | |
| 361 unsigned int vp8_variance_halfpixvar16x16_h_wmt( | |
| 362 const unsigned char *src_ptr, | |
| 363 int src_pixels_per_line, | |
| 364 const unsigned char *dst_ptr, | |
| 365 int dst_pixels_per_line, | |
| 366 unsigned int *sse) | |
| 367 { | |
| 368 int xsum0; | |
| 369 unsigned int xxsum0; | |
| 370 | |
| 371 vp8_half_horiz_variance16x_h_sse2( | |
| 372 src_ptr, src_pixels_per_line, | |
| 373 dst_ptr, dst_pixels_per_line, 16, | |
| 374 &xsum0, &xxsum0); | |
| 375 | |
| 376 *sse = xxsum0; | |
| 377 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); | |
| 378 } | |
| 379 | |
| 380 | |
| 381 unsigned int vp8_variance_halfpixvar16x16_v_wmt( | |
| 382 const unsigned char *src_ptr, | |
| 383 int src_pixels_per_line, | |
| 384 const unsigned char *dst_ptr, | |
| 385 int dst_pixels_per_line, | |
| 386 unsigned int *sse) | |
| 387 { | |
| 388 int xsum0; | |
| 389 unsigned int xxsum0; | |
| 390 vp8_half_vert_variance16x_h_sse2( | |
| 391 src_ptr, src_pixels_per_line, | |
| 392 dst_ptr, dst_pixels_per_line, 16, | |
| 393 &xsum0, &xxsum0); | |
| 394 | |
| 395 *sse = xxsum0; | |
| 396 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); | |
| 397 } | |
| 398 | |
| 399 | |
| 400 unsigned int vp8_variance_halfpixvar16x16_hv_wmt( | |
| 401 const unsigned char *src_ptr, | |
| 402 int src_pixels_per_line, | |
| 403 const unsigned char *dst_ptr, | |
| 404 int dst_pixels_per_line, | |
| 405 unsigned int *sse) | |
| 406 { | |
| 407 int xsum0; | |
| 408 unsigned int xxsum0; | |
| 409 | |
| 410 vp8_half_horiz_vert_variance16x_h_sse2( | |
| 411 src_ptr, src_pixels_per_line, | |
| 412 dst_ptr, dst_pixels_per_line, 16, | |
| 413 &xsum0, &xxsum0); | |
| 414 | |
| 415 *sse = xxsum0; | |
| 416 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); | |
| 417 } | |
| OLD | NEW |