OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 #include "vpx_config.h" |
| 12 #include "vp9/encoder/vp9_variance.h" |
| 13 #include "vp9/common/vp9_pragmas.h" |
| 14 #include "vpx_ports/mem.h" |
| 15 |
| 16 #define HALFNDX 8 |
| 17 |
| 18 extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *
output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned
int output_height, unsigned int output_width, short *vp7_filter); |
| 19 extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_pt
r, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_he
ight, unsigned int output_width, short *vp7_filter); |
| 20 extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short
*output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigne
d int output_height, unsigned int output_width, short *vp7_filter); |
| 21 extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_
ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_
height, unsigned int output_width, short *vp7_filter); |
| 22 |
| 23 extern void vp9_filter_block2d_bil4x4_var_mmx |
| 24 ( |
| 25 const unsigned char *ref_ptr, |
| 26 int ref_pixels_per_line, |
| 27 const unsigned char *src_ptr, |
| 28 int src_pixels_per_line, |
| 29 const short *HFilter, |
| 30 const short *VFilter, |
| 31 int *sum, |
| 32 unsigned int *sumsquared |
| 33 ); |
| 34 |
| 35 extern unsigned int vp9_get4x4var_mmx |
| 36 ( |
| 37 const unsigned char *src_ptr, |
| 38 int source_stride, |
| 39 const unsigned char *ref_ptr, |
| 40 int recon_stride, |
| 41 unsigned int *SSE, |
| 42 int *Sum |
| 43 ); |
| 44 |
| 45 unsigned int vp9_get_mb_ss_sse2 |
| 46 ( |
| 47 const short *src_ptr |
| 48 ); |
| 49 unsigned int vp9_get16x16var_sse2 |
| 50 ( |
| 51 const unsigned char *src_ptr, |
| 52 int source_stride, |
| 53 const unsigned char *ref_ptr, |
| 54 int recon_stride, |
| 55 unsigned int *SSE, |
| 56 int *Sum |
| 57 ); |
| 58 unsigned int vp9_get8x8var_sse2 |
| 59 ( |
| 60 const unsigned char *src_ptr, |
| 61 int source_stride, |
| 62 const unsigned char *ref_ptr, |
| 63 int recon_stride, |
| 64 unsigned int *SSE, |
| 65 int *Sum |
| 66 ); |
| 67 void vp9_filter_block2d_bil_var_sse2 |
| 68 ( |
| 69 const unsigned char *ref_ptr, |
| 70 int ref_pixels_per_line, |
| 71 const unsigned char *src_ptr, |
| 72 int src_pixels_per_line, |
| 73 unsigned int Height, |
| 74 int xoffset, |
| 75 int yoffset, |
| 76 int *sum, |
| 77 unsigned int *sumsquared |
| 78 ); |
| 79 void vp9_half_horiz_vert_variance8x_h_sse2 |
| 80 ( |
| 81 const unsigned char *ref_ptr, |
| 82 int ref_pixels_per_line, |
| 83 const unsigned char *src_ptr, |
| 84 int src_pixels_per_line, |
| 85 unsigned int Height, |
| 86 int *sum, |
| 87 unsigned int *sumsquared |
| 88 ); |
| 89 void vp9_half_horiz_vert_variance16x_h_sse2 |
| 90 ( |
| 91 const unsigned char *ref_ptr, |
| 92 int ref_pixels_per_line, |
| 93 const unsigned char *src_ptr, |
| 94 int src_pixels_per_line, |
| 95 unsigned int Height, |
| 96 int *sum, |
| 97 unsigned int *sumsquared |
| 98 ); |
| 99 void vp9_half_horiz_variance8x_h_sse2 |
| 100 ( |
| 101 const unsigned char *ref_ptr, |
| 102 int ref_pixels_per_line, |
| 103 const unsigned char *src_ptr, |
| 104 int src_pixels_per_line, |
| 105 unsigned int Height, |
| 106 int *sum, |
| 107 unsigned int *sumsquared |
| 108 ); |
| 109 void vp9_half_horiz_variance16x_h_sse2 |
| 110 ( |
| 111 const unsigned char *ref_ptr, |
| 112 int ref_pixels_per_line, |
| 113 const unsigned char *src_ptr, |
| 114 int src_pixels_per_line, |
| 115 unsigned int Height, |
| 116 int *sum, |
| 117 unsigned int *sumsquared |
| 118 ); |
| 119 void vp9_half_vert_variance8x_h_sse2 |
| 120 ( |
| 121 const unsigned char *ref_ptr, |
| 122 int ref_pixels_per_line, |
| 123 const unsigned char *src_ptr, |
| 124 int src_pixels_per_line, |
| 125 unsigned int Height, |
| 126 int *sum, |
| 127 unsigned int *sumsquared |
| 128 ); |
| 129 void vp9_half_vert_variance16x_h_sse2 |
| 130 ( |
| 131 const unsigned char *ref_ptr, |
| 132 int ref_pixels_per_line, |
| 133 const unsigned char *src_ptr, |
| 134 int src_pixels_per_line, |
| 135 unsigned int Height, |
| 136 int *sum, |
| 137 unsigned int *sumsquared |
| 138 ); |
| 139 |
| 140 DECLARE_ALIGNED(16, extern const short, vp9_bilinear_filters_mmx[16][8]); |
| 141 |
| 142 unsigned int vp9_variance4x4_wmt( |
| 143 const unsigned char *src_ptr, |
| 144 int source_stride, |
| 145 const unsigned char *ref_ptr, |
| 146 int recon_stride, |
| 147 unsigned int *sse) { |
| 148 unsigned int var; |
| 149 int avg; |
| 150 |
| 151 vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); |
| 152 *sse = var; |
| 153 return (var - (((unsigned int)avg * avg) >> 4)); |
| 154 |
| 155 } |
| 156 |
| 157 unsigned int vp9_variance8x8_wmt |
| 158 ( |
| 159 const unsigned char *src_ptr, |
| 160 int source_stride, |
| 161 const unsigned char *ref_ptr, |
| 162 int recon_stride, |
| 163 unsigned int *sse) { |
| 164 unsigned int var; |
| 165 int avg; |
| 166 |
| 167 vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); |
| 168 *sse = var; |
| 169 return (var - (((unsigned int)avg * avg) >> 6)); |
| 170 |
| 171 } |
| 172 |
| 173 |
| 174 unsigned int vp9_variance16x16_wmt |
| 175 ( |
| 176 const unsigned char *src_ptr, |
| 177 int source_stride, |
| 178 const unsigned char *ref_ptr, |
| 179 int recon_stride, |
| 180 unsigned int *sse) { |
| 181 unsigned int sse0; |
| 182 int sum0; |
| 183 |
| 184 |
| 185 vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &su
m0); |
| 186 *sse = sse0; |
| 187 return (sse0 - (((unsigned int)sum0 * sum0) >> 8)); |
| 188 } |
| 189 unsigned int vp9_mse16x16_wmt( |
| 190 const unsigned char *src_ptr, |
| 191 int source_stride, |
| 192 const unsigned char *ref_ptr, |
| 193 int recon_stride, |
| 194 unsigned int *sse) { |
| 195 |
| 196 unsigned int sse0; |
| 197 int sum0; |
| 198 vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &su
m0); |
| 199 *sse = sse0; |
| 200 return sse0; |
| 201 |
| 202 } |
| 203 |
| 204 |
| 205 unsigned int vp9_variance16x8_wmt |
| 206 ( |
| 207 const unsigned char *src_ptr, |
| 208 int source_stride, |
| 209 const unsigned char *ref_ptr, |
| 210 int recon_stride, |
| 211 unsigned int *sse) { |
| 212 unsigned int sse0, sse1, var; |
| 213 int sum0, sum1, avg; |
| 214 |
| 215 vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0
); |
| 216 vp9_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse
1, &sum1); |
| 217 |
| 218 var = sse0 + sse1; |
| 219 avg = sum0 + sum1; |
| 220 *sse = var; |
| 221 return (var - (((unsigned int)avg * avg) >> 7)); |
| 222 |
| 223 } |
| 224 |
| 225 unsigned int vp9_variance8x16_wmt |
| 226 ( |
| 227 const unsigned char *src_ptr, |
| 228 int source_stride, |
| 229 const unsigned char *ref_ptr, |
| 230 int recon_stride, |
| 231 unsigned int *sse) { |
| 232 unsigned int sse0, sse1, var; |
| 233 int sum0, sum1, avg; |
| 234 |
| 235 vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0
); |
| 236 vp9_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * r
econ_stride, recon_stride, &sse1, &sum1); |
| 237 |
| 238 var = sse0 + sse1; |
| 239 avg = sum0 + sum1; |
| 240 *sse = var; |
| 241 return (var - (((unsigned int)avg * avg) >> 7)); |
| 242 |
| 243 } |
| 244 |
| 245 unsigned int vp9_sub_pixel_variance4x4_wmt |
| 246 ( |
| 247 const unsigned char *src_ptr, |
| 248 int src_pixels_per_line, |
| 249 int xoffset, |
| 250 int yoffset, |
| 251 const unsigned char *dst_ptr, |
| 252 int dst_pixels_per_line, |
| 253 unsigned int *sse |
| 254 ) { |
| 255 int xsum; |
| 256 unsigned int xxsum; |
| 257 vp9_filter_block2d_bil4x4_var_mmx( |
| 258 src_ptr, src_pixels_per_line, |
| 259 dst_ptr, dst_pixels_per_line, |
| 260 vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], |
| 261 &xsum, &xxsum |
| 262 ); |
| 263 *sse = xxsum; |
| 264 return (xxsum - (((unsigned int)xsum * xsum) >> 4)); |
| 265 } |
| 266 |
| 267 |
| 268 unsigned int vp9_sub_pixel_variance8x8_wmt |
| 269 ( |
| 270 const unsigned char *src_ptr, |
| 271 int src_pixels_per_line, |
| 272 int xoffset, |
| 273 int yoffset, |
| 274 const unsigned char *dst_ptr, |
| 275 int dst_pixels_per_line, |
| 276 unsigned int *sse |
| 277 ) { |
| 278 int xsum; |
| 279 unsigned int xxsum; |
| 280 |
| 281 if (xoffset == HALFNDX && yoffset == 0) { |
| 282 vp9_half_horiz_variance8x_h_sse2( |
| 283 src_ptr, src_pixels_per_line, |
| 284 dst_ptr, dst_pixels_per_line, 8, |
| 285 &xsum, &xxsum); |
| 286 } else if (xoffset == 0 && yoffset == HALFNDX) { |
| 287 vp9_half_vert_variance8x_h_sse2( |
| 288 src_ptr, src_pixels_per_line, |
| 289 dst_ptr, dst_pixels_per_line, 8, |
| 290 &xsum, &xxsum); |
| 291 } else if (xoffset == HALFNDX && yoffset == HALFNDX) { |
| 292 vp9_half_horiz_vert_variance8x_h_sse2( |
| 293 src_ptr, src_pixels_per_line, |
| 294 dst_ptr, dst_pixels_per_line, 8, |
| 295 &xsum, &xxsum); |
| 296 } else { |
| 297 vp9_filter_block2d_bil_var_sse2( |
| 298 src_ptr, src_pixels_per_line, |
| 299 dst_ptr, dst_pixels_per_line, 8, |
| 300 xoffset, yoffset, |
| 301 &xsum, &xxsum); |
| 302 } |
| 303 |
| 304 *sse = xxsum; |
| 305 return (xxsum - (((unsigned int)xsum * xsum) >> 6)); |
| 306 } |
| 307 |
| 308 unsigned int vp9_sub_pixel_variance16x16_wmt |
| 309 ( |
| 310 const unsigned char *src_ptr, |
| 311 int src_pixels_per_line, |
| 312 int xoffset, |
| 313 int yoffset, |
| 314 const unsigned char *dst_ptr, |
| 315 int dst_pixels_per_line, |
| 316 unsigned int *sse |
| 317 ) { |
| 318 int xsum0, xsum1; |
| 319 unsigned int xxsum0, xxsum1; |
| 320 |
| 321 |
| 322 // note we could avoid these if statements if the calling function |
| 323 // just called the appropriate functions inside. |
| 324 if (xoffset == HALFNDX && yoffset == 0) { |
| 325 vp9_half_horiz_variance16x_h_sse2( |
| 326 src_ptr, src_pixels_per_line, |
| 327 dst_ptr, dst_pixels_per_line, 16, |
| 328 &xsum0, &xxsum0); |
| 329 } else if (xoffset == 0 && yoffset == HALFNDX) { |
| 330 vp9_half_vert_variance16x_h_sse2( |
| 331 src_ptr, src_pixels_per_line, |
| 332 dst_ptr, dst_pixels_per_line, 16, |
| 333 &xsum0, &xxsum0); |
| 334 } else if (xoffset == HALFNDX && yoffset == HALFNDX) { |
| 335 vp9_half_horiz_vert_variance16x_h_sse2( |
| 336 src_ptr, src_pixels_per_line, |
| 337 dst_ptr, dst_pixels_per_line, 16, |
| 338 &xsum0, &xxsum0); |
| 339 } else { |
| 340 vp9_filter_block2d_bil_var_sse2( |
| 341 src_ptr, src_pixels_per_line, |
| 342 dst_ptr, dst_pixels_per_line, 16, |
| 343 xoffset, yoffset, |
| 344 &xsum0, &xxsum0 |
| 345 ); |
| 346 |
| 347 vp9_filter_block2d_bil_var_sse2( |
| 348 src_ptr + 8, src_pixels_per_line, |
| 349 dst_ptr + 8, dst_pixels_per_line, 16, |
| 350 xoffset, yoffset, |
| 351 &xsum1, &xxsum1 |
| 352 ); |
| 353 xsum0 += xsum1; |
| 354 xxsum0 += xxsum1; |
| 355 } |
| 356 |
| 357 *sse = xxsum0; |
| 358 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
| 359 } |
| 360 |
| 361 unsigned int vp9_sub_pixel_mse16x16_wmt( |
| 362 const unsigned char *src_ptr, |
| 363 int src_pixels_per_line, |
| 364 int xoffset, |
| 365 int yoffset, |
| 366 const unsigned char *dst_ptr, |
| 367 int dst_pixels_per_line, |
| 368 unsigned int *sse |
| 369 ) { |
| 370 vp9_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset
, dst_ptr, dst_pixels_per_line, sse); |
| 371 return *sse; |
| 372 } |
| 373 |
| 374 unsigned int vp9_sub_pixel_variance16x8_wmt |
| 375 ( |
| 376 const unsigned char *src_ptr, |
| 377 int src_pixels_per_line, |
| 378 int xoffset, |
| 379 int yoffset, |
| 380 const unsigned char *dst_ptr, |
| 381 int dst_pixels_per_line, |
| 382 unsigned int *sse |
| 383 |
| 384 ) { |
| 385 int xsum0, xsum1; |
| 386 unsigned int xxsum0, xxsum1; |
| 387 |
| 388 if (xoffset == HALFNDX && yoffset == 0) { |
| 389 vp9_half_horiz_variance16x_h_sse2( |
| 390 src_ptr, src_pixels_per_line, |
| 391 dst_ptr, dst_pixels_per_line, 8, |
| 392 &xsum0, &xxsum0); |
| 393 } else if (xoffset == 0 && yoffset == HALFNDX) { |
| 394 vp9_half_vert_variance16x_h_sse2( |
| 395 src_ptr, src_pixels_per_line, |
| 396 dst_ptr, dst_pixels_per_line, 8, |
| 397 &xsum0, &xxsum0); |
| 398 } else if (xoffset == HALFNDX && yoffset == HALFNDX) { |
| 399 vp9_half_horiz_vert_variance16x_h_sse2( |
| 400 src_ptr, src_pixels_per_line, |
| 401 dst_ptr, dst_pixels_per_line, 8, |
| 402 &xsum0, &xxsum0); |
| 403 } else { |
| 404 vp9_filter_block2d_bil_var_sse2( |
| 405 src_ptr, src_pixels_per_line, |
| 406 dst_ptr, dst_pixels_per_line, 8, |
| 407 xoffset, yoffset, |
| 408 &xsum0, &xxsum0); |
| 409 |
| 410 vp9_filter_block2d_bil_var_sse2( |
| 411 src_ptr + 8, src_pixels_per_line, |
| 412 dst_ptr + 8, dst_pixels_per_line, 8, |
| 413 xoffset, yoffset, |
| 414 &xsum1, &xxsum1); |
| 415 xsum0 += xsum1; |
| 416 xxsum0 += xxsum1; |
| 417 } |
| 418 |
| 419 *sse = xxsum0; |
| 420 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); |
| 421 } |
| 422 |
| 423 unsigned int vp9_sub_pixel_variance8x16_wmt |
| 424 ( |
| 425 const unsigned char *src_ptr, |
| 426 int src_pixels_per_line, |
| 427 int xoffset, |
| 428 int yoffset, |
| 429 const unsigned char *dst_ptr, |
| 430 int dst_pixels_per_line, |
| 431 unsigned int *sse |
| 432 ) { |
| 433 int xsum; |
| 434 unsigned int xxsum; |
| 435 |
| 436 if (xoffset == HALFNDX && yoffset == 0) { |
| 437 vp9_half_horiz_variance8x_h_sse2( |
| 438 src_ptr, src_pixels_per_line, |
| 439 dst_ptr, dst_pixels_per_line, 16, |
| 440 &xsum, &xxsum); |
| 441 } else if (xoffset == 0 && yoffset == HALFNDX) { |
| 442 vp9_half_vert_variance8x_h_sse2( |
| 443 src_ptr, src_pixels_per_line, |
| 444 dst_ptr, dst_pixels_per_line, 16, |
| 445 &xsum, &xxsum); |
| 446 } else if (xoffset == HALFNDX && yoffset == HALFNDX) { |
| 447 vp9_half_horiz_vert_variance8x_h_sse2( |
| 448 src_ptr, src_pixels_per_line, |
| 449 dst_ptr, dst_pixels_per_line, 16, |
| 450 &xsum, &xxsum); |
| 451 } else { |
| 452 vp9_filter_block2d_bil_var_sse2( |
| 453 src_ptr, src_pixels_per_line, |
| 454 dst_ptr, dst_pixels_per_line, 16, |
| 455 xoffset, yoffset, |
| 456 &xsum, &xxsum); |
| 457 } |
| 458 |
| 459 *sse = xxsum; |
| 460 return (xxsum - (((unsigned int)xsum * xsum) >> 7)); |
| 461 } |
| 462 |
| 463 |
| 464 unsigned int vp9_variance_halfpixvar16x16_h_wmt( |
| 465 const unsigned char *src_ptr, |
| 466 int src_pixels_per_line, |
| 467 const unsigned char *dst_ptr, |
| 468 int dst_pixels_per_line, |
| 469 unsigned int *sse) { |
| 470 int xsum0; |
| 471 unsigned int xxsum0; |
| 472 |
| 473 vp9_half_horiz_variance16x_h_sse2( |
| 474 src_ptr, src_pixels_per_line, |
| 475 dst_ptr, dst_pixels_per_line, 16, |
| 476 &xsum0, &xxsum0); |
| 477 |
| 478 *sse = xxsum0; |
| 479 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
| 480 } |
| 481 |
| 482 |
| 483 unsigned int vp9_variance_halfpixvar16x16_v_wmt( |
| 484 const unsigned char *src_ptr, |
| 485 int src_pixels_per_line, |
| 486 const unsigned char *dst_ptr, |
| 487 int dst_pixels_per_line, |
| 488 unsigned int *sse) { |
| 489 int xsum0; |
| 490 unsigned int xxsum0; |
| 491 vp9_half_vert_variance16x_h_sse2( |
| 492 src_ptr, src_pixels_per_line, |
| 493 dst_ptr, dst_pixels_per_line, 16, |
| 494 &xsum0, &xxsum0); |
| 495 |
| 496 *sse = xxsum0; |
| 497 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
| 498 } |
| 499 |
| 500 |
| 501 unsigned int vp9_variance_halfpixvar16x16_hv_wmt( |
| 502 const unsigned char *src_ptr, |
| 503 int src_pixels_per_line, |
| 504 const unsigned char *dst_ptr, |
| 505 int dst_pixels_per_line, |
| 506 unsigned int *sse) { |
| 507 int xsum0; |
| 508 unsigned int xxsum0; |
| 509 |
| 510 vp9_half_horiz_vert_variance16x_h_sse2( |
| 511 src_ptr, src_pixels_per_line, |
| 512 dst_ptr, dst_pixels_per_line, 16, |
| 513 &xsum0, &xxsum0); |
| 514 |
| 515 *sse = xxsum0; |
| 516 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
| 517 } |
OLD | NEW |