OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 #include "vpx_config.h" |
| 12 #include "vp9/encoder/vp9_variance.h" |
| 13 #include "vp9/common/vp9_pragmas.h" |
| 14 #include "vpx_ports/mem.h" |
| 15 |
| 16 extern void filter_block1d_h6_mmx |
| 17 ( |
| 18 const unsigned char *src_ptr, |
| 19 unsigned short *output_ptr, |
| 20 unsigned int src_pixels_per_line, |
| 21 unsigned int pixel_step, |
| 22 unsigned int output_height, |
| 23 unsigned int output_width, |
| 24 short *vp7_filter |
| 25 ); |
| 26 extern void filter_block1d_v6_mmx |
| 27 ( |
| 28 const short *src_ptr, |
| 29 unsigned char *output_ptr, |
| 30 unsigned int pixels_per_line, |
| 31 unsigned int pixel_step, |
| 32 unsigned int output_height, |
| 33 unsigned int output_width, |
| 34 short *vp7_filter |
| 35 ); |
| 36 |
| 37 extern unsigned int vp9_get_mb_ss_mmx(const short *src_ptr); |
| 38 extern unsigned int vp9_get8x8var_mmx |
| 39 ( |
| 40 const unsigned char *src_ptr, |
| 41 int source_stride, |
| 42 const unsigned char *ref_ptr, |
| 43 int recon_stride, |
| 44 unsigned int *SSE, |
| 45 int *Sum |
| 46 ); |
| 47 extern unsigned int vp9_get4x4var_mmx |
| 48 ( |
| 49 const unsigned char *src_ptr, |
| 50 int source_stride, |
| 51 const unsigned char *ref_ptr, |
| 52 int recon_stride, |
| 53 unsigned int *SSE, |
| 54 int *Sum |
| 55 ); |
| 56 extern void vp9_filter_block2d_bil4x4_var_mmx |
| 57 ( |
| 58 const unsigned char *ref_ptr, |
| 59 int ref_pixels_per_line, |
| 60 const unsigned char *src_ptr, |
| 61 int src_pixels_per_line, |
| 62 const short *HFilter, |
| 63 const short *VFilter, |
| 64 int *sum, |
| 65 unsigned int *sumsquared |
| 66 ); |
| 67 extern void vp9_filter_block2d_bil_var_mmx |
| 68 ( |
| 69 const unsigned char *ref_ptr, |
| 70 int ref_pixels_per_line, |
| 71 const unsigned char *src_ptr, |
| 72 int src_pixels_per_line, |
| 73 unsigned int Height, |
| 74 const short *HFilter, |
| 75 const short *VFilter, |
| 76 int *sum, |
| 77 unsigned int *sumsquared |
| 78 ); |
| 79 |
| 80 |
| 81 unsigned int vp9_variance4x4_mmx( |
| 82 const unsigned char *src_ptr, |
| 83 int source_stride, |
| 84 const unsigned char *ref_ptr, |
| 85 int recon_stride, |
| 86 unsigned int *sse) { |
| 87 unsigned int var; |
| 88 int avg; |
| 89 |
| 90 vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); |
| 91 *sse = var; |
| 92 return (var - (((unsigned int)avg * avg) >> 4)); |
| 93 |
| 94 } |
| 95 |
| 96 unsigned int vp9_variance8x8_mmx( |
| 97 const unsigned char *src_ptr, |
| 98 int source_stride, |
| 99 const unsigned char *ref_ptr, |
| 100 int recon_stride, |
| 101 unsigned int *sse) { |
| 102 unsigned int var; |
| 103 int avg; |
| 104 |
| 105 vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); |
| 106 *sse = var; |
| 107 |
| 108 return (var - (((unsigned int)avg * avg) >> 6)); |
| 109 |
| 110 } |
| 111 |
| 112 unsigned int vp9_mse16x16_mmx( |
| 113 const unsigned char *src_ptr, |
| 114 int source_stride, |
| 115 const unsigned char *ref_ptr, |
| 116 int recon_stride, |
| 117 unsigned int *sse) { |
| 118 unsigned int sse0, sse1, sse2, sse3, var; |
| 119 int sum0, sum1, sum2, sum3; |
| 120 |
| 121 |
| 122 vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0)
; |
| 123 vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1
, &sum1); |
| 124 vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * re
con_stride, recon_stride, &sse2, &sum2); |
| 125 vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8
* recon_stride + 8, recon_stride, &sse3, &sum3); |
| 126 |
| 127 var = sse0 + sse1 + sse2 + sse3; |
| 128 *sse = var; |
| 129 return var; |
| 130 } |
| 131 |
| 132 |
| 133 unsigned int vp9_variance16x16_mmx( |
| 134 const unsigned char *src_ptr, |
| 135 int source_stride, |
| 136 const unsigned char *ref_ptr, |
| 137 int recon_stride, |
| 138 unsigned int *sse) { |
| 139 unsigned int sse0, sse1, sse2, sse3, var; |
| 140 int sum0, sum1, sum2, sum3, avg; |
| 141 |
| 142 |
| 143 vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0)
; |
| 144 vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1
, &sum1); |
| 145 vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * re
con_stride, recon_stride, &sse2, &sum2); |
| 146 vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8
* recon_stride + 8, recon_stride, &sse3, &sum3); |
| 147 |
| 148 var = sse0 + sse1 + sse2 + sse3; |
| 149 avg = sum0 + sum1 + sum2 + sum3; |
| 150 *sse = var; |
| 151 return (var - (((unsigned int)avg * avg) >> 8)); |
| 152 } |
| 153 |
| 154 unsigned int vp9_variance16x8_mmx( |
| 155 const unsigned char *src_ptr, |
| 156 int source_stride, |
| 157 const unsigned char *ref_ptr, |
| 158 int recon_stride, |
| 159 unsigned int *sse) { |
| 160 unsigned int sse0, sse1, var; |
| 161 int sum0, sum1, avg; |
| 162 |
| 163 vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0)
; |
| 164 vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1
, &sum1); |
| 165 |
| 166 var = sse0 + sse1; |
| 167 avg = sum0 + sum1; |
| 168 *sse = var; |
| 169 return (var - (((unsigned int)avg * avg) >> 7)); |
| 170 |
| 171 } |
| 172 |
| 173 |
| 174 unsigned int vp9_variance8x16_mmx( |
| 175 const unsigned char *src_ptr, |
| 176 int source_stride, |
| 177 const unsigned char *ref_ptr, |
| 178 int recon_stride, |
| 179 unsigned int *sse) { |
| 180 unsigned int sse0, sse1, var; |
| 181 int sum0, sum1, avg; |
| 182 |
| 183 vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0)
; |
| 184 vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * re
con_stride, recon_stride, &sse1, &sum1); |
| 185 |
| 186 var = sse0 + sse1; |
| 187 avg = sum0 + sum1; |
| 188 *sse = var; |
| 189 |
| 190 return (var - (((unsigned int)avg * avg) >> 7)); |
| 191 |
| 192 } |
| 193 |
| 194 DECLARE_ALIGNED(16, extern const short, vp9_bilinear_filters_mmx[16][8]); |
| 195 |
| 196 unsigned int vp9_sub_pixel_variance4x4_mmx |
| 197 ( |
| 198 const unsigned char *src_ptr, |
| 199 int src_pixels_per_line, |
| 200 int xoffset, |
| 201 int yoffset, |
| 202 const unsigned char *dst_ptr, |
| 203 int dst_pixels_per_line, |
| 204 unsigned int *sse) |
| 205 |
| 206 { |
| 207 int xsum; |
| 208 unsigned int xxsum; |
| 209 vp9_filter_block2d_bil4x4_var_mmx( |
| 210 src_ptr, src_pixels_per_line, |
| 211 dst_ptr, dst_pixels_per_line, |
| 212 vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], |
| 213 &xsum, &xxsum |
| 214 ); |
| 215 *sse = xxsum; |
| 216 return (xxsum - (((unsigned int)xsum * xsum) >> 4)); |
| 217 } |
| 218 |
| 219 |
| 220 unsigned int vp9_sub_pixel_variance8x8_mmx |
| 221 ( |
| 222 const unsigned char *src_ptr, |
| 223 int src_pixels_per_line, |
| 224 int xoffset, |
| 225 int yoffset, |
| 226 const unsigned char *dst_ptr, |
| 227 int dst_pixels_per_line, |
| 228 unsigned int *sse |
| 229 ) { |
| 230 |
| 231 int xsum; |
| 232 unsigned int xxsum; |
| 233 vp9_filter_block2d_bil_var_mmx( |
| 234 src_ptr, src_pixels_per_line, |
| 235 dst_ptr, dst_pixels_per_line, 8, |
| 236 vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], |
| 237 &xsum, &xxsum |
| 238 ); |
| 239 *sse = xxsum; |
| 240 return (xxsum - (((unsigned int)xsum * xsum) >> 6)); |
| 241 } |
| 242 |
| 243 unsigned int vp9_sub_pixel_variance16x16_mmx |
| 244 ( |
| 245 const unsigned char *src_ptr, |
| 246 int src_pixels_per_line, |
| 247 int xoffset, |
| 248 int yoffset, |
| 249 const unsigned char *dst_ptr, |
| 250 int dst_pixels_per_line, |
| 251 unsigned int *sse |
| 252 ) { |
| 253 |
| 254 int xsum0, xsum1; |
| 255 unsigned int xxsum0, xxsum1; |
| 256 |
| 257 vp9_filter_block2d_bil_var_mmx( |
| 258 src_ptr, src_pixels_per_line, |
| 259 dst_ptr, dst_pixels_per_line, 16, |
| 260 vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], |
| 261 &xsum0, &xxsum0 |
| 262 ); |
| 263 |
| 264 vp9_filter_block2d_bil_var_mmx( |
| 265 src_ptr + 8, src_pixels_per_line, |
| 266 dst_ptr + 8, dst_pixels_per_line, 16, |
| 267 vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], |
| 268 &xsum1, &xxsum1 |
| 269 ); |
| 270 |
| 271 xsum0 += xsum1; |
| 272 xxsum0 += xxsum1; |
| 273 |
| 274 *sse = xxsum0; |
| 275 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
| 276 |
| 277 |
| 278 } |
| 279 |
| 280 unsigned int vp9_sub_pixel_mse16x16_mmx( |
| 281 const unsigned char *src_ptr, |
| 282 int src_pixels_per_line, |
| 283 int xoffset, |
| 284 int yoffset, |
| 285 const unsigned char *dst_ptr, |
| 286 int dst_pixels_per_line, |
| 287 unsigned int *sse |
| 288 ) { |
| 289 vp9_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset
, dst_ptr, dst_pixels_per_line, sse); |
| 290 return *sse; |
| 291 } |
| 292 |
| 293 unsigned int vp9_sub_pixel_variance16x8_mmx |
| 294 ( |
| 295 const unsigned char *src_ptr, |
| 296 int src_pixels_per_line, |
| 297 int xoffset, |
| 298 int yoffset, |
| 299 const unsigned char *dst_ptr, |
| 300 int dst_pixels_per_line, |
| 301 unsigned int *sse |
| 302 ) { |
| 303 int xsum0, xsum1; |
| 304 unsigned int xxsum0, xxsum1; |
| 305 |
| 306 |
| 307 vp9_filter_block2d_bil_var_mmx( |
| 308 src_ptr, src_pixels_per_line, |
| 309 dst_ptr, dst_pixels_per_line, 8, |
| 310 vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], |
| 311 &xsum0, &xxsum0 |
| 312 ); |
| 313 |
| 314 |
| 315 vp9_filter_block2d_bil_var_mmx( |
| 316 src_ptr + 8, src_pixels_per_line, |
| 317 dst_ptr + 8, dst_pixels_per_line, 8, |
| 318 vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], |
| 319 &xsum1, &xxsum1 |
| 320 ); |
| 321 |
| 322 xsum0 += xsum1; |
| 323 xxsum0 += xxsum1; |
| 324 |
| 325 *sse = xxsum0; |
| 326 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); |
| 327 } |
| 328 |
| 329 unsigned int vp9_sub_pixel_variance8x16_mmx |
| 330 ( |
| 331 const unsigned char *src_ptr, |
| 332 int src_pixels_per_line, |
| 333 int xoffset, |
| 334 int yoffset, |
| 335 const unsigned char *dst_ptr, |
| 336 int dst_pixels_per_line, |
| 337 unsigned int *sse |
| 338 ) { |
| 339 int xsum; |
| 340 unsigned int xxsum; |
| 341 vp9_filter_block2d_bil_var_mmx( |
| 342 src_ptr, src_pixels_per_line, |
| 343 dst_ptr, dst_pixels_per_line, 16, |
| 344 vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset], |
| 345 &xsum, &xxsum |
| 346 ); |
| 347 *sse = xxsum; |
| 348 return (xxsum - (((unsigned int)xsum * xsum) >> 7)); |
| 349 } |
| 350 |
| 351 |
| 352 unsigned int vp9_variance_halfpixvar16x16_h_mmx( |
| 353 const unsigned char *src_ptr, |
| 354 int source_stride, |
| 355 const unsigned char *ref_ptr, |
| 356 int recon_stride, |
| 357 unsigned int *sse) { |
| 358 return vp9_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 0, |
| 359 ref_ptr, recon_stride, sse); |
| 360 } |
| 361 |
| 362 |
| 363 unsigned int vp9_variance_halfpixvar16x16_v_mmx( |
| 364 const unsigned char *src_ptr, |
| 365 int source_stride, |
| 366 const unsigned char *ref_ptr, |
| 367 int recon_stride, |
| 368 unsigned int *sse) { |
| 369 return vp9_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 8, |
| 370 ref_ptr, recon_stride, sse); |
| 371 } |
| 372 |
| 373 |
| 374 unsigned int vp9_variance_halfpixvar16x16_hv_mmx( |
| 375 const unsigned char *src_ptr, |
| 376 int source_stride, |
| 377 const unsigned char *ref_ptr, |
| 378 int recon_stride, |
| 379 unsigned int *sse) { |
| 380 return vp9_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 8, 8, |
| 381 ref_ptr, recon_stride, sse); |
| 382 } |
OLD | NEW |