OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 |
| 12 #include "vpx_ports/config.h" |
| 13 #include "vpx_ports/mem.h" |
| 14 #include "vp9/common/vp9_subpixel.h" |
| 15 |
| 16 extern const short vp9_six_tap_mmx[16][6 * 8]; |
| 17 |
| 18 extern const short vp9_bilinear_filters_8x_mmx[16][2 * 8]; |
| 19 |
| 20 extern void vp9_filter_block1d_h6_mmx(unsigned char *src_ptr, |
| 21 unsigned short *output_ptr, |
| 22 unsigned int src_pixels_per_line, |
| 23 unsigned int pixel_step, |
| 24 unsigned int output_height, |
| 25 unsigned int output_width, |
| 26 const short *vp9_filter); |
| 27 |
| 28 extern void vp9_filter_block1dc_v6_mmx(unsigned short *src_ptr, |
| 29 unsigned char *output_ptr, |
| 30 int output_pitch, |
| 31 unsigned int pixels_per_line, |
| 32 unsigned int pixel_step, |
| 33 unsigned int output_height, |
| 34 unsigned int output_width, |
| 35 const short *vp9_filter); |
| 36 |
| 37 extern void vp9_filter_block1d8_h6_sse2(unsigned char *src_ptr, |
| 38 unsigned short *output_ptr, |
| 39 unsigned int src_pixels_per_line, |
| 40 unsigned int pixel_step, |
| 41 unsigned int output_height, |
| 42 unsigned int output_width, |
| 43 const short *vp9_filter); |
| 44 |
| 45 extern void vp9_filter_block1d16_h6_sse2(unsigned char *src_ptr, |
| 46 unsigned short *output_ptr, |
| 47 unsigned int src_pixels_per_line, |
| 48 unsigned int pixel_step, |
| 49 unsigned int output_height, |
| 50 unsigned int output_width, |
| 51 const short *vp9_filter); |
| 52 |
| 53 extern void vp9_filter_block1d8_v6_sse2(unsigned short *src_ptr, |
| 54 unsigned char *output_ptr, |
| 55 int dst_ptich, |
| 56 unsigned int pixels_per_line, |
| 57 unsigned int pixel_step, |
| 58 unsigned int output_height, |
| 59 unsigned int output_width, |
| 60 const short *vp9_filter); |
| 61 |
| 62 extern void vp9_filter_block1d16_v6_sse2(unsigned short *src_ptr, |
| 63 unsigned char *output_ptr, |
| 64 int dst_ptich, |
| 65 unsigned int pixels_per_line, |
| 66 unsigned int pixel_step, |
| 67 unsigned int output_height, |
| 68 unsigned int output_width, |
| 69 const short *vp9_filter); |
| 70 |
| 71 extern void vp9_unpack_block1d16_h6_sse2(unsigned char *src_ptr, |
| 72 unsigned short *output_ptr, |
| 73 unsigned int src_pixels_per_line, |
| 74 unsigned int output_height, |
| 75 unsigned int output_width); |
| 76 |
| 77 extern void vp9_filter_block1d8_h6_only_sse2(unsigned char *src_ptr, |
| 78 unsigned int src_pixels_per_line, |
| 79 unsigned char *output_ptr, |
| 80 int dst_pitch, |
| 81 unsigned int output_height, |
| 82 const short *vp9_filter); |
| 83 |
| 84 extern void vp9_filter_block1d16_h6_only_sse2(unsigned char *src_ptr, |
| 85 unsigned int src_pixels_per_lin, |
| 86 unsigned char *output_ptr, |
| 87 int dst_pitch, |
| 88 unsigned int output_height, |
| 89 const short *vp9_filter); |
| 90 |
| 91 extern void vp9_filter_block1d8_v6_only_sse2(unsigned char *src_ptr, |
| 92 unsigned int src_pixels_per_line, |
| 93 unsigned char *output_ptr, |
| 94 int dst_pitch, |
| 95 unsigned int output_height, |
| 96 const short *vp9_filter); |
| 97 |
| 98 extern prototype_subpixel_predict(vp9_bilinear_predict8x8_mmx); |
| 99 |
| 100 /////////////////////////////////////////////////////////////////////////// |
| 101 // the mmx function that does the bilinear filtering and var calculation // |
| 102 // int one pass // |
| 103 /////////////////////////////////////////////////////////////////////////// |
| 104 DECLARE_ALIGNED(16, const short, vp9_bilinear_filters_mmx[16][8]) = { |
| 105 { 128, 128, 128, 128, 0, 0, 0, 0 }, |
| 106 { 120, 120, 120, 120, 8, 8, 8, 8 }, |
| 107 { 112, 112, 112, 112, 16, 16, 16, 16 }, |
| 108 { 104, 104, 104, 104, 24, 24, 24, 24 }, |
| 109 { 96, 96, 96, 96, 32, 32, 32, 32 }, |
| 110 { 88, 88, 88, 88, 40, 40, 40, 40 }, |
| 111 { 80, 80, 80, 80, 48, 48, 48, 48 }, |
| 112 { 72, 72, 72, 72, 56, 56, 56, 56 }, |
| 113 { 64, 64, 64, 64, 64, 64, 64, 64 }, |
| 114 { 56, 56, 56, 56, 72, 72, 72, 72 }, |
| 115 { 48, 48, 48, 48, 80, 80, 80, 80 }, |
| 116 { 40, 40, 40, 40, 88, 88, 88, 88 }, |
| 117 { 32, 32, 32, 32, 96, 96, 96, 96 }, |
| 118 { 24, 24, 24, 24, 104, 104, 104, 104 }, |
| 119 { 16, 16, 16, 16, 112, 112, 112, 112 }, |
| 120 { 8, 8, 8, 8, 120, 120, 120, 120 } |
| 121 }; |
| 122 |
| 123 #if HAVE_MMX |
| 124 void vp9_sixtap_predict4x4_mmx(unsigned char *src_ptr, |
| 125 int src_pixels_per_line, |
| 126 int xoffset, |
| 127 int yoffset, |
| 128 unsigned char *dst_ptr, |
| 129 int dst_pitch) { |
| 130 #ifdef ANNOUNCE_FUNCTION |
| 131 printf("vp9_sixtap_predict4x4_mmx\n"); |
| 132 #endif |
| 133 /* Temp data bufffer used in filtering */ |
| 134 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 16 * 16); |
| 135 const short *hfilter, *vfilter; |
| 136 hfilter = vp9_six_tap_mmx[xoffset]; |
| 137 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), fdata2, |
| 138 src_pixels_per_line, 1, 9, 8, hfilter); |
| 139 vfilter = vp9_six_tap_mmx[yoffset]; |
| 140 vp9_filter_block1dc_v6_mmx(fdata2 + 8, dst_ptr, dst_pitch, |
| 141 8, 4, 4, 4, vfilter); |
| 142 } |
| 143 |
| 144 void vp9_sixtap_predict16x16_mmx(unsigned char *src_ptr, |
| 145 int src_pixels_per_line, |
| 146 int xoffset, |
| 147 int yoffset, |
| 148 unsigned char *dst_ptr, |
| 149 int dst_pitch) { |
| 150 #ifdef ANNOUNCE_FUNCTION |
| 151 printf("vp9_sixtap_predict16x16_mmx\n"); |
| 152 #endif |
| 153 /* Temp data bufffer used in filtering */ |
| 154 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 24 * 24); |
| 155 const short *hfilter, *vfilter; |
| 156 |
| 157 hfilter = vp9_six_tap_mmx[xoffset]; |
| 158 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), |
| 159 fdata2, src_pixels_per_line, 1, 21, 32, |
| 160 hfilter); |
| 161 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, |
| 162 fdata2 + 4, src_pixels_per_line, 1, 21, 32, |
| 163 hfilter); |
| 164 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8, |
| 165 fdata2 + 8, src_pixels_per_line, 1, 21, 32, |
| 166 hfilter); |
| 167 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12, |
| 168 fdata2 + 12, src_pixels_per_line, 1, 21, 32, |
| 169 hfilter); |
| 170 |
| 171 vfilter = vp9_six_tap_mmx[yoffset]; |
| 172 vp9_filter_block1dc_v6_mmx(fdata2 + 32, dst_ptr, dst_pitch, |
| 173 32, 16, 16, 16, vfilter); |
| 174 vp9_filter_block1dc_v6_mmx(fdata2 + 36, dst_ptr + 4, dst_pitch, |
| 175 32, 16, 16, 16, vfilter); |
| 176 vp9_filter_block1dc_v6_mmx(fdata2 + 40, dst_ptr + 8, dst_pitch, |
| 177 32, 16, 16, 16, vfilter); |
| 178 vp9_filter_block1dc_v6_mmx(fdata2 + 44, dst_ptr + 12, dst_pitch, |
| 179 32, 16, 16, 16, vfilter); |
| 180 } |
| 181 |
| 182 void vp9_sixtap_predict8x8_mmx(unsigned char *src_ptr, |
| 183 int src_pixels_per_line, |
| 184 int xoffset, |
| 185 int yoffset, |
| 186 unsigned char *dst_ptr, |
| 187 int dst_pitch) { |
| 188 #ifdef ANNOUNCE_FUNCTION |
| 189 printf("vp9_sixtap_predict8x8_mmx\n"); |
| 190 #endif |
| 191 /* Temp data bufffer used in filtering */ |
| 192 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256); |
| 193 const short *hfilter, *vfilter; |
| 194 |
| 195 hfilter = vp9_six_tap_mmx[xoffset]; |
| 196 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), |
| 197 fdata2, src_pixels_per_line, 1, 13, 16, |
| 198 hfilter); |
| 199 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, |
| 200 fdata2 + 4, src_pixels_per_line, 1, 13, 16, |
| 201 hfilter); |
| 202 |
| 203 vfilter = vp9_six_tap_mmx[yoffset]; |
| 204 vp9_filter_block1dc_v6_mmx(fdata2 + 16, dst_ptr, dst_pitch, |
| 205 16, 8, 8, 8, vfilter); |
| 206 vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch, |
| 207 16, 8, 8, 8, vfilter); |
| 208 } |
| 209 |
| 210 void vp9_sixtap_predict8x4_mmx(unsigned char *src_ptr, |
| 211 int src_pixels_per_line, |
| 212 int xoffset, |
| 213 int yoffset, |
| 214 unsigned char *dst_ptr, |
| 215 int dst_pitch) { |
| 216 #ifdef ANNOUNCE_FUNCTION |
| 217 printf("vp9_sixtap_predict8x4_mmx\n"); |
| 218 #endif |
| 219 /* Temp data bufffer used in filtering */ |
| 220 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256); |
| 221 const short *hfilter, *vfilter; |
| 222 |
| 223 hfilter = vp9_six_tap_mmx[xoffset]; |
| 224 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), |
| 225 fdata2, src_pixels_per_line, 1, 9, 16, hfilter); |
| 226 vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, |
| 227 fdata2 + 4, src_pixels_per_line, 1, 9, 16, hfilter); |
| 228 |
| 229 vfilter = vp9_six_tap_mmx[yoffset]; |
| 230 vp9_filter_block1dc_v6_mmx(fdata2 + 16, dst_ptr, dst_pitch, |
| 231 16, 8, 4, 8, vfilter); |
| 232 vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch, |
| 233 16, 8, 4, 8, vfilter); |
| 234 } |
| 235 |
| 236 void vp9_bilinear_predict16x16_mmx(unsigned char *src_ptr, |
| 237 int src_pixels_per_line, |
| 238 int xoffset, |
| 239 int yoffset, |
| 240 unsigned char *dst_ptr, |
| 241 int dst_pitch) { |
| 242 vp9_bilinear_predict8x8_mmx(src_ptr, |
| 243 src_pixels_per_line, xoffset, yoffset, |
| 244 dst_ptr, dst_pitch); |
| 245 vp9_bilinear_predict8x8_mmx(src_ptr + 8, |
| 246 src_pixels_per_line, xoffset, yoffset, |
| 247 dst_ptr + 8, dst_pitch); |
| 248 vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line, |
| 249 src_pixels_per_line, xoffset, yoffset, |
| 250 dst_ptr + dst_pitch * 8, dst_pitch); |
| 251 vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8, |
| 252 src_pixels_per_line, xoffset, yoffset, |
| 253 dst_ptr + dst_pitch * 8 + 8, dst_pitch); |
| 254 } |
| 255 #endif |
| 256 |
| 257 #if HAVE_SSE2 |
| 258 void vp9_sixtap_predict16x16_sse2(unsigned char *src_ptr, |
| 259 int src_pixels_per_line, |
| 260 int xoffset, |
| 261 int yoffset, |
| 262 unsigned char *dst_ptr, |
| 263 int dst_pitch) { |
| 264 /* Temp data bufffer used in filtering */ |
| 265 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 24 * 24); |
| 266 const short *hfilter, *vfilter; |
| 267 #ifdef ANNOUNCE_FUNCTION |
| 268 printf("vp9_sixtap_predict16x16_sse2\n"); |
| 269 #endif |
| 270 |
| 271 if (xoffset) { |
| 272 if (yoffset) { |
| 273 hfilter = vp9_six_tap_mmx[xoffset]; |
| 274 vp9_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2, |
| 275 src_pixels_per_line, 1, 21, 32, hfilter); |
| 276 vfilter = vp9_six_tap_mmx[yoffset]; |
| 277 vp9_filter_block1d16_v6_sse2(fdata2 + 32, dst_ptr, dst_pitch, |
| 278 32, 16, 16, dst_pitch, vfilter); |
| 279 } else { |
| 280 /* First-pass only */ |
| 281 hfilter = vp9_six_tap_mmx[xoffset]; |
| 282 vp9_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, |
| 283 dst_ptr, dst_pitch, 16, hfilter); |
| 284 } |
| 285 } else { |
| 286 /* Second-pass only */ |
| 287 vfilter = vp9_six_tap_mmx[yoffset]; |
| 288 vp9_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2, |
| 289 src_pixels_per_line, 21, 32); |
| 290 vp9_filter_block1d16_v6_sse2(fdata2 + 32, dst_ptr, dst_pitch, |
| 291 32, 16, 16, dst_pitch, vfilter); |
| 292 } |
| 293 } |
| 294 |
| 295 void vp9_sixtap_predict8x8_sse2(unsigned char *src_ptr, |
| 296 int src_pixels_per_line, |
| 297 int xoffset, |
| 298 int yoffset, |
| 299 unsigned char *dst_ptr, |
| 300 int dst_pitch) { |
| 301 /* Temp data bufffer used in filtering */ |
| 302 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256); |
| 303 const short *hfilter, *vfilter; |
| 304 #ifdef ANNOUNCE_FUNCTION |
| 305 printf("vp9_sixtap_predict8x8_sse2\n"); |
| 306 #endif |
| 307 |
| 308 if (xoffset) { |
| 309 if (yoffset) { |
| 310 hfilter = vp9_six_tap_mmx[xoffset]; |
| 311 vp9_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2, |
| 312 src_pixels_per_line, 1, 13, 16, hfilter); |
| 313 vfilter = vp9_six_tap_mmx[yoffset]; |
| 314 vp9_filter_block1d8_v6_sse2(fdata2 + 16, dst_ptr, dst_pitch, |
| 315 16, 8, 8, dst_pitch, vfilter); |
| 316 } else { |
| 317 /* First-pass only */ |
| 318 hfilter = vp9_six_tap_mmx[xoffset]; |
| 319 vp9_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, |
| 320 dst_ptr, dst_pitch, 8, hfilter); |
| 321 } |
| 322 } else { |
| 323 /* Second-pass only */ |
| 324 vfilter = vp9_six_tap_mmx[yoffset]; |
| 325 vp9_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), |
| 326 src_pixels_per_line, |
| 327 dst_ptr, dst_pitch, 8, vfilter); |
| 328 } |
| 329 } |
| 330 |
| 331 void vp9_sixtap_predict8x4_sse2(unsigned char *src_ptr, |
| 332 int src_pixels_per_line, |
| 333 int xoffset, |
| 334 int yoffset, |
| 335 unsigned char *dst_ptr, |
| 336 int dst_pitch) { |
| 337 /* Temp data bufffer used in filtering */ |
| 338 DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256); |
| 339 const short *hfilter, *vfilter; |
| 340 #ifdef ANNOUNCE_FUNCTION |
| 341 printf("vp9_sixtap_predict8x4_sse2\n"); |
| 342 #endif |
| 343 |
| 344 if (xoffset) { |
| 345 if (yoffset) { |
| 346 hfilter = vp9_six_tap_mmx[xoffset]; |
| 347 vp9_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2, |
| 348 src_pixels_per_line, 1, 9, 16, hfilter); |
| 349 vfilter = vp9_six_tap_mmx[yoffset]; |
| 350 vp9_filter_block1d8_v6_sse2(fdata2 + 16, dst_ptr, dst_pitch, |
| 351 16, 8, 4, dst_pitch, vfilter); |
| 352 } else { |
| 353 /* First-pass only */ |
| 354 hfilter = vp9_six_tap_mmx[xoffset]; |
| 355 vp9_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, |
| 356 dst_ptr, dst_pitch, 4, hfilter); |
| 357 } |
| 358 } else { |
| 359 /* Second-pass only */ |
| 360 vfilter = vp9_six_tap_mmx[yoffset]; |
| 361 vp9_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), |
| 362 src_pixels_per_line, |
| 363 dst_ptr, dst_pitch, 4, vfilter); |
| 364 } |
| 365 } |
| 366 #endif |
| 367 |
| 368 #if HAVE_SSSE3 |
| 369 extern void vp9_filter_block1d8_h6_ssse3(unsigned char *src_ptr, |
| 370 unsigned int src_pixels_per_line, |
| 371 unsigned char *output_ptr, |
| 372 unsigned int output_pitch, |
| 373 unsigned int output_height, |
| 374 unsigned int vp9_filter_index); |
| 375 |
| 376 extern void vp9_filter_block1d16_h6_ssse3(unsigned char *src_ptr, |
| 377 unsigned int src_pixels_per_line, |
| 378 unsigned char *output_ptr, |
| 379 unsigned int output_pitch, |
| 380 unsigned int output_height, |
| 381 unsigned int vp9_filter_index); |
| 382 |
| 383 extern void vp9_filter_block1d16_v6_ssse3(unsigned char *src_ptr, |
| 384 unsigned int src_pitch, |
| 385 unsigned char *output_ptr, |
| 386 unsigned int out_pitch, |
| 387 unsigned int output_height, |
| 388 unsigned int vp9_filter_index); |
| 389 |
| 390 extern void vp9_filter_block1d8_v6_ssse3(unsigned char *src_ptr, |
| 391 unsigned int src_pitch, |
| 392 unsigned char *output_ptr, |
| 393 unsigned int out_pitch, |
| 394 unsigned int output_height, |
| 395 unsigned int vp9_filter_index); |
| 396 |
| 397 extern void vp9_filter_block1d4_h6_ssse3(unsigned char *src_ptr, |
| 398 unsigned int src_pixels_per_line, |
| 399 unsigned char *output_ptr, |
| 400 unsigned int output_pitch, |
| 401 unsigned int output_height, |
| 402 unsigned int vp9_filter_index); |
| 403 |
| 404 extern void vp9_filter_block1d4_v6_ssse3(unsigned char *src_ptr, |
| 405 unsigned int src_pitch, |
| 406 unsigned char *output_ptr, |
| 407 unsigned int out_pitch, |
| 408 unsigned int output_height, |
| 409 unsigned int vp9_filter_index); |
| 410 |
| 411 void vp9_sixtap_predict16x16_ssse3(unsigned char *src_ptr, |
| 412 int src_pixels_per_line, |
| 413 int xoffset, |
| 414 int yoffset, |
| 415 unsigned char *dst_ptr, |
| 416 int dst_pitch) { |
| 417 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 24 * 24); |
| 418 #ifdef ANNOUNCE_FUNCTION |
| 419 printf("vp9_sixtap_predict16x16_ssse3\n"); |
| 420 #endif |
| 421 |
| 422 if (xoffset) { |
| 423 if (yoffset) { |
| 424 vp9_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), |
| 425 src_pixels_per_line, |
| 426 fdata2, 16, 21, xoffset); |
| 427 vp9_filter_block1d16_v6_ssse3(fdata2, 16, dst_ptr, dst_pitch, |
| 428 16, yoffset); |
| 429 } else { |
| 430 /* First-pass only */ |
| 431 vp9_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, |
| 432 dst_ptr, dst_pitch, 16, xoffset); |
| 433 } |
| 434 } else { |
| 435 /* Second-pass only */ |
| 436 vp9_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line), |
| 437 src_pixels_per_line, |
| 438 dst_ptr, dst_pitch, 16, yoffset); |
| 439 } |
| 440 } |
| 441 |
| 442 void vp9_sixtap_predict8x8_ssse3(unsigned char *src_ptr, |
| 443 int src_pixels_per_line, |
| 444 int xoffset, |
| 445 int yoffset, |
| 446 unsigned char *dst_ptr, |
| 447 int dst_pitch) { |
| 448 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 256); |
| 449 #ifdef ANNOUNCE_FUNCTION |
| 450 printf("vp9_sixtap_predict8x8_ssse3\n"); |
| 451 #endif |
| 452 |
| 453 if (xoffset) { |
| 454 if (yoffset) { |
| 455 vp9_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), |
| 456 src_pixels_per_line, fdata2, 8, 13, xoffset); |
| 457 vp9_filter_block1d8_v6_ssse3(fdata2, 8, dst_ptr, dst_pitch, 8, yoffset); |
| 458 } else { |
| 459 vp9_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, |
| 460 dst_ptr, dst_pitch, 8, xoffset); |
| 461 } |
| 462 } else { |
| 463 /* Second-pass only */ |
| 464 vp9_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), |
| 465 src_pixels_per_line, |
| 466 dst_ptr, dst_pitch, 8, yoffset); |
| 467 } |
| 468 } |
| 469 |
| 470 void vp9_sixtap_predict8x4_ssse3(unsigned char *src_ptr, |
| 471 int src_pixels_per_line, |
| 472 int xoffset, |
| 473 int yoffset, |
| 474 unsigned char *dst_ptr, |
| 475 int dst_pitch) { |
| 476 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 256); |
| 477 #ifdef ANNOUNCE_FUNCTION |
| 478 printf("vp9_sixtap_predict8x4_ssse3\n"); |
| 479 #endif |
| 480 |
| 481 if (xoffset) { |
| 482 if (yoffset) { |
| 483 vp9_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), |
| 484 src_pixels_per_line, fdata2, 8, 9, xoffset); |
| 485 vp9_filter_block1d8_v6_ssse3(fdata2, 8, dst_ptr, dst_pitch, 4, yoffset); |
| 486 } else { |
| 487 /* First-pass only */ |
| 488 vp9_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, |
| 489 dst_ptr, dst_pitch, 4, xoffset); |
| 490 } |
| 491 } else { |
| 492 /* Second-pass only */ |
| 493 vp9_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), |
| 494 src_pixels_per_line, |
| 495 dst_ptr, dst_pitch, 4, yoffset); |
| 496 } |
| 497 } |
| 498 |
| 499 void vp9_sixtap_predict4x4_ssse3(unsigned char *src_ptr, |
| 500 int src_pixels_per_line, |
| 501 int xoffset, |
| 502 int yoffset, |
| 503 unsigned char *dst_ptr, |
| 504 int dst_pitch) { |
| 505 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 4 * 9); |
| 506 #ifdef ANNOUNCE_FUNCTION |
| 507 printf("vp9_sixtap_predict4x4_ssse3\n"); |
| 508 #endif |
| 509 |
| 510 if (xoffset) { |
| 511 if (yoffset) { |
| 512 vp9_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), |
| 513 src_pixels_per_line, fdata2, 4, 9, xoffset); |
| 514 vp9_filter_block1d4_v6_ssse3(fdata2, 4, dst_ptr, dst_pitch, 4, yoffset); |
| 515 } else { |
| 516 vp9_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, |
| 517 dst_ptr, dst_pitch, 4, xoffset); |
| 518 } |
| 519 } else { |
| 520 vp9_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), |
| 521 src_pixels_per_line, |
| 522 dst_ptr, dst_pitch, 4, yoffset); |
| 523 } |
| 524 } |
| 525 |
| 526 void vp9_filter_block1d16_v8_ssse3(const unsigned char *src_ptr, |
| 527 const unsigned int src_pitch, |
| 528 unsigned char *output_ptr, |
| 529 unsigned int out_pitch, |
| 530 unsigned int output_height, |
| 531 const short *filter); |
| 532 |
| 533 void vp9_filter_block1d16_h8_ssse3(const unsigned char *src_ptr, |
| 534 const unsigned int src_pitch, |
| 535 unsigned char *output_ptr, |
| 536 unsigned int out_pitch, |
| 537 unsigned int output_height, |
| 538 const short *filter); |
| 539 |
| 540 void vp9_filter_block2d_16x16_8_ssse3(const unsigned char *src_ptr, |
| 541 const unsigned int src_stride, |
| 542 const short *hfilter_aligned16, |
| 543 const short *vfilter_aligned16, |
| 544 unsigned char *dst_ptr, |
| 545 unsigned int dst_stride) { |
| 546 if (hfilter_aligned16[3] != 128 && vfilter_aligned16[3] != 128) { |
| 547 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16); |
| 548 |
| 549 vp9_filter_block1d16_h8_ssse3(src_ptr - (3 * src_stride), src_stride, |
| 550 fdata2, 16, 23, hfilter_aligned16); |
| 551 vp9_filter_block1d16_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 16, |
| 552 vfilter_aligned16); |
| 553 } else { |
| 554 if (hfilter_aligned16[3] != 128) { |
| 555 vp9_filter_block1d16_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride, |
| 556 16, hfilter_aligned16); |
| 557 } else { |
| 558 vp9_filter_block1d16_v8_ssse3(src_ptr - (3 * src_stride), src_stride, |
| 559 dst_ptr, dst_stride, 16, vfilter_aligned16); |
| 560 } |
| 561 } |
| 562 } |
| 563 |
| 564 void vp9_filter_block1d8_v8_ssse3(const unsigned char *src_ptr, |
| 565 const unsigned int src_pitch, |
| 566 unsigned char *output_ptr, |
| 567 unsigned int out_pitch, |
| 568 unsigned int output_height, |
| 569 const short *filter); |
| 570 |
| 571 void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr, |
| 572 const unsigned int src_pitch, |
| 573 unsigned char *output_ptr, |
| 574 unsigned int out_pitch, |
| 575 unsigned int output_height, |
| 576 const short *filter); |
| 577 |
| 578 void vp9_filter_block2d_8x8_8_ssse3(const unsigned char *src_ptr, |
| 579 const unsigned int src_stride, |
| 580 const short *hfilter_aligned16, |
| 581 const short *vfilter_aligned16, |
| 582 unsigned char *dst_ptr, |
| 583 unsigned int dst_stride) { |
| 584 if (hfilter_aligned16[3] != 128 && vfilter_aligned16[3] != 128) { |
| 585 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16); |
| 586 |
| 587 vp9_filter_block1d8_h8_ssse3(src_ptr - (3 * src_stride), src_stride, |
| 588 fdata2, 16, 15, hfilter_aligned16); |
| 589 vp9_filter_block1d8_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 8, |
| 590 vfilter_aligned16); |
| 591 } else { |
| 592 if (hfilter_aligned16[3] != 128) { |
| 593 vp9_filter_block1d8_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride, 8, |
| 594 hfilter_aligned16); |
| 595 } else { |
| 596 vp9_filter_block1d8_v8_ssse3(src_ptr - (3 * src_stride), src_stride, |
| 597 dst_ptr, dst_stride, 8, vfilter_aligned16); |
| 598 } |
| 599 } |
| 600 } |
| 601 |
| 602 void vp9_filter_block2d_8x4_8_ssse3(const unsigned char *src_ptr, |
| 603 const unsigned int src_stride, |
| 604 const short *hfilter_aligned16, |
| 605 const short *vfilter_aligned16, |
| 606 unsigned char *dst_ptr, |
| 607 unsigned int dst_stride) { |
| 608 if (hfilter_aligned16[3] !=128 && vfilter_aligned16[3] != 128) { |
| 609 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16); |
| 610 |
| 611 vp9_filter_block1d8_h8_ssse3(src_ptr - (3 * src_stride), src_stride, |
| 612 fdata2, 16, 11, hfilter_aligned16); |
| 613 vp9_filter_block1d8_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 4, |
| 614 vfilter_aligned16); |
| 615 } else { |
| 616 if (hfilter_aligned16[3] != 128) { |
| 617 vp9_filter_block1d8_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride, 4, |
| 618 hfilter_aligned16); |
| 619 } else { |
| 620 vp9_filter_block1d8_v8_ssse3(src_ptr - (3 * src_stride), src_stride, |
| 621 dst_ptr, dst_stride, 4, vfilter_aligned16); |
| 622 } |
| 623 } |
| 624 } |
| 625 #endif |
OLD | NEW |