| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <assert.h> | 11 #include "./vp9_rtcd.h" |
| 12 | |
| 13 #include "./vpx_config.h" | 12 #include "./vpx_config.h" |
| 14 #include "./vp9_rtcd.h" | 13 #include "vp9/common/x86/convolve.h" |
| 15 #include "vpx_ports/mem.h" | |
| 16 | |
| 17 typedef void filter8_1dfunction ( | |
| 18 const unsigned char *src_ptr, | |
| 19 const ptrdiff_t src_pitch, | |
| 20 unsigned char *output_ptr, | |
| 21 ptrdiff_t out_pitch, | |
| 22 unsigned int output_height, | |
| 23 const short *filter | |
| 24 ); | |
| 25 | |
| 26 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ | |
| 27 void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ | |
| 28 uint8_t *dst, ptrdiff_t dst_stride, \ | |
| 29 const int16_t *filter_x, int x_step_q4, \ | |
| 30 const int16_t *filter_y, int y_step_q4, \ | |
| 31 int w, int h) { \ | |
| 32 if (step_q4 == 16 && filter[3] != 128) { \ | |
| 33 if (filter[0] || filter[1] || filter[2]) { \ | |
| 34 while (w >= 16) { \ | |
| 35 vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \ | |
| 36 src_stride, \ | |
| 37 dst, \ | |
| 38 dst_stride, \ | |
| 39 h, \ | |
| 40 filter); \ | |
| 41 src += 16; \ | |
| 42 dst += 16; \ | |
| 43 w -= 16; \ | |
| 44 } \ | |
| 45 while (w >= 8) { \ | |
| 46 vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \ | |
| 47 src_stride, \ | |
| 48 dst, \ | |
| 49 dst_stride, \ | |
| 50 h, \ | |
| 51 filter); \ | |
| 52 src += 8; \ | |
| 53 dst += 8; \ | |
| 54 w -= 8; \ | |
| 55 } \ | |
| 56 while (w >= 4) { \ | |
| 57 vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \ | |
| 58 src_stride, \ | |
| 59 dst, \ | |
| 60 dst_stride, \ | |
| 61 h, \ | |
| 62 filter); \ | |
| 63 src += 4; \ | |
| 64 dst += 4; \ | |
| 65 w -= 4; \ | |
| 66 } \ | |
| 67 } else { \ | |
| 68 while (w >= 16) { \ | |
| 69 vp9_filter_block1d16_##dir##2_##avg##opt(src, \ | |
| 70 src_stride, \ | |
| 71 dst, \ | |
| 72 dst_stride, \ | |
| 73 h, \ | |
| 74 filter); \ | |
| 75 src += 16; \ | |
| 76 dst += 16; \ | |
| 77 w -= 16; \ | |
| 78 } \ | |
| 79 while (w >= 8) { \ | |
| 80 vp9_filter_block1d8_##dir##2_##avg##opt(src, \ | |
| 81 src_stride, \ | |
| 82 dst, \ | |
| 83 dst_stride, \ | |
| 84 h, \ | |
| 85 filter); \ | |
| 86 src += 8; \ | |
| 87 dst += 8; \ | |
| 88 w -= 8; \ | |
| 89 } \ | |
| 90 while (w >= 4) { \ | |
| 91 vp9_filter_block1d4_##dir##2_##avg##opt(src, \ | |
| 92 src_stride, \ | |
| 93 dst, \ | |
| 94 dst_stride, \ | |
| 95 h, \ | |
| 96 filter); \ | |
| 97 src += 4; \ | |
| 98 dst += 4; \ | |
| 99 w -= 4; \ | |
| 100 } \ | |
| 101 } \ | |
| 102 } \ | |
| 103 if (w) { \ | |
| 104 vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ | |
| 105 filter_x, x_step_q4, filter_y, y_step_q4, \ | |
| 106 w, h); \ | |
| 107 } \ | |
| 108 } | |
| 109 | |
| 110 #define FUN_CONV_2D(avg, opt) \ | |
| 111 void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ | |
| 112 uint8_t *dst, ptrdiff_t dst_stride, \ | |
| 113 const int16_t *filter_x, int x_step_q4, \ | |
| 114 const int16_t *filter_y, int y_step_q4, \ | |
| 115 int w, int h) { \ | |
| 116 assert(w <= 64); \ | |
| 117 assert(h <= 64); \ | |
| 118 if (x_step_q4 == 16 && y_step_q4 == 16) { \ | |
| 119 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ | |
| 120 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ | |
| 121 DECLARE_ALIGNED(16, unsigned char, fdata2[64 * 71]); \ | |
| 122 vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ | |
| 123 filter_x, x_step_q4, filter_y, y_step_q4, \ | |
| 124 w, h + 7); \ | |
| 125 vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ | |
| 126 filter_x, x_step_q4, filter_y, \ | |
| 127 y_step_q4, w, h); \ | |
| 128 } else { \ | |
| 129 DECLARE_ALIGNED(16, unsigned char, fdata2[64 * 65]); \ | |
| 130 vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ | |
| 131 filter_x, x_step_q4, filter_y, y_step_q4, \ | |
| 132 w, h + 1); \ | |
| 133 vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ | |
| 134 filter_x, x_step_q4, filter_y, \ | |
| 135 y_step_q4, w, h); \ | |
| 136 } \ | |
| 137 } else { \ | |
| 138 vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ | |
| 139 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ | |
| 140 } \ | |
| 141 } | |
| 142 | |
| 143 #if CONFIG_VP9_HIGHBITDEPTH | |
| 144 | |
| 145 typedef void highbd_filter8_1dfunction ( | |
| 146 const uint16_t *src_ptr, | |
| 147 const ptrdiff_t src_pitch, | |
| 148 uint16_t *output_ptr, | |
| 149 ptrdiff_t out_pitch, | |
| 150 unsigned int output_height, | |
| 151 const int16_t *filter, | |
| 152 int bd | |
| 153 ); | |
| 154 | |
| 155 #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ | |
| 156 void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \ | |
| 157 ptrdiff_t src_stride, \ | |
| 158 uint8_t *dst8, \ | |
| 159 ptrdiff_t dst_stride, \ | |
| 160 const int16_t *filter_x, \ | |
| 161 int x_step_q4, \ | |
| 162 const int16_t *filter_y, \ | |
| 163 int y_step_q4, \ | |
| 164 int w, int h, int bd) { \ | |
| 165 if (step_q4 == 16 && filter[3] != 128) { \ | |
| 166 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ | |
| 167 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ | |
| 168 if (filter[0] || filter[1] || filter[2]) { \ | |
| 169 while (w >= 16) { \ | |
| 170 vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \ | |
| 171 src_stride, \ | |
| 172 dst, \ | |
| 173 dst_stride, \ | |
| 174 h, \ | |
| 175 filter, \ | |
| 176 bd); \ | |
| 177 src += 16; \ | |
| 178 dst += 16; \ | |
| 179 w -= 16; \ | |
| 180 } \ | |
| 181 while (w >= 8) { \ | |
| 182 vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \ | |
| 183 src_stride, \ | |
| 184 dst, \ | |
| 185 dst_stride, \ | |
| 186 h, \ | |
| 187 filter, \ | |
| 188 bd); \ | |
| 189 src += 8; \ | |
| 190 dst += 8; \ | |
| 191 w -= 8; \ | |
| 192 } \ | |
| 193 while (w >= 4) { \ | |
| 194 vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \ | |
| 195 src_stride, \ | |
| 196 dst, \ | |
| 197 dst_stride, \ | |
| 198 h, \ | |
| 199 filter, \ | |
| 200 bd); \ | |
| 201 src += 4; \ | |
| 202 dst += 4; \ | |
| 203 w -= 4; \ | |
| 204 } \ | |
| 205 } else { \ | |
| 206 while (w >= 16) { \ | |
| 207 vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \ | |
| 208 src_stride, \ | |
| 209 dst, \ | |
| 210 dst_stride, \ | |
| 211 h, \ | |
| 212 filter, \ | |
| 213 bd); \ | |
| 214 src += 16; \ | |
| 215 dst += 16; \ | |
| 216 w -= 16; \ | |
| 217 } \ | |
| 218 while (w >= 8) { \ | |
| 219 vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \ | |
| 220 src_stride, \ | |
| 221 dst, \ | |
| 222 dst_stride, \ | |
| 223 h, \ | |
| 224 filter, \ | |
| 225 bd); \ | |
| 226 src += 8; \ | |
| 227 dst += 8; \ | |
| 228 w -= 8; \ | |
| 229 } \ | |
| 230 while (w >= 4) { \ | |
| 231 vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \ | |
| 232 src_stride, \ | |
| 233 dst, \ | |
| 234 dst_stride, \ | |
| 235 h, \ | |
| 236 filter, \ | |
| 237 bd); \ | |
| 238 src += 4; \ | |
| 239 dst += 4; \ | |
| 240 w -= 4; \ | |
| 241 } \ | |
| 242 } \ | |
| 243 } \ | |
| 244 if (w) { \ | |
| 245 vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ | |
| 246 filter_x, x_step_q4, filter_y, y_step_q4, \ | |
| 247 w, h, bd); \ | |
| 248 } \ | |
| 249 } | |
| 250 | |
| 251 #define HIGH_FUN_CONV_2D(avg, opt) \ | |
| 252 void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ | |
| 253 uint8_t *dst, ptrdiff_t dst_stride, \ | |
| 254 const int16_t *filter_x, int x_step_q4, \ | |
| 255 const int16_t *filter_y, int y_step_q4, \ | |
| 256 int w, int h, int bd) { \ | |
| 257 assert(w <= 64); \ | |
| 258 assert(h <= 64); \ | |
| 259 if (x_step_q4 == 16 && y_step_q4 == 16) { \ | |
| 260 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ | |
| 261 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ | |
| 262 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ | |
| 263 vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ | |
| 264 CONVERT_TO_BYTEPTR(fdata2), 64, \ | |
| 265 filter_x, x_step_q4, \ | |
| 266 filter_y, y_step_q4, \ | |
| 267 w, h + 7, bd); \ | |
| 268 vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ | |
| 269 64, dst, dst_stride, \ | |
| 270 filter_x, x_step_q4, \ | |
| 271 filter_y, y_step_q4, \ | |
| 272 w, h, bd); \ | |
| 273 } else { \ | |
| 274 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ | |
| 275 vp9_highbd_convolve8_horiz_##opt(src, src_stride, \ | |
| 276 CONVERT_TO_BYTEPTR(fdata2), 64, \ | |
| 277 filter_x, x_step_q4, \ | |
| 278 filter_y, y_step_q4, \ | |
| 279 w, h + 1, bd); \ | |
| 280 vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ | |
| 281 dst, dst_stride, \ | |
| 282 filter_x, x_step_q4, \ | |
| 283 filter_y, y_step_q4, \ | |
| 284 w, h, bd); \ | |
| 285 } \ | |
| 286 } else { \ | |
| 287 vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ | |
| 288 filter_x, x_step_q4, filter_y, y_step_q4, w, \ | |
| 289 h, bd); \ | |
| 290 } \ | |
| 291 } | |
| 292 #endif // CONFIG_VP9_HIGHBITDEPTH | |
| 293 | |
| 294 #if HAVE_AVX2 && HAVE_SSSE3 | |
| 295 filter8_1dfunction vp9_filter_block1d16_v8_avx2; | |
| 296 filter8_1dfunction vp9_filter_block1d16_h8_avx2; | |
| 297 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; | |
| 298 #if ARCH_X86_64 | |
| 299 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; | |
| 300 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; | |
| 301 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; | |
| 302 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 | |
| 303 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 | |
| 304 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 | |
| 305 #else // ARCH_X86 | |
| 306 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; | |
| 307 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; | |
| 308 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; | |
| 309 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 | |
| 310 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 | |
| 311 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 | |
| 312 #endif // ARCH_X86_64 / ARCH_X86 | |
| 313 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; | |
| 314 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; | |
| 315 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; | |
| 316 filter8_1dfunction vp9_filter_block1d8_h2_ssse3; | |
| 317 filter8_1dfunction vp9_filter_block1d4_v2_ssse3; | |
| 318 filter8_1dfunction vp9_filter_block1d4_h2_ssse3; | |
| 319 #define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3 | |
| 320 #define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3 | |
| 321 #define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3 | |
| 322 #define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3 | |
| 323 #define vp9_filter_block1d8_h2_avx2 vp9_filter_block1d8_h2_ssse3 | |
| 324 #define vp9_filter_block1d4_v2_avx2 vp9_filter_block1d4_v2_ssse3 | |
| 325 #define vp9_filter_block1d4_h2_avx2 vp9_filter_block1d4_h2_ssse3 | |
| 326 // void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, | |
| 327 // uint8_t *dst, ptrdiff_t dst_stride, | |
| 328 // const int16_t *filter_x, int x_step_q4, | |
| 329 // const int16_t *filter_y, int y_step_q4, | |
| 330 // int w, int h); | |
| 331 // void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, | |
| 332 // uint8_t *dst, ptrdiff_t dst_stride, | |
| 333 // const int16_t *filter_x, int x_step_q4, | |
| 334 // const int16_t *filter_y, int y_step_q4, | |
| 335 // int w, int h); | |
| 336 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); | |
| 337 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); | |
| 338 | |
| 339 // void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, | |
| 340 // uint8_t *dst, ptrdiff_t dst_stride, | |
| 341 // const int16_t *filter_x, int x_step_q4, | |
| 342 // const int16_t *filter_y, int y_step_q4, | |
| 343 // int w, int h); | |
| 344 FUN_CONV_2D(, avx2); | |
| 345 #endif // HAVE_AX2 && HAVE_SSSE3 | |
| 346 #if HAVE_SSSE3 | |
| 347 #if ARCH_X86_64 | |
| 348 filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; | |
| 349 filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; | |
| 350 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; | |
| 351 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; | |
| 352 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; | |
| 353 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; | |
| 354 #define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3 | |
| 355 #define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3 | |
| 356 #define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 | |
| 357 #define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 | |
| 358 #define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 | |
| 359 #else // ARCH_X86 | |
| 360 filter8_1dfunction vp9_filter_block1d16_v8_ssse3; | |
| 361 filter8_1dfunction vp9_filter_block1d16_h8_ssse3; | |
| 362 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; | |
| 363 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; | |
| 364 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; | |
| 365 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; | |
| 366 #endif // ARCH_X86_64 / ARCH_X86 | |
| 367 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; | |
| 368 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; | |
| 369 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; | |
| 370 filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; | |
| 371 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; | |
| 372 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; | |
| 373 | |
| 374 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; | |
| 375 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; | |
| 376 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; | |
| 377 filter8_1dfunction vp9_filter_block1d8_h2_ssse3; | |
| 378 filter8_1dfunction vp9_filter_block1d4_v2_ssse3; | |
| 379 filter8_1dfunction vp9_filter_block1d4_h2_ssse3; | |
| 380 filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3; | |
| 381 filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3; | |
| 382 filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3; | |
| 383 filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3; | |
| 384 filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3; | |
| 385 filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3; | |
| 386 | |
| 387 // void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, | |
| 388 // uint8_t *dst, ptrdiff_t dst_stride, | |
| 389 // const int16_t *filter_x, int x_step_q4, | |
| 390 // const int16_t *filter_y, int y_step_q4, | |
| 391 // int w, int h); | |
| 392 // void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, | |
| 393 // uint8_t *dst, ptrdiff_t dst_stride, | |
| 394 // const int16_t *filter_x, int x_step_q4, | |
| 395 // const int16_t *filter_y, int y_step_q4, | |
| 396 // int w, int h); | |
| 397 // void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, | |
| 398 // uint8_t *dst, ptrdiff_t dst_stride, | |
| 399 // const int16_t *filter_x, int x_step_q4, | |
| 400 // const int16_t *filter_y, int y_step_q4, | |
| 401 // int w, int h); | |
| 402 // void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, | |
| 403 // uint8_t *dst, ptrdiff_t dst_stride, | |
| 404 // const int16_t *filter_x, int x_step_q4, | |
| 405 // const int16_t *filter_y, int y_step_q4, | |
| 406 // int w, int h); | |
| 407 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3); | |
| 408 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3); | |
| 409 FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3); | |
| 410 FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, | |
| 411 ssse3); | |
| 412 | |
| 413 // void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, | |
| 414 // uint8_t *dst, ptrdiff_t dst_stride, | |
| 415 // const int16_t *filter_x, int x_step_q4, | |
| 416 // const int16_t *filter_y, int y_step_q4, | |
| 417 // int w, int h); | |
| 418 // void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, | |
| 419 // uint8_t *dst, ptrdiff_t dst_stride, | |
| 420 // const int16_t *filter_x, int x_step_q4, | |
| 421 // const int16_t *filter_y, int y_step_q4, | |
| 422 // int w, int h); | |
| 423 FUN_CONV_2D(, ssse3); | |
| 424 FUN_CONV_2D(avg_ , ssse3); | |
| 425 #endif // HAVE_SSSE3 | |
| 426 | 14 |
| 427 #if HAVE_SSE2 | 15 #if HAVE_SSE2 |
| 428 filter8_1dfunction vp9_filter_block1d16_v8_sse2; | 16 filter8_1dfunction vp9_filter_block1d16_v8_sse2; |
| 429 filter8_1dfunction vp9_filter_block1d16_h8_sse2; | 17 filter8_1dfunction vp9_filter_block1d16_h8_sse2; |
| 430 filter8_1dfunction vp9_filter_block1d8_v8_sse2; | 18 filter8_1dfunction vp9_filter_block1d8_v8_sse2; |
| 431 filter8_1dfunction vp9_filter_block1d8_h8_sse2; | 19 filter8_1dfunction vp9_filter_block1d8_h8_sse2; |
| 432 filter8_1dfunction vp9_filter_block1d4_v8_sse2; | 20 filter8_1dfunction vp9_filter_block1d4_v8_sse2; |
| 433 filter8_1dfunction vp9_filter_block1d4_h8_sse2; | 21 filter8_1dfunction vp9_filter_block1d4_h8_sse2; |
| 434 filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2; | 22 filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2; |
| 435 filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2; | 23 filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2; |
| (...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 565 // int w, int h, int bd); | 153 // int w, int h, int bd); |
| 566 // void vp9_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, | 154 // void vp9_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, |
| 567 // uint8_t *dst, ptrdiff_t dst_stride, | 155 // uint8_t *dst, ptrdiff_t dst_stride, |
| 568 // const int16_t *filter_x, int x_step_q4, | 156 // const int16_t *filter_x, int x_step_q4, |
| 569 // const int16_t *filter_y, int y_step_q4, | 157 // const int16_t *filter_y, int y_step_q4, |
| 570 // int w, int h, int bd); | 158 // int w, int h, int bd); |
| 571 HIGH_FUN_CONV_2D(, sse2); | 159 HIGH_FUN_CONV_2D(, sse2); |
| 572 HIGH_FUN_CONV_2D(avg_ , sse2); | 160 HIGH_FUN_CONV_2D(avg_ , sse2); |
| 573 #endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 | 161 #endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 |
| 574 #endif // HAVE_SSE2 | 162 #endif // HAVE_SSE2 |
| OLD | NEW |