| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 132 w, h + 1); \ | 132 w, h + 1); \ |
| 133 vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ | 133 vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ |
| 134 filter_x, x_step_q4, filter_y, \ | 134 filter_x, x_step_q4, filter_y, \ |
| 135 y_step_q4, w, h); \ | 135 y_step_q4, w, h); \ |
| 136 } \ | 136 } \ |
| 137 } else { \ | 137 } else { \ |
| 138 vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ | 138 vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ |
| 139 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ | 139 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ |
| 140 } \ | 140 } \ |
| 141 } | 141 } |
| 142 #if HAVE_AVX2 | 142 |
| 143 #if CONFIG_VP9_HIGHBITDEPTH |
| 144 |
| 145 typedef void high_filter8_1dfunction ( |
| 146 const uint16_t *src_ptr, |
| 147 const ptrdiff_t src_pitch, |
| 148 uint16_t *output_ptr, |
| 149 ptrdiff_t out_pitch, |
| 150 unsigned int output_height, |
| 151 const int16_t *filter, |
| 152 int bd |
| 153 ); |
| 154 |
| 155 #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ |
| 156 void vp9_high_convolve8_##name##_##opt(const uint8_t *src8, \ |
| 157 ptrdiff_t src_stride, \ |
| 158 uint8_t *dst8, ptrdiff_t dst_stride, \ |
| 159 const int16_t *filter_x, \ |
| 160 int x_step_q4, \ |
| 161 const int16_t *filter_y, \ |
| 162 int y_step_q4, \ |
| 163 int w, int h, int bd) { \ |
| 164 if (step_q4 == 16 && filter[3] != 128) { \ |
| 165 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ |
| 166 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ |
| 167 if (filter[0] || filter[1] || filter[2]) { \ |
| 168 while (w >= 16) { \ |
| 169 vp9_high_filter_block1d16_##dir##8_##avg##opt(src_start, \ |
| 170 src_stride, \ |
| 171 dst, \ |
| 172 dst_stride, \ |
| 173 h, \ |
| 174 filter, \ |
| 175 bd); \ |
| 176 src += 16; \ |
| 177 dst += 16; \ |
| 178 w -= 16; \ |
| 179 } \ |
| 180 while (w >= 8) { \ |
| 181 vp9_high_filter_block1d8_##dir##8_##avg##opt(src_start, \ |
| 182 src_stride, \ |
| 183 dst, \ |
| 184 dst_stride, \ |
| 185 h, \ |
| 186 filter, \ |
| 187 bd); \ |
| 188 src += 8; \ |
| 189 dst += 8; \ |
| 190 w -= 8; \ |
| 191 } \ |
| 192 while (w >= 4) { \ |
| 193 vp9_high_filter_block1d4_##dir##8_##avg##opt(src_start, \ |
| 194 src_stride, \ |
| 195 dst, \ |
| 196 dst_stride, \ |
| 197 h, \ |
| 198 filter, \ |
| 199 bd); \ |
| 200 src += 4; \ |
| 201 dst += 4; \ |
| 202 w -= 4; \ |
| 203 } \ |
| 204 } else { \ |
| 205 while (w >= 16) { \ |
| 206 vp9_high_filter_block1d16_##dir##2_##avg##opt(src, \ |
| 207 src_stride, \ |
| 208 dst, \ |
| 209 dst_stride, \ |
| 210 h, \ |
| 211 filter, \ |
| 212 bd); \ |
| 213 src += 16; \ |
| 214 dst += 16; \ |
| 215 w -= 16; \ |
| 216 } \ |
| 217 while (w >= 8) { \ |
| 218 vp9_high_filter_block1d8_##dir##2_##avg##opt(src, \ |
| 219 src_stride, \ |
| 220 dst, \ |
| 221 dst_stride, \ |
| 222 h, \ |
| 223 filter, \ |
| 224 bd); \ |
| 225 src += 8; \ |
| 226 dst += 8; \ |
| 227 w -= 8; \ |
| 228 } \ |
| 229 while (w >= 4) { \ |
| 230 vp9_high_filter_block1d4_##dir##2_##avg##opt(src, \ |
| 231 src_stride, \ |
| 232 dst, \ |
| 233 dst_stride, \ |
| 234 h, \ |
| 235 filter, \ |
| 236 bd); \ |
| 237 src += 4; \ |
| 238 dst += 4; \ |
| 239 w -= 4; \ |
| 240 } \ |
| 241 } \ |
| 242 } \ |
| 243 if (w) { \ |
| 244 vp9_high_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ |
| 245 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 246 w, h, bd); \ |
| 247 } \ |
| 248 } |
| 249 |
| 250 #define HIGH_FUN_CONV_2D(avg, opt) \ |
| 251 void vp9_high_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ |
| 252 uint8_t *dst, ptrdiff_t dst_stride, \ |
| 253 const int16_t *filter_x, int x_step_q4, \ |
| 254 const int16_t *filter_y, int y_step_q4, \ |
| 255 int w, int h, int bd) { \ |
| 256 assert(w <= 64); \ |
| 257 assert(h <= 64); \ |
| 258 if (x_step_q4 == 16 && y_step_q4 == 16) { \ |
| 259 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ |
| 260 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ |
| 261 DECLARE_ALIGNED_ARRAY(16, uint16_t, fdata2, 64 * 71); \ |
| 262 vp9_high_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ |
| 263 CONVERT_TO_BYTEPTR(fdata2), 64, \ |
| 264 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 265 w, h + 7, bd); \ |
| 266 vp9_high_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ |
| 267 64, dst, dst_stride, \ |
| 268 filter_x, x_step_q4, filter_y, \ |
| 269 y_step_q4, w, h, bd); \ |
| 270 } else { \ |
| 271 DECLARE_ALIGNED_ARRAY(16, uint16_t, fdata2, 64 * 65); \ |
| 272 vp9_high_convolve8_horiz_##opt(src, src_stride, \ |
| 273 CONVERT_TO_BYTEPTR(fdata2), 64, \ |
| 274 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 275 w, h + 1, bd); \ |
| 276 vp9_high_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ |
| 277 dst, dst_stride, \ |
| 278 filter_x, x_step_q4, filter_y, \ |
| 279 y_step_q4, w, h, bd); \ |
| 280 } \ |
| 281 } else { \ |
| 282 vp9_high_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ |
| 283 filter_x, x_step_q4, filter_y, y_step_q4, w, \ |
| 284 h, bd); \ |
| 285 } \ |
| 286 } |
| 287 #endif // CONFIG_VP9_HIGHBITDEPTH |
| 288 |
| 289 #if HAVE_AVX2 && HAVE_SSSE3 |
| 143 filter8_1dfunction vp9_filter_block1d16_v8_avx2; | 290 filter8_1dfunction vp9_filter_block1d16_v8_avx2; |
| 144 filter8_1dfunction vp9_filter_block1d16_h8_avx2; | 291 filter8_1dfunction vp9_filter_block1d16_h8_avx2; |
| 145 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; | 292 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; |
| 146 #if (ARCH_X86_64) | 293 #if ARCH_X86_64 |
| 147 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; | 294 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; |
| 148 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; | 295 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; |
| 149 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; | 296 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; |
| 150 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 | 297 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 |
| 151 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 | 298 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 |
| 152 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 | 299 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 |
| 153 #else | 300 #else // ARCH_X86 |
| 154 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; | 301 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; |
| 155 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; | 302 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; |
| 156 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; | 303 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; |
| 157 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 | 304 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 |
| 158 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 | 305 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 |
| 159 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 | 306 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 |
| 160 #endif | 307 #endif // ARCH_X86_64 / ARCH_X86 |
| 161 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; | 308 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; |
| 162 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; | 309 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; |
| 163 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; | 310 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; |
| 164 filter8_1dfunction vp9_filter_block1d8_h2_ssse3; | 311 filter8_1dfunction vp9_filter_block1d8_h2_ssse3; |
| 165 filter8_1dfunction vp9_filter_block1d4_v2_ssse3; | 312 filter8_1dfunction vp9_filter_block1d4_v2_ssse3; |
| 166 filter8_1dfunction vp9_filter_block1d4_h2_ssse3; | 313 filter8_1dfunction vp9_filter_block1d4_h2_ssse3; |
| 167 #define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3 | 314 #define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3 |
| 168 #define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3 | 315 #define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3 |
| 169 #define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3 | 316 #define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3 |
| 170 #define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3 | 317 #define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 183 // int w, int h); | 330 // int w, int h); |
| 184 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); | 331 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); |
| 185 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); | 332 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); |
| 186 | 333 |
| 187 // void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, | 334 // void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, |
| 188 // uint8_t *dst, ptrdiff_t dst_stride, | 335 // uint8_t *dst, ptrdiff_t dst_stride, |
| 189 // const int16_t *filter_x, int x_step_q4, | 336 // const int16_t *filter_x, int x_step_q4, |
| 190 // const int16_t *filter_y, int y_step_q4, | 337 // const int16_t *filter_y, int y_step_q4, |
| 191 // int w, int h); | 338 // int w, int h); |
| 192 FUN_CONV_2D(, avx2); | 339 FUN_CONV_2D(, avx2); |
| 193 #endif | 340 #endif // HAVE_AX2 && HAVE_SSSE3 |
| 194 #if HAVE_SSSE3 | 341 #if HAVE_SSSE3 |
| 195 #if (ARCH_X86_64) | 342 #if ARCH_X86_64 |
| 196 filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; | 343 filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; |
| 197 filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; | 344 filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; |
| 198 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; | 345 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; |
| 199 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; | 346 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; |
| 200 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; | 347 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; |
| 201 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; | 348 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; |
| 202 #define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3 | 349 #define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3 |
| 203 #define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3 | 350 #define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3 |
| 204 #define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 | 351 #define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 |
| 205 #define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 | 352 #define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 |
| 206 #define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 | 353 #define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 |
| 207 #else | 354 #else // ARCH_X86 |
| 208 filter8_1dfunction vp9_filter_block1d16_v8_ssse3; | 355 filter8_1dfunction vp9_filter_block1d16_v8_ssse3; |
| 209 filter8_1dfunction vp9_filter_block1d16_h8_ssse3; | 356 filter8_1dfunction vp9_filter_block1d16_h8_ssse3; |
| 210 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; | 357 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; |
| 211 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; | 358 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; |
| 212 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; | 359 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; |
| 213 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; | 360 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; |
| 214 #endif | 361 #endif // ARCH_X86_64 / ARCH_X86 |
| 215 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; | 362 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; |
| 216 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; | 363 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; |
| 217 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; | 364 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; |
| 218 filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; | 365 filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; |
| 219 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; | 366 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; |
| 220 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; | 367 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; |
| 221 | 368 |
| 222 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; | 369 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; |
| 223 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; | 370 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; |
| 224 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; | 371 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 263 // const int16_t *filter_x, int x_step_q4, | 410 // const int16_t *filter_x, int x_step_q4, |
| 264 // const int16_t *filter_y, int y_step_q4, | 411 // const int16_t *filter_y, int y_step_q4, |
| 265 // int w, int h); | 412 // int w, int h); |
| 266 // void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, | 413 // void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, |
| 267 // uint8_t *dst, ptrdiff_t dst_stride, | 414 // uint8_t *dst, ptrdiff_t dst_stride, |
| 268 // const int16_t *filter_x, int x_step_q4, | 415 // const int16_t *filter_x, int x_step_q4, |
| 269 // const int16_t *filter_y, int y_step_q4, | 416 // const int16_t *filter_y, int y_step_q4, |
| 270 // int w, int h); | 417 // int w, int h); |
| 271 FUN_CONV_2D(, ssse3); | 418 FUN_CONV_2D(, ssse3); |
| 272 FUN_CONV_2D(avg_ , ssse3); | 419 FUN_CONV_2D(avg_ , ssse3); |
| 273 #endif | 420 #endif // HAVE_SSSE3 |
| 274 | 421 |
| 275 #if HAVE_SSE2 | 422 #if HAVE_SSE2 |
| 276 filter8_1dfunction vp9_filter_block1d16_v8_sse2; | 423 filter8_1dfunction vp9_filter_block1d16_v8_sse2; |
| 277 filter8_1dfunction vp9_filter_block1d16_h8_sse2; | 424 filter8_1dfunction vp9_filter_block1d16_h8_sse2; |
| 278 filter8_1dfunction vp9_filter_block1d8_v8_sse2; | 425 filter8_1dfunction vp9_filter_block1d8_v8_sse2; |
| 279 filter8_1dfunction vp9_filter_block1d8_h8_sse2; | 426 filter8_1dfunction vp9_filter_block1d8_h8_sse2; |
| 280 filter8_1dfunction vp9_filter_block1d4_v8_sse2; | 427 filter8_1dfunction vp9_filter_block1d4_v8_sse2; |
| 281 filter8_1dfunction vp9_filter_block1d4_h8_sse2; | 428 filter8_1dfunction vp9_filter_block1d4_h8_sse2; |
| 282 filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2; | 429 filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2; |
| 283 filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2; | 430 filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2; |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 329 // const int16_t *filter_x, int x_step_q4, | 476 // const int16_t *filter_x, int x_step_q4, |
| 330 // const int16_t *filter_y, int y_step_q4, | 477 // const int16_t *filter_y, int y_step_q4, |
| 331 // int w, int h); | 478 // int w, int h); |
| 332 // void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, | 479 // void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, |
| 333 // uint8_t *dst, ptrdiff_t dst_stride, | 480 // uint8_t *dst, ptrdiff_t dst_stride, |
| 334 // const int16_t *filter_x, int x_step_q4, | 481 // const int16_t *filter_x, int x_step_q4, |
| 335 // const int16_t *filter_y, int y_step_q4, | 482 // const int16_t *filter_y, int y_step_q4, |
| 336 // int w, int h); | 483 // int w, int h); |
| 337 FUN_CONV_2D(, sse2); | 484 FUN_CONV_2D(, sse2); |
| 338 FUN_CONV_2D(avg_ , sse2); | 485 FUN_CONV_2D(avg_ , sse2); |
| 339 #endif | 486 |
| 487 #if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 |
| 488 high_filter8_1dfunction vp9_high_filter_block1d16_v8_sse2; |
| 489 high_filter8_1dfunction vp9_high_filter_block1d16_h8_sse2; |
| 490 high_filter8_1dfunction vp9_high_filter_block1d8_v8_sse2; |
| 491 high_filter8_1dfunction vp9_high_filter_block1d8_h8_sse2; |
| 492 high_filter8_1dfunction vp9_high_filter_block1d4_v8_sse2; |
| 493 high_filter8_1dfunction vp9_high_filter_block1d4_h8_sse2; |
| 494 high_filter8_1dfunction vp9_high_filter_block1d16_v8_avg_sse2; |
| 495 high_filter8_1dfunction vp9_high_filter_block1d16_h8_avg_sse2; |
| 496 high_filter8_1dfunction vp9_high_filter_block1d8_v8_avg_sse2; |
| 497 high_filter8_1dfunction vp9_high_filter_block1d8_h8_avg_sse2; |
| 498 high_filter8_1dfunction vp9_high_filter_block1d4_v8_avg_sse2; |
| 499 high_filter8_1dfunction vp9_high_filter_block1d4_h8_avg_sse2; |
| 500 |
| 501 high_filter8_1dfunction vp9_high_filter_block1d16_v2_sse2; |
| 502 high_filter8_1dfunction vp9_high_filter_block1d16_h2_sse2; |
| 503 high_filter8_1dfunction vp9_high_filter_block1d8_v2_sse2; |
| 504 high_filter8_1dfunction vp9_high_filter_block1d8_h2_sse2; |
| 505 high_filter8_1dfunction vp9_high_filter_block1d4_v2_sse2; |
| 506 high_filter8_1dfunction vp9_high_filter_block1d4_h2_sse2; |
| 507 high_filter8_1dfunction vp9_high_filter_block1d16_v2_avg_sse2; |
| 508 high_filter8_1dfunction vp9_high_filter_block1d16_h2_avg_sse2; |
| 509 high_filter8_1dfunction vp9_high_filter_block1d8_v2_avg_sse2; |
| 510 high_filter8_1dfunction vp9_high_filter_block1d8_h2_avg_sse2; |
| 511 high_filter8_1dfunction vp9_high_filter_block1d4_v2_avg_sse2; |
| 512 high_filter8_1dfunction vp9_high_filter_block1d4_h2_avg_sse2; |
| 513 |
| 514 // void vp9_high_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, |
| 515 // uint8_t *dst, ptrdiff_t dst_stride, |
| 516 // const int16_t *filter_x, int x_step_q4, |
| 517 // const int16_t *filter_y, int y_step_q4, |
| 518 // int w, int h, int bd); |
| 519 // void vp9_high_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, |
| 520 // uint8_t *dst, ptrdiff_t dst_stride, |
| 521 // const int16_t *filter_x, int x_step_q4, |
| 522 // const int16_t *filter_y, int y_step_q4, |
| 523 // int w, int h, int bd); |
| 524 // void vp9_high_convolve8_avg_horiz_sse2(const uint8_t *src, |
| 525 // ptrdiff_t src_stride, |
| 526 // uint8_t *dst, ptrdiff_t dst_stride, |
| 527 // const int16_t *filter_x, |
| 528 // int x_step_q4, |
| 529 // const int16_t *filter_y, |
| 530 // int y_step_q4, |
| 531 // int w, int h, int bd); |
| 532 // void vp9_high_convolve8_avg_vert_sse2(const uint8_t *src, |
| 533 // ptrdiff_t src_stride, |
| 534 // uint8_t *dst, ptrdiff_t dst_stride, |
| 535 // const int16_t *filter_x, int x_step_q4, |
| 536 // const int16_t *filter_y, int y_step_q4, |
| 537 // int w, int h, int bd); |
| 538 HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); |
| 539 HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); |
| 540 HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); |
| 541 HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, |
| 542 sse2); |
| 543 |
| 544 // void vp9_high_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, |
| 545 // uint8_t *dst, ptrdiff_t dst_stride, |
| 546 // const int16_t *filter_x, int x_step_q4, |
| 547 // const int16_t *filter_y, int y_step_q4, |
| 548 // int w, int h, int bd); |
| 549 // void vp9_high_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, |
| 550 // uint8_t *dst, ptrdiff_t dst_stride, |
| 551 // const int16_t *filter_x, int x_step_q4, |
| 552 // const int16_t *filter_y, int y_step_q4, |
| 553 // int w, int h, int bd); |
| 554 HIGH_FUN_CONV_2D(, sse2); |
| 555 HIGH_FUN_CONV_2D(avg_ , sse2); |
| 556 #endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 |
| 557 #endif // HAVE_SSE2 |
| OLD | NEW |