| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <immintrin.h> // AVX2 | 11 #include <immintrin.h> // AVX2 |
| 12 | 12 |
| 13 #include "./vp9_rtcd.h" | 13 #include "./vp9_rtcd.h" |
| 14 #include "vpx_ports/mem.h" | 14 #include "vpx_ports/mem.h" |
| 15 #include "vp9/encoder/vp9_variance.h" | 15 #include "vp9/encoder/vp9_variance.h" |
| 16 | 16 |
| 17 DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = { | 17 DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = { |
| 18 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, | 18 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, |
| 19 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, | 19 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, |
| 20 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, | |
| 21 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, | |
| 22 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, | 20 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, |
| 23 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, | 21 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, |
| 24 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, | |
| 25 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, | |
| 26 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, | 22 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, |
| 27 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, | 23 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, |
| 28 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, | |
| 29 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, | |
| 30 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, | 24 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, |
| 31 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, | 25 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, |
| 32 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, | |
| 33 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, | |
| 34 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, | 26 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
| 35 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, | 27 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
| 36 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, | |
| 37 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, | |
| 38 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, | 28 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, |
| 39 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, | 29 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, |
| 40 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, | |
| 41 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, | |
| 42 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, | 30 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, |
| 43 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, | 31 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, |
| 44 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, | |
| 45 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, | |
| 46 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, | 32 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, |
| 47 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, | 33 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, |
| 48 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, | |
| 49 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15 | |
| 50 }; | 34 }; |
| 51 | 35 |
| 52 #define FILTER_SRC(filter) \ | 36 #define FILTER_SRC(filter) \ |
| 53 /* filter the source */ \ | 37 /* filter the source */ \ |
| 54 exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter); \ | 38 exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter); \ |
| 55 exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter); \ | 39 exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter); \ |
| 56 \ | 40 \ |
| 57 /* add 8 to source */ \ | 41 /* add 8 to source */ \ |
| 58 exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8); \ | 42 exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8); \ |
| 59 exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8); \ | 43 exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8); \ |
| (...skipping 472 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 532 src_pack = src_reg; | 516 src_pack = src_reg; |
| 533 sec+= sec_stride; | 517 sec+= sec_stride; |
| 534 CALC_SUM_SSE_INSIDE_LOOP | 518 CALC_SUM_SSE_INSIDE_LOOP |
| 535 dst+= dst_stride; | 519 dst+= dst_stride; |
| 536 } | 520 } |
| 537 } | 521 } |
| 538 } | 522 } |
| 539 CALC_SUM_AND_SSE | 523 CALC_SUM_AND_SSE |
| 540 return sum; | 524 return sum; |
| 541 } | 525 } |
| OLD | NEW |