| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <immintrin.h> | 11 #include <immintrin.h> |
| 12 #include "vpx_ports/mem.h" | 12 #include "vpx_ports/mem.h" |
| 13 | 13 |
| 14 // filters for 16_h8 and 16_v8 | 14 // filters for 16_h8 and 16_v8 |
| 15 DECLARE_ALIGNED(32, const unsigned char, filt1_global_avx2[32])= { | 15 DECLARE_ALIGNED(32, static const uint8_t, filt1_global_avx2[32]) = { |
| 16 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, | 16 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, |
| 17 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8}; | 17 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 |
| 18 }; |
| 18 | 19 |
| 19 DECLARE_ALIGNED(32, const unsigned char, filt2_global_avx2[32])= { | 20 DECLARE_ALIGNED(32, static const uint8_t, filt2_global_avx2[32]) = { |
| 20 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, | 21 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, |
| 21 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10}; | 22 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 |
| 23 }; |
| 22 | 24 |
| 23 DECLARE_ALIGNED(32, const unsigned char, filt3_global_avx2[32])= { | 25 DECLARE_ALIGNED(32, static const uint8_t, filt3_global_avx2[32]) = { |
| 24 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, | 26 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, |
| 25 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12}; | 27 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 |
| 28 }; |
| 26 | 29 |
| 27 DECLARE_ALIGNED(32, const unsigned char, filt4_global_avx2[32])= { | 30 DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = { |
| 28 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, | 31 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, |
| 29 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14}; | 32 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 |
| 30 | 33 }; |
| 31 | 34 |
| 32 void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, | 35 void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, |
| 33 unsigned int src_pixels_per_line, | 36 unsigned int src_pixels_per_line, |
| 34 unsigned char *output_ptr, | 37 unsigned char *output_ptr, |
| 35 unsigned int output_pitch, | 38 unsigned int output_pitch, |
| 36 unsigned int output_height, | 39 unsigned int output_height, |
| 37 int16_t *filter) { | 40 int16_t *filter) { |
| 38 __m128i filtersReg; | 41 __m128i filtersReg; |
| 39 __m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg; | 42 __m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg; |
| 40 __m256i firstFilters, secondFilters, thirdFilters, forthFilters; | 43 __m256i firstFilters, secondFilters, thirdFilters, forthFilters; |
| (...skipping 492 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 533 | 536 |
| 534 // shrink to 8 bit each 16 bits, the first lane contain the first | 537 // shrink to 8 bit each 16 bits, the first lane contain the first |
| 535 // convolve result and the second lane contain the second convolve | 538 // convolve result and the second lane contain the second convolve |
| 536 // result | 539 // result |
| 537 srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt3); | 540 srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt3); |
| 538 | 541 |
| 539 // save 16 bytes | 542 // save 16 bytes |
| 540 _mm_store_si128((__m128i*)output_ptr, srcRegFilt1); | 543 _mm_store_si128((__m128i*)output_ptr, srcRegFilt1); |
| 541 } | 544 } |
| 542 } | 545 } |
| OLD | NEW |