source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c - Issue 181493009: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c

Issue 181493009: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include <immintrin.h>	11 #include <immintrin.h>

12 #include "vpx_ports/mem.h"	12 #include "vpx_ports/mem.h"

13	13

14 // filters for 16_h8 and 16_v8	14 // filters for 16_h8 and 16_v8

15 DECLARE_ALIGNED(32, const unsigned char, filt1_global_avx2[32])= {	15 DECLARE_ALIGNED(32, static const uint8_t, filt1_global_avx2[32]) = {

16 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,	16 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,

17 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8};	17 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8

	18 };

18	19

19 DECLARE_ALIGNED(32, const unsigned char, filt2_global_avx2[32])= {	20 DECLARE_ALIGNED(32, static const uint8_t, filt2_global_avx2[32]) = {

20 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,	21 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,

21 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10};	22 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10

	23 };

22	24

23 DECLARE_ALIGNED(32, const unsigned char, filt3_global_avx2[32])= {	25 DECLARE_ALIGNED(32, static const uint8_t, filt3_global_avx2[32]) = {

24 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12,	26 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12,

25 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12};	27 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12

	28 };

26	29

27 DECLARE_ALIGNED(32, const unsigned char, filt4_global_avx2[32])= {	30 DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {

28 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14,	31 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14,

29 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14};	32 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14

30	33 };

31	34

32 void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,	35 void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,

33 unsigned int src_pixels_per_line,	36 unsigned int src_pixels_per_line,

34 unsigned char *output_ptr,	37 unsigned char *output_ptr,

35 unsigned int output_pitch,	38 unsigned int output_pitch,

36 unsigned int output_height,	39 unsigned int output_height,

37 int16_t *filter) {	40 int16_t *filter) {

38 __m128i filtersReg;	41 __m128i filtersReg;

39 __m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg;	42 __m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg;

40 __m256i firstFilters, secondFilters, thirdFilters, forthFilters;	43 __m256i firstFilters, secondFilters, thirdFilters, forthFilters;

(...skipping 492 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
533	536

534 // shrink to 8 bit each 16 bits, the first lane contain the first	537 // shrink to 8 bit each 16 bits, the first lane contain the first

535 // convolve result and the second lane contain the second convolve	538 // convolve result and the second lane contain the second convolve

536 // result	539 // result

537 srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt3);	540 srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt3);

538	541

539 // save 16 bytes	542 // save 16 bytes

540 _mm_store_si128((__m128i*)output_ptr, srcRegFilt1);	543 _mm_store_si128((__m128i*)output_ptr, srcRegFilt1);

541 }	544 }

542 }	545 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/x86/vp9_loopfilter_mmx.asm ('k') | source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c » ('j') | no next file with comments »