include/libyuv/cpu/mips/macros_msa.h - Issue 2285683002: Add MIPS SIMD Arch (MSA) optimized MirrorRow function

Side by Side Diff: include/libyuv/cpu/mips/macros_msa.h

Issue 2285683002: Add MIPS SIMD Arch (MSA) optimized MirrorRow function (Closed)

Patch Set: Changes as per review comments Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 /*

	2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 */

	10

	11 #ifndef __MACROS_MSA_H__

	12 #define __MACROS_MSA_H__
	fbarchard1 2016/09/14 01:48:04 Could you add: #if !defined(LIBYUV_DISABLE_MSA) & Could you add: #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) manojkumar.bhosale 2016/09/14 12:45:29 Done. Show quoted text On 2016/09/14 01:48:04, fbarchard1 wrote: > Could you add: > > #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) Done.
	13

	14 #include <stdint.h>

	15 #include <msa.h>

	16

	17 #define LD_B(RTYPE, psrc) ((RTYPE)(psrc))

	18 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__)

	19

	20 #define ST_B(RTYPE, in, pdst) ((RTYPE)(pdst)) = (in)

	21 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__)

	22

	23 /* Description : Load two vectors with 16 'byte' sized elements

	24 Arguments : Inputs - psrc, stride

	25 Outputs - out0, out1

	26 Return Type - as per RTYPE

	27 Details : Load 16 byte elements in 'out0' from (psrc)

	28 Load 16 byte elements in 'out1' from (psrc + stride)

	29 */

	30 #define LD_B2(RTYPE, psrc, stride, out0, out1) \

	31 { \

	32 out0 = LD_B(RTYPE, (psrc)); \

	33 out1 = LD_B(RTYPE, (psrc) + stride); \

	34 }

	35 #define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)

	36 #define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__)

	37

	38 #define LD_B3(RTYPE, psrc, stride, out0, out1, out2) \

	39 { \

	40 LD_B2(RTYPE, (psrc), stride, out0, out1); \

	41 out2 = LD_B(RTYPE, (psrc) + 2 * stride); \

	42 }

	43 #define LD_UB3(...) LD_B3(v16u8, __VA_ARGS__)

	44 #define LD_SB3(...) LD_B3(v16i8, __VA_ARGS__)

	45

	46 #define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \

	47 { \

	48 LD_B2(RTYPE, (psrc), stride, out0, out1); \

	49 LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \

	50 }

	51 #define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)

	52 #define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__)

	53

	54 #define LD_B5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \

	55 { \

	56 LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \

	57 out4 = LD_B(RTYPE, (psrc) + 4 * stride); \

	58 }

	59 #define LD_UB5(...) LD_B5(v16u8, __VA_ARGS__)

	60 #define LD_SB5(...) LD_B5(v16i8, __VA_ARGS__)

	61

	62 #define LD_B6(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5) \

	63 { \

	64 LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \

	65 LD_B2(RTYPE, (psrc) + 4 * stride, stride, out4, out5); \

	66 }

	67 #define LD_UB6(...) LD_B6(v16u8, __VA_ARGS__)

	68 #define LD_SB6(...) LD_B6(v16i8, __VA_ARGS__)

	69

	70 #define LD_B7(RTYPE, psrc, stride, \

	71 out0, out1, out2, out3, out4, out5, out6) \

	72 { \

	73 LD_B5(RTYPE, (psrc), stride, out0, out1, out2, out3, out4); \

	74 LD_B2(RTYPE, (psrc) + 5 * stride, stride, out5, out6); \

	75 }

	76 #define LD_UB7(...) LD_B7(v16u8, __VA_ARGS__)

	77 #define LD_SB7(...) LD_B7(v16i8, __VA_ARGS__)

	78

	79 #define LD_B8(RTYPE, psrc, stride, \

	80 out0, out1, out2, out3, out4, out5, out6, out7) \

	81 { \

	82 LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \

	83 LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \

	84 }

	85 #define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__)

	86

	87 /* Description : Store two vectors with stride each having 16 'byte' sized

	88 elements

	89 Arguments : Inputs - in0, in1, pdst, stride

	90 Details : Store 16 byte elements from 'in0' to (pdst)

	91 Store 16 byte elements from 'in1' to (pdst + stride)

	92 */

	93 #define ST_B2(RTYPE, in0, in1, pdst, stride) \

	94 { \

	95 ST_B(RTYPE, in0, (pdst)); \

	96 ST_B(RTYPE, in1, (pdst) + stride); \

	97 }

	98 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)

	99 #define ST_SB2(...) ST_B2(v16i8, __VA_ARGS__)

	100

	101 #define ST_B3(RTYPE, in0, in1, in2, pdst, stride) \

	102 { \

	103 ST_B2(RTYPE, in0, in1, (pdst), stride); \

	104 ST_B(RTYPE, in2, (pdst) + 2 * stride); \

	105 }

	106 #define ST_UB3(...) ST_B3(v16u8, __VA_ARGS__)

	107

	108 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \

	109 { \

	110 ST_B2(RTYPE, in0, in1, (pdst), stride); \

	111 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \

	112 }

	113 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)

	114 #define ST_SB4(...) ST_B4(v16i8, __VA_ARGS__)

	115

	116 #define ST_B5(RTYPE, in0, in1, in2, in3, in4, pdst, stride) \

	117 { \

	118 ST_B2(RTYPE, in0, in1, (pdst), stride); \

	119 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \

	120 ST_B(RTYPE, in4, (pdst) + 4 * stride); \

	121 }

	122 #define ST_UB5(...) ST_B5(v16u8, __VA_ARGS__)

	123

	124 #define ST_B6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \

	125 { \

	126 ST_B2(RTYPE, in0, in1, (pdst), stride); \

	127 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \

	128 ST_B2(RTYPE, in4, in5, (pdst) + 4 * stride, stride); \

	129 }

	130 #define ST_UB6(...) ST_B6(v16u8, __VA_ARGS__)

	131

	132 #define ST_B7(RTYPE, in0, in1, in2, in3, in4, in5, in6, pdst, stride) \

	133 { \

	134 ST_B2(RTYPE, in0, in1, (pdst), stride); \

	135 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \

	136 ST_B2(RTYPE, in4, in5, (pdst) + 4 * stride, stride); \

	137 ST_B(RTYPE, in6, (pdst) + 6 * stride); \

	138 }

	139 #define ST_UB7(...) ST_B7(v16u8, __VA_ARGS__)

	140

	141 #define ST_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \

	142 pdst, stride) \

	143 { \

	144 ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride); \

	145 ST_B4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \

	146 }

	147 #define ST_UB8(...) ST_B8(v16u8, __VA_ARGS__)

	148

	149 /* Description : Shuffle byte vector elements as per mask vector

	150 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1

	151 Outputs - out0, out1

	152 Return Type - as per RTYPE

	153 Details : Byte elements from 'in0' & 'in1' are copied selectively to

	154 'out0' as per control vector 'mask0'

	155 */

	156 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \

	157 { \

	158 out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \

	159 out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \

	160 }

	161 #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)

	162 #endif /* __MACROS_MSA_H__ */

OLD	NEW

« docs/getting_started.md ('K') | « docs/getting_started.md ('k') | include/libyuv/cpu_id.h » ('j') | include/libyuv/cpu_id.h » ('J')