include/libyuv/macros_msa.h - Issue 2559683002: Add MSA optimized remaining scale row functions

Side by Side Diff: include/libyuv/macros_msa.h

Issue 2559683002: Add MSA optimized remaining scale row functions (Closed)

Patch Set: Changes as per review comments Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.	2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
44 uint64 val_m = 0; \	44 uint64 val_m = 0; \

45 val0_m = LW(psrc_ld_m); \	45 val0_m = LW(psrc_ld_m); \

46 val1_m = LW(psrc_ld_m + 4); \	46 val1_m = LW(psrc_ld_m + 4); \

47 val_m = (uint64)(val1_m); /* NOLINT */ \	47 val_m = (uint64)(val1_m); /* NOLINT */ \

48 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \	48 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \

49 val_m = (uint64)(val_m \| (uint64)val0_m); /* NOLINT */ \	49 val_m = (uint64)(val_m \| (uint64)val0_m); /* NOLINT */ \

50 val_m; \	50 val_m; \

51 })	51 })

52 #endif // (__mips == 64)	52 #endif // (__mips == 64)

53	53

54 #define SW(val, pdst) \	54 #define SW(val, pdst) \

55 ({ \	55 ({ \

56 uint8_t* pdst_sw_m = (uint8_t*)(pdst); \	56 uint8_t* pdst_sw_m = (uint8_t)(pdst); / NOLINT */ \

57 uint32_t val_m = (val); \	57 uint32_t val_m = (val); \

58 asm volatile("sw %[val_m], %[pdst_sw_m] \n" \	58 asm volatile("sw %[val_m], %[pdst_sw_m] \n" \

59 \	59 : [pdst_sw_m] "=m"(*pdst_sw_m) \

60 : [pdst_sw_m] "=m"(*pdst_sw_m) \	60 : [val_m] "r"(val_m)); \

61 : [val_m] "r"(val_m)); \

62 })	61 })

63	62

64 #if (__mips == 64)	63 #if (__mips == 64)

65 #define SD(val, pdst) \	64 #define SD(val, pdst) \

66 ({ \	65 ({ \

67 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \	66 uint8_t* pdst_sd_m = (uint8_t)(pdst); / NOLINT */ \

68 uint64_t val_m = (val); \	67 uint64_t val_m = (val); \

69 asm volatile("sd %[val_m], %[pdst_sd_m] \n" \	68 asm volatile("sd %[val_m], %[pdst_sd_m] \n" \

70 \	69 : [pdst_sd_m] "=m"(*pdst_sd_m) \

71 : [pdst_sd_m] "=m"(*pdst_sd_m) \	70 : [val_m] "r"(val_m)); \

72 : [val_m] "r"(val_m)); \

73 })	71 })

74 #else // !(__mips == 64)	72 #else // !(__mips == 64)

75 #define SD(val, pdst) \	73 #define SD(val, pdst) \

76 ({ \	74 ({ \

77 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \	75 uint8_t* pdst_sd_m = (uint8_t)(pdst); / NOLINT */ \

78 uint32_t val0_m, val1_m; \	76 uint32_t val0_m, val1_m; \

79 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \	77 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \

80 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \	78 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \

81 SW(val0_m, pdst_sd_m); \	79 SW(val0_m, pdst_sd_m); \

82 SW(val1_m, pdst_sd_m + 4); \	80 SW(val1_m, pdst_sd_m + 4); \

83 })	81 })

84 #endif // !(__mips == 64)	82 #endif // !(__mips == 64)

85 #else // !(__mips_isa_rev >= 6)	83 #else // !(__mips_isa_rev >= 6)

86 #define LW(psrc) \	84 #define LW(psrc) \

87 ({ \	85 ({ \

(...skipping 23 matching lines...) Expand all Loading...
111 uint64 val_m = 0; \	109 uint64 val_m = 0; \

112 val0_m = LW(psrc_ld_m); \	110 val0_m = LW(psrc_ld_m); \

113 val1_m = LW(psrc_ld_m + 4); \	111 val1_m = LW(psrc_ld_m + 4); \

114 val_m = (uint64)(val1_m); /* NOLINT */ \	112 val_m = (uint64)(val1_m); /* NOLINT */ \

115 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \	113 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \

116 val_m = (uint64)(val_m \| (uint64)val0_m); /* NOLINT */ \	114 val_m = (uint64)(val_m \| (uint64)val0_m); /* NOLINT */ \

117 val_m; \	115 val_m; \

118 })	116 })

119 #endif // (__mips == 64)	117 #endif // (__mips == 64)

120	118

121 #define SW(val, pdst) \	119 #define SW(val, pdst) \

122 ({ \	120 ({ \

123 uint8_t* pdst_sw_m = (uint8_t*)(pdst); \	121 uint8_t* pdst_sw_m = (uint8_t)(pdst); / NOLINT */ \

124 uint32_t val_m = (val); \	122 uint32_t val_m = (val); \

125 asm volatile("usw %[val_m], %[pdst_sw_m] \n" \	123 asm volatile("usw %[val_m], %[pdst_sw_m] \n" \

126 : [pdst_sw_m] "=m"(*pdst_sw_m) \	124 : [pdst_sw_m] "=m"(*pdst_sw_m) \

127 : [val_m] "r"(val_m)); \	125 : [val_m] "r"(val_m)); \

128 })	126 })

129	127

130 #define SD(val, pdst) \	128 #define SD(val, pdst) \

131 ({ \	129 ({ \

132 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \	130 uint8_t* pdst_sd_m = (uint8_t)(pdst); / NOLINT */ \

133 uint32_t val0_m, val1_m; \	131 uint32_t val0_m, val1_m; \

134 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \	132 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \

135 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \	133 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \

136 SW(val0_m, pdst_sd_m); \	134 SW(val0_m, pdst_sd_m); \

137 SW(val1_m, pdst_sd_m + 4); \	135 SW(val1_m, pdst_sd_m + 4); \

138 })	136 })

139 #endif // (__mips_isa_rev >= 6)	137 #endif // (__mips_isa_rev >= 6)

140	138

141 // TODO(fbarchard): Consider removing __VAR_ARGS versions.	139 // TODO(fbarchard): Consider removing __VAR_ARGS versions.

142 #define LD_B(RTYPE, psrc) ((RTYPE)(psrc)) /* NOLINT */	140 #define LD_B(RTYPE, psrc) ((RTYPE)(psrc)) /* NOLINT */

143 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__)	141 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__)

144	142

145 #define ST_B(RTYPE, in, pdst) ((RTYPE)(pdst)) = (in) /* NOLINT */	143 #define ST_B(RTYPE, in, pdst) ((RTYPE)(pdst)) = (in) /* NOLINT */

146 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__)	144 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__)

147	145

	146 #define ST_H(RTYPE, in, pdst) ((RTYPE)(pdst)) = (in) /* NOLINT */

	147 #define ST_UH(...) ST_H(v8u16, __VA_ARGS__)

	148

148 /* Description : Load two vectors with 16 'byte' sized elements	149 /* Description : Load two vectors with 16 'byte' sized elements

149 Arguments : Inputs - psrc, stride	150 Arguments : Inputs - psrc, stride

150 Outputs - out0, out1	151 Outputs - out0, out1

151 Return Type - as per RTYPE	152 Return Type - as per RTYPE

152 Details : Load 16 byte elements in 'out0' from (psrc)	153 Details : Load 16 byte elements in 'out0' from (psrc)

153 Load 16 byte elements in 'out1' from (psrc + stride)	154 Load 16 byte elements in 'out1' from (psrc + stride)

154 */	155 */

155 #define LD_B2(RTYPE, psrc, stride, out0, out1) \	156 #define LD_B2(RTYPE, psrc, stride, out0, out1) \

156 { \	157 { \

157 out0 = LD_B(RTYPE, (psrc)); \	158 out0 = LD_B(RTYPE, (psrc)); \

(...skipping 21 matching lines...) Expand all Loading...
179 }	180 }

180 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)	181 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)

181	182

182 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \	183 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \

183 { \	184 { \

184 ST_B2(RTYPE, in0, in1, (pdst), stride); \	185 ST_B2(RTYPE, in0, in1, (pdst), stride); \

185 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \	186 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \

186 }	187 }

187 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)	188 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)

188	189

	190 /* Description : Store vectors of 8 halfword elements with stride

	191 Arguments : Inputs - in0, in1, pdst, stride

	192 Details : Store 8 halfword elements from 'in0' to (pdst)

	193 Store 8 halfword elements from 'in1' to (pdst + stride)

	194 */

	195 #define ST_H2(RTYPE, in0, in1, pdst, stride) \
	fbarchard1 2016/12/16 18:54:06 is this is correct style for macros? try clang-f is this is correct style for macros? try clang-format -style=file -i include/libyuv/macros_msa.h manojkumar.bhosale 2016/12/20 09:25:56 Already did the clang-format as above. Tried again Show quoted text On 2016/12/16 18:54:06, fbarchard1 wrote: > is this is correct style for macros? try > > clang-format -style=file -i include/libyuv/macros_msa.h Already did the clang-format as above. Tried again but i am not seeing any file updates on clang-format
	196 { \

	197 ST_H(RTYPE, in0, (pdst)); \

	198 ST_H(RTYPE, in1, (pdst) + stride); \

	199 }

	200 #define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__)

	201

189 // TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly.	202 // TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly.

190 /* Description : Shuffle byte vector elements as per mask vector	203 /* Description : Shuffle byte vector elements as per mask vector

191 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1	204 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1

192 Outputs - out0, out1	205 Outputs - out0, out1

193 Return Type - as per RTYPE	206 Return Type - as per RTYPE

194 Details : Byte elements from 'in0' & 'in1' are copied selectively to	207 Details : Byte elements from 'in0' & 'in1' are copied selectively to

195 'out0' as per control vector 'mask0'	208 'out0' as per control vector 'mask0'

196 */	209 */

197 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \	210 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \

198 { \	211 { \

(...skipping 12 matching lines...) Expand all Loading...
211 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \	224 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \

212 { \	225 { \

213 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \	226 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \

214 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \	227 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \

215 }	228 }

216 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)	229 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)

217	230

218 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */	231 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */

219	232

220 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_	233 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_

OLD	NEW

« no previous file with comments | « no previous file | include/libyuv/scale_row.h » ('j') | no next file with comments »