third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h - Issue 2289693002: Add MIPS SIMD Arch (MSA) optimized WebGL image conversion function

Side by Side Diff: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

Issue 2289693002: Add MIPS SIMD Arch (MSA) optimized WebGL image conversion function (Closed)

Patch Set: Fixed merge conflicts Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/WebKit/Source/platform/blink_platform.gyp ('k') | third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2016 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #ifndef CommonMacrosMSA_h

	6 #define CommonMacrosMSA_h

	7

	8 #include <msa.h>

	9 #include <stdint.h>

	10

	11 #if defined(__clang__)

	12 #define CLANG_BUILD

	13 #endif

	14

	15 #ifdef CLANG_BUILD

	16 #define SRLI_H(a, b) __msa_srli_h((v8i16)a, b)

	17 #define SLLI_H(a, b) __msa_slli_h((v8i16)a, b)

	18 #define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b)

	19 #else

	20 #define SRLI_H(a, b) ((v8u16)a >> b)

	21 #define SLLI_H(a, b) ((v8i16)a << b)

	22 #define CEQI_H(a, b) (a == b)

	23 #endif

	24

	25 #define LD_V(RTYPE, psrc) ((RTYPE)(psrc))

	26 #define LD_UB(...) LD_V(v16u8, __VA_ARGS__)

	27 #define LD_UH(...) LD_V(v8u16, __VA_ARGS__)

	28 #define LD_SP(...) LD_V(v4f32, __VA_ARGS__)

	29 #define LD_DP(...) LD_V(v2f64, __VA_ARGS__)

	30

	31 #define ST_V(RTYPE, in, pdst) ((RTYPE)(pdst)) = in

	32 #define ST_UB(...) ST_V(v16u8, __VA_ARGS__)

	33 #define ST_UH(...) ST_V(v8u16, __VA_ARGS__)

	34 #define ST_SP(...) ST_V(v4f32, __VA_ARGS__)

	35 #define ST_DP(...) ST_V(v2f64, __VA_ARGS__)

	36

	37 #ifdef CLANG_BUILD

	38 #define COPY_DOUBLE_TO_VECTOR(a) ({ \

	39 v2f64 out; \

	40 out = (v2f64) __msa_fill_d((int64_t )(&a)); \

	41 out; \

	42 })

	43 #else

	44 #define COPY_DOUBLE_TO_VECTOR(a) ({ \

	45 v2f64 out; \

	46 out = __msa_cast_to_vector_double(a); \

	47 out = (v2f64) __msa_splati_d((v2i64) out, 0); \

	48 out; \

	49 })

	50 #endif

	51

	52 #define MSA_STORE_FUNC(TYPE, INSTR, FUNCNAME) \

	53 static inline void FUNCNAME(TYPE val, void* const pdst) \

	54 { \

	55 uint8_t* const pdstm = (uint8_t*)pdst; \

	56 TYPE valm = val; \

	57 asm volatile( \

	58 " " #INSTR " %[valm], %[pdstm] \n\t" \

	59 : [pdstm] "=m" (*pdstm) \

	60 : [valm] "r" (valm)); \

	61 }

	62

	63 #define MSA_STORE(val, pdst, FUNCNAME) FUNCNAME(val, pdst)

	64

	65 #ifdef CLANG_BUILD

	66 MSA_STORE_FUNC(uint32_t, sw, msa_sw);

	67 #define SW(val, pdst) MSA_STORE(val, pdst, msa_sw)

	68 #if (__mips == 64)

	69 MSA_STORE_FUNC(uint64_t, sd, msa_sd);

	70 #define SD(val, pdst) MSA_STORE(val, pdst, msa_sd)

	71 #else

	72 #define SD(val, pdst) \

	73 { \

	74 uint8_t* const pdstsd = (uint8_t*)(pdst); \

	75 const uint32_t val0m = (uint32_t)(val & 0x00000000FFFFFFFF); \

	76 const uint32_t val1m = (uint32_t)((val >> 32) & 0x00000000FFFFFFFF); \

	77 SW(val0m, pdstsd); \

	78 SW(val1m, pdstsd + 4); \

	79 }

	80 #endif

	81 #else

	82 #if (__mips_isa_rev >= 6)

	83 MSA_STORE_FUNC(uint32_t, sw, msa_sw);

	84 #define SW(val, pdst) MSA_STORE(val, pdst, msa_sw)

	85 MSA_STORE_FUNC(uint64_t, sd, msa_sd);

	86 #define SD(val, pdst) MSA_STORE(val, pdst, msa_sd)

	87 #else // !(__mips_isa_rev >= 6)

	88 MSA_STORE_FUNC(uint32_t, usw, msa_usw);

	89 #define SW(val, pdst) MSA_STORE(val, pdst, msa_usw)

	90 #define SD(val, pdst) \

	91 { \

	92 uint8_t* const pdstsd = (uint8_t*)(pdst); \

	93 const uint32_t val0m = (uint32_t)(val & 0x00000000FFFFFFFF); \

	94 const uint32_t val1m = (uint32_t)((val >> 32) & 0x00000000FFFFFFFF); \

	95 SW(val0m, pdstsd); \

	96 SW(val1m, pdstsd + 4); \

	97 }

	98 #endif // (__mips_isa_rev >= 6)

	99 #endif

	100

	101 /* Description : Load vectors with elements with stride

	102 * Arguments : Inputs - psrc, stride

	103 * Outputs - out0, out1

	104 * Return Type - as per RTYPE

	105 * Details : Load elements in 'out0' from (psrc)

	106 * Load elements in 'out1' from (psrc + stride)

	107 */

	108 #define LD_V2(RTYPE, psrc, stride, out0, out1) \

	109 { \

	110 out0 = LD_V(RTYPE, psrc); \

	111 psrc += stride; \

	112 out1 = LD_V(RTYPE, psrc); \

	113 psrc += stride; \

	114 }

	115 #define LD_UB2(...) LD_V2(v16u8, __VA_ARGS__)

	116 #define LD_UH2(...) LD_V2(v8u16, __VA_ARGS__)

	117 #define LD_SP2(...) LD_V2(v4f32, __VA_ARGS__)

	118

	119 #define LD_V3(RTYPE, psrc, stride, out0, out1, out2) \

	120 { \

	121 LD_V2(RTYPE, psrc, stride, out0, out1); \

	122 out2 = LD_V(RTYPE, psrc); \

	123 psrc += stride; \

	124 }

	125 #define LD_UB3(...) LD_V3(v16u8, __VA_ARGS__)

	126 #define LD_UH3(...) LD_V3(v8u16, __VA_ARGS__)

	127

	128 #define LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \

	129 { \

	130 LD_V2(RTYPE, psrc, stride, out0, out1); \

	131 LD_V2(RTYPE, psrc, stride, out2, out3); \

	132 }

	133 #define LD_UB4(...) LD_V4(v16u8, __VA_ARGS__)

	134 #define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__)

	135 #define LD_SP4(...) LD_V4(v4f32, __VA_ARGS__)

	136

	137 /* Description : Store vectors of elements with stride

	138 * Arguments : Inputs - in0, in1, pdst, stride

	139 * Details : Store elements from 'in0' to (pdst)

	140 * Store elements from 'in1' to (pdst + stride)

	141 */

	142 #define ST_V2(RTYPE, in0, in1, pdst, stride) \

	143 { \

	144 ST_V(RTYPE, in0, pdst); \

	145 pdst += stride; \

	146 ST_V(RTYPE, in1, pdst); \

	147 pdst += stride; \

	148 }

	149 #define ST_UB2(...) ST_V2(v16u8, __VA_ARGS__)

	150 #define ST_UH2(...) ST_V2(v8u16, __VA_ARGS__)

	151 #define ST_SP2(...) ST_V2(v4f32, __VA_ARGS__)

	152

	153 #define ST_V3(RTYPE, in0, in1, in2, pdst, stride) \

	154 { \

	155 ST_V2(RTYPE, in0, in1, pdst, stride); \

	156 ST_V(RTYPE, in2, pdst); \

	157 pdst += stride; \

	158 }

	159 #define ST_UB3(...) ST_V3(v16u8, __VA_ARGS__)

	160 #define ST_UH3(...) ST_V3(v8u16, __VA_ARGS__)

	161

	162 #define ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride) \

	163 { \

	164 ST_V2(RTYPE, in0, in1, pdst, stride); \

	165 ST_V2(RTYPE, in2, in3, pdst, stride); \

	166 }

	167 #define ST_UB4(...) ST_V4(v16u8, __VA_ARGS__)

	168 #define ST_UH4(...) ST_V4(v8u16, __VA_ARGS__)

	169 #define ST_SP4(...) ST_V4(v4f32, __VA_ARGS__)

	170 #define ST_V6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \

	171 { \

	172 ST_V3(RTYPE, in0, in1, in2, pdst, stride); \

	173 ST_V3(RTYPE, in3, in4, in5, pdst, stride); \

	174 }

	175 #define ST_UB6(...) ST_V6(v16u8, __VA_ARGS__)

	176 #define ST_SP6(...) ST_V6(v4f32, __VA_ARGS__)

	177

	178 #define ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \

	179 { \

	180 ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride); \

	181 ST_V4(RTYPE, in4, in5, in6, in7, pdst, stride); \

	182 }

	183 #define ST_UB8(...) ST_V8(v16u8, __VA_ARGS__)

	184 #define ST_SP8(...) ST_V8(v4f32, __VA_ARGS__)

	185

	186 /* Description : Interleave even halfword elements from vectors

	187 Arguments : Inputs - in0, in1, in2, in3

	188 Outputs - out0, out1

	189 Return Type - as per RTYPE

	190 Details : Even halfword elements of 'in0' and 'in1' are interleaved

	191 and written to 'out0'

	192 */

	193 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \

	194 { \

	195 out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \

	196 out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \

	197 }

	198 #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__)

	199

	200 /* Description : Interleave both left and right half of input vectors

	201 Arguments : Inputs - in0, in1

	202 Outputs - out0, out1

	203 Return Type - as per RTYPE

	204 Details : Right half of byte elements from 'in0' and 'in1' are

	205 interleaved and written to 'out0'

	206 */

	207 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \

	208 { \

	209 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \

	210 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \

	211 }

	212 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)

	213

	214 #endif // CommonMacrosMSA_h

OLD	NEW