Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(723)

Unified Diff: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

Issue 2289693002: Add MIPS SIMD Arch (MSA) optimized WebGL image conversion function (Closed)
Patch Set: Fixed merge conflicts Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h
diff --git a/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h b/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h
new file mode 100644
index 0000000000000000000000000000000000000000..2363c658e3f74b958924e028c4a07863540575fe
--- /dev/null
+++ b/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h
@@ -0,0 +1,214 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CommonMacrosMSA_h
+#define CommonMacrosMSA_h
+
+#include <msa.h>
+#include <stdint.h>
+
+#if defined(__clang__)
+#define CLANG_BUILD
+#endif
+
+#ifdef CLANG_BUILD
+#define SRLI_H(a, b) __msa_srli_h((v8i16)a, b)
+#define SLLI_H(a, b) __msa_slli_h((v8i16)a, b)
+#define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b)
+#else
+#define SRLI_H(a, b) ((v8u16)a >> b)
+#define SLLI_H(a, b) ((v8i16)a << b)
+#define CEQI_H(a, b) (a == b)
+#endif
+
+#define LD_V(RTYPE, psrc) *((RTYPE*)(psrc))
+#define LD_UB(...) LD_V(v16u8, __VA_ARGS__)
+#define LD_UH(...) LD_V(v8u16, __VA_ARGS__)
+#define LD_SP(...) LD_V(v4f32, __VA_ARGS__)
+#define LD_DP(...) LD_V(v2f64, __VA_ARGS__)
+
+#define ST_V(RTYPE, in, pdst) *((RTYPE*)(pdst)) = in
+#define ST_UB(...) ST_V(v16u8, __VA_ARGS__)
+#define ST_UH(...) ST_V(v8u16, __VA_ARGS__)
+#define ST_SP(...) ST_V(v4f32, __VA_ARGS__)
+#define ST_DP(...) ST_V(v2f64, __VA_ARGS__)
+
+#ifdef CLANG_BUILD
+#define COPY_DOUBLE_TO_VECTOR(a) ({ \
+ v2f64 out; \
+ out = (v2f64) __msa_fill_d(*(int64_t *)(&a)); \
+ out; \
+})
+#else
+#define COPY_DOUBLE_TO_VECTOR(a) ({ \
+ v2f64 out; \
+ out = __msa_cast_to_vector_double(a); \
+ out = (v2f64) __msa_splati_d((v2i64) out, 0); \
+ out; \
+})
+#endif
+
+#define MSA_STORE_FUNC(TYPE, INSTR, FUNCNAME) \
+ static inline void FUNCNAME(TYPE val, void* const pdst) \
+ { \
+ uint8_t* const pdstm = (uint8_t*)pdst; \
+ TYPE valm = val; \
+ asm volatile( \
+ " " #INSTR " %[valm], %[pdstm] \n\t" \
+ : [pdstm] "=m" (*pdstm) \
+ : [valm] "r" (valm)); \
+ }
+
+#define MSA_STORE(val, pdst, FUNCNAME) FUNCNAME(val, pdst)
+
+#ifdef CLANG_BUILD
+MSA_STORE_FUNC(uint32_t, sw, msa_sw);
+#define SW(val, pdst) MSA_STORE(val, pdst, msa_sw)
+#if (__mips == 64)
+MSA_STORE_FUNC(uint64_t, sd, msa_sd);
+#define SD(val, pdst) MSA_STORE(val, pdst, msa_sd)
+#else
+#define SD(val, pdst) \
+{ \
+ uint8_t* const pdstsd = (uint8_t*)(pdst); \
+ const uint32_t val0m = (uint32_t)(val & 0x00000000FFFFFFFF); \
+ const uint32_t val1m = (uint32_t)((val >> 32) & 0x00000000FFFFFFFF); \
+ SW(val0m, pdstsd); \
+ SW(val1m, pdstsd + 4); \
+}
+#endif
+#else
+#if (__mips_isa_rev >= 6)
+MSA_STORE_FUNC(uint32_t, sw, msa_sw);
+#define SW(val, pdst) MSA_STORE(val, pdst, msa_sw)
+MSA_STORE_FUNC(uint64_t, sd, msa_sd);
+#define SD(val, pdst) MSA_STORE(val, pdst, msa_sd)
+#else // !(__mips_isa_rev >= 6)
+MSA_STORE_FUNC(uint32_t, usw, msa_usw);
+#define SW(val, pdst) MSA_STORE(val, pdst, msa_usw)
+#define SD(val, pdst) \
+{ \
+ uint8_t* const pdstsd = (uint8_t*)(pdst); \
+ const uint32_t val0m = (uint32_t)(val & 0x00000000FFFFFFFF); \
+ const uint32_t val1m = (uint32_t)((val >> 32) & 0x00000000FFFFFFFF); \
+ SW(val0m, pdstsd); \
+ SW(val1m, pdstsd + 4); \
+}
+#endif // (__mips_isa_rev >= 6)
+#endif
+
+/* Description : Load vectors with elements with stride
+ * Arguments : Inputs - psrc, stride
+ * Outputs - out0, out1
+ * Return Type - as per RTYPE
+ * Details : Load elements in 'out0' from (psrc)
+ * Load elements in 'out1' from (psrc + stride)
+ */
+#define LD_V2(RTYPE, psrc, stride, out0, out1) \
+{ \
+ out0 = LD_V(RTYPE, psrc); \
+ psrc += stride; \
+ out1 = LD_V(RTYPE, psrc); \
+ psrc += stride; \
+}
+#define LD_UB2(...) LD_V2(v16u8, __VA_ARGS__)
+#define LD_UH2(...) LD_V2(v8u16, __VA_ARGS__)
+#define LD_SP2(...) LD_V2(v4f32, __VA_ARGS__)
+
+#define LD_V3(RTYPE, psrc, stride, out0, out1, out2) \
+{ \
+ LD_V2(RTYPE, psrc, stride, out0, out1); \
+ out2 = LD_V(RTYPE, psrc); \
+ psrc += stride; \
+}
+#define LD_UB3(...) LD_V3(v16u8, __VA_ARGS__)
+#define LD_UH3(...) LD_V3(v8u16, __VA_ARGS__)
+
+#define LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \
+{ \
+ LD_V2(RTYPE, psrc, stride, out0, out1); \
+ LD_V2(RTYPE, psrc, stride, out2, out3); \
+}
+#define LD_UB4(...) LD_V4(v16u8, __VA_ARGS__)
+#define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__)
+#define LD_SP4(...) LD_V4(v4f32, __VA_ARGS__)
+
+/* Description : Store vectors of elements with stride
+ * Arguments : Inputs - in0, in1, pdst, stride
+ * Details : Store elements from 'in0' to (pdst)
+ * Store elements from 'in1' to (pdst + stride)
+ */
+#define ST_V2(RTYPE, in0, in1, pdst, stride) \
+{ \
+ ST_V(RTYPE, in0, pdst); \
+ pdst += stride; \
+ ST_V(RTYPE, in1, pdst); \
+ pdst += stride; \
+}
+#define ST_UB2(...) ST_V2(v16u8, __VA_ARGS__)
+#define ST_UH2(...) ST_V2(v8u16, __VA_ARGS__)
+#define ST_SP2(...) ST_V2(v4f32, __VA_ARGS__)
+
+#define ST_V3(RTYPE, in0, in1, in2, pdst, stride) \
+{ \
+ ST_V2(RTYPE, in0, in1, pdst, stride); \
+ ST_V(RTYPE, in2, pdst); \
+ pdst += stride; \
+}
+#define ST_UB3(...) ST_V3(v16u8, __VA_ARGS__)
+#define ST_UH3(...) ST_V3(v8u16, __VA_ARGS__)
+
+#define ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride) \
+{ \
+ ST_V2(RTYPE, in0, in1, pdst, stride); \
+ ST_V2(RTYPE, in2, in3, pdst, stride); \
+}
+#define ST_UB4(...) ST_V4(v16u8, __VA_ARGS__)
+#define ST_UH4(...) ST_V4(v8u16, __VA_ARGS__)
+#define ST_SP4(...) ST_V4(v4f32, __VA_ARGS__)
+#define ST_V6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \
+{ \
+ ST_V3(RTYPE, in0, in1, in2, pdst, stride); \
+ ST_V3(RTYPE, in3, in4, in5, pdst, stride); \
+}
+#define ST_UB6(...) ST_V6(v16u8, __VA_ARGS__)
+#define ST_SP6(...) ST_V6(v4f32, __VA_ARGS__)
+
+#define ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
+{ \
+ ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride); \
+ ST_V4(RTYPE, in4, in5, in6, in7, pdst, stride); \
+}
+#define ST_UB8(...) ST_V8(v16u8, __VA_ARGS__)
+#define ST_SP8(...) ST_V8(v4f32, __VA_ARGS__)
+
+/* Description : Interleave even halfword elements from vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Even halfword elements of 'in0' and 'in1' are interleaved
+ and written to 'out0'
+*/
+#define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \
+ out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \
+}
+#define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__)
+
+/* Description : Interleave both left and right half of input vectors
+ Arguments : Inputs - in0, in1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Right half of byte elements from 'in0' and 'in1' are
+ interleaved and written to 'out0'
+*/
+#define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
+{ \
+ out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
+ out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
+}
+#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
+
+#endif // CommonMacrosMSA_h

Powered by Google App Engine
This is Rietveld 408576698