third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h - Issue 2340583003: Add MSA (MIPS SIMD Arch) optimized VectorMath functions

Side by Side Diff: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

Issue 2340583003: Add MSA (MIPS SIMD Arch) optimized VectorMath functions (Closed)

Patch Set: formatting changes Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CommonMacrosMSA_h	5 #ifndef CommonMacrosMSA_h

6 #define CommonMacrosMSA_h	6 #define CommonMacrosMSA_h

7	7

8 #include <msa.h>	8 #include <msa.h>

9 #include <stdint.h>	9 #include <stdint.h>

10	10

11 #if defined(__clang__)	11 #if defined(__clang__)

12 #define CLANG_BUILD	12 #define CLANG_BUILD

13 #endif	13 #endif

14	14

	15 typedef union {

	16 int32_t intVal;

	17 float floatVal;

	18 } FloatInt;

	19

15 #ifdef CLANG_BUILD	20 #ifdef CLANG_BUILD

16 #define SRLI_B(a, b) __msa_srli_b((v16i8)a, b)	21 #define SRLI_B(a, b) __msa_srli_b((v16i8)a, b)

17 #define SRLI_H(a, b) __msa_srli_h((v8i16)a, b)	22 #define SRLI_H(a, b) __msa_srli_h((v8i16)a, b)

18 #define SLLI_B(a, b) __msa_slli_b((v16i8)a, b)	23 #define SLLI_B(a, b) __msa_slli_b((v16i8)a, b)

19 #define SLLI_H(a, b) __msa_slli_h((v8i16)a, b)	24 #define SLLI_H(a, b) __msa_slli_h((v8i16)a, b)

20 #define CEQI_B(a, b) __msa_ceqi_b((v16i8)a, b)	25 #define CEQI_B(a, b) __msa_ceqi_b((v16i8)a, b)

21 #define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b)	26 #define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b)

22 #define ANDI_B(a, b) __msa_andi_b((v16u8)a, b)	27 #define ANDI_B(a, b) __msa_andi_b((v16u8)a, b)

23 #else	28 #else

24 #define SRLI_B(a, b) ((v16u8)a >> b)	29 #define SRLI_B(a, b) ((v16u8)a >> b)

(...skipping 632 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
657 { \	662 { \

658 out0 = in0 / in1; \	663 out0 = in0 / in1; \

659 out1 = in2 / in3; \	664 out1 = in2 / in3; \

660 }	665 }

661 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \	666 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \

662 { \	667 { \

663 DIV2(in0, in1, in2, in3, out0, out1); \	668 DIV2(in0, in1, in2, in3, out0, out1); \

664 DIV2(in4, in5, in6, in7, out2, out3); \	669 DIV2(in4, in5, in6, in7, out2, out3); \

665 }	670 }

666	671

	672 /* Description : Logical AND of 4 pairs of vectors with mask

	673 Arguments : Inputs - in0, in1, in2, in3, mask

	674 Outputs - in0, in1, in2, in3

	675 Details : Each element in 'in0' is logically AND'ed with mask

	676 Each element in 'in1' is logically AND'ed with mask

	677 Each element in 'in2' is logically AND'ed with mask

	678 Each element in 'in3' is logically AND'ed with mask

	679 */

	680 #define AND_W4(RTYPE, in0, in1, in2, in3, mask) \

	681 { \

	682 in0 = (RTYPE)((v16i8)in0 & (v16i8)mask); \

	683 in1 = (RTYPE)((v16i8)in1 & (v16i8)mask); \

	684 in2 = (RTYPE)((v16i8)in2 & (v16i8)mask); \

	685 in3 = (RTYPE)((v16i8)in3 & (v16i8)mask); \

	686 }

	687 #define AND_W4_SP(...) AND_W4(v4f32, __VA_ARGS__)

	688

	689 /* Description : Addition of 2 pairs of vectors

	690 Arguments : Inputs - in0, in1, in2, in3

	691 Outputs - out0, out1

	692 Details : Each element in 'in0' is added to 'in1' and result is written

	693 to 'out0'

	694 Each element in 'in2' is added to 'in3' and result is written

	695 to 'out1'

	696 */

	697 #define ADD2(in0, in1, in2, in3, out0, out1) \

	698 { \

	699 out0 = in0 + in1; \

	700 out1 = in2 + in3; \

	701 }

	702

	703 /* Description : Addition of 4 pairs of vectors

	704 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7

	705 Outputs - out0, out1

	706 Details : Each element in 'in0' is added to 'in1' and result is written

	707 to 'out0'

	708 Each element in 'in2' is added to 'in3' and result is written

	709 to 'out1'

	710 Each element in 'in4' is added to 'in5' and result is written

	711 to 'out2'

	712 Each element in 'in6' is added to 'in7' and result is written

	713 to 'out3'

	714 */

	715 #define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \

	716 { \

	717 ADD2(in0, in1, in2, in3, out0, out1); \

	718 ADD2(in4, in5, in6, in7, out2, out3); \

	719 }

	720

667 /* Description : Vector Floating-Point Convert from Unsigned Integer	721 /* Description : Vector Floating-Point Convert from Unsigned Integer

668 Arguments : Inputs - in0, in1	722 Arguments : Inputs - in0, in1

669 Outputs - out0, out1	723 Outputs - out0, out1

670 Details :

671 */	724 */

672 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \	725 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \

673 { \	726 { \

674 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \	727 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \

675 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \	728 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \

676 }	729 }

677 #define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__)	730 #define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__)

678	731

	732 /* Description : Vector Floating-Point Convert from Unsigned Integer

	733 Arguments : Inputs - in0, in1, in2, in3

	734 Outputs - out0, out1, out2, out3

	735 */

679 #define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \	736 #define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \

680 { \	737 { \

681 FFINTU_W2(RTYPE, in0, in1, out0, out1); \	738 FFINTU_W2(RTYPE, in0, in1, out0, out1); \

682 FFINTU_W2(RTYPE, in2, in3, out2, out3); \	739 FFINTU_W2(RTYPE, in2, in3, out2, out3); \

683 }	740 }

684 #define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__)	741 #define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__)

685	742

686 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer	743 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer

687 Arguments : Inputs - in0, in1	744 Arguments : Inputs - in0, in1

688 Outputs - out0, out1	745 Outputs - out0, out1

689 Details :

690 */	746 */

691 #define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \	747 #define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \

692 { \	748 { \

693 out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \	749 out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \

694 out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \	750 out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \

695 }	751 }

696 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__)	752 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__)

697	753

	754 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer

	755 Arguments : Inputs - in0, in1, in2, in3

	756 Outputs - out0, out1, out2, out3

	757 */

698 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \	758 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \

699 { \	759 { \

700 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \	760 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \

701 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \	761 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \

702 }	762 }

703 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__)	763 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__)

704	764

	765 /* Description : Vector Floating-Point multiply with scale and accumulate

	766 Arguments : Inputs - in0, in1, in2, in3, out0, out1, out2, out3, scale

	767 Outputs - out0, out1, out2, out3

	768 */

	769 #define VSMA4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \

	770 { \

	771 out0 += in0 * scale; \

	772 out1 += in1 * scale; \

	773 out2 += in2 * scale; \

	774 out3 += in3 * scale; \

	775 }

	776

	777 /* Description : Vector Floating-Point multiply with scale

	778 Arguments : Inputs - in0, in1, in2, in3, scale

	779 Outputs - out0, out1, out2, out3

	780 */

	781 #define VSMUL4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \

	782 { \

	783 out0 = in0 * scale; \

	784 out1 = in1 * scale; \

	785 out2 = in2 * scale; \

	786 out3 = in3 * scale; \

	787 }

	788

	789 /* Description : Vector Floating-Point max value

	790 Arguments : Inputs - in0, in1, in2, in3, max

	791 Output - max

	792 */

	793 #define VMAX_W4(RTYPE, in0, in1, in2, in3, max) \

	794 { \

	795 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in0); \

	796 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in1); \

	797 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in2); \

	798 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in3); \

	799 }

	800 #define VMAX_W4_SP(...) VMAX_W4(v4f32, __VA_ARGS__)

	801

	802 /* Description : Vector Floating-Point clip to min max

	803 Arguments : Inputs - in0, in1, in2, in3, min, max

	804 Outputs - out0, out1, out2, out3

	805 */

	806 #define VCLIP4(in0, in1, in2, in3, min, max, out0, out1, out2, out3) \

	807 { \

	808 out0 = __msa_fmax_w(__msa_fmin_w(in0, max), min); \

	809 out1 = __msa_fmax_w(__msa_fmin_w(in1, max), min); \

	810 out2 = __msa_fmax_w(__msa_fmin_w(in2, max), min); \

	811 out3 = __msa_fmax_w(__msa_fmin_w(in3, max), min); \

	812 }

	813

705 #endif // CommonMacrosMSA_h	814 #endif // CommonMacrosMSA_h

OLD	NEW

« no previous file with comments | « third_party/WebKit/Source/platform/audio/VectorMath.cpp ('k') | no next file » | no next file with comments »