third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h - Issue 2340583003: Add MSA (MIPS SIMD Arch) optimized VectorMath functions

Side by Side Diff: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

Issue 2340583003: Add MSA (MIPS SIMD Arch) optimized VectorMath functions (Closed)

Patch Set: coding consistency changes Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CommonMacrosMSA_h	5 #ifndef CommonMacrosMSA_h

6 #define CommonMacrosMSA_h	6 #define CommonMacrosMSA_h

7	7

8 #include <msa.h>	8 #include <msa.h>

9 #include <stdint.h>	9 #include <stdint.h>

10	10

11 #if defined(__clang__)	11 #if defined(__clang__)

12 #define CLANG_BUILD	12 #define CLANG_BUILD

13 #endif	13 #endif

14	14

	15 typedef union

	16 {

	17 int32_t intVal;

	18 float floatVal;

	19 }FloatInt;
	Raymond Toy 2016/10/06 15:42:48 Nit: "}FloatInt" -> "} FloatInt" Nit: "}FloatInt" -> "} FloatInt" Prashant.Patil 2016/10/07 08:08:06 Done. Show quoted text On 2016/10/06 15:42:48, Raymond Toy wrote: > Nit: "}FloatInt" -> "} FloatInt" Done.
	20

15 #ifdef CLANG_BUILD	21 #ifdef CLANG_BUILD

16 #define SRLI_B(a, b) __msa_srli_b((v16i8)a, b)	22 #define SRLI_B(a, b) __msa_srli_b((v16i8)a, b)

17 #define SRLI_H(a, b) __msa_srli_h((v8i16)a, b)	23 #define SRLI_H(a, b) __msa_srli_h((v8i16)a, b)

18 #define SLLI_B(a, b) __msa_slli_b((v16i8)a, b)	24 #define SLLI_B(a, b) __msa_slli_b((v16i8)a, b)

19 #define SLLI_H(a, b) __msa_slli_h((v8i16)a, b)	25 #define SLLI_H(a, b) __msa_slli_h((v8i16)a, b)

20 #define CEQI_B(a, b) __msa_ceqi_b((v16i8)a, b)	26 #define CEQI_B(a, b) __msa_ceqi_b((v16i8)a, b)

21 #define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b)	27 #define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b)

22 #define ANDI_B(a, b) __msa_andi_b((v16u8)a, b)	28 #define ANDI_B(a, b) __msa_andi_b((v16u8)a, b)

23 #else	29 #else

24 #define SRLI_B(a, b) ((v16u8)a >> b)	30 #define SRLI_B(a, b) ((v16u8)a >> b)

(...skipping 632 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
657 { \	663 { \

658 out0 = in0 / in1; \	664 out0 = in0 / in1; \

659 out1 = in2 / in3; \	665 out1 = in2 / in3; \

660 }	666 }

661 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \	667 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \

662 { \	668 { \

663 DIV2(in0, in1, in2, in3, out0, out1); \	669 DIV2(in0, in1, in2, in3, out0, out1); \

664 DIV2(in4, in5, in6, in7, out2, out3); \	670 DIV2(in4, in5, in6, in7, out2, out3); \

665 }	671 }

666	672

	673 /* Description : Logical AND of 4 pairs of vectors with mask

	674 Arguments : Inputs - in0, in1, in2, in3, mask

	675 Outputs - in0, in1, in2, in3

	676 Details : Each element in 'in0' is logically AND'ed with mask

	677 Each element in 'in1' is logically AND'ed with mask

	678 Each element in 'in2' is logically AND'ed with mask

	679 Each element in 'in3' is logically AND'ed with mask

	680 */

	681 #define AND_W4(RTYPE, in0, in1, in2, in3, mask) \

	682 { \

	683 in0 = (RTYPE)((v16i8)in0 & (v16i8)mask); \

	684 in1 = (RTYPE)((v16i8)in1 & (v16i8)mask); \

	685 in2 = (RTYPE)((v16i8)in2 & (v16i8)mask); \

	686 in3 = (RTYPE)((v16i8)in3 & (v16i8)mask); \

	687 }

	688 #define AND_W4_SP(...) AND_W4(v4f32, __VA_ARGS__)

	689

	690 /* Description : Addition of 2 pairs of vectors

	691 Arguments : Inputs - in0, in1, in2, in3

	692 Outputs - out0, out1

	693 Details : Each element in 'in0' is added to 'in1' and result is written

	694 to 'out0'

	695 Each element in 'in2' is added to 'in3' and result is written

	696 to 'out1'

	697 */

	698 #define ADD2(in0, in1, in2, in3, out0, out1) \

	699 { \

	700 out0 = in0 + in1; \

	701 out1 = in2 + in3; \

	702 }

	703

	704 /* Description : Addition of 4 pairs of vectors

	705 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7

	706 Outputs - out0, out1

	707 Details : Each element in 'in0' is added to 'in1' and result is written

	708 to 'out0'

	709 Each element in 'in2' is added to 'in3' and result is written

	710 to 'out1'

	711 Each element in 'in4' is added to 'in5' and result is written

	712 to 'out2'

	713 Each element in 'in6' is added to 'in7' and result is written

	714 to 'out3'

	715 */

	716 #define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \

	717 { \

	718 ADD2(in0, in1, in2, in3, out0, out1); \

	719 ADD2(in4, in5, in6, in7, out2, out3); \

	720 }

	721

667 /* Description : Vector Floating-Point Convert from Unsigned Integer	722 /* Description : Vector Floating-Point Convert from Unsigned Integer

668 Arguments : Inputs - in0, in1	723 Arguments : Inputs - in0, in1

669 Outputs - out0, out1	724 Outputs - out0, out1

670 Details :

671 */	725 */

672 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \	726 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \

673 { \	727 { \

674 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \	728 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \

675 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \	729 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \

676 }	730 }

677 #define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__)	731 #define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__)

678	732

	733 /* Description : Vector Floating-Point Convert from Unsigned Integer

	734 Arguments : Inputs - in0, in1, in2, in3

	735 Outputs - out0, out1, out2, out3

	736 */

679 #define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \	737 #define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \

680 { \	738 { \

681 FFINTU_W2(RTYPE, in0, in1, out0, out1); \	739 FFINTU_W2(RTYPE, in0, in1, out0, out1); \

682 FFINTU_W2(RTYPE, in2, in3, out2, out3); \	740 FFINTU_W2(RTYPE, in2, in3, out2, out3); \

683 }	741 }

684 #define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__)	742 #define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__)

685	743

686 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer	744 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer

687 Arguments : Inputs - in0, in1	745 Arguments : Inputs - in0, in1

688 Outputs - out0, out1	746 Outputs - out0, out1

689 Details :

690 */	747 */

691 #define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \	748 #define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \

692 { \	749 { \

693 out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \	750 out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \

694 out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \	751 out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \

695 }	752 }

696 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__)	753 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__)

697	754

	755 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer

	756 Arguments : Inputs - in0, in1, in2, in3

	757 Outputs - out0, out1, out2, out3

	758 */

698 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \	759 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \

699 { \	760 { \

700 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \	761 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \

701 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \	762 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \

702 }	763 }

703 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__)	764 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__)

704	765

	766 /* Description : Vector Floating-Point multiply with scale and accumulate

	767 Arguments : Inputs - in0, in1, in2, in3, out0, out1, out2, out3, scale

	768 Outputs - out0, out1, out2, out3

	769 */

	770 #define VSMA4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \

	771 { \

	772 out0 += in0 * scale; \

	773 out1 += in1 * scale; \

	774 out2 += in2 * scale; \

	775 out3 += in3 * scale; \

	776 }

	777

	778 /* Description : Vector Floating-Point multiply with scale

	779 Arguments : Inputs - in0, in1, in2, in3, scale

	780 Outputs - out0, out1, out2, out3

	781 */

	782 #define VSMUL4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \

	783 { \

	784 out0 = in0 * scale; \

	785 out1 = in1 * scale; \

	786 out2 = in2 * scale; \

	787 out3 = in3 * scale; \

	788 }

	789

	790 /* Description : Vector Floating-Point max value

	791 Arguments : Inputs - in0, in1, in2, in3, max

	792 Output - max

	793 */

	794 #define VMAX_W4(RTYPE, in0, in1, in2, in3, max) \

	795 { \

	796 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in0); \

	797 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in1); \

	798 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in2); \

	799 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in3); \

	800 }

	801 #define VMAX_W4_SP(...) VMAX_W4(v4f32, __VA_ARGS__)

	802

	803 /* Description : Vector Floating-Point clip to min max

	804 Arguments : Inputs - in0, in1, in2, in3, min, max

	805 Outputs - out0, out1, out2, out3

	806 */

	807 #define VCLIP4(in0, in1, in2, in3, min, max, out0, out1, out2, out3) \

	808 { \

	809 out0 = __msa_fmax_w(__msa_fmin_w(in0, max), min); \

	810 out1 = __msa_fmax_w(__msa_fmin_w(in1, max), min); \

	811 out2 = __msa_fmax_w(__msa_fmin_w(in2, max), min); \

	812 out3 = __msa_fmax_w(__msa_fmin_w(in3, max), min); \

	813 }

	814

705 #endif // CommonMacrosMSA_h	815 #endif // CommonMacrosMSA_h

OLD	NEW

« no previous file with comments | « third_party/WebKit/Source/platform/audio/VectorMath.cpp ('k') | no next file » | no next file with comments »