Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CommonMacrosMSA_h | 5 #ifndef CommonMacrosMSA_h |
| 6 #define CommonMacrosMSA_h | 6 #define CommonMacrosMSA_h |
| 7 | 7 |
| 8 #include <msa.h> | 8 #include <msa.h> |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| 11 #if defined(__clang__) | 11 #if defined(__clang__) |
| 12 #define CLANG_BUILD | 12 #define CLANG_BUILD |
| 13 #endif | 13 #endif |
| 14 | 14 |
| 15 typedef union | |
| 16 { | |
| 17 int32_t intVal; | |
| 18 float floatVal; | |
| 19 }FloatInt; | |
|
Raymond Toy
2016/10/06 15:42:48
Nit: "}FloatInt" -> "} FloatInt"
Prashant.Patil
2016/10/07 08:08:06
Done.
| |
| 20 | |
| 15 #ifdef CLANG_BUILD | 21 #ifdef CLANG_BUILD |
| 16 #define SRLI_B(a, b) __msa_srli_b((v16i8)a, b) | 22 #define SRLI_B(a, b) __msa_srli_b((v16i8)a, b) |
| 17 #define SRLI_H(a, b) __msa_srli_h((v8i16)a, b) | 23 #define SRLI_H(a, b) __msa_srli_h((v8i16)a, b) |
| 18 #define SLLI_B(a, b) __msa_slli_b((v16i8)a, b) | 24 #define SLLI_B(a, b) __msa_slli_b((v16i8)a, b) |
| 19 #define SLLI_H(a, b) __msa_slli_h((v8i16)a, b) | 25 #define SLLI_H(a, b) __msa_slli_h((v8i16)a, b) |
| 20 #define CEQI_B(a, b) __msa_ceqi_b((v16i8)a, b) | 26 #define CEQI_B(a, b) __msa_ceqi_b((v16i8)a, b) |
| 21 #define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b) | 27 #define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b) |
| 22 #define ANDI_B(a, b) __msa_andi_b((v16u8)a, b) | 28 #define ANDI_B(a, b) __msa_andi_b((v16u8)a, b) |
| 23 #else | 29 #else |
| 24 #define SRLI_B(a, b) ((v16u8)a >> b) | 30 #define SRLI_B(a, b) ((v16u8)a >> b) |
| (...skipping 632 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 657 { \ | 663 { \ |
| 658 out0 = in0 / in1; \ | 664 out0 = in0 / in1; \ |
| 659 out1 = in2 / in3; \ | 665 out1 = in2 / in3; \ |
| 660 } | 666 } |
| 661 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ | 667 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ |
| 662 { \ | 668 { \ |
| 663 DIV2(in0, in1, in2, in3, out0, out1); \ | 669 DIV2(in0, in1, in2, in3, out0, out1); \ |
| 664 DIV2(in4, in5, in6, in7, out2, out3); \ | 670 DIV2(in4, in5, in6, in7, out2, out3); \ |
| 665 } | 671 } |
| 666 | 672 |
| 673 /* Description : Logical AND of 4 pairs of vectors with mask | |
| 674 Arguments : Inputs - in0, in1, in2, in3, mask | |
| 675 Outputs - in0, in1, in2, in3 | |
| 676 Details : Each element in 'in0' is logically AND'ed with mask | |
| 677 Each element in 'in1' is logically AND'ed with mask | |
| 678 Each element in 'in2' is logically AND'ed with mask | |
| 679 Each element in 'in3' is logically AND'ed with mask | |
| 680 */ | |
| 681 #define AND_W4(RTYPE, in0, in1, in2, in3, mask) \ | |
| 682 { \ | |
| 683 in0 = (RTYPE)((v16i8)in0 & (v16i8)mask); \ | |
| 684 in1 = (RTYPE)((v16i8)in1 & (v16i8)mask); \ | |
| 685 in2 = (RTYPE)((v16i8)in2 & (v16i8)mask); \ | |
| 686 in3 = (RTYPE)((v16i8)in3 & (v16i8)mask); \ | |
| 687 } | |
| 688 #define AND_W4_SP(...) AND_W4(v4f32, __VA_ARGS__) | |
| 689 | |
| 690 /* Description : Addition of 2 pairs of vectors | |
| 691 Arguments : Inputs - in0, in1, in2, in3 | |
| 692 Outputs - out0, out1 | |
| 693 Details : Each element in 'in0' is added to 'in1' and result is written | |
| 694 to 'out0' | |
| 695 Each element in 'in2' is added to 'in3' and result is written | |
| 696 to 'out1' | |
| 697 */ | |
| 698 #define ADD2(in0, in1, in2, in3, out0, out1) \ | |
| 699 { \ | |
| 700 out0 = in0 + in1; \ | |
| 701 out1 = in2 + in3; \ | |
| 702 } | |
| 703 | |
| 704 /* Description : Addition of 4 pairs of vectors | |
| 705 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 | |
| 706 Outputs - out0, out1 | |
| 707 Details : Each element in 'in0' is added to 'in1' and result is written | |
| 708 to 'out0' | |
| 709 Each element in 'in2' is added to 'in3' and result is written | |
| 710 to 'out1' | |
| 711 Each element in 'in4' is added to 'in5' and result is written | |
| 712 to 'out2' | |
| 713 Each element in 'in6' is added to 'in7' and result is written | |
| 714 to 'out3' | |
| 715 */ | |
| 716 #define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ | |
| 717 { \ | |
| 718 ADD2(in0, in1, in2, in3, out0, out1); \ | |
| 719 ADD2(in4, in5, in6, in7, out2, out3); \ | |
| 720 } | |
| 721 | |
| 667 /* Description : Vector Floating-Point Convert from Unsigned Integer | 722 /* Description : Vector Floating-Point Convert from Unsigned Integer |
| 668 Arguments : Inputs - in0, in1 | 723 Arguments : Inputs - in0, in1 |
| 669 Outputs - out0, out1 | 724 Outputs - out0, out1 |
| 670 Details : | |
| 671 */ | 725 */ |
| 672 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \ | 726 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \ |
| 673 { \ | 727 { \ |
| 674 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \ | 728 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \ |
| 675 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \ | 729 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \ |
| 676 } | 730 } |
| 677 #define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__) | 731 #define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__) |
| 678 | 732 |
| 733 /* Description : Vector Floating-Point Convert from Unsigned Integer | |
| 734 Arguments : Inputs - in0, in1, in2, in3 | |
| 735 Outputs - out0, out1, out2, out3 | |
| 736 */ | |
| 679 #define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ | 737 #define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ |
| 680 { \ | 738 { \ |
| 681 FFINTU_W2(RTYPE, in0, in1, out0, out1); \ | 739 FFINTU_W2(RTYPE, in0, in1, out0, out1); \ |
| 682 FFINTU_W2(RTYPE, in2, in3, out2, out3); \ | 740 FFINTU_W2(RTYPE, in2, in3, out2, out3); \ |
| 683 } | 741 } |
| 684 #define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__) | 742 #define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__) |
| 685 | 743 |
| 686 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer | 744 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer |
| 687 Arguments : Inputs - in0, in1 | 745 Arguments : Inputs - in0, in1 |
| 688 Outputs - out0, out1 | 746 Outputs - out0, out1 |
| 689 Details : | |
| 690 */ | 747 */ |
| 691 #define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \ | 748 #define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \ |
| 692 { \ | 749 { \ |
| 693 out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \ | 750 out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \ |
| 694 out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \ | 751 out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \ |
| 695 } | 752 } |
| 696 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__) | 753 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__) |
| 697 | 754 |
| 755 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer | |
| 756 Arguments : Inputs - in0, in1, in2, in3 | |
| 757 Outputs - out0, out1, out2, out3 | |
| 758 */ | |
| 698 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ | 759 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ |
| 699 { \ | 760 { \ |
| 700 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \ | 761 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \ |
| 701 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \ | 762 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \ |
| 702 } | 763 } |
| 703 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__) | 764 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__) |
| 704 | 765 |
| 766 /* Description : Vector Floating-Point multiply with scale and accumulate | |
| 767 Arguments : Inputs - in0, in1, in2, in3, out0, out1, out2, out3, scale | |
| 768 Outputs - out0, out1, out2, out3 | |
| 769 */ | |
| 770 #define VSMA4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \ | |
| 771 { \ | |
| 772 out0 += in0 * scale; \ | |
| 773 out1 += in1 * scale; \ | |
| 774 out2 += in2 * scale; \ | |
| 775 out3 += in3 * scale; \ | |
| 776 } | |
| 777 | |
| 778 /* Description : Vector Floating-Point multiply with scale | |
| 779 Arguments : Inputs - in0, in1, in2, in3, scale | |
| 780 Outputs - out0, out1, out2, out3 | |
| 781 */ | |
| 782 #define VSMUL4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \ | |
| 783 { \ | |
| 784 out0 = in0 * scale; \ | |
| 785 out1 = in1 * scale; \ | |
| 786 out2 = in2 * scale; \ | |
| 787 out3 = in3 * scale; \ | |
| 788 } | |
| 789 | |
| 790 /* Description : Vector Floating-Point max value | |
| 791 Arguments : Inputs - in0, in1, in2, in3, max | |
| 792 Output - max | |
| 793 */ | |
| 794 #define VMAX_W4(RTYPE, in0, in1, in2, in3, max) \ | |
| 795 { \ | |
| 796 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in0); \ | |
| 797 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in1); \ | |
| 798 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in2); \ | |
| 799 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in3); \ | |
| 800 } | |
| 801 #define VMAX_W4_SP(...) VMAX_W4(v4f32, __VA_ARGS__) | |
| 802 | |
| 803 /* Description : Vector Floating-Point clip to min max | |
| 804 Arguments : Inputs - in0, in1, in2, in3, min, max | |
| 805 Outputs - out0, out1, out2, out3 | |
| 806 */ | |
| 807 #define VCLIP4(in0, in1, in2, in3, min, max, out0, out1, out2, out3) \ | |
| 808 { \ | |
| 809 out0 = __msa_fmax_w(__msa_fmin_w(in0, max), min); \ | |
| 810 out1 = __msa_fmax_w(__msa_fmin_w(in1, max), min); \ | |
| 811 out2 = __msa_fmax_w(__msa_fmin_w(in2, max), min); \ | |
| 812 out3 = __msa_fmax_w(__msa_fmin_w(in3, max), min); \ | |
| 813 } | |
| 814 | |
| 705 #endif // CommonMacrosMSA_h | 815 #endif // CommonMacrosMSA_h |
| OLD | NEW |