Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CommonMacrosMSA_h | 5 #ifndef CommonMacrosMSA_h |
| 6 #define CommonMacrosMSA_h | 6 #define CommonMacrosMSA_h |
| 7 | 7 |
| 8 #include <msa.h> | 8 #include <msa.h> |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| (...skipping 646 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 657 { \ | 657 { \ |
| 658 out0 = in0 / in1; \ | 658 out0 = in0 / in1; \ |
| 659 out1 = in2 / in3; \ | 659 out1 = in2 / in3; \ |
| 660 } | 660 } |
| 661 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ | 661 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ |
| 662 { \ | 662 { \ |
| 663 DIV2(in0, in1, in2, in3, out0, out1); \ | 663 DIV2(in0, in1, in2, in3, out0, out1); \ |
| 664 DIV2(in4, in5, in6, in7, out2, out3); \ | 664 DIV2(in4, in5, in6, in7, out2, out3); \ |
| 665 } | 665 } |
| 666 | 666 |
| 667 /* Description : Logical AND of 4 pairs of vectors with mask | |
| 668 Arguments : Inputs - in0, in1, in2, in3, mask | |
| 669 Outputs - in0, in1, in2, in3 | |
| 670 Details : Each element in 'in0' is logically AND'ed with mask | |
| 671 Each element in 'in1' is logically AND'ed with mask | |
| 672 Each element in 'in2' is logically AND'ed with mask | |
| 673 Each element in 'in3' is logically AND'ed with mask | |
| 674 */ | |
| 675 #define AND_W4(RTYPE, in0, in1, in2, in3, mask) \ | |
| 676 { \ | |
| 677 in0 = (RTYPE)((v16i8)in0 & (v16i8)mask); \ | |
| 678 in1 = (RTYPE)((v16i8)in1 & (v16i8)mask); \ | |
| 679 in2 = (RTYPE)((v16i8)in2 & (v16i8)mask); \ | |
| 680 in3 = (RTYPE)((v16i8)in3 & (v16i8)mask); \ | |
| 681 } | |
| 682 #define AND_W4_SP(...) AND_W4(v4f32, __VA_ARGS__) | |
| 683 | |
| 684 /* Description : Addition of 2 pairs of vectors | |
| 685 Arguments : Inputs - in0, in1, in2, in3 | |
| 686 Outputs - out0, out1 | |
| 687 Details : Each element in 'in0' is added to 'in1' and result is written | |
| 688 to 'out0' | |
| 689 Each element in 'in2' is added to 'in3' and result is written | |
| 690 to 'out1' | |
| 691 */ | |
| 692 #define ADD2(in0, in1, in2, in3, out0, out1) \ | |
| 693 { \ | |
| 694 out0 = in0 + in1; \ | |
| 695 out1 = in2 + in3; \ | |
| 696 } | |
| 697 | |
| 698 /* Description : Addition of 4 pairs of vectors | |
| 699 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 | |
| 700 Outputs - out0, out1 | |
| 701 Details : Each element in 'in0' is added to 'in1' and result is written | |
| 702 to 'out0' | |
| 703 Each element in 'in2' is added to 'in3' and result is written | |
| 704 to 'out1' | |
| 705 Each element in 'in4' is added to 'in5' and result is written | |
| 706 to 'out2' | |
| 707 Each element in 'in6' is added to 'in7' and result is written | |
| 708 to 'out3' | |
| 709 */ | |
| 710 #define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ | |
| 711 { \ | |
| 712 ADD2(in0, in1, in2, in3, out0, out1); \ | |
| 713 ADD2(in4, in5, in6, in7, out2, out3); \ | |
| 714 } | |
| 715 | |
| 667 /* Description : Vector Floating-Point Convert from Unsigned Integer | 716 /* Description : Vector Floating-Point Convert from Unsigned Integer |
| 668 Arguments : Inputs - in0, in1 | 717 Arguments : Inputs - in0, in1 |
| 669 Outputs - out0, out1 | 718 Outputs - out0, out1 |
| 670 Details : | |
| 671 */ | 719 */ |
| 672 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \ | 720 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \ |
| 673 { \ | 721 { \ |
| 674 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \ | 722 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \ |
| 675 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \ | 723 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \ |
| 676 } | 724 } |
| 677 #define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__) | 725 #define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__) |
| 678 | 726 |
| 727 /* Description : Vector Floating-Point Convert from Unsigned Integer | |
| 728 Arguments : Inputs - in0, in1, in2, in3 | |
| 729 Outputs - out0, out1, out2, out3 | |
| 730 */ | |
| 679 #define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ | 731 #define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ |
| 680 { \ | 732 { \ |
| 681 FFINTU_W2(RTYPE, in0, in1, out0, out1); \ | 733 FFINTU_W2(RTYPE, in0, in1, out0, out1); \ |
| 682 FFINTU_W2(RTYPE, in2, in3, out2, out3); \ | 734 FFINTU_W2(RTYPE, in2, in3, out2, out3); \ |
| 683 } | 735 } |
| 684 #define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__) | 736 #define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__) |
| 685 | 737 |
| 686 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer | 738 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer |
| 687 Arguments : Inputs - in0, in1 | 739 Arguments : Inputs - in0, in1 |
| 688 Outputs - out0, out1 | 740 Outputs - out0, out1 |
| 689 Details : | |
| 690 */ | 741 */ |
| 691 #define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \ | 742 #define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \ |
| 692 { \ | 743 { \ |
| 693 out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \ | 744 out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \ |
| 694 out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \ | 745 out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \ |
| 695 } | 746 } |
| 696 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__) | 747 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__) |
| 697 | 748 |
| 749 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer | |
| 750 Arguments : Inputs - in0, in1, in2, in3 | |
| 751 Outputs - out0, out1, out2, out3 | |
| 752 */ | |
| 698 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ | 753 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ |
| 699 { \ | 754 { \ |
| 700 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \ | 755 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \ |
| 701 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \ | 756 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \ |
| 702 } | 757 } |
| 703 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__) | 758 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__) |
| 704 | 759 |
| 760 /* Description : Vector Floating-Point multiply with scale and accumulate | |
| 761 Arguments : Inputs - in0, in1, in2, in3, out0, out1, out2, out3, scale | |
| 762 Outputs - out0, out1, out2, out3 | |
| 763 */ | |
| 764 #define VSMA4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \ | |
| 765 { \ | |
| 766 out0 += in0 * scale; \ | |
| 767 out1 += in1 * scale; \ | |
| 768 out2 += in2 * scale; \ | |
| 769 out3 += in3 * scale; \ | |
| 770 } | |
| 771 | |
| 772 /* Description : Vector Floating-Point multiply with scale | |
| 773 Arguments : Inputs - in0, in1, in2, in3, scale | |
| 774 Outputs - out0, out1, out2, out3 | |
| 775 */ | |
| 776 #define VSMUL4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \ | |
| 777 { \ | |
| 778 out0 = in0 * scale; \ | |
| 779 out1 = in1 * scale; \ | |
| 780 out2 = in2 * scale; \ | |
| 781 out3 = in3 * scale; \ | |
| 782 } | |
| 783 | |
| 784 /* Description : Vector Floating-Point max value | |
| 785 Arguments : Inputs - in0, in1, in2, in3, max | |
|
Raymond Toy
2016/10/05 17:26:53
Nit: the dash following Inputs and Output doesn't
Prashant.Patil
2016/10/06 08:27:35
Done.
| |
| 786 Output - max | |
| 787 */ | |
| 788 #define VMAX_W4(RTYPE, in0, in1, in2, in3, max) \ | |
| 789 { \ | |
| 790 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in0); \ | |
| 791 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in1); \ | |
| 792 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in2); \ | |
| 793 max = (RTYPE)__msa_fmax_w((v4f32)max, (v4f32)in3); \ | |
| 794 } | |
| 795 #define VMAX_W4_SP(...) VMAX_W4(v4f32, __VA_ARGS__) | |
| 796 | |
| 797 /* Description : Vector Floating-Point clip to min max | |
| 798 Arguments : Inputs - in0, in1, in2, in3, min, max | |
| 799 Outputs - out0, out1, out2, out3 | |
| 800 */ | |
| 801 #define VCLIP4(in0, in1, in2, in3, min, max, out0, out1, out2, out3) \ | |
| 802 { \ | |
| 803 out0 = __msa_fmax_w(__msa_fmin_w(in0, max), min); \ | |
| 804 out1 = __msa_fmax_w(__msa_fmin_w(in1, max), min); \ | |
| 805 out2 = __msa_fmax_w(__msa_fmin_w(in2, max), min); \ | |
| 806 out3 = __msa_fmax_w(__msa_fmin_w(in3, max), min); \ | |
| 807 } | |
| 808 | |
| 809 /* Description : Floating-Point to integer conversion | |
| 810 Arguments : Input - in | |
| 811 Details : It gets float value 'in' as input and returns corresponding | |
| 812 interger value 'out' | |
| 813 */ | |
| 814 typedef union | |
| 815 { | |
| 816 int32_t intVal; | |
| 817 float floatVal; | |
| 818 }FloatInt; | |
| 819 | |
| 820 #define FLOAT2INT(in) ( { \ | |
| 821 int32_t out; \ | |
| 822 FloatInt val; \ | |
| 823 val.floatVal = in; \ | |
| 824 out = val.intVal; \ | |
| 825 out; \ | |
| 826 } ) | |
| 827 | |
| 705 #endif // CommonMacrosMSA_h | 828 #endif // CommonMacrosMSA_h |
| OLD | NEW |