Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(224)

Side by Side Diff: runtime/vm/assembler_ia32_test.cc

Issue 12223115: SSE Assembler + Linux build fixes (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: -msse2 Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/assembler_ia32.cc ('k') | runtime/vm/assembler_x64.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/globals.h" 5 #include "vm/globals.h"
6 #if defined(TARGET_ARCH_IA32) 6 #if defined(TARGET_ARCH_IA32)
7 7
8 #include "vm/assembler.h" 8 #include "vm/assembler.h"
9 #include "vm/os.h" 9 #include "vm/os.h"
10 #include "vm/unit_test.h" 10 #include "vm/unit_test.h"
(...skipping 816 matching lines...) Expand 10 before | Expand all | Expand 10 after
827 } 827 }
828 828
829 829
830 ASSEMBLER_TEST_RUN(SingleFPOperations, entry) { 830 ASSEMBLER_TEST_RUN(SingleFPOperations, entry) {
831 typedef float (*SingleFPOperationsCode)(); 831 typedef float (*SingleFPOperationsCode)();
832 float res = reinterpret_cast<SingleFPOperationsCode>(entry)(); 832 float res = reinterpret_cast<SingleFPOperationsCode>(entry)();
833 EXPECT_FLOAT_EQ(14.7f, res, 0.001f); 833 EXPECT_FLOAT_EQ(14.7f, res, 0.001f);
834 } 834 }
835 835
836 836
837 ASSEMBLER_TEST_GENERATE(PackedFPOperations, assembler) {
838 __ movl(EAX, Immediate(bit_cast<int32_t, float>(12.3f)));
839 __ movd(XMM0, EAX);
840 __ shufps(XMM0, XMM0, Immediate(0x0));
841 __ movl(EAX, Immediate(bit_cast<int32_t, float>(3.4f)));
842 __ movd(XMM1, EAX);
843 __ shufps(XMM1, XMM1, Immediate(0x0));
844 __ addps(XMM0, XMM1); // 15.7f
845 __ mulps(XMM0, XMM1); // 53.38f
846 __ subps(XMM0, XMM1); // 49.98f
847 __ divps(XMM0, XMM1); // 14.7f
848 __ shufps(XMM0, XMM0, Immediate(0x55)); // Copy second lane into all 4 lanes.
849 __ pushl(EAX);
850 // Copy the low lane at ESP.
851 __ movss(Address(ESP, 0), XMM0);
852 __ flds(Address(ESP, 0));
853 __ popl(EAX);
854 __ ret();
855 }
856
857
858 ASSEMBLER_TEST_RUN(PackedFPOperations, entry) {
859 typedef float (*PackedFPOperationsCode)();
860 float res = reinterpret_cast<PackedFPOperationsCode>(entry)();
861 EXPECT_FLOAT_EQ(14.7f, res, 0.001f);
862 }
863
864
865 ASSEMBLER_TEST_GENERATE(PackedFPOperations2, assembler) {
866 __ movl(EAX, Immediate(bit_cast<int32_t, float>(4.0f)));
867 __ movd(XMM0, EAX);
868 __ shufps(XMM0, XMM0, Immediate(0x0));
869
870 __ movaps(XMM1, XMM0); // Copy XMM0
871 __ reciprocalps(XMM1); // 0.25
872 __ sqrtps(XMM1); // 0.5
873 __ rsqrtps(XMM0); // ~0.5
874 __ subps(XMM0, XMM1); // ~0.0
875 __ shufps(XMM0, XMM0, Immediate(0x00)); // Copy second lane into all 4 lanes.
876 __ pushl(EAX);
877 // Copy the low lane at ESP.
878 __ movss(Address(ESP, 0), XMM0);
879 __ flds(Address(ESP, 0));
880 __ popl(EAX);
881 __ ret();
882 }
883
884
885 ASSEMBLER_TEST_RUN(PackedFPOperations2, entry) {
886 typedef float (*PackedFPOperations2Code)();
887 float res = reinterpret_cast<PackedFPOperations2Code>(entry)();
888 EXPECT_FLOAT_EQ(0.0f, res, 0.001f);
889 }
890
891
892 ASSEMBLER_TEST_GENERATE(PackedCompareEQ, assembler) {
893 __ set1ps(XMM0, EAX, Immediate(bit_cast<int32_t, float>(2.0f)));
894 __ set1ps(XMM1, EAX, Immediate(bit_cast<int32_t, float>(4.0f)));
895 __ cmppseq(XMM0, XMM1);
896 // Copy the low lane at ESP.
897 __ pushl(EAX);
898 __ movss(Address(ESP, 0), XMM0);
899 __ flds(Address(ESP, 0));
900 __ popl(EAX);
901 __ ret();
902 }
903
904
905 ASSEMBLER_TEST_RUN(PackedCompareEQ, entry) {
906 typedef uint32_t (*PackedCompareEQCode)();
907 uint32_t res = reinterpret_cast<PackedCompareEQCode>(entry)();
908 EXPECT_EQ(static_cast<uword>(0x0), res);
909 }
910
911
912 ASSEMBLER_TEST_GENERATE(PackedCompareNEQ, assembler) {
913 __ set1ps(XMM0, EAX, Immediate(bit_cast<int32_t, float>(2.0f)));
914 __ set1ps(XMM1, EAX, Immediate(bit_cast<int32_t, float>(4.0f)));
915 __ cmppsneq(XMM0, XMM1);
916 // Copy the low lane at ESP.
917 __ pushl(EAX);
918 __ movss(Address(ESP, 0), XMM0);
919 __ flds(Address(ESP, 0));
920 __ popl(EAX);
921 __ ret();
922 }
923
924
925 ASSEMBLER_TEST_RUN(PackedCompareNEQ, entry) {
926 typedef uint32_t (*PackedCompareNEQCode)();
927 uint32_t res = reinterpret_cast<PackedCompareNEQCode>(entry)();
928 EXPECT_EQ(static_cast<uword>(0xFFFFFFFF), res);
929 }
930
931
932 ASSEMBLER_TEST_GENERATE(PackedCompareLT, assembler) {
933 __ set1ps(XMM0, EAX, Immediate(bit_cast<int32_t, float>(2.0f)));
934 __ set1ps(XMM1, EAX, Immediate(bit_cast<int32_t, float>(4.0f)));
935 __ cmppslt(XMM0, XMM1);
936 // Copy the low lane at ESP.
937 __ pushl(EAX);
938 __ movss(Address(ESP, 0), XMM0);
939 __ flds(Address(ESP, 0));
940 __ popl(EAX);
941 __ ret();
942 }
943
944
945 ASSEMBLER_TEST_RUN(PackedCompareLT, entry) {
946 typedef uint32_t (*PackedCompareLTCode)();
947 uint32_t res = reinterpret_cast<PackedCompareLTCode>(entry)();
948 EXPECT_EQ(static_cast<uword>(0xFFFFFFFF), res);
949 }
950
951
952 ASSEMBLER_TEST_GENERATE(PackedCompareLE, assembler) {
953 __ set1ps(XMM0, EAX, Immediate(bit_cast<int32_t, float>(2.0f)));
954 __ set1ps(XMM1, EAX, Immediate(bit_cast<int32_t, float>(4.0f)));
955 __ cmppsle(XMM0, XMM1);
956 // Copy the low lane at ESP.
957 __ pushl(EAX);
958 __ movss(Address(ESP, 0), XMM0);
959 __ flds(Address(ESP, 0));
960 __ popl(EAX);
961 __ ret();
962 }
963
964
965 ASSEMBLER_TEST_RUN(PackedCompareLE, entry) {
966 typedef uint32_t (*PackedCompareLECode)();
967 uint32_t res = reinterpret_cast<PackedCompareLECode>(entry)();
968 EXPECT_EQ(static_cast<uword>(0xFFFFFFFF), res);
969 }
970
971
972 ASSEMBLER_TEST_GENERATE(PackedCompareNLT, assembler) {
973 __ set1ps(XMM0, EAX, Immediate(bit_cast<int32_t, float>(2.0f)));
974 __ set1ps(XMM1, EAX, Immediate(bit_cast<int32_t, float>(4.0f)));
975 __ cmppsnlt(XMM0, XMM1);
976 // Copy the low lane at ESP.
977 __ pushl(EAX);
978 __ movss(Address(ESP, 0), XMM0);
979 __ flds(Address(ESP, 0));
980 __ popl(EAX);
981 __ ret();
982 }
983
984
985 ASSEMBLER_TEST_RUN(PackedCompareNLT, entry) {
986 typedef uint32_t (*PackedCompareNLTCode)();
987 uint32_t res = reinterpret_cast<PackedCompareNLTCode>(entry)();
988 EXPECT_EQ(static_cast<uword>(0x0), res);
989 }
990
991
992 ASSEMBLER_TEST_GENERATE(PackedCompareNLE, assembler) {
993 __ set1ps(XMM0, EAX, Immediate(bit_cast<int32_t, float>(2.0f)));
994 __ set1ps(XMM1, EAX, Immediate(bit_cast<int32_t, float>(4.0f)));
995 __ cmppsnle(XMM0, XMM1);
996 // Copy the low lane at ESP.
997 __ pushl(EAX);
998 __ movss(Address(ESP, 0), XMM0);
999 __ flds(Address(ESP, 0));
1000 __ popl(EAX);
1001 __ ret();
1002 }
1003
1004
1005 ASSEMBLER_TEST_RUN(PackedCompareNLE, entry) {
1006 typedef uint32_t (*PackedCompareNLECode)();
1007 uint32_t res = reinterpret_cast<PackedCompareNLECode>(entry)();
1008 EXPECT_EQ(static_cast<uword>(0x0), res);
1009 }
1010
1011
1012 ASSEMBLER_TEST_GENERATE(PackedNegate, assembler) {
1013 __ movl(EAX, Immediate(bit_cast<int32_t, float>(12.3f)));
1014 __ movd(XMM0, EAX);
1015 __ shufps(XMM0, XMM0, Immediate(0x0));
1016 __ negateps(XMM0);
1017 __ shufps(XMM0, XMM0, Immediate(0xAA)); // Copy third lane into all 4 lanes.
1018 __ pushl(EAX);
1019 // Copy the low lane at ESP.
1020 __ movss(Address(ESP, 0), XMM0);
1021 __ flds(Address(ESP, 0));
1022 __ popl(EAX);
1023 __ ret();
1024 }
1025
1026
1027 ASSEMBLER_TEST_RUN(PackedNegate, entry) {
1028 typedef float (*PackedNegateCode)();
1029 float res = reinterpret_cast<PackedNegateCode>(entry)();
1030 EXPECT_FLOAT_EQ(-12.3f, res, 0.001f);
1031 }
1032
1033
1034 ASSEMBLER_TEST_GENERATE(PackedAbsolute, assembler) {
1035 __ movl(EAX, Immediate(bit_cast<int32_t, float>(-15.3f)));
1036 __ movd(XMM0, EAX);
1037 __ shufps(XMM0, XMM0, Immediate(0x0));
1038 __ absps(XMM0);
1039 __ shufps(XMM0, XMM0, Immediate(0xAA)); // Copy third lane into all 4 lanes.
1040 // Copy the low lane at ESP.
1041 __ pushl(EAX);
1042 __ movss(Address(ESP, 0), XMM0);
1043 __ flds(Address(ESP, 0));
1044 __ popl(EAX);
1045 __ ret();
1046 }
1047
1048
1049 ASSEMBLER_TEST_RUN(PackedAbsolute, entry) {
1050 typedef float (*PackedAbsoluteCode)();
1051 float res = reinterpret_cast<PackedAbsoluteCode>(entry)();
1052 EXPECT_FLOAT_EQ(15.3f, res, 0.001f);
1053 }
1054
1055
1056 ASSEMBLER_TEST_GENERATE(PackedSetWZero, assembler) {
1057 __ set1ps(XMM0, EAX, Immediate(bit_cast<int32_t, float>(12.3f)));
1058 __ zerowps(XMM0);
1059 __ shufps(XMM0, XMM0, Immediate(0xFF)); // Copy the W lane which is now 0.0.
1060 // Copy the low lane at ESP.
1061 __ pushl(EAX);
1062 __ movss(Address(ESP, 0), XMM0);
1063 __ flds(Address(ESP, 0));
1064 __ popl(EAX);
1065 __ ret();
1066 }
1067
1068
1069 ASSEMBLER_TEST_RUN(PackedSetWZero, entry) {
1070 typedef float (*PackedSetWZeroCode)();
1071 float res = reinterpret_cast<PackedSetWZeroCode>(entry)();
1072 EXPECT_FLOAT_EQ(0.0f, res, 0.001f);
1073 }
1074
1075
1076 ASSEMBLER_TEST_GENERATE(PackedMin, assembler) {
1077 __ set1ps(XMM0, EAX, Immediate(bit_cast<int32_t, float>(2.0f)));
1078 __ set1ps(XMM1, EAX, Immediate(bit_cast<int32_t, float>(4.0f)));
1079 __ minps(XMM0, XMM1);
1080 // Copy the low lane at ESP.
1081 __ pushl(EAX);
1082 __ movss(Address(ESP, 0), XMM0);
1083 __ flds(Address(ESP, 0));
1084 __ popl(EAX);
1085 __ ret();
1086 }
1087
1088
1089 ASSEMBLER_TEST_RUN(PackedMin, entry) {
1090 typedef float (*PackedMinCode)();
1091 float res = reinterpret_cast<PackedMinCode>(entry)();
1092 EXPECT_FLOAT_EQ(2.0f, res, 0.001f);
1093 }
1094
1095
1096 ASSEMBLER_TEST_GENERATE(PackedMax, assembler) {
1097 __ set1ps(XMM0, EAX, Immediate(bit_cast<int32_t, float>(2.0f)));
1098 __ set1ps(XMM1, EAX, Immediate(bit_cast<int32_t, float>(4.0f)));
1099 __ maxps(XMM0, XMM1);
1100 // Copy the low lane at ESP.
1101 __ pushl(EAX);
1102 __ movss(Address(ESP, 0), XMM0);
1103 __ flds(Address(ESP, 0));
1104 __ popl(EAX);
1105 __ ret();
1106 }
1107
1108
1109 ASSEMBLER_TEST_RUN(PackedMax, entry) {
1110 typedef float (*PackedMaxCode)();
1111 float res = reinterpret_cast<PackedMaxCode>(entry)();
1112 EXPECT_FLOAT_EQ(4.0f, res, 0.001f);
1113 }
1114
1115
1116 ASSEMBLER_TEST_GENERATE(PackedLogicalOr, assembler) {
1117 static const struct ALIGN16 {
1118 uint32_t a;
1119 uint32_t b;
1120 uint32_t c;
1121 uint32_t d;
1122 } constant1 =
1123 { 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0 };
1124 static const struct ALIGN16 {
1125 uint32_t a;
1126 uint32_t b;
1127 uint32_t c;
1128 uint32_t d;
1129 } constant2 =
1130 { 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F };
1131 __ movups(XMM0, Address::Absolute(reinterpret_cast<uword>(&constant1)));
1132 __ movups(XMM1, Address::Absolute(reinterpret_cast<uword>(&constant2)));
1133 __ orps(XMM0, XMM1);
1134 // Copy the low lane at ESP.
1135 __ pushl(EAX);
1136 __ movss(Address(ESP, 0), XMM0);
1137 __ flds(Address(ESP, 0));
1138 __ popl(EAX);
1139 __ ret();
1140 }
1141
1142
1143 ASSEMBLER_TEST_RUN(PackedLogicalOr, entry) {
1144 typedef uint32_t (*PackedLogicalOrCode)();
1145 uint32_t res = reinterpret_cast<PackedLogicalOrCode>(entry)();
1146 EXPECT_EQ(0xFFFFFFFF, res);
1147 }
1148
1149
1150 ASSEMBLER_TEST_GENERATE(PackedLogicalAnd, assembler) {
1151 static const struct ALIGN16 {
1152 uint32_t a;
1153 uint32_t b;
1154 uint32_t c;
1155 uint32_t d;
1156 } constant1 =
1157 { 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0 };
1158 static const struct ALIGN16 {
1159 uint32_t a;
1160 uint32_t b;
1161 uint32_t c;
1162 uint32_t d;
1163 } constant2 =
1164 { 0x0F0FFF0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F };
1165 __ movups(XMM0, Address::Absolute(reinterpret_cast<uword>(&constant1)));
1166 __ andps(XMM0, Address::Absolute(reinterpret_cast<uword>(&constant2)));
1167 // Copy the low lane at ESP.
1168 __ pushl(EAX);
1169 __ movss(Address(ESP, 0), XMM0);
1170 __ flds(Address(ESP, 0));
1171 __ popl(EAX);
1172 __ ret();
1173 }
1174
1175
1176 ASSEMBLER_TEST_RUN(PackedLogicalAnd, entry) {
1177 typedef uint32_t (*PackedLogicalAndCode)();
1178 uint32_t res = reinterpret_cast<PackedLogicalAndCode>(entry)();
1179 EXPECT_EQ(static_cast<uword>(0x0000F000), res);
1180 }
1181
1182
1183 ASSEMBLER_TEST_GENERATE(PackedLogicalNot, assembler) {
1184 static const struct ALIGN16 {
1185 uint32_t a;
1186 uint32_t b;
1187 uint32_t c;
1188 uint32_t d;
1189 } constant1 =
1190 { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
1191 __ movups(XMM0, Address::Absolute(reinterpret_cast<uword>(&constant1)));
1192 __ notps(XMM0);
1193 // Copy the low lane at ESP.
1194 __ pushl(EAX);
1195 __ movss(Address(ESP, 0), XMM0);
1196 __ flds(Address(ESP, 0));
1197 __ popl(EAX);
1198 __ ret();
1199 }
1200
1201
1202 ASSEMBLER_TEST_RUN(PackedLogicalNot, entry) {
1203 typedef uint32_t (*PackedLogicalNotCode)();
1204 uint32_t res = reinterpret_cast<PackedLogicalNotCode>(entry)();
1205 EXPECT_EQ(static_cast<uword>(0x0), res);
1206 }
1207
1208
837 ASSEMBLER_TEST_GENERATE(SingleFPOperationsStack, assembler) { 1209 ASSEMBLER_TEST_GENERATE(SingleFPOperationsStack, assembler) {
838 __ movl(EAX, Immediate(bit_cast<int32_t, float>(12.3f))); 1210 __ movl(EAX, Immediate(bit_cast<int32_t, float>(12.3f)));
839 __ movd(XMM0, EAX); 1211 __ movd(XMM0, EAX);
840 __ addss(XMM0, Address(ESP, kWordSize)); // 15.7f 1212 __ addss(XMM0, Address(ESP, kWordSize)); // 15.7f
841 __ mulss(XMM0, Address(ESP, kWordSize)); // 53.38f 1213 __ mulss(XMM0, Address(ESP, kWordSize)); // 53.38f
842 __ subss(XMM0, Address(ESP, kWordSize)); // 49.98f 1214 __ subss(XMM0, Address(ESP, kWordSize)); // 49.98f
843 __ divss(XMM0, Address(ESP, kWordSize)); // 14.7f 1215 __ divss(XMM0, Address(ESP, kWordSize)); // 14.7f
844 __ pushl(EAX); 1216 __ pushl(EAX);
845 __ movss(Address(ESP, 0), XMM0); 1217 __ movss(Address(ESP, 0), XMM0);
846 __ flds(Address(ESP, 0)); 1218 __ flds(Address(ESP, 0));
(...skipping 1326 matching lines...) Expand 10 before | Expand all | Expand 10 after
2173 EAX); 2545 EAX);
2174 __ popl(EAX); 2546 __ popl(EAX);
2175 __ popl(CTX); 2547 __ popl(CTX);
2176 __ ret(); 2548 __ ret();
2177 } 2549 }
2178 2550
2179 2551
2180 } // namespace dart 2552 } // namespace dart
2181 2553
2182 #endif // defined TARGET_ARCH_IA32 2554 #endif // defined TARGET_ARCH_IA32
OLDNEW
« no previous file with comments | « runtime/vm/assembler_ia32.cc ('k') | runtime/vm/assembler_x64.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698