| OLD | NEW |
| 1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/runtime/runtime-utils.h" | 5 #include "src/runtime/runtime-utils.h" |
| 6 | 6 |
| 7 #include "src/arguments.h" | 7 #include "src/arguments.h" |
| 8 #include "src/conversions-inl.h" | 8 #include "src/conversions-inl.h" |
| 9 #include "src/isolate-inl.h" | 9 #include "src/isolate-inl.h" |
| 10 #include "src/regexp/jsregexp-inl.h" | 10 #include "src/regexp/jsregexp-inl.h" |
| 11 #include "src/regexp/jsregexp.h" | 11 #include "src/regexp/jsregexp.h" |
| 12 #include "src/string-builder.h" | 12 #include "src/string-builder.h" |
| 13 #include "src/string-search.h" | 13 #include "src/string-search.h" |
| 14 | 14 |
| 15 #ifdef V8_I18N_SUPPORT |
| 16 #include "unicode/uchar.h" |
| 17 #include "unicode/unistr.h" |
| 18 #endif |
| 19 |
| 15 namespace v8 { | 20 namespace v8 { |
| 16 namespace internal { | 21 namespace internal { |
| 17 | 22 |
| 18 | 23 |
| 19 // Perform string match of pattern on subject, starting at start index. | 24 // Perform string match of pattern on subject, starting at start index. |
| 20 // Caller must ensure that 0 <= start_index <= sub->length(), | 25 // Caller must ensure that 0 <= start_index <= sub->length(), |
| 21 // and should check that pat->length() + start_index <= sub->length(). | 26 // and should check that pat->length() + start_index <= sub->length(). |
| 22 int StringMatch(Isolate* isolate, Handle<String> sub, Handle<String> pat, | 27 int StringMatch(Isolate* isolate, Handle<String> sub, Handle<String> pat, |
| 23 int start_index) { | 28 int start_index) { |
| 24 DCHECK(0 <= start_index); | 29 DCHECK(0 <= start_index); |
| (...skipping 1047 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1072 isolate, result, isolate->factory()->NewRawOneByteString(length)); | 1077 isolate, result, isolate->factory()->NewRawOneByteString(length)); |
| 1073 } else { | 1078 } else { |
| 1074 if (length < 0) length = -length; | 1079 if (length < 0) length = -length; |
| 1075 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( | 1080 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 1076 isolate, result, isolate->factory()->NewRawTwoByteString(length)); | 1081 isolate, result, isolate->factory()->NewRawTwoByteString(length)); |
| 1077 } | 1082 } |
| 1078 return ConvertCaseHelper(isolate, *s, *result, length, mapping); | 1083 return ConvertCaseHelper(isolate, *s, *result, length, mapping); |
| 1079 } | 1084 } |
| 1080 | 1085 |
| 1081 | 1086 |
| 1087 #ifdef V8_I18N_SUPPORT |
| 1088 namespace { |
| 1089 |
| 1090 MUST_USE_RESULT static Handle<String> ConvertCaseICU(Handle<String> s, |
| 1091 Isolate* isolate, |
| 1092 bool is_to_upper) { |
| 1093 int32_t length = s->length(); |
| 1094 |
| 1095 // If we already have a UTF-16 string, use that, otherwise build it |
| 1096 base::SmartArrayPointer<uc16> sap; |
| 1097 const UChar* src; |
| 1098 if (StringShape(*s).IsSequentialTwoByte()) { |
| 1099 src = |
| 1100 reinterpret_cast<const UChar*>(SeqTwoByteString::cast(*s)->GetChars()); |
| 1101 } else { |
| 1102 sap = s->ToWideCString(ROBUST_STRING_TRAVERSAL); |
| 1103 src = reinterpret_cast<const UChar*>(sap.get()); |
| 1104 } |
| 1105 |
| 1106 // This UnicodeString ctor has copy-on-write semantics. It starts as a |
| 1107 // read-only alias but the buffer is copied when it's written to. |
| 1108 icu::UnicodeString converted(0, src, length); |
| 1109 if (is_to_upper) |
| 1110 converted.toUpper(); |
| 1111 else |
| 1112 converted.toLower(); |
| 1113 |
| 1114 #if 0 |
| 1115 Handle<String> result; |
| 1116 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 1117 isolate, result, |
| 1118 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( |
| 1119 reinterpret_cast<const uint16_t*>(converted.getBuffer()), |
| 1120 converted.length()))); |
| 1121 #endif |
| 1122 return isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( |
| 1123 reinterpret_cast<const uint16_t*>(converted.getBuffer()), |
| 1124 converted.length())).ToHandleChecked(); |
| 1125 } |
| 1126 |
| 1127 |
| 1128 inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; } |
| 1129 |
| 1130 |
| 1131 inline uint16_t ToASCIILower(uint16_t ch) { |
| 1132 return ch | ((ch >= 'A' && ch <= 'Z') << 5); |
| 1133 } |
| 1134 |
| 1135 |
| 1136 inline uint16_t ToASCIIUpper(uint16_t ch) { |
| 1137 return ch & ~((ch >= 'a' && ch <= 'z') << 5); |
| 1138 } |
| 1139 |
| 1140 |
| 1141 MUST_USE_RESULT Handle<String> StringToLowerCase(Handle<String> s, |
| 1142 Isolate* isolate) { |
| 1143 // Note: This is a hot function in the Dromaeo benchmark, specifically the |
| 1144 // no-op code path up through the first 'return' statement. |
| 1145 |
| 1146 int length = s->length(); |
| 1147 // First scan the string for uppercase and non-ASCII characters: |
| 1148 if (s->HasOnlyOneByteChars()) { |
| 1149 unsigned first_index_to_lower = length; |
| 1150 for (int index = 0; index < length; ++index) { |
| 1151 // Blink specializes this path for one-byte strings, so it |
| 1152 // does not need to do a generic get, but can do the equivalent |
| 1153 // of SeqOneByteStringGet. |
| 1154 uint16_t ch = s->Get(index); |
| 1155 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { |
| 1156 first_index_to_lower = index; |
| 1157 break; |
| 1158 } |
| 1159 } |
| 1160 |
| 1161 // Nothing to do if the string is all ASCII with no uppercase. |
| 1162 if (first_index_to_lower == length) return s; |
| 1163 |
| 1164 // We depend here on the invariant that the length of a Latin1 |
| 1165 // string is invariant under ToLowerCase, and the result always |
| 1166 // fits in the Latin1 range (untrue for ToUpperCase, and might |
| 1167 // be untrue in some locales, but this is the root locale) |
| 1168 Handle<SeqOneByteString> result = |
| 1169 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); |
| 1170 // In Blink, this is a simple memcpy, but in V8, the path applies in |
| 1171 // more cases. The optimization here is conditional on whether the |
| 1172 // source is actually a simple 8-bit string (always true in Blink). |
| 1173 // The broader condition lets us eliminate a bunch of duplicate code |
| 1174 // which Blink had in a separate section below. |
| 1175 if (StringShape(*s).IsSequentialOneByte()) { |
| 1176 // In this path, we can use the one-byte-specific Get, and |
| 1177 // memcpy until the first_index_to_lower. |
| 1178 SeqOneByteString* source = SeqOneByteString::cast(*s); |
| 1179 memcpy(result->GetChars(), source->GetChars(), first_index_to_lower); |
| 1180 for (int index = first_index_to_lower; index < length; ++index) { |
| 1181 uint16_t ch = source->SeqOneByteStringGet(index); |
| 1182 result->SeqOneByteStringSet(index, |
| 1183 V8_UNLIKELY(ch & ~0x7F) |
| 1184 ? static_cast<uint16_t>(u_tolower(ch)) |
| 1185 : ToASCIILower(ch)); |
| 1186 } |
| 1187 } else { |
| 1188 // In this path, we start from the beginning of the string, |
| 1189 // since there is nothing to memcpy from, and we have to |
| 1190 // use the generic Get. Another option here would be to create |
| 1191 // a two-byte string as output, and do a memcpy from that, |
| 1192 // as Blink does, but there's also the ConsString case. |
| 1193 for (int index = 0; index < length; ++index) { |
| 1194 uint16_t ch = s->Get(index); |
| 1195 result->SeqOneByteStringSet(index, |
| 1196 V8_UNLIKELY(ch & ~0x7F) |
| 1197 ? static_cast<uint16_t>(u_tolower(ch)) |
| 1198 : ToASCIILower(ch)); |
| 1199 } |
| 1200 } |
| 1201 |
| 1202 return Handle<String>(*result); |
| 1203 } |
| 1204 |
| 1205 // Blink had an additional case here for ASCII 2-byte strings, but |
| 1206 // that is subsumed by the above code (assuming there isn't a false |
| 1207 // negative for HasOnlyOneByteChars). |
| 1208 |
| 1209 // Do a slower implementation for cases that include non-ASCII characters. |
| 1210 return ConvertCaseICU(s, isolate, u_strToLower); |
| 1211 } |
| 1212 |
| 1213 |
| 1214 const uint16_t sharp_s = L'\u00DF'; |
| 1215 |
| 1216 MUST_USE_RESULT Handle<String> StringToUpperCase(Handle<String> s, |
| 1217 Isolate* isolate) { |
| 1218 // This function could be optimized for no-op cases the way lower() is, |
| 1219 // but in empirical testing, few actual calls to upper() are no-ops, so |
| 1220 // it wouldn't be worth the extra time for pre-scanning. |
| 1221 |
| 1222 int32_t length = s->length(); |
| 1223 |
| 1224 if (s->HasOnlyOneByteChars()) { |
| 1225 Handle<SeqOneByteString> result = |
| 1226 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); |
| 1227 |
| 1228 // Do a faster loop for the case where all the characters are ASCII. |
| 1229 uint16_t ored = 0; |
| 1230 for (int index = 0; index < length; ++index) { |
| 1231 uint16_t ch = s->Get(index); |
| 1232 ored |= ch; |
| 1233 result->SeqOneByteStringSet(index, ToASCIIUpper(ch)); |
| 1234 } |
| 1235 if (!(ored & ~0x7F)) return Handle<String>(*result); |
| 1236 |
| 1237 // Do a slower implementation for cases that include non-ASCII Latin-1 |
| 1238 // characters. |
| 1239 int sharp_s_count = 0; |
| 1240 |
| 1241 // There are two special cases. |
| 1242 // 1. latin-1 characters when converted to upper case are 16 bit |
| 1243 // characters. |
| 1244 // 2. Lower case sharp-S converts to "SS" (two characters) |
| 1245 for (int32_t index = 0; index < length; ++index) { |
| 1246 uint16_t ch = s->Get(index); |
| 1247 if (V8_UNLIKELY(ch == sharp_s)) ++sharp_s_count; |
| 1248 uint16_t upper = static_cast<uint16_t>(u_toupper(static_cast<UChar>(ch))); |
| 1249 if (V8_UNLIKELY(upper > 0xff)) { |
| 1250 // Since this upper-cased character does not fit in an 8-bit string, we |
| 1251 // need to take the 16-bit path. |
| 1252 goto upconvert; |
| 1253 } |
| 1254 result->SeqOneByteStringSet(index, upper); |
| 1255 } |
| 1256 |
| 1257 if (sharp_s_count == 0) return Handle<String>(*result); |
| 1258 |
| 1259 // We have sharp_s_count sharp-s characters, but none of the other special |
| 1260 // characters. |
| 1261 result = isolate->factory() |
| 1262 ->NewRawOneByteString(length + sharp_s_count) |
| 1263 .ToHandleChecked(); |
| 1264 for (int32_t index = 0, dest_index = 0; index < length; ++index) { |
| 1265 uint16_t ch = s->Get(index); |
| 1266 if (ch == sharp_s) { |
| 1267 result->SeqOneByteStringSet(dest_index++, 'S'); |
| 1268 result->SeqOneByteStringSet(dest_index++, 'S'); |
| 1269 } else { |
| 1270 uint16_t upper = |
| 1271 static_cast<uint16_t>(u_toupper(static_cast<UChar>(ch))); |
| 1272 result->SeqOneByteStringSet(dest_index++, upper); |
| 1273 } |
| 1274 } |
| 1275 |
| 1276 return Handle<String>(*result); |
| 1277 } |
| 1278 |
| 1279 upconvert: |
| 1280 return ConvertCaseICU(s, isolate, u_strToUpper); |
| 1281 } |
| 1282 |
| 1283 } // namespace |
| 1284 #endif |
| 1285 |
| 1286 |
| 1082 RUNTIME_FUNCTION(Runtime_StringToLowerCase) { | 1287 RUNTIME_FUNCTION(Runtime_StringToLowerCase) { |
| 1083 HandleScope scope(isolate); | 1288 HandleScope scope(isolate); |
| 1084 DCHECK(args.length() == 1); | 1289 DCHECK_EQ(args.length(), 1); |
| 1085 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); | 1290 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
| 1086 return ConvertCase(s, isolate, isolate->runtime_state()->to_lower_mapping()); | 1291 #ifdef V8_I18N_SUPPORT |
| 1292 if (FLAG_icu_case_mapping) |
| 1293 return *StringToLowerCase(s, isolate); |
| 1294 else |
| 1295 #endif |
| 1296 return ConvertCase(s, isolate, |
| 1297 isolate->runtime_state()->to_lower_mapping()); |
| 1087 } | 1298 } |
| 1088 | 1299 |
| 1089 | 1300 |
| 1090 RUNTIME_FUNCTION(Runtime_StringToUpperCase) { | 1301 RUNTIME_FUNCTION(Runtime_StringToUpperCase) { |
| 1091 HandleScope scope(isolate); | 1302 HandleScope scope(isolate); |
| 1092 DCHECK(args.length() == 1); | 1303 DCHECK_EQ(args.length(), 1); |
| 1093 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); | 1304 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
| 1094 return ConvertCase(s, isolate, isolate->runtime_state()->to_upper_mapping()); | 1305 #ifdef V8_I18N_SUPPORT |
| 1095 } | 1306 if (FLAG_icu_case_mapping) |
| 1096 | 1307 return *StringToUpperCase(s, isolate); |
| 1097 | 1308 else |
| 1309 #endif |
| 1310 return ConvertCase(s, isolate, |
| 1311 isolate->runtime_state()->to_upper_mapping()); |
| 1312 } |
| 1313 |
| 1314 |
| 1098 RUNTIME_FUNCTION(Runtime_StringTrim) { | 1315 RUNTIME_FUNCTION(Runtime_StringTrim) { |
| 1099 HandleScope scope(isolate); | 1316 HandleScope scope(isolate); |
| 1100 DCHECK(args.length() == 3); | 1317 DCHECK(args.length() == 3); |
| 1101 | 1318 |
| 1102 CONVERT_ARG_HANDLE_CHECKED(String, string, 0); | 1319 CONVERT_ARG_HANDLE_CHECKED(String, string, 0); |
| 1103 CONVERT_BOOLEAN_ARG_CHECKED(trimLeft, 1); | 1320 CONVERT_BOOLEAN_ARG_CHECKED(trimLeft, 1); |
| 1104 CONVERT_BOOLEAN_ARG_CHECKED(trimRight, 2); | 1321 CONVERT_BOOLEAN_ARG_CHECKED(trimRight, 2); |
| 1105 | 1322 |
| 1106 string = String::Flatten(string); | 1323 string = String::Flatten(string); |
| 1107 int length = string->length(); | 1324 int length = string->length(); |
| (...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1319 SealHandleScope shs(isolate); | 1536 SealHandleScope shs(isolate); |
| 1320 DCHECK(args.length() == 2); | 1537 DCHECK(args.length() == 2); |
| 1321 if (!args[0]->IsString()) return isolate->heap()->undefined_value(); | 1538 if (!args[0]->IsString()) return isolate->heap()->undefined_value(); |
| 1322 if (!args[1]->IsNumber()) return isolate->heap()->undefined_value(); | 1539 if (!args[1]->IsNumber()) return isolate->heap()->undefined_value(); |
| 1323 if (std::isinf(args.number_at(1))) return isolate->heap()->nan_value(); | 1540 if (std::isinf(args.number_at(1))) return isolate->heap()->nan_value(); |
| 1324 return __RT_impl_Runtime_StringCharCodeAtRT(args, isolate); | 1541 return __RT_impl_Runtime_StringCharCodeAtRT(args, isolate); |
| 1325 } | 1542 } |
| 1326 | 1543 |
| 1327 } // namespace internal | 1544 } // namespace internal |
| 1328 } // namespace v8 | 1545 } // namespace v8 |
| OLD | NEW |