OLD | NEW |
---|---|
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/runtime/runtime-utils.h" | 5 #include "src/runtime/runtime-utils.h" |
6 | 6 |
7 #include "src/arguments.h" | 7 #include "src/arguments.h" |
8 #include "src/conversions-inl.h" | 8 #include "src/conversions-inl.h" |
9 #include "src/isolate-inl.h" | 9 #include "src/isolate-inl.h" |
10 #include "src/regexp/jsregexp-inl.h" | 10 #include "src/regexp/jsregexp-inl.h" |
11 #include "src/regexp/jsregexp.h" | 11 #include "src/regexp/jsregexp.h" |
12 #include "src/string-builder.h" | 12 #include "src/string-builder.h" |
13 #include "src/string-search.h" | 13 #include "src/string-search.h" |
14 | 14 |
15 #ifdef V8_I18N_SUPPORT | |
16 #include "unicode/locid.h" | |
17 #include "unicode/uchar.h" | |
18 #include "unicode/unistr.h" | |
19 #endif | |
20 | |
15 namespace v8 { | 21 namespace v8 { |
16 namespace internal { | 22 namespace internal { |
17 | 23 |
18 | 24 |
19 // Perform string match of pattern on subject, starting at start index. | 25 // Perform string match of pattern on subject, starting at start index. |
20 // Caller must ensure that 0 <= start_index <= sub->length(), | 26 // Caller must ensure that 0 <= start_index <= sub->length(), |
21 // and should check that pat->length() + start_index <= sub->length(). | 27 // and should check that pat->length() + start_index <= sub->length(). |
22 int StringMatch(Isolate* isolate, Handle<String> sub, Handle<String> pat, | 28 int StringMatch(Isolate* isolate, Handle<String> sub, Handle<String> pat, |
23 int start_index) { | 29 int start_index) { |
24 DCHECK(0 <= start_index); | 30 DCHECK(0 <= start_index); |
(...skipping 1045 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1070 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( | 1076 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1071 isolate, result, isolate->factory()->NewRawOneByteString(length)); | 1077 isolate, result, isolate->factory()->NewRawOneByteString(length)); |
1072 } else { | 1078 } else { |
1073 if (length < 0) length = -length; | 1079 if (length < 0) length = -length; |
1074 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( | 1080 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1075 isolate, result, isolate->factory()->NewRawTwoByteString(length)); | 1081 isolate, result, isolate->factory()->NewRawTwoByteString(length)); |
1076 } | 1082 } |
1077 return ConvertCaseHelper(isolate, *s, *result, length, mapping); | 1083 return ConvertCaseHelper(isolate, *s, *result, length, mapping); |
1078 } | 1084 } |
1079 | 1085 |
1086 #ifdef V8_I18N_SUPPORT | |
1087 namespace { | |
1088 | |
1089 MUST_USE_RESULT static Handle<String> ConvertCaseICU(Handle<String> s, | |
1090 Isolate* isolate, | |
1091 bool is_to_upper) { | |
1092 DCHECK(s->IsFlat()); | |
1093 // Handle<String> flattened = String::Flatten(s); | |
1094 String::FlatContent flat = s->GetFlatContent(); | |
jungshik at Google
2016/04/07 18:57:11
This leads to a assertion failure in objects.cc:
| |
1095 | |
1096 const UChar* src; | |
1097 if (flat.IsOneByte()) { | |
1098 base::SmartArrayPointer<uc16> sap = s->ToWideCString(); | |
1099 src = reinterpret_cast<const UChar*>(sap.get()); | |
1100 } else { | |
1101 src = reinterpret_cast<const UChar*>(flat.ToUC16Vector().start()); | |
1102 } | |
1103 | |
1104 int32_t length = s->length(); | |
1105 | |
1106 // This UnicodeString ctor has copy-on-write semantics. It starts as a | |
1107 // read-only alias but the buffer is copied when it's written to. | |
1108 icu::UnicodeString converted(0, src, length); | |
1109 const icu::Locale& root_locale = icu::Locale::getRoot(); | |
1110 if (is_to_upper) | |
1111 converted.toUpper(root_locale); | |
1112 else | |
1113 converted.toLower(root_locale); | |
1114 | |
1115 return isolate->factory() | |
1116 ->NewStringFromTwoByte(Vector<const uint16_t>( | |
1117 reinterpret_cast<const uint16_t*>(converted.getBuffer()), | |
1118 converted.length())) | |
1119 .ToHandleChecked(); | |
1120 } | |
1121 | |
1122 inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; } | |
1123 | |
1124 inline uint16_t ToASCIILower(uint16_t ch) { | |
1125 return ch | ((ch >= 'A' && ch <= 'Z') << 5); | |
1126 } | |
1127 | |
1128 inline uint16_t ToASCIIUpper(uint16_t ch) { | |
1129 return ch & ~((ch >= 'a' && ch <= 'z') << 5); | |
1130 } | |
1131 | |
1132 MUST_USE_RESULT Handle<String> StringToLowerCase(Handle<String> s, | |
1133 Isolate* isolate) { | |
1134 // Note: This is a hot function in the Dromaeo benchmark, specifically the | |
1135 // no-op code path up through the first 'return' statement. | |
1136 | |
1137 int length = s->length(); | |
1138 s = String::Flatten(s); | |
1139 // First scan the string for uppercase and non-ASCII characters: | |
1140 if (s->HasOnlyOneByteChars()) { | |
1141 unsigned first_index_to_lower = length; | |
1142 for (int index = 0; index < length; ++index) { | |
1143 // Blink specializes this path for one-byte strings, so it | |
1144 // does not need to do a generic get, but can do the equivalent | |
1145 // of SeqOneByteStringGet. | |
1146 uint16_t ch = s->Get(index); | |
1147 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { | |
1148 first_index_to_lower = index; | |
1149 break; | |
1150 } | |
1151 } | |
1152 | |
1153 // Nothing to do if the string is all ASCII with no uppercase. | |
1154 if (first_index_to_lower == length) return s; | |
1155 | |
1156 // We depend here on the invariant that the length of a Latin1 | |
1157 // string is invariant under ToLowerCase, and the result always | |
1158 // fits in the Latin1 range (untrue for ToUpperCase, and might | |
1159 // be untrue in some locales, but this is the root locale) | |
1160 Handle<SeqOneByteString> result = | |
1161 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); | |
1162 if (s->IsSeqOneByteString()) { | |
1163 SeqOneByteString* source = SeqOneByteString::cast(*s); | |
1164 CopyChars(result->GetChars(), source->GetChars(), first_index_to_lower); | |
1165 } else { | |
1166 // Do we have to worry about External{One,Two}ByteString? | |
1167 DCHECK(s->IsSeqTwoByteString()); | |
1168 SeqTwoByteString* source = SeqTwoByteString::cast(*s); | |
1169 CopyChars(result->GetChars(), source->GetChars(), first_index_to_lower); | |
1170 } | |
1171 | |
1172 for (int index = first_index_to_lower; index < length; ++index) { | |
1173 uint16_t ch = s->Get(index); | |
1174 result->SeqOneByteStringSet( | |
1175 index, V8_UNLIKELY(ch & ~0x7F) ? static_cast<uint16_t>(u_tolower(ch)) | |
1176 : ToASCIILower(ch)); | |
1177 } | |
1178 | |
1179 return Handle<String>(*result); | |
1180 } | |
1181 | |
1182 // Blink had an additional case here for ASCII 2-byte strings, but | |
1183 // that is subsumed by the above code (assuming there isn't a false | |
1184 // negative for HasOnlyOneByteChars). | |
1185 | |
1186 // Do a slower implementation for cases that include non-ASCII characters. | |
1187 return ConvertCaseICU(s, isolate, false); | |
1188 } | |
1189 | |
1190 const uint16_t sharp_s = 0x00DFu; | |
1191 | |
1192 MUST_USE_RESULT Handle<String> StringToUpperCase(Handle<String> s, | |
1193 Isolate* isolate) { | |
1194 // This function could be optimized for no-op cases the way lower() is, | |
1195 // but in empirical testing, few actual calls to upper() are no-ops, so | |
1196 // it wouldn't be worth the extra time for pre-scanning. | |
1197 | |
1198 int32_t length = s->length(); | |
1199 s = String::Flatten(s); | |
1200 | |
1201 if (s->HasOnlyOneByteChars()) { | |
1202 Handle<SeqOneByteString> result = | |
1203 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); | |
1204 | |
1205 // Do a faster loop for the case where all the characters are ASCII. | |
1206 uint16_t ored = 0; | |
1207 for (int index = 0; index < length; ++index) { | |
1208 uint16_t ch = s->Get(index); | |
1209 ored |= ch; | |
1210 result->SeqOneByteStringSet(index, ToASCIIUpper(ch)); | |
1211 } | |
1212 if (!(ored & ~0x7F)) return Handle<String>(*result); | |
1213 | |
1214 // Do a slower implementation for cases that include non-ASCII Latin-1 | |
1215 // characters. | |
1216 int sharp_s_count = 0; | |
1217 | |
1218 // There are two special cases. | |
1219 // 1. latin-1 characters when converted to upper case are 16 bit | |
1220 // characters. | |
1221 // 2. Lower case sharp-S converts to "SS" (two characters) | |
1222 for (int32_t index = 0; index < length; ++index) { | |
1223 uint16_t ch = s->Get(index); | |
1224 if (V8_UNLIKELY(ch == sharp_s)) { | |
1225 ++sharp_s_count; | |
1226 continue; | |
1227 } | |
1228 uint16_t upper = static_cast<uint16_t>(u_toupper(static_cast<UChar>(ch))); | |
1229 if (V8_UNLIKELY(upper > 0xff)) { | |
1230 // Since this upper-cased character does not fit in an 8-bit string, we | |
1231 // need to take the 16-bit path. | |
1232 goto upconvert; | |
1233 } | |
1234 result->SeqOneByteStringSet(index, upper); | |
1235 } | |
1236 | |
1237 if (sharp_s_count == 0) return Handle<String>(*result); | |
1238 | |
1239 // We have sharp_s_count sharp-s characters, but none of the other special | |
1240 // characters. | |
1241 result = isolate->factory() | |
1242 ->NewRawOneByteString(length + sharp_s_count) | |
1243 .ToHandleChecked(); | |
1244 for (int32_t index = 0, dest_index = 0; index < length; ++index) { | |
1245 uint16_t ch = s->Get(index); | |
1246 if (ch == sharp_s) { | |
1247 result->SeqOneByteStringSet(dest_index++, 'S'); | |
1248 result->SeqOneByteStringSet(dest_index++, 'S'); | |
1249 } else { | |
1250 uint16_t upper = | |
1251 static_cast<uint16_t>(u_toupper(static_cast<UChar>(ch))); | |
1252 result->SeqOneByteStringSet(dest_index++, upper); | |
1253 } | |
1254 } | |
1255 | |
1256 return Handle<String>(*result); | |
1257 } | |
1258 | |
1259 upconvert: | |
1260 return ConvertCaseICU(s, isolate, true); | |
1261 } | |
1262 | |
1263 } // namespace | |
1264 #endif | |
1080 | 1265 |
1081 RUNTIME_FUNCTION(Runtime_StringToLowerCase) { | 1266 RUNTIME_FUNCTION(Runtime_StringToLowerCase) { |
1082 HandleScope scope(isolate); | 1267 HandleScope scope(isolate); |
1083 DCHECK(args.length() == 1); | 1268 DCHECK_EQ(args.length(), 1); |
1084 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); | 1269 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
1085 return ConvertCase(s, isolate, isolate->runtime_state()->to_lower_mapping()); | 1270 #ifdef V8_I18N_SUPPORT |
1271 if (FLAG_icu_case_mapping) | |
1272 return *StringToLowerCase(s, isolate); | |
1273 else | |
1274 #endif | |
1275 return ConvertCase(s, isolate, | |
1276 isolate->runtime_state()->to_lower_mapping()); | |
1086 } | 1277 } |
1087 | 1278 |
1088 | 1279 |
1089 RUNTIME_FUNCTION(Runtime_StringToUpperCase) { | 1280 RUNTIME_FUNCTION(Runtime_StringToUpperCase) { |
1090 HandleScope scope(isolate); | 1281 HandleScope scope(isolate); |
1091 DCHECK(args.length() == 1); | 1282 DCHECK_EQ(args.length(), 1); |
1092 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); | 1283 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
1093 return ConvertCase(s, isolate, isolate->runtime_state()->to_upper_mapping()); | 1284 #ifdef V8_I18N_SUPPORT |
1094 } | 1285 if (FLAG_icu_case_mapping) |
1095 | 1286 return *StringToUpperCase(s, isolate); |
1096 | 1287 else |
1288 #endif | |
1289 return ConvertCase(s, isolate, | |
1290 isolate->runtime_state()->to_upper_mapping()); | |
1291 } | |
1292 | |
1293 | |
1097 RUNTIME_FUNCTION(Runtime_StringTrim) { | 1294 RUNTIME_FUNCTION(Runtime_StringTrim) { |
1098 HandleScope scope(isolate); | 1295 HandleScope scope(isolate); |
1099 DCHECK(args.length() == 3); | 1296 DCHECK(args.length() == 3); |
1100 | 1297 |
1101 CONVERT_ARG_HANDLE_CHECKED(String, string, 0); | 1298 CONVERT_ARG_HANDLE_CHECKED(String, string, 0); |
1102 CONVERT_BOOLEAN_ARG_CHECKED(trimLeft, 1); | 1299 CONVERT_BOOLEAN_ARG_CHECKED(trimLeft, 1); |
1103 CONVERT_BOOLEAN_ARG_CHECKED(trimRight, 2); | 1300 CONVERT_BOOLEAN_ARG_CHECKED(trimRight, 2); |
1104 | 1301 |
1105 string = String::Flatten(string); | 1302 string = String::Flatten(string); |
1106 int length = string->length(); | 1303 int length = string->length(); |
(...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1318 SealHandleScope shs(isolate); | 1515 SealHandleScope shs(isolate); |
1319 DCHECK(args.length() == 2); | 1516 DCHECK(args.length() == 2); |
1320 if (!args[0]->IsString()) return isolate->heap()->undefined_value(); | 1517 if (!args[0]->IsString()) return isolate->heap()->undefined_value(); |
1321 if (!args[1]->IsNumber()) return isolate->heap()->undefined_value(); | 1518 if (!args[1]->IsNumber()) return isolate->heap()->undefined_value(); |
1322 if (std::isinf(args.number_at(1))) return isolate->heap()->nan_value(); | 1519 if (std::isinf(args.number_at(1))) return isolate->heap()->nan_value(); |
1323 return __RT_impl_Runtime_StringCharCodeAtRT(args, isolate); | 1520 return __RT_impl_Runtime_StringCharCodeAtRT(args, isolate); |
1324 } | 1521 } |
1325 | 1522 |
1326 } // namespace internal | 1523 } // namespace internal |
1327 } // namespace v8 | 1524 } // namespace v8 |
OLD | NEW |