OLD | NEW |
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 | 5 |
6 #ifdef V8_I18N_SUPPORT | 6 #ifdef V8_I18N_SUPPORT |
7 #include "src/runtime/runtime-utils.h" | 7 #include "src/runtime/runtime-utils.h" |
8 | 8 |
9 #include <memory> | 9 #include <memory> |
10 | 10 |
| 11 #include "src/api-natives.h" |
11 #include "src/api.h" | 12 #include "src/api.h" |
12 #include "src/api-natives.h" | |
13 #include "src/arguments.h" | 13 #include "src/arguments.h" |
14 #include "src/factory.h" | 14 #include "src/factory.h" |
15 #include "src/i18n.h" | 15 #include "src/i18n.h" |
16 #include "src/isolate-inl.h" | 16 #include "src/isolate-inl.h" |
17 #include "src/messages.h" | 17 #include "src/messages.h" |
| 18 #include "src/utils.h" |
18 | 19 |
19 #include "unicode/brkiter.h" | 20 #include "unicode/brkiter.h" |
20 #include "unicode/calendar.h" | 21 #include "unicode/calendar.h" |
21 #include "unicode/coll.h" | 22 #include "unicode/coll.h" |
22 #include "unicode/curramt.h" | 23 #include "unicode/curramt.h" |
23 #include "unicode/datefmt.h" | 24 #include "unicode/datefmt.h" |
24 #include "unicode/dcfmtsym.h" | 25 #include "unicode/dcfmtsym.h" |
25 #include "unicode/decimfmt.h" | 26 #include "unicode/decimfmt.h" |
26 #include "unicode/dtfmtsym.h" | 27 #include "unicode/dtfmtsym.h" |
27 #include "unicode/dtptngen.h" | 28 #include "unicode/dtptngen.h" |
(...skipping 1058 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1086 | 1087 |
1087 RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) { | 1088 RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) { |
1088 HandleScope scope(isolate); | 1089 HandleScope scope(isolate); |
1089 DCHECK_EQ(args.length(), 1); | 1090 DCHECK_EQ(args.length(), 1); |
1090 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); | 1091 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
1091 | 1092 |
1092 int length = s->length(); | 1093 int length = s->length(); |
1093 s = String::Flatten(s); | 1094 s = String::Flatten(s); |
1094 // First scan the string for uppercase and non-ASCII characters: | 1095 // First scan the string for uppercase and non-ASCII characters: |
1095 if (s->HasOnlyOneByteChars()) { | 1096 if (s->HasOnlyOneByteChars()) { |
1096 int first_index_to_lower = length; | |
1097 for (int index = 0; index < length; ++index) { | |
1098 // Blink specializes this path for one-byte strings, so it | |
1099 // does not need to do a generic get, but can do the equivalent | |
1100 // of SeqOneByteStringGet. | |
1101 uint16_t ch = s->Get(index); | |
1102 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { | |
1103 first_index_to_lower = index; | |
1104 break; | |
1105 } | |
1106 } | |
1107 | |
1108 // Nothing to do if the string is all ASCII with no uppercase. | |
1109 if (first_index_to_lower == length) return *s; | |
1110 | |
1111 // We depend here on the invariant that the length of a Latin1 | 1097 // We depend here on the invariant that the length of a Latin1 |
1112 // string is invariant under ToLowerCase, and the result always | 1098 // string is invariant under ToLowerCase, and the result always |
1113 // fits in the Latin1 range in the *root locale*. It does not hold | 1099 // fits in the Latin1 range in the *root locale*. It does not hold |
1114 // for ToUpperCase even in the root locale. | 1100 // for ToUpperCase even in the root locale. |
1115 Handle<SeqOneByteString> result; | 1101 Handle<SeqOneByteString> result; |
1116 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( | 1102 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1117 isolate, result, isolate->factory()->NewRawOneByteString(length)); | 1103 isolate, result, isolate->factory()->NewRawOneByteString(length)); |
1118 | 1104 |
1119 DisallowHeapAllocation no_gc; | 1105 DisallowHeapAllocation no_gc; |
1120 String::FlatContent flat = s->GetFlatContent(); | 1106 String::FlatContent flat = s->GetFlatContent(); |
| 1107 uint8_t* dest = result->GetChars(); |
| 1108 const uint8_t* src = flat.ToOneByteVector().start(); |
| 1109 if (flat.IsOneByte() && static_cast<size_t>(length) >= sizeof(uintptr_t)) { |
| 1110 bool has_changed_character = false; |
| 1111 bool is_ascii = FastAsciiConvert<true>(reinterpret_cast<char*>(dest), |
| 1112 reinterpret_cast<const char*>(src), |
| 1113 length, &has_changed_character); |
| 1114 // If not ASCII, we discard the result and start anew. |
| 1115 if (is_ascii) return has_changed_character ? *result : *s; |
| 1116 } |
| 1117 |
| 1118 int index_to_first_upper = 0; |
| 1119 for (int index = 0; index < length; ++index) { |
| 1120 uint16_t ch = s->Get(index); |
| 1121 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { |
| 1122 index_to_first_upper = index; |
| 1123 break; |
| 1124 } |
| 1125 } |
| 1126 |
| 1127 // An ASCII input without any uppercase characters is already handled by |
| 1128 // FastAsciiConvert as long as the input is a machine-word or longer. |
| 1129 DCHECK(index_to_first_upper < length || |
| 1130 static_cast<size_t>(length) < sizeof(uintptr_t)); |
| 1131 // Nothing to do if the string is all ASCII with no uppercase. |
| 1132 if (index_to_first_upper == length) return *s; |
1121 if (flat.IsOneByte()) { | 1133 if (flat.IsOneByte()) { |
1122 const uint8_t* src = flat.ToOneByteVector().start(); | 1134 CopyChars(dest, src, static_cast<size_t>(index_to_first_upper)); |
1123 CopyChars(result->GetChars(), src, | 1135 for (int index = index_to_first_upper; index < length; ++index) { |
1124 static_cast<size_t>(first_index_to_lower)); | |
1125 for (int index = first_index_to_lower; index < length; ++index) { | |
1126 uint16_t ch = static_cast<uint16_t>(src[index]); | 1136 uint16_t ch = static_cast<uint16_t>(src[index]); |
1127 result->SeqOneByteStringSet(index, ToLatin1Lower(ch)); | 1137 result->SeqOneByteStringSet(index, ToLatin1Lower(ch)); |
1128 } | 1138 } |
1129 } else { | 1139 } else { |
1130 const uint16_t* src = flat.ToUC16Vector().start(); | 1140 const uint16_t* src = flat.ToUC16Vector().start(); |
1131 CopyChars(result->GetChars(), src, | 1141 CopyChars(dest, src, static_cast<size_t>(index_to_first_upper)); |
1132 static_cast<size_t>(first_index_to_lower)); | 1142 for (int index = index_to_first_upper; index < length; ++index) { |
1133 for (int index = first_index_to_lower; index < length; ++index) { | |
1134 uint16_t ch = src[index]; | 1143 uint16_t ch = src[index]; |
1135 result->SeqOneByteStringSet(index, ToLatin1Lower(ch)); | 1144 result->SeqOneByteStringSet(index, ToLatin1Lower(ch)); |
1136 } | 1145 } |
1137 } | 1146 } |
1138 | 1147 |
1139 return *result; | 1148 return *result; |
1140 } | 1149 } |
1141 | 1150 |
1142 // Blink had an additional case here for ASCII 2-byte strings, but | 1151 // Blink had an additional case here for ASCII 2-byte strings, but |
1143 // that is subsumed by the above code (assuming there isn't a false | 1152 // that is subsumed by the above code (assuming there isn't a false |
1144 // negative for HasOnlyOneByteChars). | 1153 // negative for HasOnlyOneByteChars). |
1145 | 1154 |
1146 // Do a slower implementation for cases that include non-ASCII characters. | 1155 // Do a slower implementation for cases that include non-ASCII characters. |
1147 return LocaleConvertCase(s, isolate, false, ""); | 1156 return LocaleConvertCase(s, isolate, false, ""); |
1148 } | 1157 } |
1149 | 1158 |
1150 RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) { | 1159 RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) { |
1151 HandleScope scope(isolate); | 1160 HandleScope scope(isolate); |
1152 DCHECK_EQ(args.length(), 1); | 1161 DCHECK_EQ(args.length(), 1); |
1153 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); | 1162 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
1154 | 1163 |
1155 // This function could be optimized for no-op cases the way lowercase | |
1156 // counterpart is, but in empirical testing, few actual calls to upper() | |
1157 // are no-ops. So, it wouldn't be worth the extra time for pre-scanning. | |
1158 | |
1159 int32_t length = s->length(); | 1164 int32_t length = s->length(); |
1160 s = String::Flatten(s); | 1165 s = String::Flatten(s); |
1161 | 1166 |
1162 if (s->HasOnlyOneByteChars()) { | 1167 if (s->HasOnlyOneByteChars()) { |
1163 Handle<SeqOneByteString> result; | 1168 Handle<SeqOneByteString> result; |
1164 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( | 1169 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
1165 isolate, result, isolate->factory()->NewRawOneByteString(length)); | 1170 isolate, result, isolate->factory()->NewRawOneByteString(length)); |
1166 | 1171 |
1167 int sharp_s_count; | 1172 int sharp_s_count; |
1168 bool is_result_single_byte; | 1173 bool is_result_single_byte; |
1169 { | 1174 { |
1170 DisallowHeapAllocation no_gc; | 1175 DisallowHeapAllocation no_gc; |
1171 String::FlatContent flat = s->GetFlatContent(); | 1176 String::FlatContent flat = s->GetFlatContent(); |
1172 // If it was ok to slow down ASCII-only input slightly, ToUpperFastASCII | |
1173 // could be removed because ToUpperOneByte is pretty fast now (it | |
1174 // does not call ICU API any more.). | |
1175 if (flat.IsOneByte()) { | 1177 if (flat.IsOneByte()) { |
1176 Vector<const uint8_t> src = flat.ToOneByteVector(); | 1178 Vector<const uint8_t> src = flat.ToOneByteVector(); |
1177 if (ToUpperFastASCII(src, result)) return *result; | 1179 bool has_changed_character = false; |
| 1180 bool is_ascii = |
| 1181 FastAsciiConvert<false>(reinterpret_cast<char*>(result->GetChars()), |
| 1182 reinterpret_cast<const char*>(src.start()), |
| 1183 length, &has_changed_character); |
| 1184 // If not ASCII, we discard the result and use the table for Latin1. |
| 1185 if (is_ascii) return has_changed_character ? *result : *s; |
1178 is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count); | 1186 is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count); |
1179 } else { | 1187 } else { |
1180 DCHECK(flat.IsTwoByte()); | 1188 DCHECK(flat.IsTwoByte()); |
1181 Vector<const uint16_t> src = flat.ToUC16Vector(); | 1189 Vector<const uint16_t> src = flat.ToUC16Vector(); |
1182 if (ToUpperFastASCII(src, result)) return *result; | 1190 if (ToUpperFastASCII(src, result)) return *result; |
1183 is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count); | 1191 is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count); |
1184 } | 1192 } |
1185 } | 1193 } |
1186 | 1194 |
1187 // Go to the full Unicode path if there are characters whose uppercase | 1195 // Go to the full Unicode path if there are characters whose uppercase |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1247 Handle<FixedArray> date_cache_version = | 1255 Handle<FixedArray> date_cache_version = |
1248 Handle<FixedArray>::cast(isolate->eternal_handles()->GetSingleton( | 1256 Handle<FixedArray>::cast(isolate->eternal_handles()->GetSingleton( |
1249 EternalHandles::DATE_CACHE_VERSION)); | 1257 EternalHandles::DATE_CACHE_VERSION)); |
1250 return date_cache_version->get(0); | 1258 return date_cache_version->get(0); |
1251 } | 1259 } |
1252 | 1260 |
1253 } // namespace internal | 1261 } // namespace internal |
1254 } // namespace v8 | 1262 } // namespace v8 |
1255 | 1263 |
1256 #endif // V8_I18N_SUPPORT | 1264 #endif // V8_I18N_SUPPORT |
OLD | NEW |