OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
(...skipping 1125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1136 for (int i = 0;; i++) { | 1136 for (int i = 0;; i++) { |
1137 const char* long_name = u_getPropertyValueName( | 1137 const char* long_name = u_getPropertyValueName( |
1138 property, property_value, | 1138 property, property_value, |
1139 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); | 1139 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); |
1140 if (long_name == NULL) break; | 1140 if (long_name == NULL) break; |
1141 if (strcmp(property_value_name, long_name) == 0) return true; | 1141 if (strcmp(property_value_name, long_name) == 0) return true; |
1142 } | 1142 } |
1143 return false; | 1143 return false; |
1144 } | 1144 } |
1145 | 1145 |
1146 bool LookupPropertyName(UProperty property, const char* property_value_name, | 1146 bool LookupPropertyValueName(UProperty property, |
1147 bool negate, ZoneList<CharacterRange>* result, | 1147 const char* property_value_name, bool negate, |
1148 Zone* zone) { | 1148 ZoneList<CharacterRange>* result, Zone* zone) { |
1149 UProperty property_for_lookup = property; | 1149 UProperty property_for_lookup = property; |
1150 if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) { | 1150 if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) { |
1151 // For the property Script_Extensions, we have to do the property value | 1151 // For the property Script_Extensions, we have to do the property value |
1152 // name lookup as if the property is Script. | 1152 // name lookup as if the property is Script. |
1153 property_for_lookup = UCHAR_SCRIPT; | 1153 property_for_lookup = UCHAR_SCRIPT; |
1154 } | 1154 } |
1155 int32_t property_value = | 1155 int32_t property_value = |
1156 u_getPropertyValueEnum(property_for_lookup, property_value_name); | 1156 u_getPropertyValueEnum(property_for_lookup, property_value_name); |
1157 if (property_value == UCHAR_INVALID_CODE) return false; | 1157 if (property_value == UCHAR_INVALID_CODE) return false; |
1158 | 1158 |
(...skipping 19 matching lines...) Expand all Loading... |
1178 } | 1178 } |
1179 } | 1179 } |
1180 return success; | 1180 return success; |
1181 } | 1181 } |
1182 | 1182 |
1183 template <size_t N> | 1183 template <size_t N> |
1184 inline bool NameEquals(const char* name, const char (&literal)[N]) { | 1184 inline bool NameEquals(const char* name, const char (&literal)[N]) { |
1185 return strncmp(name, literal, N + 1) == 0; | 1185 return strncmp(name, literal, N + 1) == 0; |
1186 } | 1186 } |
1187 | 1187 |
1188 bool LookupSpecialPropertyName(const char* name, | 1188 bool LookupSpecialPropertyValueName(const char* name, |
1189 ZoneList<CharacterRange>* result, bool negate, | 1189 ZoneList<CharacterRange>* result, |
1190 Zone* zone) { | 1190 bool negate, Zone* zone) { |
1191 if (NameEquals(name, "Any")) { | 1191 if (NameEquals(name, "Any")) { |
1192 if (!negate) result->Add(CharacterRange::Everything(), zone); | 1192 if (!negate) result->Add(CharacterRange::Everything(), zone); |
1193 } else if (NameEquals(name, "ASCII")) { | 1193 } else if (NameEquals(name, "ASCII")) { |
1194 result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint) | 1194 result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint) |
1195 : CharacterRange::Range(0x0, 0x7f), | 1195 : CharacterRange::Range(0x0, 0x7f), |
1196 zone); | 1196 zone); |
1197 } else if (NameEquals(name, "Assigned")) { | 1197 } else if (NameEquals(name, "Assigned")) { |
1198 return LookupPropertyName(UCHAR_GENERAL_CATEGORY, "Unassigned", !negate, | 1198 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", |
1199 result, zone); | 1199 !negate, result, zone); |
1200 } else if (NameEquals(name, "Other_ID_Start") || NameEquals(name, "OIDS")) { | |
1201 // From Unicode 9.0.0 PropList.txt | |
1202 // 1885..1886 ; Other_ID_Start | |
1203 // 2118 ; Other_ID_Start | |
1204 // 212E ; Other_ID_Start | |
1205 // 309B..309C ; Other_ID_Start | |
1206 if (negate) { | |
1207 result->Add(CharacterRange::Range(0x0000, 0x1884), zone); | |
1208 result->Add(CharacterRange::Range(0x1887, 0x2117), zone); | |
1209 result->Add(CharacterRange::Range(0x2119, 0x212D), zone); | |
1210 result->Add(CharacterRange::Range(0x212F, 0x309A), zone); | |
1211 result->Add(CharacterRange::Range(0x309D, String::kMaxCodePoint), zone); | |
1212 } else { | |
1213 result->Add(CharacterRange::Range(0x1885, 0x1886), zone); | |
1214 result->Add(CharacterRange::Singleton(0x2118), zone); | |
1215 result->Add(CharacterRange::Singleton(0x212E), zone); | |
1216 result->Add(CharacterRange::Range(0x309B, 0x309C), zone); | |
1217 } | |
1218 } else if (NameEquals(name, "Other_ID_Continue") || | |
1219 NameEquals(name, "OIDC")) { | |
1220 // From Unicode 9.0.0 PropList.txt | |
1221 // 00B7 ; Other_ID_Continue | |
1222 // 0387 ; Other_ID_Continue | |
1223 // 1369..1371 ; Other_ID_Continue | |
1224 // 19DA ; Other_ID_Continue | |
1225 if (negate) { | |
1226 result->Add(CharacterRange::Range(0x0000, 0x00B6), zone); | |
1227 result->Add(CharacterRange::Range(0x00B8, 0x0386), zone); | |
1228 result->Add(CharacterRange::Range(0x0388, 0x1368), zone); | |
1229 result->Add(CharacterRange::Range(0x1372, 0x19D9), zone); | |
1230 result->Add(CharacterRange::Range(0x19DB, String::kMaxCodePoint), zone); | |
1231 } else { | |
1232 result->Add(CharacterRange::Singleton(0x00B7), zone); | |
1233 result->Add(CharacterRange::Singleton(0x0387), zone); | |
1234 result->Add(CharacterRange::Range(0x1369, 0x1371), zone); | |
1235 result->Add(CharacterRange::Singleton(0x19DA), zone); | |
1236 } | |
1237 } else { | 1200 } else { |
1238 return false; | 1201 return false; |
1239 } | 1202 } |
1240 return true; | 1203 return true; |
1241 } | 1204 } |
1242 | 1205 |
1243 } // anonymous namespace | 1206 } // anonymous namespace |
1244 | 1207 |
1245 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, | 1208 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
1246 bool negate) { | 1209 bool negate) { |
(...skipping 22 matching lines...) Expand all Loading... |
1269 } | 1232 } |
1270 } else { | 1233 } else { |
1271 return false; | 1234 return false; |
1272 } | 1235 } |
1273 Advance(); | 1236 Advance(); |
1274 first_part.Add(0); // null-terminate string. | 1237 first_part.Add(0); // null-terminate string. |
1275 | 1238 |
1276 if (second_part.is_empty()) { | 1239 if (second_part.is_empty()) { |
1277 // First attempt to interpret as general category property value name. | 1240 // First attempt to interpret as general category property value name. |
1278 const char* name = first_part.ToConstVector().start(); | 1241 const char* name = first_part.ToConstVector().start(); |
1279 if (LookupPropertyName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, result, | 1242 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, |
1280 zone())) { | 1243 result, zone())) { |
1281 return true; | 1244 return true; |
1282 } | 1245 } |
1283 | 1246 // Interpret "Any", "ASCII", and "Assigned". |
| 1247 if (LookupSpecialPropertyValueName(name, result, negate, zone())) { |
| 1248 return true; |
| 1249 } |
1284 // Then attempt to interpret as binary property name with value name 'Y'. | 1250 // Then attempt to interpret as binary property name with value name 'Y'. |
1285 UProperty property = u_getPropertyEnum(name); | 1251 UProperty property = u_getPropertyEnum(name); |
1286 if (property == UCHAR_INVALID_CODE) { | |
1287 // Interpret "Any", "ASCII", "Assigned", "Other_ID_Start", and | |
1288 // "Other_ID_Continue". | |
1289 return LookupSpecialPropertyName(name, result, negate, zone()); | |
1290 } | |
1291 if (property < UCHAR_BINARY_START) return false; | 1252 if (property < UCHAR_BINARY_START) return false; |
1292 if (property >= UCHAR_BINARY_LIMIT) return false; | 1253 if (property >= UCHAR_BINARY_LIMIT) return false; |
1293 if (!IsExactPropertyAlias(name, property)) return false; | 1254 if (!IsExactPropertyAlias(name, property)) return false; |
1294 return LookupPropertyName(property, negate ? "N" : "Y", false, result, | 1255 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, |
1295 zone()); | 1256 zone()); |
1296 } else { | 1257 } else { |
1297 // Both property name and value name are specified. Attempt to interpret | 1258 // Both property name and value name are specified. Attempt to interpret |
1298 // the property name as enumerated property. | 1259 // the property name as enumerated property. |
1299 const char* property_name = first_part.ToConstVector().start(); | 1260 const char* property_name = first_part.ToConstVector().start(); |
1300 const char* value_name = second_part.ToConstVector().start(); | 1261 const char* value_name = second_part.ToConstVector().start(); |
1301 UProperty property = u_getPropertyEnum(property_name); | 1262 UProperty property = u_getPropertyEnum(property_name); |
1302 if (!IsExactPropertyAlias(property_name, property)) return false; | 1263 if (!IsExactPropertyAlias(property_name, property)) return false; |
1303 if (property == UCHAR_GENERAL_CATEGORY) { | 1264 if (property == UCHAR_GENERAL_CATEGORY) { |
1304 // We want to allow aggregate value names such as "Letter". | 1265 // We want to allow aggregate value names such as "Letter". |
1305 property = UCHAR_GENERAL_CATEGORY_MASK; | 1266 property = UCHAR_GENERAL_CATEGORY_MASK; |
1306 } else if (property != UCHAR_SCRIPT && | 1267 } else if (property != UCHAR_SCRIPT && |
1307 property != UCHAR_SCRIPT_EXTENSIONS) { | 1268 property != UCHAR_SCRIPT_EXTENSIONS) { |
1308 return false; | 1269 return false; |
1309 } | 1270 } |
1310 return LookupPropertyName(property, value_name, negate, result, zone()); | 1271 return LookupPropertyValueName(property, value_name, negate, result, |
| 1272 zone()); |
1311 } | 1273 } |
1312 } | 1274 } |
1313 | 1275 |
1314 #else // V8_I18N_SUPPORT | 1276 #else // V8_I18N_SUPPORT |
1315 | 1277 |
1316 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, | 1278 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
1317 bool negate) { | 1279 bool negate) { |
1318 return false; | 1280 return false; |
1319 } | 1281 } |
1320 | 1282 |
(...skipping 587 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1908 return false; | 1870 return false; |
1909 } | 1871 } |
1910 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1872 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1911 zone()); | 1873 zone()); |
1912 LAST(ADD_TERM); | 1874 LAST(ADD_TERM); |
1913 return true; | 1875 return true; |
1914 } | 1876 } |
1915 | 1877 |
1916 } // namespace internal | 1878 } // namespace internal |
1917 } // namespace v8 | 1879 } // namespace v8 |
OLD | NEW |