OLD | NEW |
---|---|
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
(...skipping 1184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1195 zone); | 1195 zone); |
1196 } else if (NameEquals(name, "Assigned")) { | 1196 } else if (NameEquals(name, "Assigned")) { |
1197 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", | 1197 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", |
1198 !negate, result, zone); | 1198 !negate, result, zone); |
1199 } else { | 1199 } else { |
1200 return false; | 1200 return false; |
1201 } | 1201 } |
1202 return true; | 1202 return true; |
1203 } | 1203 } |
1204 | 1204 |
1205 // Explicitly whitelist supported binary properties. The spec forbids supporting | |
1206 // properties outside of this set to ensure interoperability. | |
1207 bool IsSupportedBinaryProperty(UProperty property) { | |
1208 switch (property) { | |
1209 case UCHAR_ALPHABETIC: | |
mathias
2017/04/18 08:29:38
Maybe add a comment here explaining that Any and A
| |
1210 case UCHAR_ASCII_HEX_DIGIT: | |
mathias
2017/04/18 08:29:38
Same here with `Assigned`.
| |
1211 case UCHAR_BIDI_CONTROL: | |
1212 case UCHAR_BIDI_MIRRORED: | |
1213 case UCHAR_CASE_IGNORABLE: | |
1214 case UCHAR_CASED: | |
1215 case UCHAR_CHANGES_WHEN_CASEFOLDED: | |
1216 case UCHAR_CHANGES_WHEN_CASEMAPPED: | |
1217 case UCHAR_CHANGES_WHEN_LOWERCASED: | |
1218 case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED: | |
1219 case UCHAR_CHANGES_WHEN_TITLECASED: | |
1220 case UCHAR_CHANGES_WHEN_UPPERCASED: | |
1221 case UCHAR_DASH: | |
1222 case UCHAR_DEFAULT_IGNORABLE_CODE_POINT: | |
1223 case UCHAR_DEPRECATED: | |
1224 case UCHAR_DIACRITIC: | |
1225 case UCHAR_EMOJI: | |
1226 // TODO(yangguo): Uncomment this once we upgrade to ICU 60. | |
1227 // See https://ssl.icu-project.org/trac/ticket/13062 | |
1228 // case UCHAR_EMOJI_COMPONENT: | |
1229 case UCHAR_EMOJI_MODIFIER_BASE: | |
1230 case UCHAR_EMOJI_MODIFIER: | |
1231 case UCHAR_EMOJI_PRESENTATION: | |
1232 case UCHAR_EXTENDER: | |
1233 case UCHAR_GRAPHEME_BASE: | |
1234 case UCHAR_GRAPHEME_EXTEND: | |
1235 case UCHAR_HEX_DIGIT: | |
1236 case UCHAR_ID_CONTINUE: | |
1237 case UCHAR_ID_START: | |
1238 case UCHAR_IDEOGRAPHIC: | |
1239 case UCHAR_IDS_BINARY_OPERATOR: | |
1240 case UCHAR_IDS_TRINARY_OPERATOR: | |
1241 case UCHAR_JOIN_CONTROL: | |
1242 case UCHAR_LOGICAL_ORDER_EXCEPTION: | |
1243 case UCHAR_LOWERCASE: | |
1244 case UCHAR_MATH: | |
1245 case UCHAR_NONCHARACTER_CODE_POINT: | |
1246 case UCHAR_PATTERN_SYNTAX: | |
1247 case UCHAR_PATTERN_WHITE_SPACE: | |
1248 case UCHAR_QUOTATION_MARK: | |
1249 case UCHAR_RADICAL: | |
1250 case UCHAR_S_TERM: | |
1251 case UCHAR_SOFT_DOTTED: | |
1252 case UCHAR_TERMINAL_PUNCTUATION: | |
1253 case UCHAR_UNIFIED_IDEOGRAPH: | |
1254 case UCHAR_UPPERCASE: | |
1255 case UCHAR_VARIATION_SELECTOR: | |
1256 case UCHAR_WHITE_SPACE: | |
1257 case UCHAR_XID_CONTINUE: | |
1258 case UCHAR_XID_START: | |
1259 return true; | |
1260 default: | |
1261 break; | |
1262 } | |
1263 return false; | |
1264 } | |
1265 | |
1205 } // anonymous namespace | 1266 } // anonymous namespace |
1206 | 1267 |
1207 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, | 1268 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
1208 bool negate) { | 1269 bool negate) { |
1209 // Parse the property class as follows: | 1270 // Parse the property class as follows: |
1210 // - In \p{name}, 'name' is interpreted | 1271 // - In \p{name}, 'name' is interpreted |
1211 // - either as a general category property value name. | 1272 // - either as a general category property value name. |
1212 // - or as a binary property name. | 1273 // - or as a binary property name. |
1213 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, | 1274 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, |
1214 // and 'value' is interpreted as one of the available property value names. | 1275 // and 'value' is interpreted as one of the available property value names. |
(...skipping 26 matching lines...) Expand all Loading... | |
1241 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, | 1302 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, |
1242 result, zone())) { | 1303 result, zone())) { |
1243 return true; | 1304 return true; |
1244 } | 1305 } |
1245 // Interpret "Any", "ASCII", and "Assigned". | 1306 // Interpret "Any", "ASCII", and "Assigned". |
1246 if (LookupSpecialPropertyValueName(name, result, negate, zone())) { | 1307 if (LookupSpecialPropertyValueName(name, result, negate, zone())) { |
1247 return true; | 1308 return true; |
1248 } | 1309 } |
1249 // Then attempt to interpret as binary property name with value name 'Y'. | 1310 // Then attempt to interpret as binary property name with value name 'Y'. |
1250 UProperty property = u_getPropertyEnum(name); | 1311 UProperty property = u_getPropertyEnum(name); |
1251 if (property < UCHAR_BINARY_START) return false; | 1312 if (!IsSupportedBinaryProperty(property)) return false; |
1252 if (property >= UCHAR_BINARY_LIMIT) return false; | |
1253 if (!IsExactPropertyAlias(name, property)) return false; | 1313 if (!IsExactPropertyAlias(name, property)) return false; |
1254 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, | 1314 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, |
1255 zone()); | 1315 zone()); |
1256 } else { | 1316 } else { |
1257 // Both property name and value name are specified. Attempt to interpret | 1317 // Both property name and value name are specified. Attempt to interpret |
1258 // the property name as enumerated property. | 1318 // the property name as enumerated property. |
1259 const char* property_name = first_part.ToConstVector().start(); | 1319 const char* property_name = first_part.ToConstVector().start(); |
1260 const char* value_name = second_part.ToConstVector().start(); | 1320 const char* value_name = second_part.ToConstVector().start(); |
1261 UProperty property = u_getPropertyEnum(property_name); | 1321 UProperty property = u_getPropertyEnum(property_name); |
1262 if (!IsExactPropertyAlias(property_name, property)) return false; | 1322 if (!IsExactPropertyAlias(property_name, property)) return false; |
(...skipping 608 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1871 return false; | 1931 return false; |
1872 } | 1932 } |
1873 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1933 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1874 zone()); | 1934 zone()); |
1875 LAST(ADD_TERM); | 1935 LAST(ADD_TERM); |
1876 return true; | 1936 return true; |
1877 } | 1937 } |
1878 | 1938 |
1879 } // namespace internal | 1939 } // namespace internal |
1880 } // namespace v8 | 1940 } // namespace v8 |
OLD | NEW |