Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
| 6 | 6 |
| 7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
| 8 #include "src/factory.h" | 8 #include "src/factory.h" |
| 9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
| 10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
| (...skipping 1184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1195 zone); | 1195 zone); |
| 1196 } else if (NameEquals(name, "Assigned")) { | 1196 } else if (NameEquals(name, "Assigned")) { |
| 1197 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", | 1197 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", |
| 1198 !negate, result, zone); | 1198 !negate, result, zone); |
| 1199 } else { | 1199 } else { |
| 1200 return false; | 1200 return false; |
| 1201 } | 1201 } |
| 1202 return true; | 1202 return true; |
| 1203 } | 1203 } |
| 1204 | 1204 |
| 1205 // Explicitly whitelist supported binary properties. The spec forbids supporting | |
| 1206 // properties outside of this set to ensure interoperability. | |
| 1207 bool IsSupportedBinaryProperty(UProperty property) { | |
| 1208 switch (property) { | |
| 1209 case UCHAR_ALPHABETIC: | |
|
mathias
2017/04/18 08:29:38
Maybe add a comment here explaining that Any and A
| |
| 1210 case UCHAR_ASCII_HEX_DIGIT: | |
|
mathias
2017/04/18 08:29:38
Same here with `Assigned`.
| |
| 1211 case UCHAR_BIDI_CONTROL: | |
| 1212 case UCHAR_BIDI_MIRRORED: | |
| 1213 case UCHAR_CASE_IGNORABLE: | |
| 1214 case UCHAR_CASED: | |
| 1215 case UCHAR_CHANGES_WHEN_CASEFOLDED: | |
| 1216 case UCHAR_CHANGES_WHEN_CASEMAPPED: | |
| 1217 case UCHAR_CHANGES_WHEN_LOWERCASED: | |
| 1218 case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED: | |
| 1219 case UCHAR_CHANGES_WHEN_TITLECASED: | |
| 1220 case UCHAR_CHANGES_WHEN_UPPERCASED: | |
| 1221 case UCHAR_DASH: | |
| 1222 case UCHAR_DEFAULT_IGNORABLE_CODE_POINT: | |
| 1223 case UCHAR_DEPRECATED: | |
| 1224 case UCHAR_DIACRITIC: | |
| 1225 case UCHAR_EMOJI: | |
| 1226 // TODO(yangguo): Uncomment this once we upgrade to ICU 60. | |
| 1227 // See https://ssl.icu-project.org/trac/ticket/13062 | |
| 1228 // case UCHAR_EMOJI_COMPONENT: | |
| 1229 case UCHAR_EMOJI_MODIFIER_BASE: | |
| 1230 case UCHAR_EMOJI_MODIFIER: | |
| 1231 case UCHAR_EMOJI_PRESENTATION: | |
| 1232 case UCHAR_EXTENDER: | |
| 1233 case UCHAR_GRAPHEME_BASE: | |
| 1234 case UCHAR_GRAPHEME_EXTEND: | |
| 1235 case UCHAR_HEX_DIGIT: | |
| 1236 case UCHAR_ID_CONTINUE: | |
| 1237 case UCHAR_ID_START: | |
| 1238 case UCHAR_IDEOGRAPHIC: | |
| 1239 case UCHAR_IDS_BINARY_OPERATOR: | |
| 1240 case UCHAR_IDS_TRINARY_OPERATOR: | |
| 1241 case UCHAR_JOIN_CONTROL: | |
| 1242 case UCHAR_LOGICAL_ORDER_EXCEPTION: | |
| 1243 case UCHAR_LOWERCASE: | |
| 1244 case UCHAR_MATH: | |
| 1245 case UCHAR_NONCHARACTER_CODE_POINT: | |
| 1246 case UCHAR_PATTERN_SYNTAX: | |
| 1247 case UCHAR_PATTERN_WHITE_SPACE: | |
| 1248 case UCHAR_QUOTATION_MARK: | |
| 1249 case UCHAR_RADICAL: | |
| 1250 case UCHAR_S_TERM: | |
| 1251 case UCHAR_SOFT_DOTTED: | |
| 1252 case UCHAR_TERMINAL_PUNCTUATION: | |
| 1253 case UCHAR_UNIFIED_IDEOGRAPH: | |
| 1254 case UCHAR_UPPERCASE: | |
| 1255 case UCHAR_VARIATION_SELECTOR: | |
| 1256 case UCHAR_WHITE_SPACE: | |
| 1257 case UCHAR_XID_CONTINUE: | |
| 1258 case UCHAR_XID_START: | |
| 1259 return true; | |
| 1260 default: | |
| 1261 break; | |
| 1262 } | |
| 1263 return false; | |
| 1264 } | |
| 1265 | |
| 1205 } // anonymous namespace | 1266 } // anonymous namespace |
| 1206 | 1267 |
| 1207 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, | 1268 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
| 1208 bool negate) { | 1269 bool negate) { |
| 1209 // Parse the property class as follows: | 1270 // Parse the property class as follows: |
| 1210 // - In \p{name}, 'name' is interpreted | 1271 // - In \p{name}, 'name' is interpreted |
| 1211 // - either as a general category property value name. | 1272 // - either as a general category property value name. |
| 1212 // - or as a binary property name. | 1273 // - or as a binary property name. |
| 1213 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, | 1274 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, |
| 1214 // and 'value' is interpreted as one of the available property value names. | 1275 // and 'value' is interpreted as one of the available property value names. |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 1241 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, | 1302 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, |
| 1242 result, zone())) { | 1303 result, zone())) { |
| 1243 return true; | 1304 return true; |
| 1244 } | 1305 } |
| 1245 // Interpret "Any", "ASCII", and "Assigned". | 1306 // Interpret "Any", "ASCII", and "Assigned". |
| 1246 if (LookupSpecialPropertyValueName(name, result, negate, zone())) { | 1307 if (LookupSpecialPropertyValueName(name, result, negate, zone())) { |
| 1247 return true; | 1308 return true; |
| 1248 } | 1309 } |
| 1249 // Then attempt to interpret as binary property name with value name 'Y'. | 1310 // Then attempt to interpret as binary property name with value name 'Y'. |
| 1250 UProperty property = u_getPropertyEnum(name); | 1311 UProperty property = u_getPropertyEnum(name); |
| 1251 if (property < UCHAR_BINARY_START) return false; | 1312 if (!IsSupportedBinaryProperty(property)) return false; |
| 1252 if (property >= UCHAR_BINARY_LIMIT) return false; | |
| 1253 if (!IsExactPropertyAlias(name, property)) return false; | 1313 if (!IsExactPropertyAlias(name, property)) return false; |
| 1254 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, | 1314 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, |
| 1255 zone()); | 1315 zone()); |
| 1256 } else { | 1316 } else { |
| 1257 // Both property name and value name are specified. Attempt to interpret | 1317 // Both property name and value name are specified. Attempt to interpret |
| 1258 // the property name as enumerated property. | 1318 // the property name as enumerated property. |
| 1259 const char* property_name = first_part.ToConstVector().start(); | 1319 const char* property_name = first_part.ToConstVector().start(); |
| 1260 const char* value_name = second_part.ToConstVector().start(); | 1320 const char* value_name = second_part.ToConstVector().start(); |
| 1261 UProperty property = u_getPropertyEnum(property_name); | 1321 UProperty property = u_getPropertyEnum(property_name); |
| 1262 if (!IsExactPropertyAlias(property_name, property)) return false; | 1322 if (!IsExactPropertyAlias(property_name, property)) return false; |
| (...skipping 608 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1871 return false; | 1931 return false; |
| 1872 } | 1932 } |
| 1873 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1933 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
| 1874 zone()); | 1934 zone()); |
| 1875 LAST(ADD_TERM); | 1935 LAST(ADD_TERM); |
| 1876 return true; | 1936 return true; |
| 1877 } | 1937 } |
| 1878 | 1938 |
| 1879 } // namespace internal | 1939 } // namespace internal |
| 1880 } // namespace v8 | 1940 } // namespace v8 |
| OLD | NEW |