OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
(...skipping 1184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1195 zone); | 1195 zone); |
1196 } else if (NameEquals(name, "Assigned")) { | 1196 } else if (NameEquals(name, "Assigned")) { |
1197 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", | 1197 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", |
1198 !negate, result, zone); | 1198 !negate, result, zone); |
1199 } else { | 1199 } else { |
1200 return false; | 1200 return false; |
1201 } | 1201 } |
1202 return true; | 1202 return true; |
1203 } | 1203 } |
1204 | 1204 |
| 1205 // Explicitly whitelist supported binary properties. The spec forbids supporting |
| 1206 // properties outside of this set to ensure interoperability. |
| 1207 bool IsSupportedBinaryProperty(UProperty property) { |
| 1208 switch (property) { |
| 1209 case UCHAR_ALPHABETIC: |
| 1210 // 'Any' is not supported by ICU. See LookupSpecialPropertyValueName. |
| 1211 // 'ASCII' is not supported by ICU. See LookupSpecialPropertyValueName. |
| 1212 case UCHAR_ASCII_HEX_DIGIT: |
| 1213 // 'Assigned' is not supported by ICU. See LookupSpecialPropertyValueName. |
| 1214 case UCHAR_BIDI_CONTROL: |
| 1215 case UCHAR_BIDI_MIRRORED: |
| 1216 case UCHAR_CASE_IGNORABLE: |
| 1217 case UCHAR_CASED: |
| 1218 case UCHAR_CHANGES_WHEN_CASEFOLDED: |
| 1219 case UCHAR_CHANGES_WHEN_CASEMAPPED: |
| 1220 case UCHAR_CHANGES_WHEN_LOWERCASED: |
| 1221 case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED: |
| 1222 case UCHAR_CHANGES_WHEN_TITLECASED: |
| 1223 case UCHAR_CHANGES_WHEN_UPPERCASED: |
| 1224 case UCHAR_DASH: |
| 1225 case UCHAR_DEFAULT_IGNORABLE_CODE_POINT: |
| 1226 case UCHAR_DEPRECATED: |
| 1227 case UCHAR_DIACRITIC: |
| 1228 case UCHAR_EMOJI: |
| 1229 // TODO(yangguo): Uncomment this once we upgrade to ICU 60. |
| 1230 // See https://ssl.icu-project.org/trac/ticket/13062 |
| 1231 // case UCHAR_EMOJI_COMPONENT: |
| 1232 case UCHAR_EMOJI_MODIFIER_BASE: |
| 1233 case UCHAR_EMOJI_MODIFIER: |
| 1234 case UCHAR_EMOJI_PRESENTATION: |
| 1235 case UCHAR_EXTENDER: |
| 1236 case UCHAR_GRAPHEME_BASE: |
| 1237 case UCHAR_GRAPHEME_EXTEND: |
| 1238 case UCHAR_HEX_DIGIT: |
| 1239 case UCHAR_ID_CONTINUE: |
| 1240 case UCHAR_ID_START: |
| 1241 case UCHAR_IDEOGRAPHIC: |
| 1242 case UCHAR_IDS_BINARY_OPERATOR: |
| 1243 case UCHAR_IDS_TRINARY_OPERATOR: |
| 1244 case UCHAR_JOIN_CONTROL: |
| 1245 case UCHAR_LOGICAL_ORDER_EXCEPTION: |
| 1246 case UCHAR_LOWERCASE: |
| 1247 case UCHAR_MATH: |
| 1248 case UCHAR_NONCHARACTER_CODE_POINT: |
| 1249 case UCHAR_PATTERN_SYNTAX: |
| 1250 case UCHAR_PATTERN_WHITE_SPACE: |
| 1251 case UCHAR_QUOTATION_MARK: |
| 1252 case UCHAR_RADICAL: |
| 1253 case UCHAR_S_TERM: |
| 1254 case UCHAR_SOFT_DOTTED: |
| 1255 case UCHAR_TERMINAL_PUNCTUATION: |
| 1256 case UCHAR_UNIFIED_IDEOGRAPH: |
| 1257 case UCHAR_UPPERCASE: |
| 1258 case UCHAR_VARIATION_SELECTOR: |
| 1259 case UCHAR_WHITE_SPACE: |
| 1260 case UCHAR_XID_CONTINUE: |
| 1261 case UCHAR_XID_START: |
| 1262 return true; |
| 1263 default: |
| 1264 break; |
| 1265 } |
| 1266 return false; |
| 1267 } |
| 1268 |
1205 } // anonymous namespace | 1269 } // anonymous namespace |
1206 | 1270 |
1207 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, | 1271 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
1208 bool negate) { | 1272 bool negate) { |
1209 // Parse the property class as follows: | 1273 // Parse the property class as follows: |
1210 // - In \p{name}, 'name' is interpreted | 1274 // - In \p{name}, 'name' is interpreted |
1211 // - either as a general category property value name. | 1275 // - either as a general category property value name. |
1212 // - or as a binary property name. | 1276 // - or as a binary property name. |
1213 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, | 1277 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, |
1214 // and 'value' is interpreted as one of the available property value names. | 1278 // and 'value' is interpreted as one of the available property value names. |
(...skipping 26 matching lines...) Expand all Loading... |
1241 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, | 1305 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, |
1242 result, zone())) { | 1306 result, zone())) { |
1243 return true; | 1307 return true; |
1244 } | 1308 } |
1245 // Interpret "Any", "ASCII", and "Assigned". | 1309 // Interpret "Any", "ASCII", and "Assigned". |
1246 if (LookupSpecialPropertyValueName(name, result, negate, zone())) { | 1310 if (LookupSpecialPropertyValueName(name, result, negate, zone())) { |
1247 return true; | 1311 return true; |
1248 } | 1312 } |
1249 // Then attempt to interpret as binary property name with value name 'Y'. | 1313 // Then attempt to interpret as binary property name with value name 'Y'. |
1250 UProperty property = u_getPropertyEnum(name); | 1314 UProperty property = u_getPropertyEnum(name); |
1251 if (property < UCHAR_BINARY_START) return false; | 1315 if (!IsSupportedBinaryProperty(property)) return false; |
1252 if (property >= UCHAR_BINARY_LIMIT) return false; | |
1253 if (!IsExactPropertyAlias(name, property)) return false; | 1316 if (!IsExactPropertyAlias(name, property)) return false; |
1254 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, | 1317 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, |
1255 zone()); | 1318 zone()); |
1256 } else { | 1319 } else { |
1257 // Both property name and value name are specified. Attempt to interpret | 1320 // Both property name and value name are specified. Attempt to interpret |
1258 // the property name as enumerated property. | 1321 // the property name as enumerated property. |
1259 const char* property_name = first_part.ToConstVector().start(); | 1322 const char* property_name = first_part.ToConstVector().start(); |
1260 const char* value_name = second_part.ToConstVector().start(); | 1323 const char* value_name = second_part.ToConstVector().start(); |
1261 UProperty property = u_getPropertyEnum(property_name); | 1324 UProperty property = u_getPropertyEnum(property_name); |
1262 if (!IsExactPropertyAlias(property_name, property)) return false; | 1325 if (!IsExactPropertyAlias(property_name, property)) return false; |
(...skipping 608 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1871 return false; | 1934 return false; |
1872 } | 1935 } |
1873 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1936 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1874 zone()); | 1937 zone()); |
1875 LAST(ADD_TERM); | 1938 LAST(ADD_TERM); |
1876 return true; | 1939 return true; |
1877 } | 1940 } |
1878 | 1941 |
1879 } // namespace internal | 1942 } // namespace internal |
1880 } // namespace v8 | 1943 } // namespace v8 |
OLD | NEW |