Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(271)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2827613002: [regexp] explicitly whitelist allowed binary property classes. (Closed)
Patch Set: add comments Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-binary.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 1184 matching lines...) Expand 10 before | Expand all | Expand 10 after
1195 zone); 1195 zone);
1196 } else if (NameEquals(name, "Assigned")) { 1196 } else if (NameEquals(name, "Assigned")) {
1197 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", 1197 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned",
1198 !negate, result, zone); 1198 !negate, result, zone);
1199 } else { 1199 } else {
1200 return false; 1200 return false;
1201 } 1201 }
1202 return true; 1202 return true;
1203 } 1203 }
1204 1204
1205 // Explicitly whitelist supported binary properties. The spec forbids supporting
1206 // properties outside of this set to ensure interoperability.
1207 bool IsSupportedBinaryProperty(UProperty property) {
1208 switch (property) {
1209 case UCHAR_ALPHABETIC:
1210 // 'Any' is not supported by ICU. See LookupSpecialPropertyValueName.
1211 // 'ASCII' is not supported by ICU. See LookupSpecialPropertyValueName.
1212 case UCHAR_ASCII_HEX_DIGIT:
1213 // 'Assigned' is not supported by ICU. See LookupSpecialPropertyValueName.
1214 case UCHAR_BIDI_CONTROL:
1215 case UCHAR_BIDI_MIRRORED:
1216 case UCHAR_CASE_IGNORABLE:
1217 case UCHAR_CASED:
1218 case UCHAR_CHANGES_WHEN_CASEFOLDED:
1219 case UCHAR_CHANGES_WHEN_CASEMAPPED:
1220 case UCHAR_CHANGES_WHEN_LOWERCASED:
1221 case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED:
1222 case UCHAR_CHANGES_WHEN_TITLECASED:
1223 case UCHAR_CHANGES_WHEN_UPPERCASED:
1224 case UCHAR_DASH:
1225 case UCHAR_DEFAULT_IGNORABLE_CODE_POINT:
1226 case UCHAR_DEPRECATED:
1227 case UCHAR_DIACRITIC:
1228 case UCHAR_EMOJI:
1229 // TODO(yangguo): Uncomment this once we upgrade to ICU 60.
1230 // See https://ssl.icu-project.org/trac/ticket/13062
1231 // case UCHAR_EMOJI_COMPONENT:
1232 case UCHAR_EMOJI_MODIFIER_BASE:
1233 case UCHAR_EMOJI_MODIFIER:
1234 case UCHAR_EMOJI_PRESENTATION:
1235 case UCHAR_EXTENDER:
1236 case UCHAR_GRAPHEME_BASE:
1237 case UCHAR_GRAPHEME_EXTEND:
1238 case UCHAR_HEX_DIGIT:
1239 case UCHAR_ID_CONTINUE:
1240 case UCHAR_ID_START:
1241 case UCHAR_IDEOGRAPHIC:
1242 case UCHAR_IDS_BINARY_OPERATOR:
1243 case UCHAR_IDS_TRINARY_OPERATOR:
1244 case UCHAR_JOIN_CONTROL:
1245 case UCHAR_LOGICAL_ORDER_EXCEPTION:
1246 case UCHAR_LOWERCASE:
1247 case UCHAR_MATH:
1248 case UCHAR_NONCHARACTER_CODE_POINT:
1249 case UCHAR_PATTERN_SYNTAX:
1250 case UCHAR_PATTERN_WHITE_SPACE:
1251 case UCHAR_QUOTATION_MARK:
1252 case UCHAR_RADICAL:
1253 case UCHAR_S_TERM:
1254 case UCHAR_SOFT_DOTTED:
1255 case UCHAR_TERMINAL_PUNCTUATION:
1256 case UCHAR_UNIFIED_IDEOGRAPH:
1257 case UCHAR_UPPERCASE:
1258 case UCHAR_VARIATION_SELECTOR:
1259 case UCHAR_WHITE_SPACE:
1260 case UCHAR_XID_CONTINUE:
1261 case UCHAR_XID_START:
1262 return true;
1263 default:
1264 break;
1265 }
1266 return false;
1267 }
1268
1205 } // anonymous namespace 1269 } // anonymous namespace
1206 1270
1207 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, 1271 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
1208 bool negate) { 1272 bool negate) {
1209 // Parse the property class as follows: 1273 // Parse the property class as follows:
1210 // - In \p{name}, 'name' is interpreted 1274 // - In \p{name}, 'name' is interpreted
1211 // - either as a general category property value name. 1275 // - either as a general category property value name.
1212 // - or as a binary property name. 1276 // - or as a binary property name.
1213 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, 1277 // - In \p{name=value}, 'name' is interpreted as an enumerated property name,
1214 // and 'value' is interpreted as one of the available property value names. 1278 // and 'value' is interpreted as one of the available property value names.
(...skipping 26 matching lines...) Expand all
1241 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, 1305 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate,
1242 result, zone())) { 1306 result, zone())) {
1243 return true; 1307 return true;
1244 } 1308 }
1245 // Interpret "Any", "ASCII", and "Assigned". 1309 // Interpret "Any", "ASCII", and "Assigned".
1246 if (LookupSpecialPropertyValueName(name, result, negate, zone())) { 1310 if (LookupSpecialPropertyValueName(name, result, negate, zone())) {
1247 return true; 1311 return true;
1248 } 1312 }
1249 // Then attempt to interpret as binary property name with value name 'Y'. 1313 // Then attempt to interpret as binary property name with value name 'Y'.
1250 UProperty property = u_getPropertyEnum(name); 1314 UProperty property = u_getPropertyEnum(name);
1251 if (property < UCHAR_BINARY_START) return false; 1315 if (!IsSupportedBinaryProperty(property)) return false;
1252 if (property >= UCHAR_BINARY_LIMIT) return false;
1253 if (!IsExactPropertyAlias(name, property)) return false; 1316 if (!IsExactPropertyAlias(name, property)) return false;
1254 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, 1317 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result,
1255 zone()); 1318 zone());
1256 } else { 1319 } else {
1257 // Both property name and value name are specified. Attempt to interpret 1320 // Both property name and value name are specified. Attempt to interpret
1258 // the property name as enumerated property. 1321 // the property name as enumerated property.
1259 const char* property_name = first_part.ToConstVector().start(); 1322 const char* property_name = first_part.ToConstVector().start();
1260 const char* value_name = second_part.ToConstVector().start(); 1323 const char* value_name = second_part.ToConstVector().start();
1261 UProperty property = u_getPropertyEnum(property_name); 1324 UProperty property = u_getPropertyEnum(property_name);
1262 if (!IsExactPropertyAlias(property_name, property)) return false; 1325 if (!IsExactPropertyAlias(property_name, property)) return false;
(...skipping 608 matching lines...) Expand 10 before | Expand all | Expand 10 after
1871 return false; 1934 return false;
1872 } 1935 }
1873 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1936 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1874 zone()); 1937 zone());
1875 LAST(ADD_TERM); 1938 LAST(ADD_TERM);
1876 return true; 1939 return true;
1877 } 1940 }
1878 1941
1879 } // namespace internal 1942 } // namespace internal
1880 } // namespace v8 1943 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-binary.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698