Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2809143003: [regexp] remove \p{Other_ID_Start} and \p{Other_ID_Continue} (Closed)
Patch Set: Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-special.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 1125 matching lines...) Expand 10 before | Expand all | Expand 10 after
1136 for (int i = 0;; i++) { 1136 for (int i = 0;; i++) {
1137 const char* long_name = u_getPropertyValueName( 1137 const char* long_name = u_getPropertyValueName(
1138 property, property_value, 1138 property, property_value,
1139 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); 1139 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
1140 if (long_name == NULL) break; 1140 if (long_name == NULL) break;
1141 if (strcmp(property_value_name, long_name) == 0) return true; 1141 if (strcmp(property_value_name, long_name) == 0) return true;
1142 } 1142 }
1143 return false; 1143 return false;
1144 } 1144 }
1145 1145
1146 bool LookupPropertyName(UProperty property, const char* property_value_name, 1146 bool LookupPropertyValueName(UProperty property,
1147 bool negate, ZoneList<CharacterRange>* result, 1147 const char* property_value_name, bool negate,
1148 Zone* zone) { 1148 ZoneList<CharacterRange>* result, Zone* zone) {
1149 UProperty property_for_lookup = property; 1149 UProperty property_for_lookup = property;
1150 if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) { 1150 if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) {
1151 // For the property Script_Extensions, we have to do the property value 1151 // For the property Script_Extensions, we have to do the property value
1152 // name lookup as if the property is Script. 1152 // name lookup as if the property is Script.
1153 property_for_lookup = UCHAR_SCRIPT; 1153 property_for_lookup = UCHAR_SCRIPT;
1154 } 1154 }
1155 int32_t property_value = 1155 int32_t property_value =
1156 u_getPropertyValueEnum(property_for_lookup, property_value_name); 1156 u_getPropertyValueEnum(property_for_lookup, property_value_name);
1157 if (property_value == UCHAR_INVALID_CODE) return false; 1157 if (property_value == UCHAR_INVALID_CODE) return false;
1158 1158
(...skipping 19 matching lines...) Expand all
1178 } 1178 }
1179 } 1179 }
1180 return success; 1180 return success;
1181 } 1181 }
1182 1182
1183 template <size_t N> 1183 template <size_t N>
1184 inline bool NameEquals(const char* name, const char (&literal)[N]) { 1184 inline bool NameEquals(const char* name, const char (&literal)[N]) {
1185 return strncmp(name, literal, N + 1) == 0; 1185 return strncmp(name, literal, N + 1) == 0;
1186 } 1186 }
1187 1187
1188 bool LookupSpecialPropertyName(const char* name, 1188 bool LookupSpecialPropertyValueName(const char* name,
1189 ZoneList<CharacterRange>* result, bool negate, 1189 ZoneList<CharacterRange>* result,
1190 Zone* zone) { 1190 bool negate, Zone* zone) {
1191 if (NameEquals(name, "Any")) { 1191 if (NameEquals(name, "Any")) {
1192 if (!negate) result->Add(CharacterRange::Everything(), zone); 1192 if (!negate) result->Add(CharacterRange::Everything(), zone);
1193 } else if (NameEquals(name, "ASCII")) { 1193 } else if (NameEquals(name, "ASCII")) {
1194 result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint) 1194 result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint)
1195 : CharacterRange::Range(0x0, 0x7f), 1195 : CharacterRange::Range(0x0, 0x7f),
1196 zone); 1196 zone);
1197 } else if (NameEquals(name, "Assigned")) { 1197 } else if (NameEquals(name, "Assigned")) {
1198 return LookupPropertyName(UCHAR_GENERAL_CATEGORY, "Unassigned", !negate, 1198 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned",
1199 result, zone); 1199 !negate, result, zone);
1200 } else if (NameEquals(name, "Other_ID_Start") || NameEquals(name, "OIDS")) {
1201 // From Unicode 9.0.0 PropList.txt
1202 // 1885..1886 ; Other_ID_Start
1203 // 2118 ; Other_ID_Start
1204 // 212E ; Other_ID_Start
1205 // 309B..309C ; Other_ID_Start
1206 if (negate) {
1207 result->Add(CharacterRange::Range(0x0000, 0x1884), zone);
1208 result->Add(CharacterRange::Range(0x1887, 0x2117), zone);
1209 result->Add(CharacterRange::Range(0x2119, 0x212D), zone);
1210 result->Add(CharacterRange::Range(0x212F, 0x309A), zone);
1211 result->Add(CharacterRange::Range(0x309D, String::kMaxCodePoint), zone);
1212 } else {
1213 result->Add(CharacterRange::Range(0x1885, 0x1886), zone);
1214 result->Add(CharacterRange::Singleton(0x2118), zone);
1215 result->Add(CharacterRange::Singleton(0x212E), zone);
1216 result->Add(CharacterRange::Range(0x309B, 0x309C), zone);
1217 }
1218 } else if (NameEquals(name, "Other_ID_Continue") ||
1219 NameEquals(name, "OIDC")) {
1220 // From Unicode 9.0.0 PropList.txt
1221 // 00B7 ; Other_ID_Continue
1222 // 0387 ; Other_ID_Continue
1223 // 1369..1371 ; Other_ID_Continue
1224 // 19DA ; Other_ID_Continue
1225 if (negate) {
1226 result->Add(CharacterRange::Range(0x0000, 0x00B6), zone);
1227 result->Add(CharacterRange::Range(0x00B8, 0x0386), zone);
1228 result->Add(CharacterRange::Range(0x0388, 0x1368), zone);
1229 result->Add(CharacterRange::Range(0x1372, 0x19D9), zone);
1230 result->Add(CharacterRange::Range(0x19DB, String::kMaxCodePoint), zone);
1231 } else {
1232 result->Add(CharacterRange::Singleton(0x00B7), zone);
1233 result->Add(CharacterRange::Singleton(0x0387), zone);
1234 result->Add(CharacterRange::Range(0x1369, 0x1371), zone);
1235 result->Add(CharacterRange::Singleton(0x19DA), zone);
1236 }
1237 } else { 1200 } else {
1238 return false; 1201 return false;
1239 } 1202 }
1240 return true; 1203 return true;
1241 } 1204 }
1242 1205
1243 } // anonymous namespace 1206 } // anonymous namespace
1244 1207
1245 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, 1208 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
1246 bool negate) { 1209 bool negate) {
(...skipping 22 matching lines...) Expand all
1269 } 1232 }
1270 } else { 1233 } else {
1271 return false; 1234 return false;
1272 } 1235 }
1273 Advance(); 1236 Advance();
1274 first_part.Add(0); // null-terminate string. 1237 first_part.Add(0); // null-terminate string.
1275 1238
1276 if (second_part.is_empty()) { 1239 if (second_part.is_empty()) {
1277 // First attempt to interpret as general category property value name. 1240 // First attempt to interpret as general category property value name.
1278 const char* name = first_part.ToConstVector().start(); 1241 const char* name = first_part.ToConstVector().start();
1279 if (LookupPropertyName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, result, 1242 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate,
1280 zone())) { 1243 result, zone())) {
1281 return true; 1244 return true;
1282 } 1245 }
1283 1246 // Interpret "Any", "ASCII", and "Assigned".
1247 if (LookupSpecialPropertyValueName(name, result, negate, zone())) {
1248 return true;
1249 }
1284 // Then attempt to interpret as binary property name with value name 'Y'. 1250 // Then attempt to interpret as binary property name with value name 'Y'.
1285 UProperty property = u_getPropertyEnum(name); 1251 UProperty property = u_getPropertyEnum(name);
1286 if (property == UCHAR_INVALID_CODE) {
1287 // Interpret "Any", "ASCII", "Assigned", "Other_ID_Start", and
1288 // "Other_ID_Continue".
1289 return LookupSpecialPropertyName(name, result, negate, zone());
1290 }
1291 if (property < UCHAR_BINARY_START) return false; 1252 if (property < UCHAR_BINARY_START) return false;
1292 if (property >= UCHAR_BINARY_LIMIT) return false; 1253 if (property >= UCHAR_BINARY_LIMIT) return false;
1293 if (!IsExactPropertyAlias(name, property)) return false; 1254 if (!IsExactPropertyAlias(name, property)) return false;
1294 return LookupPropertyName(property, negate ? "N" : "Y", false, result, 1255 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result,
1295 zone()); 1256 zone());
1296 } else { 1257 } else {
1297 // Both property name and value name are specified. Attempt to interpret 1258 // Both property name and value name are specified. Attempt to interpret
1298 // the property name as enumerated property. 1259 // the property name as enumerated property.
1299 const char* property_name = first_part.ToConstVector().start(); 1260 const char* property_name = first_part.ToConstVector().start();
1300 const char* value_name = second_part.ToConstVector().start(); 1261 const char* value_name = second_part.ToConstVector().start();
1301 UProperty property = u_getPropertyEnum(property_name); 1262 UProperty property = u_getPropertyEnum(property_name);
1302 if (!IsExactPropertyAlias(property_name, property)) return false; 1263 if (!IsExactPropertyAlias(property_name, property)) return false;
1303 if (property == UCHAR_GENERAL_CATEGORY) { 1264 if (property == UCHAR_GENERAL_CATEGORY) {
1304 // We want to allow aggregate value names such as "Letter". 1265 // We want to allow aggregate value names such as "Letter".
1305 property = UCHAR_GENERAL_CATEGORY_MASK; 1266 property = UCHAR_GENERAL_CATEGORY_MASK;
1306 } else if (property != UCHAR_SCRIPT && 1267 } else if (property != UCHAR_SCRIPT &&
1307 property != UCHAR_SCRIPT_EXTENSIONS) { 1268 property != UCHAR_SCRIPT_EXTENSIONS) {
1308 return false; 1269 return false;
1309 } 1270 }
1310 return LookupPropertyName(property, value_name, negate, result, zone()); 1271 return LookupPropertyValueName(property, value_name, negate, result,
1272 zone());
1311 } 1273 }
1312 } 1274 }
1313 1275
1314 #else // V8_I18N_SUPPORT 1276 #else // V8_I18N_SUPPORT
1315 1277
1316 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, 1278 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
1317 bool negate) { 1279 bool negate) {
1318 return false; 1280 return false;
1319 } 1281 }
1320 1282
(...skipping 587 matching lines...) Expand 10 before | Expand all | Expand 10 after
1908 return false; 1870 return false;
1909 } 1871 }
1910 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1872 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1911 zone()); 1873 zone());
1912 LAST(ADD_TERM); 1874 LAST(ADD_TERM);
1913 return true; 1875 return true;
1914 } 1876 }
1915 1877
1916 } // namespace internal 1878 } // namespace internal
1917 } // namespace v8 1879 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-special.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698