Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(185)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2808803002: [regexp] implement \p{Other_ID_Start} and \p{Other_ID_Continue}. (Closed)
Patch Set: address comments Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-special.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 1125 matching lines...) Expand 10 before | Expand all | Expand 10 after
1136 for (int i = 0;; i++) { 1136 for (int i = 0;; i++) {
1137 const char* long_name = u_getPropertyValueName( 1137 const char* long_name = u_getPropertyValueName(
1138 property, property_value, 1138 property, property_value,
1139 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); 1139 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
1140 if (long_name == NULL) break; 1140 if (long_name == NULL) break;
1141 if (strcmp(property_value_name, long_name) == 0) return true; 1141 if (strcmp(property_value_name, long_name) == 0) return true;
1142 } 1142 }
1143 return false; 1143 return false;
1144 } 1144 }
1145 1145
1146 bool LookupPropertyValueName(UProperty property, 1146 bool LookupPropertyName(UProperty property, const char* property_value_name,
1147 const char* property_value_name, bool negate, 1147 bool negate, ZoneList<CharacterRange>* result,
1148 ZoneList<CharacterRange>* result, Zone* zone) { 1148 Zone* zone) {
1149 UProperty property_for_lookup = property; 1149 UProperty property_for_lookup = property;
1150 if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) { 1150 if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) {
1151 // For the property Script_Extensions, we have to do the property value 1151 // For the property Script_Extensions, we have to do the property value
1152 // name lookup as if the property is Script. 1152 // name lookup as if the property is Script.
1153 property_for_lookup = UCHAR_SCRIPT; 1153 property_for_lookup = UCHAR_SCRIPT;
1154 } 1154 }
1155 int32_t property_value = 1155 int32_t property_value =
1156 u_getPropertyValueEnum(property_for_lookup, property_value_name); 1156 u_getPropertyValueEnum(property_for_lookup, property_value_name);
1157 if (property_value == UCHAR_INVALID_CODE) return false; 1157 if (property_value == UCHAR_INVALID_CODE) return false;
1158 1158
(...skipping 19 matching lines...) Expand all
1178 } 1178 }
1179 } 1179 }
1180 return success; 1180 return success;
1181 } 1181 }
1182 1182
1183 template <size_t N> 1183 template <size_t N>
1184 inline bool NameEquals(const char* name, const char (&literal)[N]) { 1184 inline bool NameEquals(const char* name, const char (&literal)[N]) {
1185 return strncmp(name, literal, N + 1) == 0; 1185 return strncmp(name, literal, N + 1) == 0;
1186 } 1186 }
1187 1187
1188 bool LookupSpecialPropertyValueName(const char* name, 1188 bool LookupSpecialPropertyName(const char* name,
1189 ZoneList<CharacterRange>* result, 1189 ZoneList<CharacterRange>* result, bool negate,
1190 bool negate, Zone* zone) { 1190 Zone* zone) {
1191 if (NameEquals(name, "Any")) { 1191 if (NameEquals(name, "Any")) {
1192 if (!negate) result->Add(CharacterRange::Everything(), zone); 1192 if (!negate) result->Add(CharacterRange::Everything(), zone);
1193 } else if (NameEquals(name, "ASCII")) { 1193 } else if (NameEquals(name, "ASCII")) {
1194 result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint) 1194 result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint)
1195 : CharacterRange::Range(0x0, 0x7f), 1195 : CharacterRange::Range(0x0, 0x7f),
1196 zone); 1196 zone);
1197 } else if (NameEquals(name, "Assigned")) { 1197 } else if (NameEquals(name, "Assigned")) {
1198 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", 1198 return LookupPropertyName(UCHAR_GENERAL_CATEGORY, "Unassigned", !negate,
1199 !negate, result, zone); 1199 result, zone);
1200 } else if (NameEquals(name, "Other_ID_Start") || NameEquals(name, "OIDS")) {
1201 // From Unicode 9.0.0 PropList.txt
1202 // 1885..1886 ; Other_ID_Start
1203 // 2118 ; Other_ID_Start
1204 // 212E ; Other_ID_Start
1205 // 309B..309C ; Other_ID_Start
1206 if (negate) {
1207 result->Add(CharacterRange::Range(0x0000, 0x1884), zone);
1208 result->Add(CharacterRange::Range(0x1887, 0x2117), zone);
1209 result->Add(CharacterRange::Range(0x2119, 0x212D), zone);
1210 result->Add(CharacterRange::Range(0x212F, 0x309A), zone);
1211 result->Add(CharacterRange::Range(0x309D, String::kMaxCodePoint), zone);
1212 } else {
1213 result->Add(CharacterRange::Range(0x1885, 0x1886), zone);
1214 result->Add(CharacterRange::Singleton(0x2118), zone);
1215 result->Add(CharacterRange::Singleton(0x212E), zone);
1216 result->Add(CharacterRange::Range(0x309B, 0x309C), zone);
1217 }
1218 } else if (NameEquals(name, "Other_ID_Continue") ||
1219 NameEquals(name, "OIDC")) {
1220 // From Unicode 9.0.0 PropList.txt
1221 // 00B7 ; Other_ID_Continue
1222 // 0387 ; Other_ID_Continue
1223 // 1369..1371 ; Other_ID_Continue
1224 // 19DA ; Other_ID_Continue
1225 if (negate) {
1226 result->Add(CharacterRange::Range(0x0000, 0x00B6), zone);
1227 result->Add(CharacterRange::Range(0x00B8, 0x0386), zone);
1228 result->Add(CharacterRange::Range(0x0388, 0x1368), zone);
1229 result->Add(CharacterRange::Range(0x1372, 0x19D9), zone);
1230 result->Add(CharacterRange::Range(0x19DB, String::kMaxCodePoint), zone);
1231 } else {
1232 result->Add(CharacterRange::Singleton(0x00B7), zone);
1233 result->Add(CharacterRange::Singleton(0x0387), zone);
1234 result->Add(CharacterRange::Range(0x1369, 0x1371), zone);
1235 result->Add(CharacterRange::Singleton(0x19DA), zone);
1236 }
1200 } else { 1237 } else {
1201 return false; 1238 return false;
1202 } 1239 }
1203 return true; 1240 return true;
1204 } 1241 }
1205 1242
1206 } // anonymous namespace 1243 } // anonymous namespace
1207 1244
1208 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, 1245 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
1209 bool negate) { 1246 bool negate) {
(...skipping 22 matching lines...) Expand all
1232 } 1269 }
1233 } else { 1270 } else {
1234 return false; 1271 return false;
1235 } 1272 }
1236 Advance(); 1273 Advance();
1237 first_part.Add(0); // null-terminate string. 1274 first_part.Add(0); // null-terminate string.
1238 1275
1239 if (second_part.is_empty()) { 1276 if (second_part.is_empty()) {
1240 // First attempt to interpret as general category property value name. 1277 // First attempt to interpret as general category property value name.
1241 const char* name = first_part.ToConstVector().start(); 1278 const char* name = first_part.ToConstVector().start();
1242 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, 1279 if (LookupPropertyName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, result,
1243 result, zone())) { 1280 zone())) {
1244 return true; 1281 return true;
1245 } 1282 }
1246 // Interpret "Any", "ASCII", and "Assigned". 1283
1247 if (LookupSpecialPropertyValueName(name, result, negate, zone())) {
1248 return true;
1249 }
1250 // Then attempt to interpret as binary property name with value name 'Y'. 1284 // Then attempt to interpret as binary property name with value name 'Y'.
1251 UProperty property = u_getPropertyEnum(name); 1285 UProperty property = u_getPropertyEnum(name);
1286 if (property == UCHAR_INVALID_CODE) {
1287 // Interpret "Any", "ASCII", "Assigned", "Other_ID_Start", and
1288 // "Other_ID_Continue".
1289 return LookupSpecialPropertyName(name, result, negate, zone());
1290 }
1252 if (property < UCHAR_BINARY_START) return false; 1291 if (property < UCHAR_BINARY_START) return false;
1253 if (property >= UCHAR_BINARY_LIMIT) return false; 1292 if (property >= UCHAR_BINARY_LIMIT) return false;
1254 if (!IsExactPropertyAlias(name, property)) return false; 1293 if (!IsExactPropertyAlias(name, property)) return false;
1255 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, 1294 return LookupPropertyName(property, negate ? "N" : "Y", false, result,
1256 zone()); 1295 zone());
1257 } else { 1296 } else {
1258 // Both property name and value name are specified. Attempt to interpret 1297 // Both property name and value name are specified. Attempt to interpret
1259 // the property name as enumerated property. 1298 // the property name as enumerated property.
1260 const char* property_name = first_part.ToConstVector().start(); 1299 const char* property_name = first_part.ToConstVector().start();
1261 const char* value_name = second_part.ToConstVector().start(); 1300 const char* value_name = second_part.ToConstVector().start();
1262 UProperty property = u_getPropertyEnum(property_name); 1301 UProperty property = u_getPropertyEnum(property_name);
1263 if (!IsExactPropertyAlias(property_name, property)) return false; 1302 if (!IsExactPropertyAlias(property_name, property)) return false;
1264 if (property == UCHAR_GENERAL_CATEGORY) { 1303 if (property == UCHAR_GENERAL_CATEGORY) {
1265 // We want to allow aggregate value names such as "Letter". 1304 // We want to allow aggregate value names such as "Letter".
1266 property = UCHAR_GENERAL_CATEGORY_MASK; 1305 property = UCHAR_GENERAL_CATEGORY_MASK;
1267 } else if (property != UCHAR_SCRIPT && 1306 } else if (property != UCHAR_SCRIPT &&
1268 property != UCHAR_SCRIPT_EXTENSIONS) { 1307 property != UCHAR_SCRIPT_EXTENSIONS) {
1269 return false; 1308 return false;
1270 } 1309 }
1271 return LookupPropertyValueName(property, value_name, negate, result, 1310 return LookupPropertyName(property, value_name, negate, result, zone());
1272 zone());
1273 } 1311 }
1274 } 1312 }
1275 1313
1276 #else // V8_I18N_SUPPORT 1314 #else // V8_I18N_SUPPORT
1277 1315
1278 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, 1316 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
1279 bool negate) { 1317 bool negate) {
1280 return false; 1318 return false;
1281 } 1319 }
1282 1320
(...skipping 587 matching lines...) Expand 10 before | Expand all | Expand 10 after
1870 return false; 1908 return false;
1871 } 1909 }
1872 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1910 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1873 zone()); 1911 zone());
1874 LAST(ADD_TERM); 1912 LAST(ADD_TERM);
1875 return true; 1913 return true;
1876 } 1914 }
1877 1915
1878 } // namespace internal 1916 } // namespace internal
1879 } // namespace v8 1917 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-special.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698