Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(134)

Side by Side Diff: src/ast.h

Issue 8188: Some new regexp infrastructure. (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/ast.cc » ('j') | src/ast.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1163 matching lines...) Expand 10 before | Expand all | Expand 10 after
1174 }; 1174 };
1175 1175
1176 1176
1177 class ThisFunction: public Expression { 1177 class ThisFunction: public Expression {
1178 public: 1178 public:
1179 virtual void Accept(Visitor* v); 1179 virtual void Accept(Visitor* v);
1180 }; 1180 };
1181 1181
1182 1182
1183 // ---------------------------------------------------------------------------- 1183 // ----------------------------------------------------------------------------
1184 // Regular expressions
Erik Corry 2008/10/27 14:58:44 full stop
Christian Plesner Hansen 2008/10/27 18:57:02 I copied the banner style from parser.cc which doe
1185
1186
1187 #define FOR_EACH_REG_EXP_NODE_TYPE(VISIT) \
1188 VISIT(Disjunction) \
1189 VISIT(Alternative) \
1190 VISIT(Assertion) \
1191 VISIT(CharacterClass) \
1192 VISIT(Atom) \
1193 VISIT(Quantifier) \
1194 VISIT(Capture) \
1195 VISIT(Lookahead) \
1196 VISIT(Empty)
1197
1198
1199 class RegExpVisitor;
1200 template <typename Char> class RegExpNode;
1201 #define FORWARD_DECLARE(Name) class RegExp##Name;
1202 FOR_EACH_REG_EXP_NODE_TYPE(FORWARD_DECLARE)
1203 #undef FORWARD_DECLARE
1204
1205
1206 class RegExpTree: public ZoneObject {
1207 public:
1208 virtual ~RegExpTree() { }
1209 virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
1210 SmartPointer<char> ToString();
1211 };
1212
1213
1214 class RegExpDisjunction: public RegExpTree {
1215 public:
1216 RegExpDisjunction(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { }
1217 virtual void* Accept(RegExpVisitor* visitor, void* data);
1218 ZoneList<RegExpTree*>* nodes() { return nodes_; }
1219 private:
1220 ZoneList<RegExpTree*>* nodes_;
1221 };
1222
1223
1224 class RegExpAlternative: public RegExpTree {
1225 public:
1226 RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { }
1227 virtual void* Accept(RegExpVisitor* visitor, void* data);
1228 ZoneList<RegExpTree*>* nodes() { return nodes_; }
1229 private:
1230 ZoneList<RegExpTree*>* nodes_;
1231 };
1232
1233
1234 class RegExpAssertion: public RegExpTree {
1235 public:
1236 enum Type { START, END, BOUNDARY, NON_BOUNDARY };
Lasse Reichstein 2008/10/27 13:12:58 I suggest having both START_OF_INPUT and START_OF_
Christian Plesner Hansen 2008/10/27 18:57:02 Good point.
1237 RegExpAssertion(Type type) : type_(type) { }
1238 virtual void* Accept(RegExpVisitor* visitor, void* data);
1239 Type type() { return type_; }
1240 private:
1241 Type type_;
1242 };
1243
1244
1245 class CharacterRange {
1246 public:
1247 // For compatibility with the CHECK_OK macro
1248 CharacterRange(void* null) { ASSERT_EQ(NULL, null); }
1249 CharacterRange(uc32 from, uc32 to, bool is_special)
Lasse Reichstein 2008/10/27 13:12:58 Could you document what "is_special" signifies? (o
Christian Plesner Hansen 2008/10/27 18:57:02 I've renamed it to is_character_class_.
1250 : from_(from),
1251 to_(to),
1252 is_special_(is_special) {
1253 // Assert that truncating doesn't throw away information.
1254 ASSERT_EQ(from, from_);
1255 ASSERT_EQ(to_, to);
1256 }
1257 static inline CharacterRange Special(uc32 tag) {
1258 return CharacterRange(tag, tag, true);
1259 }
1260 static inline CharacterRange Singleton(uc32 value) {
1261 return CharacterRange(value, value, false);
1262 }
1263 static inline CharacterRange Range(uc32 from, uc32 to) {
1264 return CharacterRange(from, to, false);
1265 }
1266 unsigned from() { return from_; }
Erik Corry 2008/10/27 14:58:44 ASSERT !special?
1267 unsigned to() { return to_; }
1268 bool is_special() { return is_special_; }
1269 bool IsSingleton() { return (from_ == to_) && !is_special_; }
1270 private:
1271 unsigned from_ : 21;
Erik Corry 2008/10/27 14:58:44 JS Regexps don't handle anything outside the basic
Christian Plesner Hansen 2008/10/27 18:57:02 Yes, and then when we unfold the character classes
1272 unsigned to_ : 21;
1273 bool is_special_ : 1;
1274 };
1275
1276
1277 STATIC_CHECK(sizeof(CharacterRange) == 2 * sizeof(int));
1278
1279
1280 class RegExpCharacterClass: public RegExpTree {
1281 public:
1282 RegExpCharacterClass(CharacterRange range)
1283 : ranges_(new ZoneList<CharacterRange>(1)),
1284 is_negated_(false) {
1285 ranges_->Add(range);
1286 }
1287 RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
1288 : ranges_(ranges),
1289 is_negated_(is_negated) { }
1290 virtual void* Accept(RegExpVisitor* visitor, void* data);
1291 ZoneList<CharacterRange>* ranges() { return ranges_; }
1292 bool is_negated() { return is_negated_; }
1293 private:
1294 ZoneList<CharacterRange>* ranges_;
1295 bool is_negated_;
1296 };
1297
1298
1299 class RegExpAtom: public RegExpTree {
1300 public:
1301 RegExpAtom(Vector<const uc16> data) : data_(data) { }
1302 virtual void* Accept(RegExpVisitor* visitor, void* data);
1303 Vector<const uc16> data() { return data_; }
1304 private:
1305 Vector<const uc16> data_;
1306 };
1307
1308
1309 class RegExpQuantifier: public RegExpTree {
1310 public:
1311 RegExpQuantifier(int min, int max, bool is_greedy, RegExpTree* body)
1312 : min_(min),
1313 max_(max),
1314 is_greedy_(is_greedy),
1315 body_(body) { }
1316 virtual void* Accept(RegExpVisitor* visitor, void* data);
1317 int min() { return min_; }
1318 int max() { return max_; }
1319 bool is_greedy() { return is_greedy_; }
1320 RegExpTree* body() { return body_; }
1321 // We just use a very large integer value as infinity because 1^31
1322 // is infinite in practice.
1323 static const int kInfinity = (1 << 31);
1324 private:
1325 int min_;
1326 int max_;
1327 bool is_greedy_;
1328 RegExpTree* body_;
1329 };
1330
1331
1332 class RegExpCapture: public RegExpTree {
Lasse Reichstein 2008/10/27 13:12:58 I think keeping the index of the capture in the no
Christian Plesner Hansen 2008/10/27 18:57:02 The ast nodes that I don't yet convert into nodes
1333 public:
1334 RegExpCapture(RegExpTree* body)
1335 : body_(body) { }
1336 virtual void* Accept(RegExpVisitor* visitor, void* data);
1337 RegExpTree* body() { return body_; }
1338 private:
1339 RegExpTree* body_;
1340 };
1341
1342
1343 class RegExpLookahead: public RegExpTree {
1344 public:
1345 RegExpLookahead(RegExpTree* body, bool is_positive)
1346 : body_(body),
1347 is_positive_(is_positive) { }
1348 virtual void* Accept(RegExpVisitor* visitor, void* data);
1349 RegExpTree* body() { return body_; }
1350 bool is_positive() { return is_positive_; }
1351 private:
1352 RegExpTree* body_;
1353 bool is_positive_;
1354 };
1355
1356
1357 class RegExpEmpty: public RegExpTree {
1358 public:
1359 RegExpEmpty() { }
1360 virtual void* Accept(RegExpVisitor* visitor, void* data);
1361 static RegExpEmpty* GetInstance() { return &kInstance; }
1362 private:
1363 static RegExpEmpty kInstance;
1364 };
1365
1366
1367 class RegExpVisitor BASE_EMBEDDED {
1368 public:
1369 virtual ~RegExpVisitor() { }
1370 #define MAKE_CASE(Name) \
1371 virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
1372 FOR_EACH_REG_EXP_NODE_TYPE(MAKE_CASE)
1373 #undef MAKE_CASE
1374 };
1375
1376
1377 // ----------------------------------------------------------------------------
1184 // Basic visitor 1378 // Basic visitor
1185 // - leaf node visitors are abstract. 1379 // - leaf node visitors are abstract.
1186 1380
1187 class Visitor BASE_EMBEDDED { 1381 class Visitor BASE_EMBEDDED {
1188 public: 1382 public:
1189 Visitor() : stack_overflow_(false) { } 1383 Visitor() : stack_overflow_(false) { }
1190 virtual ~Visitor() { } 1384 virtual ~Visitor() { }
1191 1385
1192 // Dispatch 1386 // Dispatch
1193 void Visit(Node* node) { node->Accept(this); } 1387 void Visit(Node* node) { node->Accept(this); }
(...skipping 24 matching lines...) Expand all
1218 #undef DEF_VISIT 1412 #undef DEF_VISIT
1219 1413
1220 private: 1414 private:
1221 bool stack_overflow_; 1415 bool stack_overflow_;
1222 }; 1416 };
1223 1417
1224 1418
1225 } } // namespace v8::internal 1419 } } // namespace v8::internal
1226 1420
1227 #endif // V8_AST_H_ 1421 #endif // V8_AST_H_
OLDNEW
« no previous file with comments | « no previous file | src/ast.cc » ('j') | src/ast.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698