Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(702)

Unified Diff: src/lexer/lexer.re

Issue 32573003: Experimental parser: add UnicodeCache. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/lexer/lexer.h ('k') | src/lexer/lexer-shell.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/lexer/lexer.re
diff --git a/src/lexer/lexer.re b/src/lexer/lexer.re
index 1fae29929fa86700f11eda41daa9cc46febadc60..36a0b3f3480fdb300d714ba4dcf6cc747748e475 100644
--- a/src/lexer/lexer.re
+++ b/src/lexer/lexer.re
@@ -31,6 +31,30 @@
#include <stdlib.h>
#include <string.h>
+// FIXME: some of this is probably not needed.
+#include "allocation.h"
+#include "ast.h"
+#include "preparse-data-format.h"
+#include "preparse-data.h"
+#include "scopes.h"
+#include "preparser.h"
+#include "api.h"
+#include "ast.h"
+#include "bootstrapper.h"
+#include "char-predicates-inl.h"
+#include "codegen.h"
+#include "compiler.h"
+#include "func-name-inferrer.h"
+#include "messages.h"
+#include "parser.h"
+#include "platform.h"
+#include "preparser.h"
+#include "runtime.h"
+#include "scanner-character-streams.h"
+#include "scopeinfo.h"
+#include "string-stream.h"
+
+
// TODO:
// - SpiderMonkey compatibility hack: " --> something" is treated
// as a single line comment.
@@ -76,14 +100,29 @@ enum Condition {
using namespace v8::internal;
+namespace {
+
+inline int HexValue(uc32 c) {
+ c -= '0';
+ if (static_cast<unsigned>(c) <= 9) return c;
+ c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
+ if (static_cast<unsigned>(c) <= 5) return c + 10;
+ return -1;
+}
+
+}
+
#define PUSH_TOKEN(T) { send(T); SKIP(); }
#define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); }
#define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;}
#define PUSH_LINE_TERMINATOR() { SKIP(); }
#define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS); return 1; }
-PushScanner::PushScanner(ExperimentalScanner* sink)
-: eof_(false),
+#define YYCTYPE uint8_t
+
+PushScanner::PushScanner(ExperimentalScanner* sink, UnicodeCache* unicode_cache)
+: unicode_cache_(unicode_cache),
+ eof_(false),
state_(-1),
condition_(kConditionNormal),
limit_(NULL),
@@ -102,6 +141,31 @@ PushScanner::PushScanner(ExperimentalScanner* sink)
PushScanner::~PushScanner() {
}
+
+uc32 PushScanner::ScanHexNumber(int length) {
+ // We have seen \uXXXX, let's see what it is.
+ // FIXME: we never end up in here if only a subset of the 4 chars are valid
+ // hex digits -> handle the case where they're not.
+ uc32 x = 0;
+ for (YYCTYPE* s = cursor_ - length; s != cursor_; ++s) {
+ int d = HexValue(*s);
+ if (d < 0) {
+ return -1;
+ }
+ x = x * 16 + d;
+ }
+ return x;
+}
+
+
+bool PushScanner::ValidIdentifierPart() {
+ return unicode_cache_->IsIdentifierPart(ScanHexNumber(4));
+}
+
+bool PushScanner::ValidIdentifierStart() {
+ return unicode_cache_->IsIdentifierStart(ScanHexNumber(4));
+}
+
void PushScanner::send(Token::Value token) {
int beg = (start_ - buffer_) + real_start_;
int end = (cursor_ - buffer_) + real_start_;
@@ -188,7 +252,6 @@ start_:
re2c:condenumprefix = kCondition;
re2c:define:YYCONDTYPE = Condition;
re2c:define:YYCURSOR = cursor_;
- re2c:define:YYCTYPE = uint8_t;
re2c:define:YYLIMIT = limit_;
re2c:define:YYMARKER = marker_;
@@ -319,8 +382,7 @@ start_:
<Normal> ['] :=> SingleQuoteString
<Normal> identifier_start_ :=> Identifier
- <Normal> "\\u0000" :=> IdentifierIllegal
- <Normal> "\\u" [0-9a-fA-F]{4} :=> Identifier
+ <Normal> "\\u" [0-9a-fA-F]{4} { if (ValidIdentifierStart()) { YYSETCONDITION(kConditionIdentifier); goto yy0; } YYSETCONDITION(kConditionIdentifierIllegal); send(Token::ILLEGAL); start_ = cursor_; goto yy0; }
<Normal> "\\" { PUSH_TOKEN(Token::ILLEGAL); }
<Normal> eof { PUSH_EOF_AND_RETURN();}
@@ -347,13 +409,12 @@ start_:
<SingleQuoteString> any { goto yy0; }
<Identifier> identifier_char+ { goto yy0; }
- <Identifier> "\\u0000" :=> IdentifierIllegal
- <Identifier> "\\u" [0-9a-fA-F]{4} { goto yy0; }
+ <Identifier> "\\u" [0-9a-fA-F]{4} { if (ValidIdentifierPart()) goto yy0; YYSETCONDITION(kConditionIdentifierIllegal); send(Token::ILLEGAL); }
<Identifier> "\\" { PUSH_TOKEN(Token::ILLEGAL); }
<Identifier> any { PUSH_TOKEN_LOOKAHEAD(Token::IDENTIFIER); }
<IdentifierIllegal> identifier_char+ { goto yy0; }
- <IdentifierIllegal> "\\"+ { goto yy0; }
+ <IdentifierIllegal> "\\"+ { goto yy0; }
<IdentifierIllegal> any { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); }
<SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();}
« no previous file with comments | « src/lexer/lexer.h ('k') | src/lexer/lexer-shell.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698