Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Unified Diff: src/lexer/lexer.re

Issue 27705002: Experimental parser: Refactoring and timing. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Code review (ulan) Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/lexer/lexer.h ('k') | src/lexer/lexer-shell.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/lexer/lexer.re
diff --git a/src/lexer/lexer.re b/src/lexer/lexer.re
index a63e4183468c499017a8d3b8592543dd867c02ae..cddcda2816128741de0d88e20189c5e3bcab28ca 100644
--- a/src/lexer/lexer.re
+++ b/src/lexer/lexer.re
@@ -1,3 +1,32 @@
+// Portions of this code based on re2c:
+// (re2c/examples/push.re)
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
#include <fcntl.h>
#include <stdio.h>
#include <stddef.h>
@@ -52,116 +81,113 @@ using namespace v8::internal;
#define PUSH_LINE_TERMINATOR() { SKIP(); }
#define TERMINATE_ILLEGAL() { return 1; }
-class PushScanner {
-
-public:
- PushScanner(ExperimentalScanner* sink):
- eof_(false),
- state_(-1),
- condition_(kConditionNormal),
- limit_(NULL),
- start_(NULL),
- cursor_(NULL),
- marker_(NULL),
- real_start_(0),
- buffer_(NULL),
- buffer_end_(NULL),
- yych(0),
- yyaccept(0),
- sink_(sink) {
- }
+PushScanner::PushScanner(ExperimentalScanner* sink)
+: eof_(false),
+ state_(-1),
+ condition_(kConditionNormal),
+ limit_(NULL),
+ start_(NULL),
+ cursor_(NULL),
+ marker_(NULL),
+ real_start_(0),
+ buffer_(NULL),
+ buffer_end_(NULL),
+ yych(0),
+ yyaccept(0),
+ sink_(sink) {
- ~PushScanner() {
- }
+}
- void send(Token::Value token) {
- int beg = (start_ - buffer_) + real_start_;
- int end = (cursor_ - buffer_) + real_start_;
- if (FLAG_trace_lexer) {
- printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
- for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s);
- printf(".\n");
- }
- sink_->Record(token, beg, end);
+PushScanner::~PushScanner() {
+}
+
+void PushScanner::send(Token::Value token) {
+ int beg = (start_ - buffer_) + real_start_;
+ int end = (cursor_ - buffer_) + real_start_;
+ if (FLAG_trace_lexer) {
+ printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
+ for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s);
+ printf(".\n");
}
+ sink_->Record(token, beg, end);
+}
- uint32_t push(const void *input, int input_size) {
- if (FLAG_trace_lexer) {
- printf(
+uint32_t PushScanner::push(const void *input, int input_size) {
+ if (FLAG_trace_lexer) {
+ printf(
"scanner is receiving a new data batch of length %d\n"
"scanner continues with saved state_ = %d\n",
input_size,
- state_
- );
- }
+ state_);
+ }
- // Data source is signaling end of file when batch size
- // is less than max_fill. This is slightly annoying because
- // max_fill is a value that can only be known after re2c does
- // its thing. Practically though, max_fill is never bigger than
- // the longest keyword, so given our grammar, 32 is a safe bet.
-
- uint8_t null[64];
- const int max_fill = 32;
- if (input_size < max_fill) { // FIXME: do something about this!!!
- eof_ = true;
- input = null;
- input_size = sizeof(null);
- memset(null, 0, sizeof(null));
- }
+ // Data source is signaling end of file when batch size
+ // is less than max_fill. This is slightly annoying because
+ // max_fill is a value that can only be known after re2c does
+ // its thing. Practically though, max_fill is never bigger than
+ // the longest keyword, so given our grammar, 32 is a safe bet.
+
+ uint8_t null[64];
+ const int max_fill = 32;
+ if (input_size < max_fill) { // FIXME: do something about this!!!
+ eof_ = true;
+ input = null;
+ input_size = sizeof(null);
+ memset(null, 0, sizeof(null));
+ }
- // When we get here, we have a partially
- // consumed buffer_ which is in the following state_:
- // last valid char last valid buffer_ spot
- // v v
- // +-------------------+-------------+---------------+-------------+----------------------+
- // ^ ^ ^ ^ ^ ^
- // buffer_ start_ marker_ cursor_ limit_ buffer_end_
- //
- // We need to stretch the buffer_ and concatenate the new chunk of input to it
-
- size_t used = limit_ - buffer_;
- size_t needed = used + input_size;
- size_t allocated = buffer_end_ - buffer_;
- if(allocated < needed) {
- size_t limit__offset = limit_ - buffer_;
- size_t start_offset = start_ - buffer_;
- size_t marker__offset = marker_ - buffer_;
- size_t cursor__offset = cursor_ - buffer_;
-
- buffer_ = (uint8_t*)realloc(buffer_, needed);
- buffer_end_ = needed + buffer_;
-
- marker_ = marker__offset + buffer_;
- cursor_ = cursor__offset + buffer_;
- start_ = buffer_ + start_offset;
- limit_ = limit__offset + buffer_;
- }
- memcpy(limit_, input, input_size);
- limit_ += input_size;
+ // When we get here, we have a partially
+ // consumed buffer_ which is in the following state_:
+ // last valid char last valid buffer_ spot
+ // v v
+ // +-------------------+-------------+---------------+-------------+----------------------+
+ // ^ ^ ^ ^ ^ ^
+ // buffer_ start_ marker_ cursor_ limit_ buffer_end_
+ //
+ // We need to stretch the buffer_ and concatenate the new chunk of input to it
+
+ size_t used = limit_ - buffer_;
+ size_t needed = used + input_size;
+ size_t allocated = buffer_end_ - buffer_;
+ if (allocated < needed) {
+ size_t limit__offset = limit_ - buffer_;
+ size_t start_offset = start_ - buffer_;
+ size_t marker__offset = marker_ - buffer_;
+ size_t cursor__offset = cursor_ - buffer_;
+
+ buffer_ = (uint8_t*)realloc(buffer_, needed);
+ buffer_end_ = needed + buffer_;
+
+ marker_ = marker__offset + buffer_;
+ cursor_ = cursor__offset + buffer_;
+ start_ = buffer_ + start_offset;
+ limit_ = limit__offset + buffer_;
+ }
+ memcpy(limit_, input, input_size);
+ limit_ += input_size;
- // The scanner start_s here
- #define YYLIMIT limit_
- #define YYCURSOR cursor_
- #define YYMARKER marker_
- #define YYCTYPE uint8_t
+ // The scanner starts here
+#define YYLIMIT limit_
+#define YYCURSOR cursor_
+#define YYMARKER marker_
+#define YYCTYPE uint8_t
- #define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); goto yy0; }
- #define YYFILL(n) { goto fill; }
+#define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); goto yy0; }
+#define YYFILL(n) { goto fill; }
- #define YYGETSTATE() state_
- #define YYSETSTATE(x) { state_ = (x); }
+#define YYGETSTATE() state_
+#define YYSETSTATE(x) { state_ = (x); }
- #define YYGETCONDITION() condition_
- #define YYSETCONDITION(x) { condition_ = (x); }
+#define YYGETCONDITION() condition_
+#define YYSETCONDITION(x) { condition_ = (x); }
- start_:
- if (FLAG_trace_lexer) {
- printf("Starting a round; state_: %d, condition_: %d\n", state_, condition_);
- }
+start_:
+ if (FLAG_trace_lexer) {
+ printf("Starting a round; state_: %d, condition_: %d\n", state_, condition_);
+ }
- /*!re2c
+ /*!re2c
re2c:indent:top = 1;
re2c:yych:conversion = 0;
re2c:condenumprefix = kCondition;
@@ -321,103 +347,36 @@ public:
<HtmlComment> any { goto yy0; }
*/
- fill:
- int unfinished_size = cursor_ - start_;
- if (FLAG_trace_lexer) {
- printf(
+fill:
+ int unfinished_size = cursor_ - start_;
+ if (FLAG_trace_lexer) {
+ printf(
"scanner needs a refill. Exiting for now with:\n"
" saved fill state_ = %d\n"
" unfinished token size = %d\n",
state_,
- unfinished_size
- );
- if(0 < unfinished_size && start_ < limit_) {
- printf(" unfinished token is: ");
- fwrite(start_, 1, cursor_ - start_, stdout);
- putchar('\n');
- }
+ unfinished_size);
+ if(0 < unfinished_size && start_ < limit_) {
+ printf(" unfinished token is: ");
+ fwrite(start_, 1, cursor_ - start_, stdout);
putchar('\n');
}
-
- if (eof_) goto start_;
-
- // Once we get here, we can get rid of
- // everything before start_ and after limit_.
-
- if (buffer_ < start_) {
- size_t start_offset = start_ - buffer_;
- memmove(buffer_, start_, limit_ - start_);
- marker_ -= start_offset;
- cursor_ -= start_offset;
- limit_ -= start_offset;
- start_ -= start_offset;
- real_start_ += start_offset;
- }
- return 0;
+ putchar('\n');
}
- private:
- bool eof_;
- int32_t state_;
- int32_t condition_;
-
- uint8_t* limit_;
- uint8_t* start_;
- uint8_t* cursor_;
- uint8_t* marker_;
- int real_start_;
-
- uint8_t* buffer_;
- uint8_t* buffer_end_;
-
- uint8_t yych;
- uint32_t yyaccept;
-
- ExperimentalScanner* sink_;
-};
-
+ if (eof_) goto start_;
-ExperimentalScanner::ExperimentalScanner(const char* fname) :
- current_(0), fetched_(0) {
- file_ = fopen(fname, "rb");
- scanner_ = new PushScanner(this);
-}
+ // Once we get here, we can get rid of
+ // everything before start_ and after limit_.
-
-ExperimentalScanner::~ExperimentalScanner() {
- fclose(file_);
-}
-
-
-void ExperimentalScanner::FillTokens() {
- current_ = 0;
- fetched_ = 0;
- uint8_t chars[BUFFER_SIZE];
- int n = static_cast<int>(fread(&chars, 1, BUFFER_SIZE, file_));
- for (int i = n; i < BUFFER_SIZE; i++) chars[i] = 0;
- scanner_->push(chars, BUFFER_SIZE);
-}
-
-
-Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) {
- while (current_ == fetched_) {
- FillTokens();
+ if (buffer_ < start_) {
+ size_t start_offset = start_ - buffer_;
+ memmove(buffer_, start_, limit_ - start_);
+ marker_ -= start_offset;
+ cursor_ -= start_offset;
+ limit_ -= start_offset;
+ start_ -= start_offset;
+ real_start_ += start_offset;
}
- *beg_pos = beg_[current_];
- *end_pos = end_[current_];
- Token::Value res = token_[current_];
- if (token_[current_] != Token::Token::EOS &&
- token_[current_] != Token::ILLEGAL) {
- current_++;
- }
- return res;
-}
-
-
-void ExperimentalScanner::Record(Token::Value token, int beg, int end) {
- if (token == Token::EOS) end--;
- token_[fetched_] = token;
- beg_[fetched_] = beg;
- end_[fetched_] = end;
- fetched_++;
+ return 0;
}
« no previous file with comments | « src/lexer/lexer.h ('k') | src/lexer/lexer-shell.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698