| Index: src/lexer/lexer.re
|
| diff --git a/src/lexer/lexer.re b/src/lexer/lexer.re
|
| index a63e4183468c499017a8d3b8592543dd867c02ae..cddcda2816128741de0d88e20189c5e3bcab28ca 100644
|
| --- a/src/lexer/lexer.re
|
| +++ b/src/lexer/lexer.re
|
| @@ -1,3 +1,32 @@
|
| +// Portions of this code based on re2c:
|
| +// (re2c/examples/push.re)
|
| +// Copyright 2013 the V8 project authors. All rights reserved.
|
| +// Redistribution and use in source and binary forms, with or without
|
| +// modification, are permitted provided that the following conditions are
|
| +// met:
|
| +//
|
| +// * Redistributions of source code must retain the above copyright
|
| +// notice, this list of conditions and the following disclaimer.
|
| +// * Redistributions in binary form must reproduce the above
|
| +// copyright notice, this list of conditions and the following
|
| +// disclaimer in the documentation and/or other materials provided
|
| +// with the distribution.
|
| +// * Neither the name of Google Inc. nor the names of its
|
| +// contributors may be used to endorse or promote products derived
|
| +// from this software without specific prior written permission.
|
| +//
|
| +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| +
|
| #include <fcntl.h>
|
| #include <stdio.h>
|
| #include <stddef.h>
|
| @@ -52,116 +81,113 @@ using namespace v8::internal;
|
| #define PUSH_LINE_TERMINATOR() { SKIP(); }
|
| #define TERMINATE_ILLEGAL() { return 1; }
|
|
|
| -class PushScanner {
|
| -
|
| -public:
|
| - PushScanner(ExperimentalScanner* sink):
|
| - eof_(false),
|
| - state_(-1),
|
| - condition_(kConditionNormal),
|
| - limit_(NULL),
|
| - start_(NULL),
|
| - cursor_(NULL),
|
| - marker_(NULL),
|
| - real_start_(0),
|
| - buffer_(NULL),
|
| - buffer_end_(NULL),
|
| - yych(0),
|
| - yyaccept(0),
|
| - sink_(sink) {
|
| - }
|
| +PushScanner::PushScanner(ExperimentalScanner* sink)
|
| +: eof_(false),
|
| + state_(-1),
|
| + condition_(kConditionNormal),
|
| + limit_(NULL),
|
| + start_(NULL),
|
| + cursor_(NULL),
|
| + marker_(NULL),
|
| + real_start_(0),
|
| + buffer_(NULL),
|
| + buffer_end_(NULL),
|
| + yych(0),
|
| + yyaccept(0),
|
| + sink_(sink) {
|
|
|
| - ~PushScanner() {
|
| - }
|
| +}
|
|
|
| - void send(Token::Value token) {
|
| - int beg = (start_ - buffer_) + real_start_;
|
| - int end = (cursor_ - buffer_) + real_start_;
|
| - if (FLAG_trace_lexer) {
|
| - printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
|
| - for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s);
|
| - printf(".\n");
|
| - }
|
| - sink_->Record(token, beg, end);
|
| +PushScanner::~PushScanner() {
|
| +}
|
| +
|
| +void PushScanner::send(Token::Value token) {
|
| + int beg = (start_ - buffer_) + real_start_;
|
| + int end = (cursor_ - buffer_) + real_start_;
|
| + if (FLAG_trace_lexer) {
|
| + printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
|
| + for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s);
|
| + printf(".\n");
|
| }
|
| + sink_->Record(token, beg, end);
|
| +}
|
|
|
| - uint32_t push(const void *input, int input_size) {
|
| - if (FLAG_trace_lexer) {
|
| - printf(
|
| +uint32_t PushScanner::push(const void *input, int input_size) {
|
| + if (FLAG_trace_lexer) {
|
| + printf(
|
| "scanner is receiving a new data batch of length %d\n"
|
| "scanner continues with saved state_ = %d\n",
|
| input_size,
|
| - state_
|
| - );
|
| - }
|
| + state_);
|
| + }
|
|
|
| - // Data source is signaling end of file when batch size
|
| - // is less than max_fill. This is slightly annoying because
|
| - // max_fill is a value that can only be known after re2c does
|
| - // its thing. Practically though, max_fill is never bigger than
|
| - // the longest keyword, so given our grammar, 32 is a safe bet.
|
| -
|
| - uint8_t null[64];
|
| - const int max_fill = 32;
|
| - if (input_size < max_fill) { // FIXME: do something about this!!!
|
| - eof_ = true;
|
| - input = null;
|
| - input_size = sizeof(null);
|
| - memset(null, 0, sizeof(null));
|
| - }
|
| + // Data source is signaling end of file when batch size
|
| + // is less than max_fill. This is slightly annoying because
|
| + // max_fill is a value that can only be known after re2c does
|
| + // its thing. Practically though, max_fill is never bigger than
|
| + // the longest keyword, so given our grammar, 32 is a safe bet.
|
| +
|
| + uint8_t null[64];
|
| + const int max_fill = 32;
|
| + if (input_size < max_fill) { // FIXME: do something about this!!!
|
| + eof_ = true;
|
| + input = null;
|
| + input_size = sizeof(null);
|
| + memset(null, 0, sizeof(null));
|
| + }
|
|
|
|
|
| - // When we get here, we have a partially
|
| - // consumed buffer_ which is in the following state_:
|
| - // last valid char last valid buffer_ spot
|
| - // v v
|
| - // +-------------------+-------------+---------------+-------------+----------------------+
|
| - // ^ ^ ^ ^ ^ ^
|
| - // buffer_ start_ marker_ cursor_ limit_ buffer_end_
|
| - //
|
| - // We need to stretch the buffer_ and concatenate the new chunk of input to it
|
| -
|
| - size_t used = limit_ - buffer_;
|
| - size_t needed = used + input_size;
|
| - size_t allocated = buffer_end_ - buffer_;
|
| - if(allocated < needed) {
|
| - size_t limit__offset = limit_ - buffer_;
|
| - size_t start_offset = start_ - buffer_;
|
| - size_t marker__offset = marker_ - buffer_;
|
| - size_t cursor__offset = cursor_ - buffer_;
|
| -
|
| - buffer_ = (uint8_t*)realloc(buffer_, needed);
|
| - buffer_end_ = needed + buffer_;
|
| -
|
| - marker_ = marker__offset + buffer_;
|
| - cursor_ = cursor__offset + buffer_;
|
| - start_ = buffer_ + start_offset;
|
| - limit_ = limit__offset + buffer_;
|
| - }
|
| - memcpy(limit_, input, input_size);
|
| - limit_ += input_size;
|
| + // When we get here, we have a partially
|
| + // consumed buffer_ which is in the following state_:
|
| + // last valid char last valid buffer_ spot
|
| + // v v
|
| + // +-------------------+-------------+---------------+-------------+----------------------+
|
| + // ^ ^ ^ ^ ^ ^
|
| + // buffer_ start_ marker_ cursor_ limit_ buffer_end_
|
| + //
|
| + // We need to stretch the buffer_ and concatenate the new chunk of input to it
|
| +
|
| + size_t used = limit_ - buffer_;
|
| + size_t needed = used + input_size;
|
| + size_t allocated = buffer_end_ - buffer_;
|
| + if (allocated < needed) {
|
| + size_t limit__offset = limit_ - buffer_;
|
| + size_t start_offset = start_ - buffer_;
|
| + size_t marker__offset = marker_ - buffer_;
|
| + size_t cursor__offset = cursor_ - buffer_;
|
| +
|
| + buffer_ = (uint8_t*)realloc(buffer_, needed);
|
| + buffer_end_ = needed + buffer_;
|
| +
|
| + marker_ = marker__offset + buffer_;
|
| + cursor_ = cursor__offset + buffer_;
|
| + start_ = buffer_ + start_offset;
|
| + limit_ = limit__offset + buffer_;
|
| + }
|
| + memcpy(limit_, input, input_size);
|
| + limit_ += input_size;
|
|
|
| - // The scanner start_s here
|
| - #define YYLIMIT limit_
|
| - #define YYCURSOR cursor_
|
| - #define YYMARKER marker_
|
| - #define YYCTYPE uint8_t
|
| + // The scanner starts here
|
| +#define YYLIMIT limit_
|
| +#define YYCURSOR cursor_
|
| +#define YYMARKER marker_
|
| +#define YYCTYPE uint8_t
|
|
|
| - #define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); goto yy0; }
|
| - #define YYFILL(n) { goto fill; }
|
| +#define SKIP() { start_ = cursor_; YYSETCONDITION(kConditionNormal); goto yy0; }
|
| +#define YYFILL(n) { goto fill; }
|
|
|
| - #define YYGETSTATE() state_
|
| - #define YYSETSTATE(x) { state_ = (x); }
|
| +#define YYGETSTATE() state_
|
| +#define YYSETSTATE(x) { state_ = (x); }
|
|
|
| - #define YYGETCONDITION() condition_
|
| - #define YYSETCONDITION(x) { condition_ = (x); }
|
| +#define YYGETCONDITION() condition_
|
| +#define YYSETCONDITION(x) { condition_ = (x); }
|
|
|
| - start_:
|
| - if (FLAG_trace_lexer) {
|
| - printf("Starting a round; state_: %d, condition_: %d\n", state_, condition_);
|
| - }
|
| +start_:
|
| + if (FLAG_trace_lexer) {
|
| + printf("Starting a round; state_: %d, condition_: %d\n", state_, condition_);
|
| + }
|
|
|
| - /*!re2c
|
| + /*!re2c
|
| re2c:indent:top = 1;
|
| re2c:yych:conversion = 0;
|
| re2c:condenumprefix = kCondition;
|
| @@ -321,103 +347,36 @@ public:
|
| <HtmlComment> any { goto yy0; }
|
| */
|
|
|
| - fill:
|
| - int unfinished_size = cursor_ - start_;
|
| - if (FLAG_trace_lexer) {
|
| - printf(
|
| +fill:
|
| + int unfinished_size = cursor_ - start_;
|
| + if (FLAG_trace_lexer) {
|
| + printf(
|
| "scanner needs a refill. Exiting for now with:\n"
|
| " saved fill state_ = %d\n"
|
| " unfinished token size = %d\n",
|
| state_,
|
| - unfinished_size
|
| - );
|
| - if(0 < unfinished_size && start_ < limit_) {
|
| - printf(" unfinished token is: ");
|
| - fwrite(start_, 1, cursor_ - start_, stdout);
|
| - putchar('\n');
|
| - }
|
| + unfinished_size);
|
| + if(0 < unfinished_size && start_ < limit_) {
|
| + printf(" unfinished token is: ");
|
| + fwrite(start_, 1, cursor_ - start_, stdout);
|
| putchar('\n');
|
| }
|
| -
|
| - if (eof_) goto start_;
|
| -
|
| - // Once we get here, we can get rid of
|
| - // everything before start_ and after limit_.
|
| -
|
| - if (buffer_ < start_) {
|
| - size_t start_offset = start_ - buffer_;
|
| - memmove(buffer_, start_, limit_ - start_);
|
| - marker_ -= start_offset;
|
| - cursor_ -= start_offset;
|
| - limit_ -= start_offset;
|
| - start_ -= start_offset;
|
| - real_start_ += start_offset;
|
| - }
|
| - return 0;
|
| + putchar('\n');
|
| }
|
|
|
| - private:
|
| - bool eof_;
|
| - int32_t state_;
|
| - int32_t condition_;
|
| -
|
| - uint8_t* limit_;
|
| - uint8_t* start_;
|
| - uint8_t* cursor_;
|
| - uint8_t* marker_;
|
| - int real_start_;
|
| -
|
| - uint8_t* buffer_;
|
| - uint8_t* buffer_end_;
|
| -
|
| - uint8_t yych;
|
| - uint32_t yyaccept;
|
| -
|
| - ExperimentalScanner* sink_;
|
| -};
|
| -
|
| + if (eof_) goto start_;
|
|
|
| -ExperimentalScanner::ExperimentalScanner(const char* fname) :
|
| - current_(0), fetched_(0) {
|
| - file_ = fopen(fname, "rb");
|
| - scanner_ = new PushScanner(this);
|
| -}
|
| + // Once we get here, we can get rid of
|
| + // everything before start_ and after limit_.
|
|
|
| -
|
| -ExperimentalScanner::~ExperimentalScanner() {
|
| - fclose(file_);
|
| -}
|
| -
|
| -
|
| -void ExperimentalScanner::FillTokens() {
|
| - current_ = 0;
|
| - fetched_ = 0;
|
| - uint8_t chars[BUFFER_SIZE];
|
| - int n = static_cast<int>(fread(&chars, 1, BUFFER_SIZE, file_));
|
| - for (int i = n; i < BUFFER_SIZE; i++) chars[i] = 0;
|
| - scanner_->push(chars, BUFFER_SIZE);
|
| -}
|
| -
|
| -
|
| -Token::Value ExperimentalScanner::Next(int* beg_pos, int* end_pos) {
|
| - while (current_ == fetched_) {
|
| - FillTokens();
|
| + if (buffer_ < start_) {
|
| + size_t start_offset = start_ - buffer_;
|
| + memmove(buffer_, start_, limit_ - start_);
|
| + marker_ -= start_offset;
|
| + cursor_ -= start_offset;
|
| + limit_ -= start_offset;
|
| + start_ -= start_offset;
|
| + real_start_ += start_offset;
|
| }
|
| - *beg_pos = beg_[current_];
|
| - *end_pos = end_[current_];
|
| - Token::Value res = token_[current_];
|
| - if (token_[current_] != Token::Token::EOS &&
|
| - token_[current_] != Token::ILLEGAL) {
|
| - current_++;
|
| - }
|
| - return res;
|
| -}
|
| -
|
| -
|
| -void ExperimentalScanner::Record(Token::Value token, int beg, int end) {
|
| - if (token == Token::EOS) end--;
|
| - token_[fetched_] = token;
|
| - beg_[fetched_] = beg;
|
| - end_[fetched_] = end;
|
| - fetched_++;
|
| + return 0;
|
| }
|
|
|