Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(218)

Side by Side Diff: src/lexer/lexer.re

Issue 50573003: Experimental parser: don't hardcode 8-bit char type. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/lexer/lexer.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Portions of this code based on re2c: 1 // Portions of this code based on re2c:
2 // (re2c/examples/push.re) 2 // (re2c/examples/push.re)
3 // Copyright 2013 the V8 project authors. All rights reserved. 3 // Copyright 2013 the V8 project authors. All rights reserved.
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are 5 // modification, are permitted provided that the following conditions are
6 // met: 6 // met:
7 // 7 //
8 // * Redistributions of source code must retain the above copyright 8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer. 9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above 10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following 11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided 12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution. 13 // with the distribution.
14 // * Neither the name of Google Inc. nor the names of its 14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived 15 // contributors may be used to endorse or promote products derived
16 // from this software without specific prior written permission. 16 // from this software without specific prior written permission.
17 // 17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 29
30 #include "lexer.h"
31
30 #include <stdio.h> 32 #include <stdio.h>
31 #include <stdlib.h> 33 #include <stdlib.h>
32 #include <string.h> 34 #include <string.h>
33 35
34 // FIXME: some of this is probably not needed. 36 // FIXME: some of this is probably not needed.
35 #include "allocation.h" 37 #include "allocation.h"
36 #include "ast.h" 38 #include "ast.h"
37 #include "preparse-data-format.h" 39 #include "preparse-data-format.h"
38 #include "preparse-data.h" 40 #include "preparse-data.h"
39 #include "scopes.h" 41 #include "scopes.h"
40 #include "preparser.h" 42 #include "preparser.h"
41 #include "api.h" 43 #include "api.h"
42 #include "ast.h" 44 #include "ast.h"
43 #include "bootstrapper.h" 45 #include "bootstrapper.h"
44 #include "char-predicates-inl.h" 46 #include "char-predicates-inl.h"
45 #include "codegen.h" 47 #include "codegen.h"
46 #include "compiler.h" 48 #include "compiler.h"
47 #include "func-name-inferrer.h" 49 #include "func-name-inferrer.h"
48 #include "messages.h" 50 #include "messages.h"
49 #include "parser.h" 51 #include "parser.h"
50 #include "platform.h" 52 #include "platform.h"
51 #include "preparser.h" 53 #include "preparser.h"
52 #include "runtime.h" 54 #include "runtime.h"
53 #include "scanner-character-streams.h" 55 #include "scanner-character-streams.h"
54 #include "scopeinfo.h" 56 #include "scopeinfo.h"
55 #include "string-stream.h" 57 #include "string-stream.h"
56 58
59 #include "experimental-scanner.h"
57 60
58 // TODO: 61 // TODO:
59 // - Run-time lexing modifications: harmony number literals, keywords depending 62 // - Run-time lexing modifications: harmony number literals, keywords depending
60 // on harmony_modules, harmony_scoping 63 // on harmony_modules, harmony_scoping
61 // - Escaping the string literals (like the baseline does) 64 // - Escaping the string literals (like the baseline does)
62 // - Error recovery after illegal tokens. 65 // - Error recovery after illegal tokens.
63 66
64 enum Condition { 67 enum Condition {
65 kConditionNormal, 68 kConditionNormal,
66 kConditionDoubleQuoteString, 69 kConditionDoubleQuoteString,
67 kConditionSingleQuoteString, 70 kConditionSingleQuoteString,
68 kConditionIdentifier, 71 kConditionIdentifier,
69 kConditionIdentifierIllegal, 72 kConditionIdentifierIllegal,
70 kConditionSingleLineComment, 73 kConditionSingleLineComment,
71 kConditionMultiLineComment, 74 kConditionMultiLineComment,
72 kConditionHtmlComment 75 kConditionHtmlComment
73 }; 76 };
74 77
75 #if defined(WIN32)
76
77 typedef signed char int8_t;
78 typedef signed short int16_t;
79 typedef signed int int32_t;
80
81 typedef unsigned char uint8_t;
82 typedef unsigned short uint16_t;
83 typedef unsigned int uint32_t;
84
85 #else
86
87 #include <stdint.h>
88 #include <unistd.h>
89
90 #ifndef O_BINARY
91 #define O_BINARY 0
92 #endif
93
94 #endif // defined(WIN32)
95
96 #include "experimental-scanner.h"
97 #include "lexer.h"
98
99 using namespace v8::internal; 78 using namespace v8::internal;
100 79
101 namespace { 80 namespace {
102 81
103 inline int HexValue(uc32 c) { 82 inline int HexValue(uc32 c) {
104 c -= '0'; 83 c -= '0';
105 if (static_cast<unsigned>(c) <= 9) return c; 84 if (static_cast<unsigned>(c) <= 9) return c;
106 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. 85 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
107 if (static_cast<unsigned>(c) <= 5) return c + 10; 86 if (static_cast<unsigned>(c) <= 5) return c + 10;
108 return -1; 87 return -1;
109 } 88 }
110 89
111 } 90 }
112 91
113 #define PUSH_TOKEN(T) { send(T); SKIP(); } 92 #define PUSH_TOKEN(T) { send(T); SKIP(); }
114 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); } 93 #define PUSH_TOKEN_LOOKAHEAD(T) { --cursor_; send(T); SKIP(); }
115 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;} 94 #define PUSH_EOF_AND_RETURN() { send(Token::EOS); eof_ = true; return 1;}
116 #define PUSH_LINE_TERMINATOR() { just_seen_line_terminator_ = true; SKIP(); } 95 #define PUSH_LINE_TERMINATOR() { just_seen_line_terminator_ = true; SKIP(); }
117 #define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS); return 1; } 96 #define TERMINATE_ILLEGAL() { send(Token::ILLEGAL); send(Token::EOS); return 1; }
118 97
119 #define YYCTYPE uint8_t
120
121 PushScanner::PushScanner(ExperimentalScanner* sink, UnicodeCache* unicode_cache) 98 PushScanner::PushScanner(ExperimentalScanner* sink, UnicodeCache* unicode_cache)
122 : unicode_cache_(unicode_cache), 99 : unicode_cache_(unicode_cache),
123 eof_(false), 100 eof_(false),
124 state_(-1), 101 state_(-1),
125 condition_(kConditionNormal), 102 condition_(kConditionNormal),
126 limit_(NULL), 103 limit_(NULL),
127 start_(NULL), 104 start_(NULL),
128 cursor_(NULL), 105 cursor_(NULL),
129 marker_(NULL), 106 marker_(NULL),
130 real_start_(0), 107 real_start_(0),
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
163 140
164 bool PushScanner::ValidIdentifierStart() { 141 bool PushScanner::ValidIdentifierStart() {
165 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4)); 142 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4));
166 } 143 }
167 144
168 void PushScanner::send(Token::Value token) { 145 void PushScanner::send(Token::Value token) {
169 int beg = (start_ - buffer_) + real_start_; 146 int beg = (start_ - buffer_) + real_start_;
170 int end = (cursor_ - buffer_) + real_start_; 147 int end = (cursor_ - buffer_) + real_start_;
171 if (FLAG_trace_lexer) { 148 if (FLAG_trace_lexer) {
172 printf("got %s at (%d, %d): ", Token::Name(token), beg, end); 149 printf("got %s at (%d, %d): ", Token::Name(token), beg, end);
173 for (uint8_t* s = start_; s != cursor_; s++) printf("%c", (char)*s); 150 for (YYCTYPE* s = start_; s != cursor_; s++) printf("%c", (char)*s);
174 printf(".\n"); 151 printf(".\n");
175 } 152 }
176 just_seen_line_terminator_ = false; 153 just_seen_line_terminator_ = false;
177 sink_->Record(token, beg, end); 154 sink_->Record(token, beg, end);
178 } 155 }
179 156
180 uint32_t PushScanner::push(const void *input, int input_size) { 157 uint32_t PushScanner::push(const void *input, int input_size) {
181 if (FLAG_trace_lexer) { 158 if (FLAG_trace_lexer) {
182 printf( 159 printf(
183 "scanner is receiving a new data batch of length %d\n" 160 "scanner is receiving a new data batch of length %d\n"
184 "scanner continues with saved state_ = %d\n", 161 "scanner continues with saved state_ = %d\n",
185 input_size, 162 input_size,
186 state_); 163 state_);
187 } 164 }
188 165
189 // Data source is signaling end of file when batch size 166 // Data source is signaling end of file when batch size
190 // is less than max_fill. This is slightly annoying because 167 // is less than max_fill. This is slightly annoying because
191 // max_fill is a value that can only be known after re2c does 168 // max_fill is a value that can only be known after re2c does
192 // its thing. Practically though, max_fill is never bigger than 169 // its thing. Practically though, max_fill is never bigger than
193 // the longest keyword, so given our grammar, 32 is a safe bet. 170 // the longest keyword, so given our grammar, 32 is a safe bet.
194 171
195 uint8_t null[64]; 172 YYCTYPE null[64];
196 const int max_fill = 32; 173 const int max_fill = 32;
197 if (input_size < max_fill) { // FIXME: do something about this!!! 174 if (input_size < max_fill) { // FIXME: do something about this!!!
198 eof_ = true; 175 eof_ = true;
199 input = null; 176 input = null;
200 input_size = sizeof(null); 177 input_size = sizeof(null);
201 memset(null, 0, sizeof(null)); 178 memset(null, 0, sizeof(null));
202 } 179 }
203 180
204 181
205 // When we get here, we have a partially 182 // When we get here, we have a partially
206 // consumed buffer_ which is in the following state_: 183 // consumed buffer_ which is in the following state_:
207 // last valid char last valid buffer_ spot 184 // last valid char last valid buffer_ spot
208 // v v 185 // v v
209 // +-------------------+-------------+---------------+-------------+--------- -------------+ 186 // +-------------------+-------------+---------------+-------------+--------- -------------+
210 // ^ ^ ^ ^ ^ ^ 187 // ^ ^ ^ ^ ^ ^
211 // buffer_ start_ marker_ cursor_ limit_ buffer_end_ 188 // buffer_ start_ marker_ cursor_ limit_ buffer_end_
212 // 189 //
213 // We need to stretch the buffer_ and concatenate the new chunk of input to i t 190 // We need to stretch the buffer_ and concatenate the new chunk of input to i t
214 191
215 size_t used = limit_ - buffer_; 192 size_t used = limit_ - buffer_;
216 size_t needed = used + input_size; 193 size_t needed = used + input_size;
217 size_t allocated = buffer_end_ - buffer_; 194 size_t allocated = buffer_end_ - buffer_;
218 if (allocated < needed) { 195 if (allocated < needed) {
219 size_t limit__offset = limit_ - buffer_; 196 size_t limit__offset = limit_ - buffer_;
220 size_t start_offset = start_ - buffer_; 197 size_t start_offset = start_ - buffer_;
221 size_t marker__offset = marker_ - buffer_; 198 size_t marker__offset = marker_ - buffer_;
222 size_t cursor__offset = cursor_ - buffer_; 199 size_t cursor__offset = cursor_ - buffer_;
223 200
224 buffer_ = (uint8_t*)realloc(buffer_, needed); 201 buffer_ = (YYCTYPE*)realloc(buffer_, needed);
225 buffer_end_ = needed + buffer_; 202 buffer_end_ = needed + buffer_;
226 203
227 marker_ = marker__offset + buffer_; 204 marker_ = marker__offset + buffer_;
228 cursor_ = cursor__offset + buffer_; 205 cursor_ = cursor__offset + buffer_;
229 start_ = buffer_ + start_offset; 206 start_ = buffer_ + start_offset;
230 limit_ = limit__offset + buffer_; 207 limit_ = limit__offset + buffer_;
231 } 208 }
232 memcpy(limit_, input, input_size); 209 memcpy(limit_, input, input_size);
233 limit_ += input_size; 210 limit_ += input_size;
234 211
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
463 size_t start_offset = start_ - buffer_; 440 size_t start_offset = start_ - buffer_;
464 memmove(buffer_, start_, limit_ - start_); 441 memmove(buffer_, start_, limit_ - start_);
465 marker_ -= start_offset; 442 marker_ -= start_offset;
466 cursor_ -= start_offset; 443 cursor_ -= start_offset;
467 limit_ -= start_offset; 444 limit_ -= start_offset;
468 start_ -= start_offset; 445 start_ -= start_offset;
469 real_start_ += start_offset; 446 real_start_ += start_offset;
470 } 447 }
471 return 0; 448 return 0;
472 } 449 }
OLDNEW
« no previous file with comments | « src/lexer/lexer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698