Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(562)

Side by Side Diff: src/scanner.h

Issue 6580038: [Isolates] Merge from bleeding_edge, revisions 5934-6100. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/isolates/
Patch Set: '' Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/runtime-profiler.cc ('k') | src/scanner.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 17 matching lines...) Expand all
28 #ifndef V8_SCANNER_H_ 28 #ifndef V8_SCANNER_H_
29 #define V8_SCANNER_H_ 29 #define V8_SCANNER_H_
30 30
31 #include "token.h" 31 #include "token.h"
32 #include "char-predicates-inl.h" 32 #include "char-predicates-inl.h"
33 #include "scanner-base.h" 33 #include "scanner-base.h"
34 34
35 namespace v8 { 35 namespace v8 {
36 namespace internal { 36 namespace internal {
37 37
38 // UTF16 buffer to read characters from a character stream. 38 // A buffered character stream based on a random access character
39 class CharacterStreamUTF16Buffer: public UTF16Buffer { 39 // source (ReadBlock can be called with pos_ pointing to any position,
40 // even positions before the current).
41 class BufferedUC16CharacterStream: public UC16CharacterStream {
40 public: 42 public:
41 CharacterStreamUTF16Buffer(); 43 BufferedUC16CharacterStream();
42 virtual ~CharacterStreamUTF16Buffer() {} 44 virtual ~BufferedUC16CharacterStream();
43 void Initialize(Handle<String> data,
44 unibrow::CharacterStream* stream,
45 int start_position,
46 int end_position);
47 virtual void PushBack(uc32 ch);
48 virtual uc32 Advance();
49 virtual void SeekForward(int pos);
50 45
51 private: 46 virtual void PushBack(uc16 character);
52 List<uc32> pushback_buffer_;
53 uc32 last_;
54 unibrow::CharacterStream* stream_;
55 47
56 List<uc32>* pushback_buffer() { return &pushback_buffer_; } 48 protected:
49 static const unsigned kBufferSize = 512;
50 static const unsigned kPushBackStepSize = 16;
51
52 virtual unsigned SlowSeekForward(unsigned delta);
53 virtual bool ReadBlock();
54 virtual void SlowPushBack(uc16 character);
55
56 virtual unsigned BufferSeekForward(unsigned delta) = 0;
57 virtual unsigned FillBuffer(unsigned position, unsigned length) = 0;
58
59 const uc16* pushback_limit_;
60 uc16 buffer_[kBufferSize];
61 };
62
63
64 // Generic string stream.
65 class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream {
66 public:
67 GenericStringUC16CharacterStream(Handle<String> data,
68 unsigned start_position,
69 unsigned end_position);
70 virtual ~GenericStringUC16CharacterStream();
71
72 protected:
73 virtual unsigned BufferSeekForward(unsigned delta);
74 virtual unsigned FillBuffer(unsigned position, unsigned length);
75
76 Handle<String> string_;
77 unsigned start_position_;
78 unsigned length_;
79 };
80
81
82 // UC16 stream based on a literal UTF-8 string.
83 class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream {
84 public:
85 Utf8ToUC16CharacterStream(const byte* data, unsigned length);
86 virtual ~Utf8ToUC16CharacterStream();
87
88 protected:
89 virtual unsigned BufferSeekForward(unsigned delta);
90 virtual unsigned FillBuffer(unsigned char_position, unsigned length);
91 void SetRawPosition(unsigned char_position);
92
93 const byte* raw_data_;
94 unsigned raw_data_length_; // Measured in bytes, not characters.
95 unsigned raw_data_pos_;
96 // The character position of the character at raw_data[raw_data_pos_].
97 // Not necessarily the same as pos_.
98 unsigned raw_character_position_;
57 }; 99 };
58 100
59 101
60 // UTF16 buffer to read characters from an external string. 102 // UTF16 buffer to read characters from an external string.
61 template <typename StringType, typename CharType> 103 class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream {
62 class ExternalStringUTF16Buffer: public UTF16Buffer {
63 public: 104 public:
64 ExternalStringUTF16Buffer(); 105 ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data,
65 virtual ~ExternalStringUTF16Buffer() {} 106 int start_position,
66 void Initialize(Handle<StringType> data, 107 int end_position);
67 int start_position, 108 virtual ~ExternalTwoByteStringUC16CharacterStream();
68 int end_position);
69 virtual void PushBack(uc32 ch);
70 virtual uc32 Advance();
71 virtual void SeekForward(int pos);
72 109
73 private: 110 virtual void PushBack(uc16 character) {
74 const CharType* raw_data_; // Pointer to the actual array of characters. 111 ASSERT(buffer_cursor_ > raw_data_);
112 buffer_cursor_--;
113 pos_--;
114 }
115 protected:
116 virtual unsigned SlowSeekForward(unsigned delta) {
117 // Fast case always handles seeking.
118 return 0;
119 }
120 virtual bool ReadBlock() {
121 // Entire string is read at start.
122 return false;
123 }
124 Handle<ExternalTwoByteString> source_;
125 const uc16* raw_data_; // Pointer to the actual array of characters.
75 }; 126 };
76 127
77 128
78 // Initializes a UTF16Buffer as input stream, using one of a number
79 // of strategies depending on the available character sources.
80 class StreamInitializer {
81 public:
82 UTF16Buffer* Init(Handle<String> source,
83 unibrow::CharacterStream* stream,
84 int start_position,
85 int end_position);
86 private:
87 // Different UTF16 buffers used to pull characters from. Based on input one of
88 // these will be initialized as the actual data source.
89 CharacterStreamUTF16Buffer char_stream_buffer_;
90 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>
91 two_byte_string_buffer_;
92 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;
93
94 // Used to convert the source string into a character stream when a stream
95 // is not passed to the scanner.
96 SafeStringInputBuffer safe_string_input_buffer_;
97 };
98
99 // ---------------------------------------------------------------------------- 129 // ----------------------------------------------------------------------------
100 // V8JavaScriptScanner 130 // V8JavaScriptScanner
101 // JavaScript scanner getting its input from either a V8 String or a unicode 131 // JavaScript scanner getting its input from either a V8 String or a unicode
102 // CharacterStream. 132 // CharacterStream.
103 133
104 class V8JavaScriptScanner : public JavaScriptScanner { 134 class V8JavaScriptScanner : public JavaScriptScanner {
105 public: 135 public:
106 explicit V8JavaScriptScanner(Isolate* isolate) 136 explicit V8JavaScriptScanner(Isolate* isolate)
107 : JavaScriptScanner(isolate->scanner_constants()) {} 137 : JavaScriptScanner(isolate) {}
108 138
109 // Initialize the Scanner to scan source. 139 void Initialize(UC16CharacterStream* source,
110 void Initialize(Handle<String> source, int literal_flags = kAllLiterals);
111 void Initialize(Handle<String> source,
112 unibrow::CharacterStream* stream,
113 int literal_flags = kAllLiterals); 140 int literal_flags = kAllLiterals);
114 void Initialize(Handle<String> source,
115 int start_position, int end_position,
116 int literal_flags = kAllLiterals);
117
118 protected:
119 StreamInitializer stream_initializer_;
120 }; 141 };
121 142
122 143
123 class JsonScanner : public Scanner { 144 class JsonScanner : public Scanner {
124 public: 145 public:
125 JsonScanner(); 146 JsonScanner(Isolate* isolate);
126 147
127 // Initialize the Scanner to scan source. 148 void Initialize(UC16CharacterStream* source);
128 void Initialize(Handle<String> source);
129 149
130 // Returns the next token. 150 // Returns the next token.
131 Token::Value Next(); 151 Token::Value Next();
132 152
133 protected: 153 protected:
134 // Skip past JSON whitespace (only space, tab, newline and carrige-return). 154 // Skip past JSON whitespace (only space, tab, newline and carrige-return).
135 bool SkipJsonWhiteSpace(); 155 bool SkipJsonWhiteSpace();
136 156
137 // Scan a single JSON token. The JSON lexical grammar is specified in the 157 // Scan a single JSON token. The JSON lexical grammar is specified in the
138 // ECMAScript 5 standard, section 15.12.1.1. 158 // ECMAScript 5 standard, section 15.12.1.1.
139 // Recognizes all of the single-character tokens directly, or calls a function 159 // Recognizes all of the single-character tokens directly, or calls a function
140 // to scan a number, string or identifier literal. 160 // to scan a number, string or identifier literal.
141 // The only allowed whitespace characters between tokens are tab, 161 // The only allowed whitespace characters between tokens are tab,
142 // carrige-return, newline and space. 162 // carriage-return, newline and space.
143 void ScanJson(); 163 void ScanJson();
144 164
145 // A JSON number (production JSONNumber) is a subset of the valid JavaScript 165 // A JSON number (production JSONNumber) is a subset of the valid JavaScript
146 // decimal number literals. 166 // decimal number literals.
147 // It includes an optional minus sign, must have at least one 167 // It includes an optional minus sign, must have at least one
148 // digit before and after a decimal point, may not have prefixed zeros (unless 168 // digit before and after a decimal point, may not have prefixed zeros (unless
149 // the integer part is zero), and may include an exponent part (e.g., "e-10"). 169 // the integer part is zero), and may include an exponent part (e.g., "e-10").
150 // Hexadecimal and octal numbers are not allowed. 170 // Hexadecimal and octal numbers are not allowed.
151 Token::Value ScanJsonNumber(); 171 Token::Value ScanJsonNumber();
152 172
153 // A JSON string (production JSONString) is subset of valid JavaScript string 173 // A JSON string (production JSONString) is subset of valid JavaScript string
154 // literals. The string must only be double-quoted (not single-quoted), and 174 // literals. The string must only be double-quoted (not single-quoted), and
155 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and 175 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
156 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. 176 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
157 Token::Value ScanJsonString(); 177 Token::Value ScanJsonString();
158 178
159 // Used to recognizes one of the literals "true", "false", or "null". These 179 // Used to recognizes one of the literals "true", "false", or "null". These
160 // are the only valid JSON identifiers (productions JSONBooleanLiteral, 180 // are the only valid JSON identifiers (productions JSONBooleanLiteral,
161 // JSONNullLiteral). 181 // JSONNullLiteral).
162 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); 182 Token::Value ScanJsonIdentifier(const char* text, Token::Value token);
163
164 StreamInitializer stream_initializer_;
165 }; 183 };
166 184
167
168 // ExternalStringUTF16Buffer
169 template <typename StringType, typename CharType>
170 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
171 : raw_data_(NULL) { }
172
173
174 template <typename StringType, typename CharType>
175 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
176 Handle<StringType> data,
177 int start_position,
178 int end_position) {
179 ASSERT(!data.is_null());
180 raw_data_ = data->resource()->data();
181
182 ASSERT(end_position <= data->length());
183 if (start_position > 0) {
184 SeekForward(start_position);
185 }
186 end_ =
187 end_position != kNoEndPosition ? end_position : data->length();
188 }
189
190
191 template <typename StringType, typename CharType>
192 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
193 if (pos_ < end_) {
194 return raw_data_[pos_++];
195 } else {
196 // note: currently the following increment is necessary to avoid a
197 // test-parser problem!
198 pos_++;
199 return static_cast<uc32>(-1);
200 }
201 }
202
203
204 template <typename StringType, typename CharType>
205 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
206 pos_--;
207 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
208 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
209 }
210
211
212 template <typename StringType, typename CharType>
213 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
214 pos_ = pos;
215 }
216
217 } } // namespace v8::internal 185 } } // namespace v8::internal
218 186
219 #endif // V8_SCANNER_H_ 187 #endif // V8_SCANNER_H_
OLDNEW
« no previous file with comments | « src/runtime-profiler.cc ('k') | src/scanner.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698