OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
54 | 54 |
55 enum { | 55 enum { |
56 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE | 56 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE |
57 }; | 57 }; |
58 | 58 |
59 private: | 59 private: |
60 // Range testing | 60 // Range testing |
61 static inline bool Between(int x, int lo, int hi) { | 61 static inline bool Between(int x, int lo, int hi) { |
62 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo); | 62 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo); |
63 } | 63 } |
64 | |
64 // Indicates a missing value. | 65 // Indicates a missing value. |
65 static const int kNone = kMaxInt; | 66 static const int kNone = kMaxInt; |
66 | 67 |
68 // Maximal number of digits used to build the value of a numeral. | |
69 // Remaining digits are ignored. | |
70 static const int kMaxSignificantDigits = 9; | |
71 | |
67 // InputReader provides basic string parsing and character classification. | 72 // InputReader provides basic string parsing and character classification. |
68 template <typename Char> | 73 template <typename Char> |
69 class InputReader BASE_EMBEDDED { | 74 class InputReader BASE_EMBEDDED { |
70 public: | 75 public: |
71 InputReader(UnicodeCache* unicode_cache, Vector<Char> s) | 76 InputReader(UnicodeCache* unicode_cache, Vector<Char> s) |
72 : index_(0), | 77 : index_(0), |
73 buffer_(s), | 78 buffer_(s), |
74 has_read_number_(false), | |
75 unicode_cache_(unicode_cache) { | 79 unicode_cache_(unicode_cache) { |
76 Next(); | 80 Next(); |
77 } | 81 } |
78 | 82 |
83 int position() { return index_; } | |
84 | |
79 // Advance to the next character of the string. | 85 // Advance to the next character of the string. |
80 void Next() { ch_ = (index_ < buffer_.length()) ? buffer_[index_++] : 0; } | 86 void Next() { |
87 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0; | |
88 index_++; | |
89 } | |
81 | 90 |
82 // Read a string of digits as an unsigned number (cap just below kMaxInt). | 91 // Read a string of digits as an unsigned number. Cap value at |
83 int ReadUnsignedNumber() { | 92 // kMaxSignificantDigits, but skip remaining digits if the numeral |
84 has_read_number_ = true; | 93 // is longer. |
85 int n; | 94 int ReadUnsignedNumeral() { |
86 for (n = 0; IsAsciiDigit() && n < kMaxInt / 10 - 1; Next()) { | 95 int n = 0; |
87 n = n * 10 + ch_ - '0'; | 96 int i = 0; |
97 while (IsAsciiDigit()) { | |
98 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0'; | |
99 i++; | |
100 Next(); | |
88 } | 101 } |
89 return n; | 102 return n; |
90 } | 103 } |
91 | |
92 // Read a string of digits, take the first three or fewer as an unsigned | |
93 // number of milliseconds, and ignore any digits after the first three. | |
94 int ReadMilliseconds() { | |
95 has_read_number_ = true; | |
96 int n = 0; | |
97 int power; | |
98 for (power = 100; IsAsciiDigit(); Next(), power = power / 10) { | |
99 n = n + power * (ch_ - '0'); | |
100 } | |
101 return n; | |
102 } | |
103 | 104 |
104 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a | 105 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a |
105 // lower-case prefix, and pad any remainder of the buffer with zeroes. | 106 // lower-case prefix, and pad any remainder of the buffer with zeroes. |
106 // Return word length. | 107 // Return word length. |
107 int ReadWord(uint32_t* prefix, int prefix_size) { | 108 int ReadWord(uint32_t* prefix, int prefix_size) { |
108 int len; | 109 int len; |
109 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) { | 110 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) { |
110 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_); | 111 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_); |
111 } | 112 } |
112 for (int i = len; i < prefix_size; i++) prefix[i] = 0; | 113 for (int i = len; i < prefix_size; i++) prefix[i] = 0; |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
144 // Character testing/classification. Non-ASCII digits are not supported. | 145 // Character testing/classification. Non-ASCII digits are not supported. |
145 bool Is(uint32_t c) const { return ch_ == c; } | 146 bool Is(uint32_t c) const { return ch_ == c; } |
146 bool IsEnd() const { return ch_ == 0; } | 147 bool IsEnd() const { return ch_ == 0; } |
147 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); } | 148 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); } |
148 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; } | 149 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; } |
149 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; } | 150 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; } |
150 | 151 |
151 // Return 1 for '+' and -1 for '-'. | 152 // Return 1 for '+' and -1 for '-'. |
152 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); } | 153 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); } |
153 | 154 |
154 // Indicates whether any (possibly empty!) numbers have been read. | |
155 bool HasReadNumber() const { return has_read_number_; } | |
156 | |
157 private: | 155 private: |
158 int index_; | 156 int index_; |
159 Vector<Char> buffer_; | 157 Vector<Char> buffer_; |
160 bool has_read_number_; | |
161 uint32_t ch_; | 158 uint32_t ch_; |
162 UnicodeCache* unicode_cache_; | 159 UnicodeCache* unicode_cache_; |
163 }; | 160 }; |
164 | 161 |
165 enum KeywordType { INVALID, MONTH_NAME, TIME_ZONE_NAME, AM_PM }; | 162 enum KeywordType { |
163 INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM | |
164 }; | |
165 | |
166 struct DateToken { | |
167 public: | |
168 bool IsInvalid() { return tag_ == kInvalidTokenTag; } | |
169 bool IsUnknown() { return tag_ == kUnknownTokenTag; } | |
170 bool IsNumber() { return tag_ == kNumberTag; } | |
171 bool IsSymbol() { return tag_ == kSymbolTag; } | |
172 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; } | |
173 bool IsEndOfInput() { return tag_ == kEndOfInputTag; } | |
174 bool IsKeyword() { return tag_ >= kKeywordTagStart; } | |
175 | |
176 int length() { return length_; } | |
177 | |
178 int number() { | |
179 ASSERT(IsNumber()); | |
180 return value_; | |
181 } | |
182 KeywordType keyword_type() { | |
183 ASSERT(IsKeyword()); | |
184 return static_cast<KeywordType>(tag_); | |
185 } | |
186 int keyword_value() { | |
187 ASSERT(IsKeyword()); | |
188 return value_; | |
189 } | |
190 char symbol() { | |
191 ASSERT(IsSymbol()); | |
192 return static_cast<char>(value_); | |
193 } | |
194 bool IsSymbol(char symbol) { | |
195 return IsSymbol() && this->symbol() == symbol; | |
196 } | |
197 bool IsKeywordType(KeywordType tag) { | |
198 return tag_ == tag; | |
199 } | |
200 bool IsFixedLengthNumber(int length) { | |
201 return IsNumber() && length_ == length; | |
202 } | |
203 bool IsAsciiSign() { | |
204 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+'); | |
205 } | |
206 int ascii_sign() { | |
207 ASSERT(IsAsciiSign()); | |
208 return 44 - value_; | |
209 } | |
210 bool IsKeywordZ() { | |
211 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0; | |
212 } | |
213 bool IsUnknown(int character) { | |
214 return IsUnknown() && value_ == character; | |
215 } | |
216 // Factory functions. | |
217 static DateToken Keyword(KeywordType tag, int value, int length) { | |
218 return DateToken(tag, length, value); | |
219 } | |
220 static DateToken Number(int value, int length) { | |
221 return DateToken(kNumberTag, length, value); | |
222 } | |
223 static DateToken Symbol(char symbol) { | |
224 return DateToken(kSymbolTag, 1, symbol); | |
225 } | |
226 static DateToken EndOfInput() { | |
227 return DateToken(kEndOfInputTag, 0, -1); | |
228 } | |
229 static DateToken WhiteSpace(int length) { | |
230 return DateToken(kWhiteSpaceTag, length, -1); | |
231 } | |
232 static DateToken Unknown() { | |
233 return DateToken(kUnknownTokenTag, 1, -1); | |
234 } | |
235 static DateToken Invalid() { | |
236 return DateToken(kInvalidTokenTag, 0, -1); | |
237 } | |
238 private: | |
239 enum TagType { | |
240 kInvalidTokenTag = -5, | |
Erik Corry
2011/07/01 10:22:46
These are deliberately the same?
Lasse Reichstein
2011/07/01 10:49:52
No. Fixed.
| |
241 kUnknownTokenTag = -5, | |
242 kWhiteSpaceTag = -4, | |
243 kNumberTag = -3, | |
244 kSymbolTag = -2, | |
245 kEndOfInputTag = -1, | |
246 kKeywordTagStart = 0 | |
247 }; | |
248 DateToken(int tag, int length, int value) | |
249 : tag_(tag), | |
250 length_(length), | |
251 value_(value) { } | |
252 | |
253 int tag_; | |
254 int length_; // Number of characters. | |
255 int value_; | |
256 }; | |
257 | |
258 template <typename Char> | |
259 class DateStringTokenizer { | |
260 public: | |
261 explicit DateStringTokenizer(InputReader<Char>* in) | |
262 : in_(in), next_(Scan()) { } | |
263 DateToken Next() { | |
264 DateToken result = next_; | |
265 next_ = Scan(); | |
266 return result; | |
267 } | |
268 | |
269 DateToken Peek() { | |
270 return next_; | |
271 } | |
272 bool SkipSymbol(char symbol) { | |
273 if (next_.IsSymbol(symbol)) { | |
274 next_ = Scan(); | |
275 return true; | |
276 } | |
277 return false; | |
278 } | |
279 private: | |
280 DateToken Scan(); | |
281 | |
282 InputReader<Char>* in_; | |
283 DateToken next_; | |
284 }; | |
285 | |
286 static int ReadMilliseconds(DateToken number); | |
166 | 287 |
167 // KeywordTable maps names of months, time zones, am/pm to numbers. | 288 // KeywordTable maps names of months, time zones, am/pm to numbers. |
168 class KeywordTable : public AllStatic { | 289 class KeywordTable : public AllStatic { |
169 public: | 290 public: |
170 // Look up a word in the keyword table and return an index. | 291 // Look up a word in the keyword table and return an index. |
171 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength | 292 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength |
172 // and 'len' is the word length. | 293 // and 'len' is the word length. |
173 static int Lookup(const uint32_t* pre, int len); | 294 static int Lookup(const uint32_t* pre, int len); |
174 // Get the type of the keyword at index i. | 295 // Get the type of the keyword at index i. |
175 static KeywordType GetType(int i) { | 296 static KeywordType GetType(int i) { |
(...skipping 18 matching lines...) Expand all Loading... | |
194 minute_ = 0; | 315 minute_ = 0; |
195 } | 316 } |
196 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; } | 317 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; } |
197 void SetAbsoluteHour(int hour) { hour_ = hour; } | 318 void SetAbsoluteHour(int hour) { hour_ = hour; } |
198 void SetAbsoluteMinute(int minute) { minute_ = minute; } | 319 void SetAbsoluteMinute(int minute) { minute_ = minute; } |
199 bool IsExpecting(int n) const { | 320 bool IsExpecting(int n) const { |
200 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n); | 321 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n); |
201 } | 322 } |
202 bool IsUTC() const { return hour_ == 0 && minute_ == 0; } | 323 bool IsUTC() const { return hour_ == 0 && minute_ == 0; } |
203 bool Write(FixedArray* output); | 324 bool Write(FixedArray* output); |
325 bool IsEmpty() { return hour_ == kNone; } | |
204 private: | 326 private: |
205 int sign_; | 327 int sign_; |
206 int hour_; | 328 int hour_; |
207 int minute_; | 329 int minute_; |
208 }; | 330 }; |
209 | 331 |
210 class TimeComposer BASE_EMBEDDED { | 332 class TimeComposer BASE_EMBEDDED { |
211 public: | 333 public: |
212 TimeComposer() : index_(0), hour_offset_(kNone) {} | 334 TimeComposer() : index_(0), hour_offset_(kNone) {} |
213 bool IsEmpty() const { return index_ == 0; } | 335 bool IsEmpty() const { return index_ == 0; } |
214 bool IsExpecting(int n) const { | 336 bool IsExpecting(int n) const { |
215 return (index_ == 1 && IsMinute(n)) || | 337 return (index_ == 1 && IsMinute(n)) || |
216 (index_ == 2 && IsSecond(n)) || | 338 (index_ == 2 && IsSecond(n)) || |
217 (index_ == 3 && IsMillisecond(n)); | 339 (index_ == 3 && IsMillisecond(n)); |
218 } | 340 } |
219 bool Add(int n) { | 341 bool Add(int n) { |
220 return index_ < kSize ? (comp_[index_++] = n, true) : false; | 342 return index_ < kSize ? (comp_[index_++] = n, true) : false; |
221 } | 343 } |
222 bool AddFinal(int n) { | 344 bool AddFinal(int n) { |
223 if (!Add(n)) return false; | 345 if (!Add(n)) return false; |
224 while (index_ < kSize) comp_[index_++] = 0; | 346 while (index_ < kSize) comp_[index_++] = 0; |
225 return true; | 347 return true; |
226 } | 348 } |
227 void SetHourOffset(int n) { hour_offset_ = n; } | 349 void SetHourOffset(int n) { hour_offset_ = n; } |
228 bool Write(FixedArray* output); | 350 bool Write(FixedArray* output); |
229 | 351 |
230 static bool IsMinute(int x) { return Between(x, 0, 59); } | 352 static bool IsMinute(int x) { return Between(x, 0, 59); } |
353 static bool IsHour(int x) { return Between(x, 0, 23); } | |
354 static bool IsSecond(int x) { return Between(x, 0, 59); } | |
231 private: | 355 private: |
232 static bool IsHour(int x) { return Between(x, 0, 23); } | |
233 static bool IsHour12(int x) { return Between(x, 0, 12); } | 356 static bool IsHour12(int x) { return Between(x, 0, 12); } |
234 static bool IsSecond(int x) { return Between(x, 0, 59); } | |
235 static bool IsMillisecond(int x) { return Between(x, 0, 999); } | 357 static bool IsMillisecond(int x) { return Between(x, 0, 999); } |
236 | 358 |
237 static const int kSize = 4; | 359 static const int kSize = 4; |
238 int comp_[kSize]; | 360 int comp_[kSize]; |
239 int index_; | 361 int index_; |
240 int hour_offset_; | 362 int hour_offset_; |
241 }; | 363 }; |
242 | 364 |
243 class DayComposer BASE_EMBEDDED { | 365 class DayComposer BASE_EMBEDDED { |
244 public: | 366 public: |
245 DayComposer() : index_(0), named_month_(kNone) {} | 367 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {} |
246 bool IsEmpty() const { return index_ == 0; } | 368 bool IsEmpty() const { return index_ == 0; } |
247 bool Add(int n) { | 369 bool Add(int n) { |
248 return index_ < kSize ? (comp_[index_++] = n, true) : false; | 370 return index_ < kSize ? (comp_[index_++] = n, true) : false; |
Erik Corry
2011/07/01 10:22:46
Holy moley!
Lasse Reichstein
2011/07/01 10:49:52
Indeed, rewritten.
| |
249 } | 371 } |
250 void SetNamedMonth(int n) { named_month_ = n; } | 372 void SetNamedMonth(int n) { named_month_ = n; } |
251 bool Write(FixedArray* output); | 373 bool Write(FixedArray* output); |
252 private: | 374 void set_iso_date() { is_iso_date_ = true; } |
253 static bool IsMonth(int x) { return Between(x, 1, 12); } | 375 static bool IsMonth(int x) { return Between(x, 1, 12); } |
254 static bool IsDay(int x) { return Between(x, 1, 31); } | 376 static bool IsDay(int x) { return Between(x, 1, 31); } |
377 private: | |
255 | 378 |
Erik Corry
2011/07/01 10:22:46
Blank line above private: rather than below it.
Lasse Reichstein
2011/07/01 10:49:52
Done.
| |
256 static const int kSize = 3; | 379 static const int kSize = 3; |
257 int comp_[kSize]; | 380 int comp_[kSize]; |
258 int index_; | 381 int index_; |
259 int named_month_; | 382 int named_month_; |
383 // If set, ensures that data is always parsed in year-month-date order. | |
384 bool is_iso_date_; | |
260 }; | 385 }; |
386 | |
387 // Tries to parse an ES5 Date Time String. Returns the next token | |
388 // to continue with in the legacy date string parser. If parsing is | |
389 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful, | |
390 // returns DateToken::Invalid(). Otherwise parsing continues in the | |
391 // legacy parser. | |
392 template <typename Char> | |
393 static DateParser::DateToken ParseES5DateTime( | |
394 DateStringTokenizer<Char>* scanner, | |
395 DayComposer* day, | |
396 TimeComposer* time, | |
397 TimeZoneComposer* tz); | |
261 }; | 398 }; |
262 | 399 |
263 | 400 |
264 } } // namespace v8::internal | 401 } } // namespace v8::internal |
265 | 402 |
266 #endif // V8_DATEPARSER_H_ | 403 #endif // V8_DATEPARSER_H_ |
OLD | NEW |