Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(191)

Side by Side Diff: src/dateparser.h

Issue 7291022: Make date parser handle all ES5 Date Time Strings correctly. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Address review comments. Created 9 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/date.js ('k') | src/dateparser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
54 54
55 enum { 55 enum {
56 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE 56 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
57 }; 57 };
58 58
59 private: 59 private:
60 // Range testing 60 // Range testing
61 static inline bool Between(int x, int lo, int hi) { 61 static inline bool Between(int x, int lo, int hi) {
62 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo); 62 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
63 } 63 }
64
64 // Indicates a missing value. 65 // Indicates a missing value.
65 static const int kNone = kMaxInt; 66 static const int kNone = kMaxInt;
66 67
68 // Maximal number of digits used to build the value of a numeral.
69 // Remaining digits are ignored.
70 static const int kMaxSignificantDigits = 9;
71
67 // InputReader provides basic string parsing and character classification. 72 // InputReader provides basic string parsing and character classification.
68 template <typename Char> 73 template <typename Char>
69 class InputReader BASE_EMBEDDED { 74 class InputReader BASE_EMBEDDED {
70 public: 75 public:
71 InputReader(UnicodeCache* unicode_cache, Vector<Char> s) 76 InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
72 : index_(0), 77 : index_(0),
73 buffer_(s), 78 buffer_(s),
74 has_read_number_(false),
75 unicode_cache_(unicode_cache) { 79 unicode_cache_(unicode_cache) {
76 Next(); 80 Next();
77 } 81 }
78 82
83 int position() { return index_; }
84
79 // Advance to the next character of the string. 85 // Advance to the next character of the string.
80 void Next() { ch_ = (index_ < buffer_.length()) ? buffer_[index_++] : 0; } 86 void Next() {
87 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
88 index_++;
89 }
81 90
82 // Read a string of digits as an unsigned number (cap just below kMaxInt). 91 // Read a string of digits as an unsigned number. Cap value at
83 int ReadUnsignedNumber() { 92 // kMaxSignificantDigits, but skip remaining digits if the numeral
84 has_read_number_ = true; 93 // is longer.
85 int n; 94 int ReadUnsignedNumeral() {
86 for (n = 0; IsAsciiDigit() && n < kMaxInt / 10 - 1; Next()) { 95 int n = 0;
87 n = n * 10 + ch_ - '0'; 96 int i = 0;
97 while (IsAsciiDigit()) {
98 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
99 i++;
100 Next();
88 } 101 }
89 return n; 102 return n;
90 } 103 }
91
92 // Read a string of digits, take the first three or fewer as an unsigned
93 // number of milliseconds, and ignore any digits after the first three.
94 int ReadMilliseconds() {
95 has_read_number_ = true;
96 int n = 0;
97 int power;
98 for (power = 100; IsAsciiDigit(); Next(), power = power / 10) {
99 n = n + power * (ch_ - '0');
100 }
101 return n;
102 }
103 104
104 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a 105 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
105 // lower-case prefix, and pad any remainder of the buffer with zeroes. 106 // lower-case prefix, and pad any remainder of the buffer with zeroes.
106 // Return word length. 107 // Return word length.
107 int ReadWord(uint32_t* prefix, int prefix_size) { 108 int ReadWord(uint32_t* prefix, int prefix_size) {
108 int len; 109 int len;
109 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) { 110 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
110 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_); 111 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
111 } 112 }
112 for (int i = len; i < prefix_size; i++) prefix[i] = 0; 113 for (int i = len; i < prefix_size; i++) prefix[i] = 0;
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
144 // Character testing/classification. Non-ASCII digits are not supported. 145 // Character testing/classification. Non-ASCII digits are not supported.
145 bool Is(uint32_t c) const { return ch_ == c; } 146 bool Is(uint32_t c) const { return ch_ == c; }
146 bool IsEnd() const { return ch_ == 0; } 147 bool IsEnd() const { return ch_ == 0; }
147 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); } 148 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
148 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; } 149 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
149 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; } 150 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
150 151
151 // Return 1 for '+' and -1 for '-'. 152 // Return 1 for '+' and -1 for '-'.
152 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); } 153 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
153 154
154 // Indicates whether any (possibly empty!) numbers have been read.
155 bool HasReadNumber() const { return has_read_number_; }
156
157 private: 155 private:
158 int index_; 156 int index_;
159 Vector<Char> buffer_; 157 Vector<Char> buffer_;
160 bool has_read_number_;
161 uint32_t ch_; 158 uint32_t ch_;
162 UnicodeCache* unicode_cache_; 159 UnicodeCache* unicode_cache_;
163 }; 160 };
164 161
165 enum KeywordType { INVALID, MONTH_NAME, TIME_ZONE_NAME, AM_PM }; 162 enum KeywordType {
163 INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
164 };
165
166 struct DateToken {
167 public:
168 bool IsInvalid() { return tag_ == kInvalidTokenTag; }
169 bool IsUnknown() { return tag_ == kUnknownTokenTag; }
170 bool IsNumber() { return tag_ == kNumberTag; }
171 bool IsSymbol() { return tag_ == kSymbolTag; }
172 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
173 bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
174 bool IsKeyword() { return tag_ >= kKeywordTagStart; }
175
176 int length() { return length_; }
177
178 int number() {
179 ASSERT(IsNumber());
180 return value_;
181 }
182 KeywordType keyword_type() {
183 ASSERT(IsKeyword());
184 return static_cast<KeywordType>(tag_);
185 }
186 int keyword_value() {
187 ASSERT(IsKeyword());
188 return value_;
189 }
190 char symbol() {
191 ASSERT(IsSymbol());
192 return static_cast<char>(value_);
193 }
194 bool IsSymbol(char symbol) {
195 return IsSymbol() && this->symbol() == symbol;
196 }
197 bool IsKeywordType(KeywordType tag) {
198 return tag_ == tag;
199 }
200 bool IsFixedLengthNumber(int length) {
201 return IsNumber() && length_ == length;
202 }
203 bool IsAsciiSign() {
204 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
205 }
206 int ascii_sign() {
207 ASSERT(IsAsciiSign());
208 return 44 - value_;
209 }
210 bool IsKeywordZ() {
211 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
212 }
213 bool IsUnknown(int character) {
214 return IsUnknown() && value_ == character;
215 }
216 // Factory functions.
217 static DateToken Keyword(KeywordType tag, int value, int length) {
218 return DateToken(tag, length, value);
219 }
220 static DateToken Number(int value, int length) {
221 return DateToken(kNumberTag, length, value);
222 }
223 static DateToken Symbol(char symbol) {
224 return DateToken(kSymbolTag, 1, symbol);
225 }
226 static DateToken EndOfInput() {
227 return DateToken(kEndOfInputTag, 0, -1);
228 }
229 static DateToken WhiteSpace(int length) {
230 return DateToken(kWhiteSpaceTag, length, -1);
231 }
232 static DateToken Unknown() {
233 return DateToken(kUnknownTokenTag, 1, -1);
234 }
235 static DateToken Invalid() {
236 return DateToken(kInvalidTokenTag, 0, -1);
237 }
238 private:
239 enum TagType {
240 kInvalidTokenTag = -6,
241 kUnknownTokenTag = -5,
242 kWhiteSpaceTag = -4,
243 kNumberTag = -3,
244 kSymbolTag = -2,
245 kEndOfInputTag = -1,
246 kKeywordTagStart = 0
247 };
248 DateToken(int tag, int length, int value)
249 : tag_(tag),
250 length_(length),
251 value_(value) { }
252
253 int tag_;
254 int length_; // Number of characters.
255 int value_;
256 };
257
258 template <typename Char>
259 class DateStringTokenizer {
260 public:
261 explicit DateStringTokenizer(InputReader<Char>* in)
262 : in_(in), next_(Scan()) { }
263 DateToken Next() {
264 DateToken result = next_;
265 next_ = Scan();
266 return result;
267 }
268
269 DateToken Peek() {
270 return next_;
271 }
272 bool SkipSymbol(char symbol) {
273 if (next_.IsSymbol(symbol)) {
274 next_ = Scan();
275 return true;
276 }
277 return false;
278 }
279 private:
280 DateToken Scan();
281
282 InputReader<Char>* in_;
283 DateToken next_;
284 };
285
286 static int ReadMilliseconds(DateToken number);
166 287
167 // KeywordTable maps names of months, time zones, am/pm to numbers. 288 // KeywordTable maps names of months, time zones, am/pm to numbers.
168 class KeywordTable : public AllStatic { 289 class KeywordTable : public AllStatic {
169 public: 290 public:
170 // Look up a word in the keyword table and return an index. 291 // Look up a word in the keyword table and return an index.
171 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength 292 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
172 // and 'len' is the word length. 293 // and 'len' is the word length.
173 static int Lookup(const uint32_t* pre, int len); 294 static int Lookup(const uint32_t* pre, int len);
174 // Get the type of the keyword at index i. 295 // Get the type of the keyword at index i.
175 static KeywordType GetType(int i) { 296 static KeywordType GetType(int i) {
(...skipping 18 matching lines...) Expand all
194 minute_ = 0; 315 minute_ = 0;
195 } 316 }
196 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; } 317 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
197 void SetAbsoluteHour(int hour) { hour_ = hour; } 318 void SetAbsoluteHour(int hour) { hour_ = hour; }
198 void SetAbsoluteMinute(int minute) { minute_ = minute; } 319 void SetAbsoluteMinute(int minute) { minute_ = minute; }
199 bool IsExpecting(int n) const { 320 bool IsExpecting(int n) const {
200 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n); 321 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
201 } 322 }
202 bool IsUTC() const { return hour_ == 0 && minute_ == 0; } 323 bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
203 bool Write(FixedArray* output); 324 bool Write(FixedArray* output);
325 bool IsEmpty() { return hour_ == kNone; }
204 private: 326 private:
205 int sign_; 327 int sign_;
206 int hour_; 328 int hour_;
207 int minute_; 329 int minute_;
208 }; 330 };
209 331
210 class TimeComposer BASE_EMBEDDED { 332 class TimeComposer BASE_EMBEDDED {
211 public: 333 public:
212 TimeComposer() : index_(0), hour_offset_(kNone) {} 334 TimeComposer() : index_(0), hour_offset_(kNone) {}
213 bool IsEmpty() const { return index_ == 0; } 335 bool IsEmpty() const { return index_ == 0; }
214 bool IsExpecting(int n) const { 336 bool IsExpecting(int n) const {
215 return (index_ == 1 && IsMinute(n)) || 337 return (index_ == 1 && IsMinute(n)) ||
216 (index_ == 2 && IsSecond(n)) || 338 (index_ == 2 && IsSecond(n)) ||
217 (index_ == 3 && IsMillisecond(n)); 339 (index_ == 3 && IsMillisecond(n));
218 } 340 }
219 bool Add(int n) { 341 bool Add(int n) {
220 return index_ < kSize ? (comp_[index_++] = n, true) : false; 342 return index_ < kSize ? (comp_[index_++] = n, true) : false;
221 } 343 }
222 bool AddFinal(int n) { 344 bool AddFinal(int n) {
223 if (!Add(n)) return false; 345 if (!Add(n)) return false;
224 while (index_ < kSize) comp_[index_++] = 0; 346 while (index_ < kSize) comp_[index_++] = 0;
225 return true; 347 return true;
226 } 348 }
227 void SetHourOffset(int n) { hour_offset_ = n; } 349 void SetHourOffset(int n) { hour_offset_ = n; }
228 bool Write(FixedArray* output); 350 bool Write(FixedArray* output);
229 351
230 static bool IsMinute(int x) { return Between(x, 0, 59); } 352 static bool IsMinute(int x) { return Between(x, 0, 59); }
353 static bool IsHour(int x) { return Between(x, 0, 23); }
354 static bool IsSecond(int x) { return Between(x, 0, 59); }
231 private: 355 private:
232 static bool IsHour(int x) { return Between(x, 0, 23); }
233 static bool IsHour12(int x) { return Between(x, 0, 12); } 356 static bool IsHour12(int x) { return Between(x, 0, 12); }
234 static bool IsSecond(int x) { return Between(x, 0, 59); }
235 static bool IsMillisecond(int x) { return Between(x, 0, 999); } 357 static bool IsMillisecond(int x) { return Between(x, 0, 999); }
236 358
237 static const int kSize = 4; 359 static const int kSize = 4;
238 int comp_[kSize]; 360 int comp_[kSize];
239 int index_; 361 int index_;
240 int hour_offset_; 362 int hour_offset_;
241 }; 363 };
242 364
243 class DayComposer BASE_EMBEDDED { 365 class DayComposer BASE_EMBEDDED {
244 public: 366 public:
245 DayComposer() : index_(0), named_month_(kNone) {} 367 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
246 bool IsEmpty() const { return index_ == 0; } 368 bool IsEmpty() const { return index_ == 0; }
247 bool Add(int n) { 369 bool Add(int n) {
248 return index_ < kSize ? (comp_[index_++] = n, true) : false; 370 if (index_ < kSize) {
371 comp_[index_] = n;
372 index_++;
373 return true;
374 }
375 return false;
249 } 376 }
250 void SetNamedMonth(int n) { named_month_ = n; } 377 void SetNamedMonth(int n) { named_month_ = n; }
251 bool Write(FixedArray* output); 378 bool Write(FixedArray* output);
252 private: 379 void set_iso_date() { is_iso_date_ = true; }
253 static bool IsMonth(int x) { return Between(x, 1, 12); } 380 static bool IsMonth(int x) { return Between(x, 1, 12); }
254 static bool IsDay(int x) { return Between(x, 1, 31); } 381 static bool IsDay(int x) { return Between(x, 1, 31); }
255 382
383 private:
256 static const int kSize = 3; 384 static const int kSize = 3;
257 int comp_[kSize]; 385 int comp_[kSize];
258 int index_; 386 int index_;
259 int named_month_; 387 int named_month_;
388 // If set, ensures that data is always parsed in year-month-date order.
389 bool is_iso_date_;
260 }; 390 };
391
392 // Tries to parse an ES5 Date Time String. Returns the next token
393 // to continue with in the legacy date string parser. If parsing is
394 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
395 // returns DateToken::Invalid(). Otherwise parsing continues in the
396 // legacy parser.
397 template <typename Char>
398 static DateParser::DateToken ParseES5DateTime(
399 DateStringTokenizer<Char>* scanner,
400 DayComposer* day,
401 TimeComposer* time,
402 TimeZoneComposer* tz);
261 }; 403 };
262 404
263 405
264 } } // namespace v8::internal 406 } } // namespace v8::internal
265 407
266 #endif // V8_DATEPARSER_H_ 408 #endif // V8_DATEPARSER_H_
OLDNEW
« no previous file with comments | « src/date.js ('k') | src/dateparser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698