OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_DATEPARSER_INL_H_ | 5 #ifndef V8_DATEPARSER_INL_H_ |
6 #define V8_DATEPARSER_INL_H_ | 6 #define V8_DATEPARSER_INL_H_ |
7 | 7 |
8 #include "src/char-predicates-inl.h" | 8 #include "src/char-predicates-inl.h" |
9 #include "src/dateparser.h" | 9 #include "src/dateparser.h" |
10 #include "src/unicode-cache-inl.h" | 10 #include "src/unicode-cache-inl.h" |
11 | 11 |
12 namespace v8 { | 12 namespace v8 { |
13 namespace internal { | 13 namespace internal { |
14 | 14 |
15 template <typename Char> | 15 template <typename Char> |
16 bool DateParser::Parse(Vector<Char> str, | 16 bool DateParser::Parse(Vector<Char> str, |
17 FixedArray* out, | 17 FixedArray* out, |
18 UnicodeCache* unicode_cache) { | 18 UnicodeCache* unicode_cache) { |
19 DCHECK(out->length() >= OUTPUT_SIZE); | 19 DCHECK(out->length() >= OUTPUT_SIZE); |
20 InputReader<Char> in(unicode_cache, str); | 20 InputReader<Char> in(unicode_cache, str); |
21 DateStringTokenizer<Char> scanner(&in); | 21 DateStringTokenizer<Char> scanner(&in); |
22 TimeZoneComposer tz; | 22 TimeZoneComposer tz; |
23 TimeComposer time; | 23 TimeComposer time; |
24 DayComposer day; | 24 DayComposer day; |
25 | 25 |
26 // Specification: | 26 // Specification: |
27 // Accept ES6 ISO 8601 date-time-strings or legacy dates compatible | 27 // Accept ES5 ISO 8601 date-time-strings or legacy dates compatible |
28 // with Safari. | 28 // with Safari. |
29 // ES6 ISO 8601 dates: | 29 // ES5 ISO 8601 dates: |
30 // [('-'|'+')yy]yyyy[-MM[-DD]][THH:mm[:ss[.sss]][Z|(+|-)hh:mm]] | 30 // [('-'|'+')yy]yyyy[-MM[-DD]][THH:mm[:ss[.sss]][Z|(+|-)hh:mm]] |
31 // where yyyy is in the range 0000..9999 and | 31 // where yyyy is in the range 0000..9999 and |
32 // +/-yyyyyy is in the range -999999..+999999 - | 32 // +/-yyyyyy is in the range -999999..+999999 - |
33 // but -000000 is invalid (year zero must be positive), | 33 // but -000000 is invalid (year zero must be positive), |
34 // MM is in the range 01..12, | 34 // MM is in the range 01..12, |
35 // DD is in the range 01..31, | 35 // DD is in the range 01..31, |
36 // MM and DD defaults to 01 if missing,, | 36 // MM and DD defaults to 01 if missing,, |
37 // HH is generally in the range 00..23, but can be 24 if mm, ss | 37 // HH is generally in the range 00..23, but can be 24 if mm, ss |
38 // and sss are zero (or missing), representing midnight at the | 38 // and sss are zero (or missing), representing midnight at the |
39 // end of a day, | 39 // end of a day, |
40 // mm and ss are in the range 00..59, | 40 // mm and ss are in the range 00..59, |
41 // sss is in the range 000..999, | 41 // sss is in the range 000..999, |
42 // hh is in the range 00..23, | 42 // hh is in the range 00..23, |
43 // mm, ss, and sss default to 00 if missing, and | 43 // mm, ss, and sss default to 00 if missing, and |
44 // timezone defaults to local time if missing. | 44 // timezone defaults to Z if missing |
| 45 // (following Safari, ISO actually demands local time). |
45 // Extensions: | 46 // Extensions: |
46 // We also allow sss to have more or less than three digits (but at | 47 // We also allow sss to have more or less than three digits (but at |
47 // least one). | 48 // least one). |
48 // We allow hh:mm to be specified as hhmm. | 49 // We allow hh:mm to be specified as hhmm. |
49 // Legacy dates: | 50 // Legacy dates: |
50 // Any unrecognized word before the first number is ignored. | 51 // Any unrecognized word before the first number is ignored. |
51 // Parenthesized text is ignored. | 52 // Parenthesized text is ignored. |
52 // An unsigned number followed by ':' is a time value, and is | 53 // An unsigned number followed by ':' is a time value, and is |
53 // added to the TimeComposer. A number followed by '::' adds a second | 54 // added to the TimeComposer. A number followed by '::' adds a second |
54 // zero as well. A number followed by '.' is also a time and must be | 55 // zero as well. A number followed by '.' is also a time and must be |
55 // followed by milliseconds. | 56 // followed by milliseconds. |
56 // Any other number is a date component and is added to DayComposer. | 57 // Any other number is a date component and is added to DayComposer. |
57 // A month name (or really: any word having the same first three letters | 58 // A month name (or really: any word having the same first three letters |
58 // as a month name) is recorded as a named month in the Day composer. | 59 // as a month name) is recorded as a named month in the Day composer. |
59 // A word recognizable as a time-zone is recorded as such, as is | 60 // A word recognizable as a time-zone is recorded as such, as is |
60 // '(+|-)(hhmm|hh:)'. | 61 // '(+|-)(hhmm|hh:)'. |
61 // Legacy dates don't allow extra signs ('+' or '-') or umatched ')' | 62 // Legacy dates don't allow extra signs ('+' or '-') or umatched ')' |
62 // after a number has been read (before the first number, any garbage | 63 // after a number has been read (before the first number, any garbage |
63 // is allowed). | 64 // is allowed). |
64 // Intersection of the two: | 65 // Intersection of the two: |
65 // A string that matches both formats (e.g. 1970-01-01) will be | 66 // A string that matches both formats (e.g. 1970-01-01) will be |
66 // parsed as an ES6 date-time string. | 67 // parsed as an ES5 date-time string - which means it will default |
67 // After a valid "T" has been read while scanning an ES6 datetime string, | 68 // to UTC time-zone. That's unavoidable if following the ES5 |
| 69 // specification. |
| 70 // After a valid "T" has been read while scanning an ES5 datetime string, |
68 // the input can no longer be a valid legacy date, since the "T" is a | 71 // the input can no longer be a valid legacy date, since the "T" is a |
69 // garbage string after a number has been read. | 72 // garbage string after a number has been read. |
70 | 73 |
71 // First try getting as far as possible with as ES6 Date Time String. | 74 // First try getting as far as possible with as ES5 Date Time String. |
72 DateToken next_unhandled_token = ParseES6DateTime(&scanner, &day, &time, &tz); | 75 DateToken next_unhandled_token = ParseES5DateTime(&scanner, &day, &time, &tz); |
73 if (next_unhandled_token.IsInvalid()) return false; | 76 if (next_unhandled_token.IsInvalid()) return false; |
74 bool has_read_number = !day.IsEmpty(); | 77 bool has_read_number = !day.IsEmpty(); |
75 // If there's anything left, continue with the legacy parser. | 78 // If there's anything left, continue with the legacy parser. |
76 for (DateToken token = next_unhandled_token; | 79 for (DateToken token = next_unhandled_token; |
77 !token.IsEndOfInput(); | 80 !token.IsEndOfInput(); |
78 token = scanner.Next()) { | 81 token = scanner.Next()) { |
79 if (token.IsNumber()) { | 82 if (token.IsNumber()) { |
80 has_read_number = true; | 83 has_read_number = true; |
81 int n = token.number(); | 84 int n = token.number(); |
82 if (scanner.SkipSymbol(':')) { | 85 if (scanner.SkipSymbol(':')) { |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
210 do { | 213 do { |
211 if (ch_ == ')') --balance; | 214 if (ch_ == ')') --balance; |
212 else if (ch_ == '(') ++balance; | 215 else if (ch_ == '(') ++balance; |
213 Next(); | 216 Next(); |
214 } while (balance > 0 && ch_); | 217 } while (balance > 0 && ch_); |
215 return true; | 218 return true; |
216 } | 219 } |
217 | 220 |
218 | 221 |
219 template <typename Char> | 222 template <typename Char> |
220 DateParser::DateToken DateParser::ParseES6DateTime( | 223 DateParser::DateToken DateParser::ParseES5DateTime( |
221 DateStringTokenizer<Char>* scanner, | 224 DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time, |
222 DayComposer* day, | |
223 TimeComposer* time, | |
224 TimeZoneComposer* tz) { | 225 TimeZoneComposer* tz) { |
225 DCHECK(day->IsEmpty()); | 226 DCHECK(day->IsEmpty()); |
226 DCHECK(time->IsEmpty()); | 227 DCHECK(time->IsEmpty()); |
227 DCHECK(tz->IsEmpty()); | 228 DCHECK(tz->IsEmpty()); |
228 | 229 |
229 // Parse mandatory date string: [('-'|'+')yy]yyyy[':'MM[':'DD]] | 230 // Parse mandatory date string: [('-'|'+')yy]yyyy[':'MM[':'DD]] |
230 if (scanner->Peek().IsAsciiSign()) { | 231 if (scanner->Peek().IsAsciiSign()) { |
231 // Keep the sign token, so we can pass it back to the legacy | 232 // Keep the sign token, so we can pass it back to the legacy |
232 // parser if we don't use it. | 233 // parser if we don't use it. |
233 DateToken sign_token = scanner->Next(); | 234 DateToken sign_token = scanner->Next(); |
(...skipping 14 matching lines...) Expand all Loading... |
248 if (scanner->SkipSymbol('-')) { | 249 if (scanner->SkipSymbol('-')) { |
249 if (!scanner->Peek().IsFixedLengthNumber(2) || | 250 if (!scanner->Peek().IsFixedLengthNumber(2) || |
250 !DayComposer::IsDay(scanner->Peek().number())) return scanner->Next(); | 251 !DayComposer::IsDay(scanner->Peek().number())) return scanner->Next(); |
251 day->Add(scanner->Next().number()); | 252 day->Add(scanner->Next().number()); |
252 } | 253 } |
253 } | 254 } |
254 // Check for optional time string: 'T'HH':'mm[':'ss['.'sss]]Z | 255 // Check for optional time string: 'T'HH':'mm[':'ss['.'sss]]Z |
255 if (!scanner->Peek().IsKeywordType(TIME_SEPARATOR)) { | 256 if (!scanner->Peek().IsKeywordType(TIME_SEPARATOR)) { |
256 if (!scanner->Peek().IsEndOfInput()) return scanner->Next(); | 257 if (!scanner->Peek().IsEndOfInput()) return scanner->Next(); |
257 } else { | 258 } else { |
258 // ES6 Date Time String time part is present. | 259 // ES5 Date Time String time part is present. |
259 scanner->Next(); | 260 scanner->Next(); |
260 if (!scanner->Peek().IsFixedLengthNumber(2) || | 261 if (!scanner->Peek().IsFixedLengthNumber(2) || |
261 !Between(scanner->Peek().number(), 0, 24)) { | 262 !Between(scanner->Peek().number(), 0, 24)) { |
262 return DateToken::Invalid(); | 263 return DateToken::Invalid(); |
263 } | 264 } |
264 // Allow 24:00[:00[.000]], but no other time starting with 24. | 265 // Allow 24:00[:00[.000]], but no other time starting with 24. |
265 bool hour_is_24 = (scanner->Peek().number() == 24); | 266 bool hour_is_24 = (scanner->Peek().number() == 24); |
266 time->Add(scanner->Next().number()); | 267 time->Add(scanner->Next().number()); |
267 if (!scanner->SkipSymbol(':')) return DateToken::Invalid(); | 268 if (!scanner->SkipSymbol(':')) return DateToken::Invalid(); |
268 if (!scanner->Peek().IsFixedLengthNumber(2) || | 269 if (!scanner->Peek().IsFixedLengthNumber(2) || |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
314 if (!scanner->SkipSymbol(':')) return DateToken::Invalid(); | 315 if (!scanner->SkipSymbol(':')) return DateToken::Invalid(); |
315 if (!scanner->Peek().IsFixedLengthNumber(2) || | 316 if (!scanner->Peek().IsFixedLengthNumber(2) || |
316 !TimeComposer::IsMinute(scanner->Peek().number())) { | 317 !TimeComposer::IsMinute(scanner->Peek().number())) { |
317 return DateToken::Invalid(); | 318 return DateToken::Invalid(); |
318 } | 319 } |
319 tz->SetAbsoluteMinute(scanner->Next().number()); | 320 tz->SetAbsoluteMinute(scanner->Next().number()); |
320 } | 321 } |
321 } | 322 } |
322 if (!scanner->Peek().IsEndOfInput()) return DateToken::Invalid(); | 323 if (!scanner->Peek().IsEndOfInput()) return DateToken::Invalid(); |
323 } | 324 } |
324 // Successfully parsed ES6 Date Time String. | 325 // Successfully parsed ES5 Date Time String. Default to UTC if no TZ given. |
| 326 if (tz->IsEmpty()) tz->Set(0); |
325 day->set_iso_date(); | 327 day->set_iso_date(); |
326 return DateToken::EndOfInput(); | 328 return DateToken::EndOfInput(); |
327 } | 329 } |
328 | 330 |
329 | 331 |
330 } // namespace internal | 332 } // namespace internal |
331 } // namespace v8 | 333 } // namespace v8 |
332 | 334 |
333 #endif // V8_DATEPARSER_INL_H_ | 335 #endif // V8_DATEPARSER_INL_H_ |
OLD | NEW |