Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(868)

Side by Side Diff: src/dateparser-inl.h

Issue 7291022: Make date parser handle all ES5 Date Time Strings correctly. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 9 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 21 matching lines...) Expand all
32 32
33 namespace v8 { 33 namespace v8 {
34 namespace internal { 34 namespace internal {
35 35
36 template <typename Char> 36 template <typename Char>
37 bool DateParser::Parse(Vector<Char> str, 37 bool DateParser::Parse(Vector<Char> str,
38 FixedArray* out, 38 FixedArray* out,
39 UnicodeCache* unicode_cache) { 39 UnicodeCache* unicode_cache) {
40 ASSERT(out->length() >= OUTPUT_SIZE); 40 ASSERT(out->length() >= OUTPUT_SIZE);
41 InputReader<Char> in(unicode_cache, str); 41 InputReader<Char> in(unicode_cache, str);
42 DateStringTokenizer<Char> scanner(&in);
42 TimeZoneComposer tz; 43 TimeZoneComposer tz;
43 TimeComposer time; 44 TimeComposer time;
44 DayComposer day; 45 DayComposer day;
45 46
46 while (!in.IsEnd()) { 47 // Specification:
47 if (in.IsAsciiDigit()) { 48 // Accept ES5 ISO 8601 date-time-strings or legacy dates compatible
48 // Parse a number (possibly with 1 or 2 trailing colons). 49 // with Safari.
49 int n = in.ReadUnsignedNumber(); 50 // ES5 ISO 8601 dates:
50 if (in.Skip(':')) { 51 // [('-'|'+')yy]yyyy[-MM[-DD]][THH:mm[:ss[.sss]][Z|(+|-)hh:mm]]
51 if (in.Skip(':')) { 52 // where yyyy is in the range 0000..9999 and
53 // +/-yyyyyy is in the range -999999..+999999 -
54 // but -000000 is invalid (year zero must be positive),
55 // MM is in the range 01..12,
56 // DD is in the range 01..31,
57 // MM and DD defaults to 01 if missing,,
58 // HH is generally in the range 00..23, but can be 24 if mm, ss
59 // and sss are zero (or missing), representing midnight at the
60 // end of a day,
61 // mm and ss are in the range 00..59,
62 // sss is in the range 000..999,
63 // hh is in the range 00..23,
64 // mm, ss, and sss default to 00 if missing, and
65 // timezone defaults to Z if missing.
66 // Extensions:
67 // We also allow sss to have more or less than three digits (but at
68 // least one).
69 // We allow hh:mm to be specified as hhmm.
70 // Legacy dates:
71 // Any unrecognized word before the first number is ignored.
72 // Parenthesized text is ignored.
73 // An unsigned number followed by ':' is a time value, and is
74 // added to the TimeComposer. A number followed by '::' adds a second
75 // zero as well. A number followed by '.' is also a time and must be
76 // followed by milliseconds.
77 // Any other number is a date component and is added to DayComposer.
78 // A month name (or really: any word having the same first three letters
79 // as a month name) is recorded as a named month in the Day composer.
80 // A word recognizable as a time-zone is recorded as such, as is
81 // '(+|-)(hhmm|hh:)'.
82 // Legacy dates don't allow extra signs ('+' or '-') or umatched ')'
83 // after a number has been read (before the first number, any garbage
84 // is allowed).
85 // Intersection of the two:
86 // A string that matches both formats (e.g. 1970-01-01) will be
87 // parsed as an ES5 date-time string - which means it will default
88 // to UTC time-zone. That's unavoidable if following the ES5
89 // specification.
90 // After a valid "T" has been read while scanning an ES5 datetime string,
91 // the input can no longer be a valid legacy date, since the "T" is a
92 // garbage string after a number has been read.
93
94 // First try getting as far as possible with as ES5 Date Time String.
95 DateToken next_unhandled_token = ParseES5DateTime(&scanner, &day, &time, &tz);
96 if (next_unhandled_token.IsInvalid()) return false;
97 bool has_read_number = !day.IsEmpty();
98 // If there's anything left, continue with the legacy parser.
99 for (DateToken token = next_unhandled_token;
100 !token.IsEndOfInput();
101 token = scanner.Next()) {
102 if (token.IsNumber()) {
103 has_read_number = true;
104 int n = token.number();
105 if (scanner.SkipSymbol(':')) {
106 if (scanner.SkipSymbol(':')) {
52 // n + "::" 107 // n + "::"
53 if (!time.IsEmpty()) return false; 108 if (!time.IsEmpty()) return false;
54 time.Add(n); 109 time.Add(n);
55 time.Add(0); 110 time.Add(0);
56 } else { 111 } else {
57 // n + ":" 112 // n + ":"
58 if (!time.Add(n)) return false; 113 if (!time.Add(n)) return false;
59 in.Skip('.'); 114 if (scanner.Peek().IsSymbol('.')) scanner.Next();
60 } 115 }
61 } else if (in.Skip('.') && time.IsExpecting(n)) { 116 } else if (scanner.SkipSymbol('.') && time.IsExpecting(n)) {
62 time.Add(n); 117 time.Add(n);
63 if (!in.IsAsciiDigit()) return false; 118 if (!scanner.Peek().IsNumber()) return false;
64 int n = in.ReadMilliseconds(); 119 int n = ReadMilliseconds(scanner.Next());
120 if (n < 0) return false;
65 time.AddFinal(n); 121 time.AddFinal(n);
66 } else if (tz.IsExpecting(n)) { 122 } else if (tz.IsExpecting(n)) {
67 tz.SetAbsoluteMinute(n); 123 tz.SetAbsoluteMinute(n);
68 } else if (time.IsExpecting(n)) { 124 } else if (time.IsExpecting(n)) {
69 time.AddFinal(n); 125 time.AddFinal(n);
70 // Require end, white space, "Z", "+" or "-" immediately after 126 // Require end, white space, "Z", "+" or "-" immediately after
71 // finalizing time. 127 // finalizing time.
72 if (!in.IsEnd() && !in.SkipWhiteSpace() && !in.Is('Z') && 128 DateToken peek = scanner.Peek();
73 !in.IsAsciiSign()) return false; 129 if (!peek.IsEndOfInput() &&
130 !peek.IsWhiteSpace() &&
131 !peek.IsKeywordZ() &&
132 !peek.IsAsciiSign()) return false;
74 } else { 133 } else {
75 if (!day.Add(n)) return false; 134 if (!day.Add(n)) return false;
76 in.Skip('-'); // Ignore suffix '-' for year, month, or day. 135 scanner.SkipSymbol('-');
77 // Skip trailing 'T' for ECMAScript 5 date string format but make 136 }
78 // sure that it is followed by a digit (for the time). 137 } else if (token.IsKeyword()) {
79 if (in.Skip('T') && !in.IsAsciiDigit()) return false;
80 }
81 } else if (in.IsAsciiAlphaOrAbove()) {
82 // Parse a "word" (sequence of chars. >= 'A'). 138 // Parse a "word" (sequence of chars. >= 'A').
83 uint32_t pre[KeywordTable::kPrefixLength]; 139 KeywordType type = token.keyword_type();
84 int len = in.ReadWord(pre, KeywordTable::kPrefixLength); 140 int value = token.keyword_value();
85 int index = KeywordTable::Lookup(pre, len);
86 KeywordType type = KeywordTable::GetType(index);
87
88 if (type == AM_PM && !time.IsEmpty()) { 141 if (type == AM_PM && !time.IsEmpty()) {
89 time.SetHourOffset(KeywordTable::GetValue(index)); 142 time.SetHourOffset(value);
90 } else if (type == MONTH_NAME) { 143 } else if (type == MONTH_NAME) {
91 day.SetNamedMonth(KeywordTable::GetValue(index)); 144 day.SetNamedMonth(value);
92 in.Skip('-'); // Ignore suffix '-' for month names 145 scanner.SkipSymbol('-');
93 } else if (type == TIME_ZONE_NAME && in.HasReadNumber()) { 146 } else if (type == TIME_ZONE_NAME && has_read_number) {
94 tz.Set(KeywordTable::GetValue(index)); 147 tz.Set(value);
95 } else { 148 } else {
96 // Garbage words are illegal if a number has been read. 149 // Garbage words are illegal if a number has been read.
97 if (in.HasReadNumber()) return false; 150 if (has_read_number) return false;
98 } 151 }
99 } else if (in.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) { 152 } else if (token.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) {
100 // Parse UTC offset (only after UTC or time). 153 // Parse UTC offset (only after UTC or time).
101 tz.SetSign(in.GetAsciiSignValue()); 154 tz.SetSign(token.ascii_sign());
102 in.Next(); 155 // The following number may be empty.
103 int n = in.ReadUnsignedNumber(); 156 int n = 0;
104 if (in.Skip(':')) { 157 if (scanner.Peek().IsNumber()) {
158 n = scanner.Next().number();
159 }
160 has_read_number = true;
161
162 if (scanner.Peek().IsSymbol(':')) {
105 tz.SetAbsoluteHour(n); 163 tz.SetAbsoluteHour(n);
106 tz.SetAbsoluteMinute(kNone); 164 tz.SetAbsoluteMinute(kNone);
107 } else { 165 } else {
108 tz.SetAbsoluteHour(n / 100); 166 tz.SetAbsoluteHour(n / 100);
109 tz.SetAbsoluteMinute(n % 100); 167 tz.SetAbsoluteMinute(n % 100);
110 } 168 }
111 } else if (in.Is('(')) { 169 } else if ((token.IsAsciiSign() || token.IsSymbol(')')) &&
112 // Ignore anything from '(' to a matching ')' or end of string. 170 has_read_number) {
113 in.SkipParentheses();
114 } else if ((in.IsAsciiSign() || in.Is(')')) && in.HasReadNumber()) {
115 // Extra sign or ')' is illegal if a number has been read. 171 // Extra sign or ')' is illegal if a number has been read.
116 return false; 172 return false;
117 } else { 173 } else {
118 // Ignore other characters. 174 // Ignore other characters and whitespace.
119 in.Next(); 175 }
120 } 176 }
121 } 177
122 return day.Write(out) && time.Write(out) && tz.Write(out); 178 return day.Write(out) && time.Write(out) && tz.Write(out);
123 } 179 }
124 180
Erik Corry 2011/07/01 10:22:46 Blank line.
181 template<typename CharType>
182 DateParser::DateToken DateParser::DateStringTokenizer<CharType>::Scan() {
183 int pre_pos = in_->position();
184 if (in_->IsEnd()) return DateToken::EndOfInput();
185 if (in_->IsAsciiDigit()) {
186 int n = in_->ReadUnsignedNumeral();
187 int length = in_->position() - pre_pos;
188 return DateToken::Number(n, length);
189 }
190 if (in_->Skip(':')) return DateToken::Symbol(':');
191 if (in_->Skip('-')) return DateToken::Symbol('-');
192 if (in_->Skip('+')) return DateToken::Symbol('+');
193 if (in_->Skip('.')) return DateToken::Symbol('.');
194 if (in_->Skip(')')) return DateToken::Symbol(')');
195 if (in_->IsAsciiAlphaOrAbove()) {
196 ASSERT(KeywordTable::kPrefixLength == 3);
197 uint32_t buffer[3] = {0, 0, 0};
198 int length = in_->ReadWord(buffer, 3);
199 int index = KeywordTable::Lookup(buffer, length);
200 return DateToken::Keyword(KeywordTable::GetType(index),
201 KeywordTable::GetValue(index),
202 length);
203 }
204 if (in_->SkipWhiteSpace()) {
205 return DateToken::WhiteSpace(in_->position() - pre_pos);
206 }
207 if (in_->SkipParentheses()) {
208 return DateToken::Unknown();
209 }
210 in_->Next();
211 return DateToken::Unknown();
212 }
213
214
215 template <typename Char>
216 DateParser::DateToken DateParser::ParseES5DateTime(
217 DateStringTokenizer<Char>* scanner,
218 DayComposer* day,
219 TimeComposer* time,
220 TimeZoneComposer* tz) {
221 ASSERT(day->IsEmpty());
222 ASSERT(time->IsEmpty());
223 ASSERT(tz->IsEmpty());
224
225 // Parse mandatory date string: [('-'|'+')yy]yyyy[':'MM[':'DD]]
226 if (scanner->Peek().IsAsciiSign()) {
227 // Keep the sign token, so we can pass it back to the legacy
228 // parser if we don't use it.
229 DateToken sign_token = scanner->Next();
230 if (!scanner->Peek().IsFixedLengthNumber(6)) return sign_token;
231 int sign = sign_token.ascii_sign();
232 int year = scanner->Next().number();
233 if (sign < 0 && year == 0) return sign_token;
234 day->Add(sign * year);
235 } else if (scanner->Peek().IsFixedLengthNumber(4)) {
236 day->Add(scanner->Next().number());
237 } else {
238 return scanner->Next();
239 }
240 if (scanner->SkipSymbol('-')) {
241 if (!scanner->Peek().IsFixedLengthNumber(2) ||
242 !DayComposer::IsMonth(scanner->Peek().number())) return scanner->Next();
243 day->Add(scanner->Next().number());
244 if (scanner->SkipSymbol('-')) {
245 if (!scanner->Peek().IsFixedLengthNumber(2) ||
246 !DayComposer::IsDay(scanner->Peek().number())) return scanner->Next();
247 day->Add(scanner->Next().number());
248 }
249 }
250 // Check for optional time string: 'T'HH':'mm[':'ss['.'sss]]Z
251 if (!scanner->Peek().IsKeywordType(TIME_SEPARATOR)) {
252 if (!scanner->Peek().IsEndOfInput()) return scanner->Next();
253 } else {
254 // ES5 Date Time String time part is present.
255 scanner->Next();
256 if (!scanner->Peek().IsFixedLengthNumber(2) ||
257 !Between(scanner->Peek().number(), 0, 24)) {
258 return DateToken::Invalid();
259 }
260 // Allow 24:00[:00[.000]], but no other time starting with 24.
261 bool hour_is_24 = (scanner->Peek().number() == 24);
262 time->Add(scanner->Next().number());
263 if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
264 if (!scanner->Peek().IsFixedLengthNumber(2) ||
265 !TimeComposer::IsMinute(scanner->Peek().number()) ||
266 (hour_is_24 && scanner->Peek().number() > 0)) {
267 return DateToken::Invalid();
268 }
269 time->Add(scanner->Next().number());
270 if (scanner->SkipSymbol(':')) {
271 if (!scanner->Peek().IsFixedLengthNumber(2) ||
272 !TimeComposer::IsSecond(scanner->Peek().number()) ||
273 (hour_is_24 && scanner->Peek().number() > 0)) {
274 return DateToken::Invalid();
275 }
276 time->Add(scanner->Next().number());
277 if (scanner->SkipSymbol('.')) {
278 if (!scanner->Peek().IsNumber() ||
279 (hour_is_24 && scanner->Peek().number() > 0)) {
280 return DateToken::Invalid();
281 }
282 // Allow more or less than the mandated three digits.
283 time->Add(ReadMilliseconds(scanner->Next()));
284 }
285 }
286 // Check for optional timezone designation: 'Z' | ('+'|'-')hh':'mm
287 if (scanner->Peek().IsKeywordZ()) {
288 scanner->Next();
289 tz->Set(0);
290 } else if (scanner->Peek().IsSymbol('+') ||
291 scanner->Peek().IsSymbol('-')) {
292 tz->SetSign(scanner->Next().symbol() == '+' ? 1 : -1);
293 if (scanner->Peek().IsFixedLengthNumber(4)) {
294 // hhmm extension syntax.
295 int hourmin = scanner->Next().number();
296 int hour = hourmin / 100;
297 int min = hourmin % 100;
298 if (!TimeComposer::IsHour(hour) || !TimeComposer::IsMinute(min)) {
299 return DateToken::Invalid();
300 }
301 tz->SetAbsoluteHour(hour);
302 tz->SetAbsoluteMinute(min);
303 } else {
304 // hh:mm standard syntax.
305 if (!scanner->Peek().IsFixedLengthNumber(2) ||
306 !TimeComposer::IsHour(scanner->Peek().number())) {
307 return DateToken::Invalid();
308 }
309 tz->SetAbsoluteHour(scanner->Next().number());
310 if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
311 if (!scanner->Peek().IsFixedLengthNumber(2) ||
312 !TimeComposer::IsMinute(scanner->Peek().number())) {
313 return DateToken::Invalid();
314 }
315 tz->SetAbsoluteMinute(scanner->Next().number());
316 }
317 }
318 if (!scanner->Peek().IsEndOfInput()) return DateToken::Invalid();
319 }
320 // Successfully parsed ES5 Date Time String. Default to UTC if no TZ given.
321 if (tz->IsEmpty()) tz->Set(0);
322 day->set_iso_date();
323 return DateToken::EndOfInput();
324 }
325
326
125 } } // namespace v8::internal 327 } } // namespace v8::internal
126 328
127 #endif // V8_DATEPARSER_INL_H_ 329 #endif // V8_DATEPARSER_INL_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698