Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(61)

Side by Side Diff: src/dateparser-inl.h

Issue 7291022: Make date parser handle all ES5 Date Time Strings correctly. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Address review comments. Created 9 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/dateparser.cc ('k') | test/mjsunit/date.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 21 matching lines...) Expand all
32 32
33 namespace v8 { 33 namespace v8 {
34 namespace internal { 34 namespace internal {
35 35
36 template <typename Char> 36 template <typename Char>
37 bool DateParser::Parse(Vector<Char> str, 37 bool DateParser::Parse(Vector<Char> str,
38 FixedArray* out, 38 FixedArray* out,
39 UnicodeCache* unicode_cache) { 39 UnicodeCache* unicode_cache) {
40 ASSERT(out->length() >= OUTPUT_SIZE); 40 ASSERT(out->length() >= OUTPUT_SIZE);
41 InputReader<Char> in(unicode_cache, str); 41 InputReader<Char> in(unicode_cache, str);
42 DateStringTokenizer<Char> scanner(&in);
42 TimeZoneComposer tz; 43 TimeZoneComposer tz;
43 TimeComposer time; 44 TimeComposer time;
44 DayComposer day; 45 DayComposer day;
45 46
46 while (!in.IsEnd()) { 47 // Specification:
47 if (in.IsAsciiDigit()) { 48 // Accept ES5 ISO 8601 date-time-strings or legacy dates compatible
48 // Parse a number (possibly with 1 or 2 trailing colons). 49 // with Safari.
49 int n = in.ReadUnsignedNumber(); 50 // ES5 ISO 8601 dates:
50 if (in.Skip(':')) { 51 // [('-'|'+')yy]yyyy[-MM[-DD]][THH:mm[:ss[.sss]][Z|(+|-)hh:mm]]
51 if (in.Skip(':')) { 52 // where yyyy is in the range 0000..9999 and
53 // +/-yyyyyy is in the range -999999..+999999 -
54 // but -000000 is invalid (year zero must be positive),
55 // MM is in the range 01..12,
56 // DD is in the range 01..31,
57 // MM and DD defaults to 01 if missing,,
58 // HH is generally in the range 00..23, but can be 24 if mm, ss
59 // and sss are zero (or missing), representing midnight at the
60 // end of a day,
61 // mm and ss are in the range 00..59,
62 // sss is in the range 000..999,
63 // hh is in the range 00..23,
64 // mm, ss, and sss default to 00 if missing, and
65 // timezone defaults to Z if missing.
66 // Extensions:
67 // We also allow sss to have more or less than three digits (but at
68 // least one).
69 // We allow hh:mm to be specified as hhmm.
70 // Legacy dates:
71 // Any unrecognized word before the first number is ignored.
72 // Parenthesized text is ignored.
73 // An unsigned number followed by ':' is a time value, and is
74 // added to the TimeComposer. A number followed by '::' adds a second
75 // zero as well. A number followed by '.' is also a time and must be
76 // followed by milliseconds.
77 // Any other number is a date component and is added to DayComposer.
78 // A month name (or really: any word having the same first three letters
79 // as a month name) is recorded as a named month in the Day composer.
80 // A word recognizable as a time-zone is recorded as such, as is
81 // '(+|-)(hhmm|hh:)'.
82 // Legacy dates don't allow extra signs ('+' or '-') or umatched ')'
83 // after a number has been read (before the first number, any garbage
84 // is allowed).
85 // Intersection of the two:
86 // A string that matches both formats (e.g. 1970-01-01) will be
87 // parsed as an ES5 date-time string - which means it will default
88 // to UTC time-zone. That's unavoidable if following the ES5
89 // specification.
90 // After a valid "T" has been read while scanning an ES5 datetime string,
91 // the input can no longer be a valid legacy date, since the "T" is a
92 // garbage string after a number has been read.
93
94 // First try getting as far as possible with as ES5 Date Time String.
95 DateToken next_unhandled_token = ParseES5DateTime(&scanner, &day, &time, &tz);
96 if (next_unhandled_token.IsInvalid()) return false;
97 bool has_read_number = !day.IsEmpty();
98 // If there's anything left, continue with the legacy parser.
99 for (DateToken token = next_unhandled_token;
100 !token.IsEndOfInput();
101 token = scanner.Next()) {
102 if (token.IsNumber()) {
103 has_read_number = true;
104 int n = token.number();
105 if (scanner.SkipSymbol(':')) {
106 if (scanner.SkipSymbol(':')) {
52 // n + "::" 107 // n + "::"
53 if (!time.IsEmpty()) return false; 108 if (!time.IsEmpty()) return false;
54 time.Add(n); 109 time.Add(n);
55 time.Add(0); 110 time.Add(0);
56 } else { 111 } else {
57 // n + ":" 112 // n + ":"
58 if (!time.Add(n)) return false; 113 if (!time.Add(n)) return false;
59 in.Skip('.'); 114 if (scanner.Peek().IsSymbol('.')) scanner.Next();
60 } 115 }
61 } else if (in.Skip('.') && time.IsExpecting(n)) { 116 } else if (scanner.SkipSymbol('.') && time.IsExpecting(n)) {
62 time.Add(n); 117 time.Add(n);
63 if (!in.IsAsciiDigit()) return false; 118 if (!scanner.Peek().IsNumber()) return false;
64 int n = in.ReadMilliseconds(); 119 int n = ReadMilliseconds(scanner.Next());
120 if (n < 0) return false;
65 time.AddFinal(n); 121 time.AddFinal(n);
66 } else if (tz.IsExpecting(n)) { 122 } else if (tz.IsExpecting(n)) {
67 tz.SetAbsoluteMinute(n); 123 tz.SetAbsoluteMinute(n);
68 } else if (time.IsExpecting(n)) { 124 } else if (time.IsExpecting(n)) {
69 time.AddFinal(n); 125 time.AddFinal(n);
70 // Require end, white space, "Z", "+" or "-" immediately after 126 // Require end, white space, "Z", "+" or "-" immediately after
71 // finalizing time. 127 // finalizing time.
72 if (!in.IsEnd() && !in.SkipWhiteSpace() && !in.Is('Z') && 128 DateToken peek = scanner.Peek();
73 !in.IsAsciiSign()) return false; 129 if (!peek.IsEndOfInput() &&
130 !peek.IsWhiteSpace() &&
131 !peek.IsKeywordZ() &&
132 !peek.IsAsciiSign()) return false;
74 } else { 133 } else {
75 if (!day.Add(n)) return false; 134 if (!day.Add(n)) return false;
76 in.Skip('-'); // Ignore suffix '-' for year, month, or day. 135 scanner.SkipSymbol('-');
77 // Skip trailing 'T' for ECMAScript 5 date string format but make 136 }
78 // sure that it is followed by a digit (for the time). 137 } else if (token.IsKeyword()) {
79 if (in.Skip('T') && !in.IsAsciiDigit()) return false;
80 }
81 } else if (in.IsAsciiAlphaOrAbove()) {
82 // Parse a "word" (sequence of chars. >= 'A'). 138 // Parse a "word" (sequence of chars. >= 'A').
83 uint32_t pre[KeywordTable::kPrefixLength]; 139 KeywordType type = token.keyword_type();
84 int len = in.ReadWord(pre, KeywordTable::kPrefixLength); 140 int value = token.keyword_value();
85 int index = KeywordTable::Lookup(pre, len);
86 KeywordType type = KeywordTable::GetType(index);
87
88 if (type == AM_PM && !time.IsEmpty()) { 141 if (type == AM_PM && !time.IsEmpty()) {
89 time.SetHourOffset(KeywordTable::GetValue(index)); 142 time.SetHourOffset(value);
90 } else if (type == MONTH_NAME) { 143 } else if (type == MONTH_NAME) {
91 day.SetNamedMonth(KeywordTable::GetValue(index)); 144 day.SetNamedMonth(value);
92 in.Skip('-'); // Ignore suffix '-' for month names 145 scanner.SkipSymbol('-');
93 } else if (type == TIME_ZONE_NAME && in.HasReadNumber()) { 146 } else if (type == TIME_ZONE_NAME && has_read_number) {
94 tz.Set(KeywordTable::GetValue(index)); 147 tz.Set(value);
95 } else { 148 } else {
96 // Garbage words are illegal if a number has been read. 149 // Garbage words are illegal if a number has been read.
97 if (in.HasReadNumber()) return false; 150 if (has_read_number) return false;
98 } 151 }
99 } else if (in.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) { 152 } else if (token.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) {
100 // Parse UTC offset (only after UTC or time). 153 // Parse UTC offset (only after UTC or time).
101 tz.SetSign(in.GetAsciiSignValue()); 154 tz.SetSign(token.ascii_sign());
102 in.Next(); 155 // The following number may be empty.
103 int n = in.ReadUnsignedNumber(); 156 int n = 0;
104 if (in.Skip(':')) { 157 if (scanner.Peek().IsNumber()) {
158 n = scanner.Next().number();
159 }
160 has_read_number = true;
161
162 if (scanner.Peek().IsSymbol(':')) {
105 tz.SetAbsoluteHour(n); 163 tz.SetAbsoluteHour(n);
106 tz.SetAbsoluteMinute(kNone); 164 tz.SetAbsoluteMinute(kNone);
107 } else { 165 } else {
108 tz.SetAbsoluteHour(n / 100); 166 tz.SetAbsoluteHour(n / 100);
109 tz.SetAbsoluteMinute(n % 100); 167 tz.SetAbsoluteMinute(n % 100);
110 } 168 }
111 } else if (in.Is('(')) { 169 } else if ((token.IsAsciiSign() || token.IsSymbol(')')) &&
112 // Ignore anything from '(' to a matching ')' or end of string. 170 has_read_number) {
113 in.SkipParentheses();
114 } else if ((in.IsAsciiSign() || in.Is(')')) && in.HasReadNumber()) {
115 // Extra sign or ')' is illegal if a number has been read. 171 // Extra sign or ')' is illegal if a number has been read.
116 return false; 172 return false;
117 } else { 173 } else {
118 // Ignore other characters. 174 // Ignore other characters and whitespace.
119 in.Next(); 175 }
120 } 176 }
121 } 177
122 return day.Write(out) && time.Write(out) && tz.Write(out); 178 return day.Write(out) && time.Write(out) && tz.Write(out);
123 } 179 }
124 180
181
182 template<typename CharType>
183 DateParser::DateToken DateParser::DateStringTokenizer<CharType>::Scan() {
184 int pre_pos = in_->position();
185 if (in_->IsEnd()) return DateToken::EndOfInput();
186 if (in_->IsAsciiDigit()) {
187 int n = in_->ReadUnsignedNumeral();
188 int length = in_->position() - pre_pos;
189 return DateToken::Number(n, length);
190 }
191 if (in_->Skip(':')) return DateToken::Symbol(':');
192 if (in_->Skip('-')) return DateToken::Symbol('-');
193 if (in_->Skip('+')) return DateToken::Symbol('+');
194 if (in_->Skip('.')) return DateToken::Symbol('.');
195 if (in_->Skip(')')) return DateToken::Symbol(')');
196 if (in_->IsAsciiAlphaOrAbove()) {
197 ASSERT(KeywordTable::kPrefixLength == 3);
198 uint32_t buffer[3] = {0, 0, 0};
199 int length = in_->ReadWord(buffer, 3);
200 int index = KeywordTable::Lookup(buffer, length);
201 return DateToken::Keyword(KeywordTable::GetType(index),
202 KeywordTable::GetValue(index),
203 length);
204 }
205 if (in_->SkipWhiteSpace()) {
206 return DateToken::WhiteSpace(in_->position() - pre_pos);
207 }
208 if (in_->SkipParentheses()) {
209 return DateToken::Unknown();
210 }
211 in_->Next();
212 return DateToken::Unknown();
213 }
214
215
216 template <typename Char>
217 DateParser::DateToken DateParser::ParseES5DateTime(
218 DateStringTokenizer<Char>* scanner,
219 DayComposer* day,
220 TimeComposer* time,
221 TimeZoneComposer* tz) {
222 ASSERT(day->IsEmpty());
223 ASSERT(time->IsEmpty());
224 ASSERT(tz->IsEmpty());
225
226 // Parse mandatory date string: [('-'|'+')yy]yyyy[':'MM[':'DD]]
227 if (scanner->Peek().IsAsciiSign()) {
228 // Keep the sign token, so we can pass it back to the legacy
229 // parser if we don't use it.
230 DateToken sign_token = scanner->Next();
231 if (!scanner->Peek().IsFixedLengthNumber(6)) return sign_token;
232 int sign = sign_token.ascii_sign();
233 int year = scanner->Next().number();
234 if (sign < 0 && year == 0) return sign_token;
235 day->Add(sign * year);
236 } else if (scanner->Peek().IsFixedLengthNumber(4)) {
237 day->Add(scanner->Next().number());
238 } else {
239 return scanner->Next();
240 }
241 if (scanner->SkipSymbol('-')) {
242 if (!scanner->Peek().IsFixedLengthNumber(2) ||
243 !DayComposer::IsMonth(scanner->Peek().number())) return scanner->Next();
244 day->Add(scanner->Next().number());
245 if (scanner->SkipSymbol('-')) {
246 if (!scanner->Peek().IsFixedLengthNumber(2) ||
247 !DayComposer::IsDay(scanner->Peek().number())) return scanner->Next();
248 day->Add(scanner->Next().number());
249 }
250 }
251 // Check for optional time string: 'T'HH':'mm[':'ss['.'sss]]Z
252 if (!scanner->Peek().IsKeywordType(TIME_SEPARATOR)) {
253 if (!scanner->Peek().IsEndOfInput()) return scanner->Next();
254 } else {
255 // ES5 Date Time String time part is present.
256 scanner->Next();
257 if (!scanner->Peek().IsFixedLengthNumber(2) ||
258 !Between(scanner->Peek().number(), 0, 24)) {
259 return DateToken::Invalid();
260 }
261 // Allow 24:00[:00[.000]], but no other time starting with 24.
262 bool hour_is_24 = (scanner->Peek().number() == 24);
263 time->Add(scanner->Next().number());
264 if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
265 if (!scanner->Peek().IsFixedLengthNumber(2) ||
266 !TimeComposer::IsMinute(scanner->Peek().number()) ||
267 (hour_is_24 && scanner->Peek().number() > 0)) {
268 return DateToken::Invalid();
269 }
270 time->Add(scanner->Next().number());
271 if (scanner->SkipSymbol(':')) {
272 if (!scanner->Peek().IsFixedLengthNumber(2) ||
273 !TimeComposer::IsSecond(scanner->Peek().number()) ||
274 (hour_is_24 && scanner->Peek().number() > 0)) {
275 return DateToken::Invalid();
276 }
277 time->Add(scanner->Next().number());
278 if (scanner->SkipSymbol('.')) {
279 if (!scanner->Peek().IsNumber() ||
280 (hour_is_24 && scanner->Peek().number() > 0)) {
281 return DateToken::Invalid();
282 }
283 // Allow more or less than the mandated three digits.
284 time->Add(ReadMilliseconds(scanner->Next()));
285 }
286 }
287 // Check for optional timezone designation: 'Z' | ('+'|'-')hh':'mm
288 if (scanner->Peek().IsKeywordZ()) {
289 scanner->Next();
290 tz->Set(0);
291 } else if (scanner->Peek().IsSymbol('+') ||
292 scanner->Peek().IsSymbol('-')) {
293 tz->SetSign(scanner->Next().symbol() == '+' ? 1 : -1);
294 if (scanner->Peek().IsFixedLengthNumber(4)) {
295 // hhmm extension syntax.
296 int hourmin = scanner->Next().number();
297 int hour = hourmin / 100;
298 int min = hourmin % 100;
299 if (!TimeComposer::IsHour(hour) || !TimeComposer::IsMinute(min)) {
300 return DateToken::Invalid();
301 }
302 tz->SetAbsoluteHour(hour);
303 tz->SetAbsoluteMinute(min);
304 } else {
305 // hh:mm standard syntax.
306 if (!scanner->Peek().IsFixedLengthNumber(2) ||
307 !TimeComposer::IsHour(scanner->Peek().number())) {
308 return DateToken::Invalid();
309 }
310 tz->SetAbsoluteHour(scanner->Next().number());
311 if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
312 if (!scanner->Peek().IsFixedLengthNumber(2) ||
313 !TimeComposer::IsMinute(scanner->Peek().number())) {
314 return DateToken::Invalid();
315 }
316 tz->SetAbsoluteMinute(scanner->Next().number());
317 }
318 }
319 if (!scanner->Peek().IsEndOfInput()) return DateToken::Invalid();
320 }
321 // Successfully parsed ES5 Date Time String. Default to UTC if no TZ given.
322 if (tz->IsEmpty()) tz->Set(0);
323 day->set_iso_date();
324 return DateToken::EndOfInput();
325 }
326
327
125 } } // namespace v8::internal 328 } } // namespace v8::internal
126 329
127 #endif // V8_DATEPARSER_INL_H_ 330 #endif // V8_DATEPARSER_INL_H_
OLDNEW
« no previous file with comments | « src/dateparser.cc ('k') | test/mjsunit/date.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698