| Index: src/dateparser.h
|
| diff --git a/src/dateparser.h b/src/dateparser.h
|
| index 6e87c3418519bb8a322086052f9576fd0ce4b6da..4bd320e901d585c4df3c7763f600dc01505d38cb 100644
|
| --- a/src/dateparser.h
|
| +++ b/src/dateparser.h
|
| @@ -61,9 +61,14 @@ class DateParser : public AllStatic {
|
| static inline bool Between(int x, int lo, int hi) {
|
| return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
|
| }
|
| +
|
| // Indicates a missing value.
|
| static const int kNone = kMaxInt;
|
|
|
| + // Maximal number of digits used to build the value of a numeral.
|
| + // Remaining digits are ignored.
|
| + static const int kMaxSignificantDigits = 9;
|
| +
|
| // InputReader provides basic string parsing and character classification.
|
| template <typename Char>
|
| class InputReader BASE_EMBEDDED {
|
| @@ -71,32 +76,28 @@ class DateParser : public AllStatic {
|
| InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
|
| : index_(0),
|
| buffer_(s),
|
| - has_read_number_(false),
|
| unicode_cache_(unicode_cache) {
|
| Next();
|
| }
|
|
|
| + int position() { return index_; }
|
| +
|
| // Advance to the next character of the string.
|
| - void Next() { ch_ = (index_ < buffer_.length()) ? buffer_[index_++] : 0; }
|
| -
|
| - // Read a string of digits as an unsigned number (cap just below kMaxInt).
|
| - int ReadUnsignedNumber() {
|
| - has_read_number_ = true;
|
| - int n;
|
| - for (n = 0; IsAsciiDigit() && n < kMaxInt / 10 - 1; Next()) {
|
| - n = n * 10 + ch_ - '0';
|
| - }
|
| - return n;
|
| + void Next() {
|
| + ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
|
| + index_++;
|
| }
|
|
|
| - // Read a string of digits, take the first three or fewer as an unsigned
|
| - // number of milliseconds, and ignore any digits after the first three.
|
| - int ReadMilliseconds() {
|
| - has_read_number_ = true;
|
| + // Read a string of digits as an unsigned number. Cap value at
|
| + // kMaxSignificantDigits, but skip remaining digits if the numeral
|
| + // is longer.
|
| + int ReadUnsignedNumeral() {
|
| int n = 0;
|
| - int power;
|
| - for (power = 100; IsAsciiDigit(); Next(), power = power / 10) {
|
| - n = n + power * (ch_ - '0');
|
| + int i = 0;
|
| + while (IsAsciiDigit()) {
|
| + if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
|
| + i++;
|
| + Next();
|
| }
|
| return n;
|
| }
|
| @@ -151,18 +152,138 @@ class DateParser : public AllStatic {
|
| // Return 1 for '+' and -1 for '-'.
|
| int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
|
|
|
| - // Indicates whether any (possibly empty!) numbers have been read.
|
| - bool HasReadNumber() const { return has_read_number_; }
|
| -
|
| private:
|
| int index_;
|
| Vector<Char> buffer_;
|
| - bool has_read_number_;
|
| uint32_t ch_;
|
| UnicodeCache* unicode_cache_;
|
| };
|
|
|
| - enum KeywordType { INVALID, MONTH_NAME, TIME_ZONE_NAME, AM_PM };
|
| + enum KeywordType {
|
| + INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
|
| + };
|
| +
|
| + struct DateToken {
|
| + public:
|
| + bool IsInvalid() { return tag_ == kInvalidTokenTag; }
|
| + bool IsUnknown() { return tag_ == kUnknownTokenTag; }
|
| + bool IsNumber() { return tag_ == kNumberTag; }
|
| + bool IsSymbol() { return tag_ == kSymbolTag; }
|
| + bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
|
| + bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
|
| + bool IsKeyword() { return tag_ >= kKeywordTagStart; }
|
| +
|
| + int length() { return length_; }
|
| +
|
| + int number() {
|
| + ASSERT(IsNumber());
|
| + return value_;
|
| + }
|
| + KeywordType keyword_type() {
|
| + ASSERT(IsKeyword());
|
| + return static_cast<KeywordType>(tag_);
|
| + }
|
| + int keyword_value() {
|
| + ASSERT(IsKeyword());
|
| + return value_;
|
| + }
|
| + char symbol() {
|
| + ASSERT(IsSymbol());
|
| + return static_cast<char>(value_);
|
| + }
|
| + bool IsSymbol(char symbol) {
|
| + return IsSymbol() && this->symbol() == symbol;
|
| + }
|
| + bool IsKeywordType(KeywordType tag) {
|
| + return tag_ == tag;
|
| + }
|
| + bool IsFixedLengthNumber(int length) {
|
| + return IsNumber() && length_ == length;
|
| + }
|
| + bool IsAsciiSign() {
|
| + return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
|
| + }
|
| + int ascii_sign() {
|
| + ASSERT(IsAsciiSign());
|
| + return 44 - value_;
|
| + }
|
| + bool IsKeywordZ() {
|
| + return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
|
| + }
|
| + bool IsUnknown(int character) {
|
| + return IsUnknown() && value_ == character;
|
| + }
|
| + // Factory functions.
|
| + static DateToken Keyword(KeywordType tag, int value, int length) {
|
| + return DateToken(tag, length, value);
|
| + }
|
| + static DateToken Number(int value, int length) {
|
| + return DateToken(kNumberTag, length, value);
|
| + }
|
| + static DateToken Symbol(char symbol) {
|
| + return DateToken(kSymbolTag, 1, symbol);
|
| + }
|
| + static DateToken EndOfInput() {
|
| + return DateToken(kEndOfInputTag, 0, -1);
|
| + }
|
| + static DateToken WhiteSpace(int length) {
|
| + return DateToken(kWhiteSpaceTag, length, -1);
|
| + }
|
| + static DateToken Unknown() {
|
| + return DateToken(kUnknownTokenTag, 1, -1);
|
| + }
|
| + static DateToken Invalid() {
|
| + return DateToken(kInvalidTokenTag, 0, -1);
|
| + }
|
| + private:
|
| + enum TagType {
|
| + kInvalidTokenTag = -6,
|
| + kUnknownTokenTag = -5,
|
| + kWhiteSpaceTag = -4,
|
| + kNumberTag = -3,
|
| + kSymbolTag = -2,
|
| + kEndOfInputTag = -1,
|
| + kKeywordTagStart = 0
|
| + };
|
| + DateToken(int tag, int length, int value)
|
| + : tag_(tag),
|
| + length_(length),
|
| + value_(value) { }
|
| +
|
| + int tag_;
|
| + int length_; // Number of characters.
|
| + int value_;
|
| + };
|
| +
|
| + template <typename Char>
|
| + class DateStringTokenizer {
|
| + public:
|
| + explicit DateStringTokenizer(InputReader<Char>* in)
|
| + : in_(in), next_(Scan()) { }
|
| + DateToken Next() {
|
| + DateToken result = next_;
|
| + next_ = Scan();
|
| + return result;
|
| + }
|
| +
|
| + DateToken Peek() {
|
| + return next_;
|
| + }
|
| + bool SkipSymbol(char symbol) {
|
| + if (next_.IsSymbol(symbol)) {
|
| + next_ = Scan();
|
| + return true;
|
| + }
|
| + return false;
|
| + }
|
| + private:
|
| + DateToken Scan();
|
| +
|
| + InputReader<Char>* in_;
|
| + DateToken next_;
|
| + };
|
| +
|
| + static int ReadMilliseconds(DateToken number);
|
|
|
| // KeywordTable maps names of months, time zones, am/pm to numbers.
|
| class KeywordTable : public AllStatic {
|
| @@ -201,6 +322,7 @@ class DateParser : public AllStatic {
|
| }
|
| bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
|
| bool Write(FixedArray* output);
|
| + bool IsEmpty() { return hour_ == kNone; }
|
| private:
|
| int sign_;
|
| int hour_;
|
| @@ -228,10 +350,10 @@ class DateParser : public AllStatic {
|
| bool Write(FixedArray* output);
|
|
|
| static bool IsMinute(int x) { return Between(x, 0, 59); }
|
| - private:
|
| static bool IsHour(int x) { return Between(x, 0, 23); }
|
| - static bool IsHour12(int x) { return Between(x, 0, 12); }
|
| static bool IsSecond(int x) { return Between(x, 0, 59); }
|
| + private:
|
| + static bool IsHour12(int x) { return Between(x, 0, 12); }
|
| static bool IsMillisecond(int x) { return Between(x, 0, 999); }
|
|
|
| static const int kSize = 4;
|
| @@ -242,22 +364,42 @@ class DateParser : public AllStatic {
|
|
|
| class DayComposer BASE_EMBEDDED {
|
| public:
|
| - DayComposer() : index_(0), named_month_(kNone) {}
|
| + DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
|
| bool IsEmpty() const { return index_ == 0; }
|
| bool Add(int n) {
|
| - return index_ < kSize ? (comp_[index_++] = n, true) : false;
|
| + if (index_ < kSize) {
|
| + comp_[index_] = n;
|
| + index_++;
|
| + return true;
|
| + }
|
| + return false;
|
| }
|
| void SetNamedMonth(int n) { named_month_ = n; }
|
| bool Write(FixedArray* output);
|
| - private:
|
| + void set_iso_date() { is_iso_date_ = true; }
|
| static bool IsMonth(int x) { return Between(x, 1, 12); }
|
| static bool IsDay(int x) { return Between(x, 1, 31); }
|
|
|
| + private:
|
| static const int kSize = 3;
|
| int comp_[kSize];
|
| int index_;
|
| int named_month_;
|
| + // If set, ensures that data is always parsed in year-month-date order.
|
| + bool is_iso_date_;
|
| };
|
| +
|
| + // Tries to parse an ES5 Date Time String. Returns the next token
|
| + // to continue with in the legacy date string parser. If parsing is
|
| + // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
|
| + // returns DateToken::Invalid(). Otherwise parsing continues in the
|
| + // legacy parser.
|
| + template <typename Char>
|
| + static DateParser::DateToken ParseES5DateTime(
|
| + DateStringTokenizer<Char>* scanner,
|
| + DayComposer* day,
|
| + TimeComposer* time,
|
| + TimeZoneComposer* tz);
|
| };
|
|
|
|
|
|
|