Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(141)

Unified Diff: src/conversions.cc

Issue 1529004: StringToInt rewritten. This version doesn't allocate memory for long decimals... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/conversions.h ('k') | src/runtime.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/conversions.cc
===================================================================
--- src/conversions.cc (revision 4332)
+++ src/conversions.cc (working copy)
@@ -48,51 +48,6 @@
return -1;
}
-
-// Provide a common interface to getting a character at a certain
-// index from a char* or a String object.
-static inline int GetChar(const char* str, int index) {
- ASSERT(index >= 0 && index < StrLength(str));
- return str[index];
-}
-
-
-static inline int GetChar(String* str, int index) {
- return str->Get(index);
-}
-
-
-static inline int GetLength(const char* str) {
- return StrLength(str);
-}
-
-
-static inline int GetLength(String* str) {
- return str->length();
-}
-
-
-static inline const char* GetCString(const char* str, int index) {
- return str + index;
-}
-
-
-static inline const char* GetCString(String* str, int index) {
- int length = str->length();
- char* result = NewArray<char>(length + 1);
- for (int i = index; i < length; i++) {
- uc16 c = str->Get(i);
- if (c <= 127) {
- result[i - index] = static_cast<char>(c);
- } else {
- result[i - index] = 127; // Force number parsing to fail.
- }
- }
- result[length - index] = '\0';
- return result;
-}
-
-
namespace {
// C++-style iterator adaptor for StringInputBuffer
@@ -134,15 +89,6 @@
}
-static inline void ReleaseCString(const char* original, const char* str) {
-}
-
-
-static inline void ReleaseCString(String* original, const char* str) {
- DeleteArray(const_cast<char *>(str));
-}
-
-
template <class Iterator, class EndMark>
static bool SubStringEquals(Iterator* current,
EndMark end,
@@ -168,98 +114,7 @@
// we don't need to preserve all the digits.
const int kMaxSignificantDigits = 772;
-// Parse an int from a string starting a given index and in a given
-// radix. The string can be either a char* or a String*.
-template <class S>
-static int InternalStringToInt(S* s, int i, int radix, double* value) {
- int len = GetLength(s);
- // Setup limits for computing the value.
- ASSERT(2 <= radix && radix <= 36);
- int lim_0 = '0' + (radix < 10 ? radix : 10);
- int lim_a = 'a' + (radix - 10);
- int lim_A = 'A' + (radix - 10);
-
- // NOTE: The code for computing the value may seem a bit complex at
- // first glance. It is structured to use 32-bit multiply-and-add
- // loops as long as possible to avoid loosing precision.
-
- double v = 0.0;
- int j;
- for (j = i; j < len;) {
- // Parse the longest part of the string starting at index j
- // possible while keeping the multiplier, and thus the part
- // itself, within 32 bits.
- uint32_t part = 0, multiplier = 1;
- int k;
- for (k = j; k < len; k++) {
- int c = GetChar(s, k);
- if (c >= '0' && c < lim_0) {
- c = c - '0';
- } else if (c >= 'a' && c < lim_a) {
- c = c - 'a' + 10;
- } else if (c >= 'A' && c < lim_A) {
- c = c - 'A' + 10;
- } else {
- break;
- }
-
- // Update the value of the part as long as the multiplier fits
- // in 32 bits. When we can't guarantee that the next iteration
- // will not overflow the multiplier, we stop parsing the part
- // by leaving the loop.
- static const uint32_t kMaximumMultiplier = 0xffffffffU / 36;
- uint32_t m = multiplier * radix;
- if (m > kMaximumMultiplier) break;
- part = part * radix + c;
- multiplier = m;
- ASSERT(multiplier > part);
- }
-
- // Compute the number of part digits. If no digits were parsed;
- // we're done parsing the entire string.
- int digits = k - j;
- if (digits == 0) break;
-
- // Update the value and skip the part in the string.
- ASSERT(multiplier ==
- pow(static_cast<double>(radix), static_cast<double>(digits)));
- v = v * multiplier + part;
- j = k;
- }
-
- // If the resulting value is larger than 2^53 the value does not fit
- // in the mantissa of the double and there is a loss of precision.
- // When the value is larger than 2^53 the rounding depends on the
- // code generation. If the code generator spills the double value
- // it uses 64 bits and if it does not it uses 80 bits.
- //
- // If there is a potential for overflow we resort to strtod for
- // radix 10 numbers to get higher precision. For numbers in another
- // radix we live with the loss of precision.
- static const double kPreciseConversionLimit = 9007199254740992.0;
- if (radix == 10 && v > kPreciseConversionLimit) {
- const char* cstr = GetCString(s, i);
- const char* end;
- v = gay_strtod(cstr, &end);
- ReleaseCString(s, cstr);
- }
-
- *value = v;
- return j;
-}
-
-
-int StringToInt(String* str, int index, int radix, double* value) {
- return InternalStringToInt(str, index, radix, value);
-}
-
-
-int StringToInt(const char* str, int index, int radix, double* value) {
- return InternalStringToInt(const_cast<char*>(str), index, radix, value);
-}
-
-
static const double JUNK_STRING_VALUE = OS::nan_value();
@@ -281,18 +136,23 @@
}
+static double SignedZero(bool sign) {
+ return sign ? -0.0 : 0.0;
+}
+
+
// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
template <int radix_log_2, class Iterator, class EndMark>
- static double InternalStringToIntDouble(Iterator current,
- EndMark end,
- bool sign,
- bool allow_trailing_junk) {
+static double InternalStringToIntDouble(Iterator current,
+ EndMark end,
+ bool sign,
+ bool allow_trailing_junk) {
ASSERT(current != end);
// Skip leading 0s.
while (*current == '0') {
++current;
- if (current == end) return sign ? -0.0 : 0.0;
+ if (current == end) return SignedZero(sign);
}
int64_t number = 0;
@@ -382,6 +242,183 @@
}
+template <class Iterator, class EndMark>
+static double InternalStringToInt(Iterator current, EndMark end, int radix) {
+ const bool allow_trailing_junk = true;
+ const double empty_string_val = JUNK_STRING_VALUE;
+
+ if (!AdvanceToNonspace(&current, end)) return empty_string_val;
+
+ bool sign = false;
+ bool leading_zero = false;
+
+ if (*current == '+') {
+ // Ignore leading sign; skip following spaces.
+ ++current;
+ if (!AdvanceToNonspace(&current, end)) return JUNK_STRING_VALUE;
+ } else if (*current == '-') {
+ ++current;
+ if (!AdvanceToNonspace(&current, end)) return JUNK_STRING_VALUE;
+ sign = true;
+ }
+
+ if (radix == 0) {
+ // Radix detection.
+ if (*current == '0') {
+ ++current;
+ if (current == end) return SignedZero(sign);
+ if (*current == 'x' || *current == 'X') {
+ radix = 16;
+ ++current;
+ if (current == end) return JUNK_STRING_VALUE;
+ } else {
+ radix = 8;
+ leading_zero = true;
+ }
+ } else {
+ radix = 10;
+ }
+ } else if (radix == 16) {
+ if (*current == '0') {
+ // Allow "0x" prefix.
+ ++current;
+ if (current == end) return SignedZero(sign);
+ if (*current == 'x' || *current == 'X') {
+ ++current;
+ if (current == end) return JUNK_STRING_VALUE;
+ } else {
+ leading_zero = true;
+ }
+ }
+ }
+
+ if (radix < 2 || radix > 36) return JUNK_STRING_VALUE;
+
+ // Skip leading zeros.
+ while (*current == '0') {
+ leading_zero = true;
+ ++current;
+ if (current == end) return SignedZero(sign);
+ }
+
+ if (!leading_zero && !isDigit(*current, radix)) {
+ return JUNK_STRING_VALUE;
+ }
+
+ if (IsPowerOf2(radix)) {
+ switch (radix) {
+ case 2:
+ return InternalStringToIntDouble<1>(
+ current, end, sign, allow_trailing_junk);
+ case 4:
+ return InternalStringToIntDouble<2>(
+ current, end, sign, allow_trailing_junk);
+ case 8:
+ return InternalStringToIntDouble<3>(
+ current, end, sign, allow_trailing_junk);
+
+ case 16:
+ return InternalStringToIntDouble<4>(
+ current, end, sign, allow_trailing_junk);
+
+ case 32:
+ return InternalStringToIntDouble<5>(
+ current, end, sign, allow_trailing_junk);
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ if (radix == 10) {
+ // Parsing with strtod.
+ const int kMaxSignificantDigits = 309; // Doubles are less than 1.8e308.
+ // The buffer may contain up to kMaxSignificantDigits + 1 digits and a zero
Florian Loitsch 2010/03/31 16:50:00 Maybe add comment: A number with more than kMaxSig
SeRya 2010/03/31 17:18:08 The comment 7 lines below says the same. I think t
Florian Loitsch 2010/04/03 13:10:59 true. fine with me.
+ // end.
+ const int kBufferSize = kMaxSignificantDigits + 2;
+ char buffer[kBufferSize];
+ int buffer_pos = 0;
+ while (*current >= '0' && *current <= '9') {
+ if (buffer_pos <= kMaxSignificantDigits) {
+ // If the number has more than kMaxSignificantDigits it will be parsed
+ // as infinity.
+ ASSERT(buffer_pos < kBufferSize);
+ buffer[buffer_pos++] = static_cast<char>(*current);
+ }
+ ++current;
+ if (current == end) break;
+ }
+
+ if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+ return JUNK_STRING_VALUE;
+ }
+
+ ASSERT(buffer_pos < kBufferSize);
+ buffer[buffer_pos++] = '\0';
+ return sign ? -gay_strtod(buffer, NULL) : gay_strtod(buffer, NULL);
+ }
+
+ // TODO(serya): The following legacy code causes accumulating rounding
+ // error for number greater than ~2^56. It should be rewritten using long
+ // arithmetic.
+
+ int lim_0 = '0' + (radix < 10 ? radix : 10);
+ int lim_a = 'a' + (radix - 10);
+ int lim_A = 'A' + (radix - 10);
+
+ // NOTE: The code for computing the value may seem a bit complex at
+ // first glance. It is structured to use 32-bit multiply-and-add
+ // loops as long as possible to avoid loosing precision.
+
+ double v = 0.0;
+ bool done = false;
+ do {
+ // Parse the longest part of the string starting at index j
+ // possible while keeping the multiplier, and thus the part
+ // itself, within 32 bits.
+ unsigned int part = 0, multiplier = 1;
+ while (true) {
+ int d;
+ if (*current >= '0' && *current < lim_0) {
+ d = *current - '0';
+ } else if (*current >= 'a' && *current < lim_a) {
+ d = *current - 'a' + 10;
+ } else if (*current >= 'A' && *current < lim_A) {
+ d = *current - 'A' + 10;
+ } else {
+ done = true;
+ break;
+ }
+
+ // Update the value of the part as long as the multiplier fits
+ // in 32 bits. When we can't guarantee that the next iteration
+ // will not overflow the multiplier, we stop parsing the part
+ // by leaving the loop.
+ const unsigned int kMaximumMultiplier = 0xffffffffU / 36;
+ uint32_t m = multiplier * radix;
+ if (m > kMaximumMultiplier) break;
+ part = part * radix + d;
+ multiplier = m;
+ ASSERT(multiplier > part);
+
+ ++current;
+ if (current == end) {
+ done = true;
+ break;
+ }
+ }
+
+ // Update the value and skip the part in the string.
+ v = v * multiplier + part;
+ } while (!done);
+
+ if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+ return JUNK_STRING_VALUE;
+ }
+
+ return sign ? -v : v;
+}
+
+
// Converts a string to a double value. Assumes the Iterator supports
// the following operations:
// 1. current == end (other ops are not allowed), current != end.
@@ -417,7 +454,7 @@
bool nonzero_digit_dropped = false;
bool fractional_part = false;
- double signed_zero = 0.0;
+ bool sign = false;
if (*current == '+') {
// Ignore leading sign; skip following spaces.
@@ -427,7 +464,7 @@
buffer[buffer_pos++] = '-';
++current;
if (!AdvanceToNonspace(&current, end)) return JUNK_STRING_VALUE;
- signed_zero = -0.0;
+ sign = true;
}
static const char kInfinitySymbol[] = "Infinity";
@@ -447,14 +484,16 @@
bool leading_zero = false;
if (*current == '0') {
++current;
- if (current == end) return signed_zero;
+ if (current == end) return SignedZero(sign);
leading_zero = true;
-// It could be hexadecimal value.
+ // It could be hexadecimal value.
if ((flags & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
++current;
- if (current == end) return JUNK_STRING_VALUE; // "0x".
+ if (current == end || !isDigit(*current, 16)) {
+ return JUNK_STRING_VALUE; // "0x".
+ }
bool sign = (buffer_pos > 0 && buffer[0] == '-');
return InternalStringToIntDouble<4>(current,
@@ -466,7 +505,7 @@
// Ignore leading zeros in the integer part.
while (*current == '0') {
++current;
- if (current == end) return signed_zero;
+ if (current == end) return SignedZero(sign);
}
}
@@ -508,7 +547,7 @@
// leading zeros (if any).
while (*current == '0') {
++current;
- if (current == end) return signed_zero;
+ if (current == end) return SignedZero(sign);
exponent--; // Move this 0 into the exponent.
}
}
@@ -635,7 +674,7 @@
ASSERT(exponent == 0);
buffer_pos += exp_digits;
} else if (!fractional_part && significant_digits <= kMaxDigitsInInt) {
- if (significant_digits == 0) return signed_zero;
+ if (significant_digits == 0) return SignedZero(sign);
ASSERT(buffer_pos > 0);
int num = 0;
int start_pos = (buffer[0] == '-' ? 1 : 0);
@@ -672,6 +711,25 @@
}
+double StringToInt(String* str, int radix) {
+ StringShape shape(str);
+ if (shape.IsSequentialAscii()) {
+ const char* begin = SeqAsciiString::cast(str)->GetChars();
+ const char* end = begin + str->length();
+ return InternalStringToInt(begin, end, radix);
+ } else if (shape.IsSequentialTwoByte()) {
+ const uc16* begin = SeqTwoByteString::cast(str)->GetChars();
+ const uc16* end = begin + str->length();
+ return InternalStringToInt(begin, end, radix);
+ } else {
+ StringInputBuffer buffer(str);
+ return InternalStringToInt(StringInputBufferIterator(&buffer),
+ StringInputBufferIterator::EndMarker(),
+ radix);
+ }
+}
+
+
double StringToDouble(const char* str, int flags, double empty_string_val) {
const char* end = str + StrLength(str);
« no previous file with comments | « src/conversions.h ('k') | src/runtime.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698