| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/string_number_conversions.h" | |
| 6 | |
| 7 #include <ctype.h> | |
| 8 #include <errno.h> | |
| 9 #include <stdlib.h> | |
| 10 #include <wctype.h> | |
| 11 | |
| 12 #include <limits> | |
| 13 | |
| 14 #include "base/logging.h" | |
| 15 #include "base/third_party/dmg_fp/dmg_fp.h" | |
| 16 #include "base/utf_string_conversions.h" | |
| 17 | |
| 18 namespace base { | |
| 19 | |
| 20 namespace { | |
| 21 | |
| 22 template <typename STR, typename INT, typename UINT, bool NEG> | |
| 23 struct IntToStringT { | |
| 24 // This is to avoid a compiler warning about unary minus on unsigned type. | |
| 25 // For example, say you had the following code: | |
| 26 // template <typename INT> | |
| 27 // INT abs(INT value) { return value < 0 ? -value : value; } | |
| 28 // Even though if INT is unsigned, it's impossible for value < 0, so the | |
| 29 // unary minus will never be taken, the compiler will still generate a | |
| 30 // warning. We do a little specialization dance... | |
| 31 template <typename INT2, typename UINT2, bool NEG2> | |
| 32 struct ToUnsignedT {}; | |
| 33 | |
| 34 template <typename INT2, typename UINT2> | |
| 35 struct ToUnsignedT<INT2, UINT2, false> { | |
| 36 static UINT2 ToUnsigned(INT2 value) { | |
| 37 return static_cast<UINT2>(value); | |
| 38 } | |
| 39 }; | |
| 40 | |
| 41 template <typename INT2, typename UINT2> | |
| 42 struct ToUnsignedT<INT2, UINT2, true> { | |
| 43 static UINT2 ToUnsigned(INT2 value) { | |
| 44 return static_cast<UINT2>(value < 0 ? -value : value); | |
| 45 } | |
| 46 }; | |
| 47 | |
| 48 // This set of templates is very similar to the above templates, but | |
| 49 // for testing whether an integer is negative. | |
| 50 template <typename INT2, bool NEG2> | |
| 51 struct TestNegT {}; | |
| 52 template <typename INT2> | |
| 53 struct TestNegT<INT2, false> { | |
| 54 static bool TestNeg(INT2 value) { | |
| 55 // value is unsigned, and can never be negative. | |
| 56 return false; | |
| 57 } | |
| 58 }; | |
| 59 template <typename INT2> | |
| 60 struct TestNegT<INT2, true> { | |
| 61 static bool TestNeg(INT2 value) { | |
| 62 return value < 0; | |
| 63 } | |
| 64 }; | |
| 65 | |
| 66 static STR IntToString(INT value) { | |
| 67 // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4. | |
| 68 // So round up to allocate 3 output characters per byte, plus 1 for '-'. | |
| 69 const int kOutputBufSize = 3 * sizeof(INT) + 1; | |
| 70 | |
| 71 // Allocate the whole string right away, we will right back to front, and | |
| 72 // then return the substr of what we ended up using. | |
| 73 STR outbuf(kOutputBufSize, 0); | |
| 74 | |
| 75 bool is_neg = TestNegT<INT, NEG>::TestNeg(value); | |
| 76 // Even though is_neg will never be true when INT is parameterized as | |
| 77 // unsigned, even the presence of the unary operation causes a warning. | |
| 78 UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value); | |
| 79 | |
| 80 for (typename STR::iterator it = outbuf.end();;) { | |
| 81 --it; | |
| 82 DCHECK(it != outbuf.begin()); | |
| 83 *it = static_cast<typename STR::value_type>((res % 10) + '0'); | |
| 84 res /= 10; | |
| 85 | |
| 86 // We're done.. | |
| 87 if (res == 0) { | |
| 88 if (is_neg) { | |
| 89 --it; | |
| 90 DCHECK(it != outbuf.begin()); | |
| 91 *it = static_cast<typename STR::value_type>('-'); | |
| 92 } | |
| 93 return STR(it, outbuf.end()); | |
| 94 } | |
| 95 } | |
| 96 NOTREACHED(); | |
| 97 return STR(); | |
| 98 } | |
| 99 }; | |
| 100 | |
| 101 // Utility to convert a character to a digit in a given base | |
| 102 template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit { | |
| 103 }; | |
| 104 | |
| 105 // Faster specialization for bases <= 10 | |
| 106 template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> { | |
| 107 public: | |
| 108 static bool Convert(CHAR c, uint8* digit) { | |
| 109 if (c >= '0' && c < '0' + BASE) { | |
| 110 *digit = c - '0'; | |
| 111 return true; | |
| 112 } | |
| 113 return false; | |
| 114 } | |
| 115 }; | |
| 116 | |
| 117 // Specialization for bases where 10 < base <= 36 | |
| 118 template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> { | |
| 119 public: | |
| 120 static bool Convert(CHAR c, uint8* digit) { | |
| 121 if (c >= '0' && c <= '9') { | |
| 122 *digit = c - '0'; | |
| 123 } else if (c >= 'a' && c < 'a' + BASE - 10) { | |
| 124 *digit = c - 'a' + 10; | |
| 125 } else if (c >= 'A' && c < 'A' + BASE - 10) { | |
| 126 *digit = c - 'A' + 10; | |
| 127 } else { | |
| 128 return false; | |
| 129 } | |
| 130 return true; | |
| 131 } | |
| 132 }; | |
| 133 | |
| 134 template<int BASE, typename CHAR> bool CharToDigit(CHAR c, uint8* digit) { | |
| 135 return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit); | |
| 136 } | |
| 137 | |
| 138 // There is an IsWhitespace for wchars defined in string_util.h, but it is | |
| 139 // locale independent, whereas the functions we are replacing were | |
| 140 // locale-dependent. TBD what is desired, but for the moment let's not introduce | |
| 141 // a change in behaviour. | |
| 142 template<typename CHAR> class WhitespaceHelper { | |
| 143 }; | |
| 144 | |
| 145 template<> class WhitespaceHelper<char> { | |
| 146 public: | |
| 147 static bool Invoke(char c) { | |
| 148 return 0 != isspace(static_cast<unsigned char>(c)); | |
| 149 } | |
| 150 }; | |
| 151 | |
| 152 template<> class WhitespaceHelper<char16> { | |
| 153 public: | |
| 154 static bool Invoke(char16 c) { | |
| 155 return 0 != iswspace(c); | |
| 156 } | |
| 157 }; | |
| 158 | |
| 159 template<typename CHAR> bool LocalIsWhitespace(CHAR c) { | |
| 160 return WhitespaceHelper<CHAR>::Invoke(c); | |
| 161 } | |
| 162 | |
| 163 // IteratorRangeToNumberTraits should provide: | |
| 164 // - a typedef for iterator_type, the iterator type used as input. | |
| 165 // - a typedef for value_type, the target numeric type. | |
| 166 // - static functions min, max (returning the minimum and maximum permitted | |
| 167 // values) | |
| 168 // - constant kBase, the base in which to interpret the input | |
| 169 template<typename IteratorRangeToNumberTraits> | |
| 170 class IteratorRangeToNumber { | |
| 171 public: | |
| 172 typedef IteratorRangeToNumberTraits traits; | |
| 173 typedef typename traits::iterator_type const_iterator; | |
| 174 typedef typename traits::value_type value_type; | |
| 175 | |
| 176 // Generalized iterator-range-to-number conversion. | |
| 177 // | |
| 178 static bool Invoke(const_iterator begin, | |
| 179 const_iterator end, | |
| 180 value_type* output) { | |
| 181 bool valid = true; | |
| 182 | |
| 183 while (begin != end && LocalIsWhitespace(*begin)) { | |
| 184 valid = false; | |
| 185 ++begin; | |
| 186 } | |
| 187 | |
| 188 if (begin != end && *begin == '-') { | |
| 189 if (!Negative::Invoke(begin + 1, end, output)) { | |
| 190 valid = false; | |
| 191 } | |
| 192 } else { | |
| 193 if (begin != end && *begin == '+') { | |
| 194 ++begin; | |
| 195 } | |
| 196 if (!Positive::Invoke(begin, end, output)) { | |
| 197 valid = false; | |
| 198 } | |
| 199 } | |
| 200 | |
| 201 return valid; | |
| 202 } | |
| 203 | |
| 204 private: | |
| 205 // Sign provides: | |
| 206 // - a static function, CheckBounds, that determines whether the next digit | |
| 207 // causes an overflow/underflow | |
| 208 // - a static function, Increment, that appends the next digit appropriately | |
| 209 // according to the sign of the number being parsed. | |
| 210 template<typename Sign> | |
| 211 class Base { | |
| 212 public: | |
| 213 static bool Invoke(const_iterator begin, const_iterator end, | |
| 214 typename traits::value_type* output) { | |
| 215 *output = 0; | |
| 216 | |
| 217 if (begin == end) { | |
| 218 return false; | |
| 219 } | |
| 220 | |
| 221 // Note: no performance difference was found when using template | |
| 222 // specialization to remove this check in bases other than 16 | |
| 223 if (traits::kBase == 16 && end - begin > 2 && *begin == '0' && | |
| 224 (*(begin + 1) == 'x' || *(begin + 1) == 'X')) { | |
| 225 begin += 2; | |
| 226 } | |
| 227 | |
| 228 for (const_iterator current = begin; current != end; ++current) { | |
| 229 uint8 new_digit = 0; | |
| 230 | |
| 231 if (!CharToDigit<traits::kBase>(*current, &new_digit)) { | |
| 232 return false; | |
| 233 } | |
| 234 | |
| 235 if (current != begin) { | |
| 236 if (!Sign::CheckBounds(output, new_digit)) { | |
| 237 return false; | |
| 238 } | |
| 239 *output *= traits::kBase; | |
| 240 } | |
| 241 | |
| 242 Sign::Increment(new_digit, output); | |
| 243 } | |
| 244 return true; | |
| 245 } | |
| 246 }; | |
| 247 | |
| 248 class Positive : public Base<Positive> { | |
| 249 public: | |
| 250 static bool CheckBounds(value_type* output, uint8 new_digit) { | |
| 251 if (*output > static_cast<value_type>(traits::max() / traits::kBase) || | |
| 252 (*output == static_cast<value_type>(traits::max() / traits::kBase) && | |
| 253 new_digit > traits::max() % traits::kBase)) { | |
| 254 *output = traits::max(); | |
| 255 return false; | |
| 256 } | |
| 257 return true; | |
| 258 } | |
| 259 static void Increment(uint8 increment, value_type* output) { | |
| 260 *output += increment; | |
| 261 } | |
| 262 }; | |
| 263 | |
| 264 class Negative : public Base<Negative> { | |
| 265 public: | |
| 266 static bool CheckBounds(value_type* output, uint8 new_digit) { | |
| 267 if (*output < traits::min() / traits::kBase || | |
| 268 (*output == traits::min() / traits::kBase && | |
| 269 new_digit > 0 - traits::min() % traits::kBase)) { | |
| 270 *output = traits::min(); | |
| 271 return false; | |
| 272 } | |
| 273 return true; | |
| 274 } | |
| 275 static void Increment(uint8 increment, value_type* output) { | |
| 276 *output -= increment; | |
| 277 } | |
| 278 }; | |
| 279 }; | |
| 280 | |
| 281 template<typename ITERATOR, typename VALUE, int BASE> | |
| 282 class BaseIteratorRangeToNumberTraits { | |
| 283 public: | |
| 284 typedef ITERATOR iterator_type; | |
| 285 typedef VALUE value_type; | |
| 286 static value_type min() { | |
| 287 return std::numeric_limits<value_type>::min(); | |
| 288 } | |
| 289 static value_type max() { | |
| 290 return std::numeric_limits<value_type>::max(); | |
| 291 } | |
| 292 static const int kBase = BASE; | |
| 293 }; | |
| 294 | |
| 295 template<typename ITERATOR> | |
| 296 class BaseHexIteratorRangeToIntTraits | |
| 297 : public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> { | |
| 298 }; | |
| 299 | |
| 300 template<typename ITERATOR> | |
| 301 class BaseHexIteratorRangeToInt64Traits | |
| 302 : public BaseIteratorRangeToNumberTraits<ITERATOR, int64, 16> { | |
| 303 }; | |
| 304 | |
| 305 typedef BaseHexIteratorRangeToIntTraits<StringPiece::const_iterator> | |
| 306 HexIteratorRangeToIntTraits; | |
| 307 | |
| 308 typedef BaseHexIteratorRangeToInt64Traits<StringPiece::const_iterator> | |
| 309 HexIteratorRangeToInt64Traits; | |
| 310 | |
| 311 template<typename STR> | |
| 312 bool HexStringToBytesT(const STR& input, std::vector<uint8>* output) { | |
| 313 DCHECK_EQ(output->size(), 0u); | |
| 314 size_t count = input.size(); | |
| 315 if (count == 0 || (count % 2) != 0) | |
| 316 return false; | |
| 317 for (uintptr_t i = 0; i < count / 2; ++i) { | |
| 318 uint8 msb = 0; // most significant 4 bits | |
| 319 uint8 lsb = 0; // least significant 4 bits | |
| 320 if (!CharToDigit<16>(input[i * 2], &msb) || | |
| 321 !CharToDigit<16>(input[i * 2 + 1], &lsb)) | |
| 322 return false; | |
| 323 output->push_back((msb << 4) | lsb); | |
| 324 } | |
| 325 return true; | |
| 326 } | |
| 327 | |
| 328 template <typename VALUE, int BASE> | |
| 329 class StringPieceToNumberTraits | |
| 330 : public BaseIteratorRangeToNumberTraits<StringPiece::const_iterator, | |
| 331 VALUE, | |
| 332 BASE> { | |
| 333 }; | |
| 334 | |
| 335 template <typename VALUE> | |
| 336 bool StringToIntImpl(const StringPiece& input, VALUE* output) { | |
| 337 return IteratorRangeToNumber<StringPieceToNumberTraits<VALUE, 10> >::Invoke( | |
| 338 input.begin(), input.end(), output); | |
| 339 } | |
| 340 | |
| 341 template <typename VALUE, int BASE> | |
| 342 class StringPiece16ToNumberTraits | |
| 343 : public BaseIteratorRangeToNumberTraits<StringPiece16::const_iterator, | |
| 344 VALUE, | |
| 345 BASE> { | |
| 346 }; | |
| 347 | |
| 348 template <typename VALUE> | |
| 349 bool String16ToIntImpl(const StringPiece16& input, VALUE* output) { | |
| 350 return IteratorRangeToNumber<StringPiece16ToNumberTraits<VALUE, 10> >::Invoke( | |
| 351 input.begin(), input.end(), output); | |
| 352 } | |
| 353 | |
| 354 } // namespace | |
| 355 | |
| 356 std::string IntToString(int value) { | |
| 357 return IntToStringT<std::string, int, unsigned int, true>:: | |
| 358 IntToString(value); | |
| 359 } | |
| 360 | |
| 361 string16 IntToString16(int value) { | |
| 362 return IntToStringT<string16, int, unsigned int, true>:: | |
| 363 IntToString(value); | |
| 364 } | |
| 365 | |
| 366 std::string UintToString(unsigned int value) { | |
| 367 return IntToStringT<std::string, unsigned int, unsigned int, false>:: | |
| 368 IntToString(value); | |
| 369 } | |
| 370 | |
| 371 string16 UintToString16(unsigned int value) { | |
| 372 return IntToStringT<string16, unsigned int, unsigned int, false>:: | |
| 373 IntToString(value); | |
| 374 } | |
| 375 | |
| 376 std::string Int64ToString(int64 value) { | |
| 377 return IntToStringT<std::string, int64, uint64, true>:: | |
| 378 IntToString(value); | |
| 379 } | |
| 380 | |
| 381 string16 Int64ToString16(int64 value) { | |
| 382 return IntToStringT<string16, int64, uint64, true>::IntToString(value); | |
| 383 } | |
| 384 | |
| 385 std::string Uint64ToString(uint64 value) { | |
| 386 return IntToStringT<std::string, uint64, uint64, false>:: | |
| 387 IntToString(value); | |
| 388 } | |
| 389 | |
| 390 string16 Uint64ToString16(uint64 value) { | |
| 391 return IntToStringT<string16, uint64, uint64, false>:: | |
| 392 IntToString(value); | |
| 393 } | |
| 394 | |
| 395 std::string DoubleToString(double value) { | |
| 396 // According to g_fmt.cc, it is sufficient to declare a buffer of size 32. | |
| 397 char buffer[32]; | |
| 398 dmg_fp::g_fmt(buffer, value); | |
| 399 return std::string(buffer); | |
| 400 } | |
| 401 | |
| 402 bool StringToInt(const StringPiece& input, int* output) { | |
| 403 return StringToIntImpl(input, output); | |
| 404 } | |
| 405 | |
| 406 bool StringToInt(const StringPiece16& input, int* output) { | |
| 407 return String16ToIntImpl(input, output); | |
| 408 } | |
| 409 | |
| 410 bool StringToUint(const StringPiece& input, unsigned* output) { | |
| 411 return StringToIntImpl(input, output); | |
| 412 } | |
| 413 | |
| 414 bool StringToUint(const StringPiece16& input, unsigned* output) { | |
| 415 return String16ToIntImpl(input, output); | |
| 416 } | |
| 417 | |
| 418 bool StringToInt64(const StringPiece& input, int64* output) { | |
| 419 return StringToIntImpl(input, output); | |
| 420 } | |
| 421 | |
| 422 bool StringToInt64(const StringPiece16& input, int64* output) { | |
| 423 return String16ToIntImpl(input, output); | |
| 424 } | |
| 425 | |
| 426 bool StringToUint64(const StringPiece& input, uint64* output) { | |
| 427 return StringToIntImpl(input, output); | |
| 428 } | |
| 429 | |
| 430 bool StringToUint64(const StringPiece16& input, uint64* output) { | |
| 431 return String16ToIntImpl(input, output); | |
| 432 } | |
| 433 | |
| 434 bool StringToSizeT(const StringPiece& input, size_t* output) { | |
| 435 return StringToIntImpl(input, output); | |
| 436 } | |
| 437 | |
| 438 bool StringToSizeT(const StringPiece16& input, size_t* output) { | |
| 439 return String16ToIntImpl(input, output); | |
| 440 } | |
| 441 | |
| 442 bool StringToDouble(const std::string& input, double* output) { | |
| 443 errno = 0; // Thread-safe? It is on at least Mac, Linux, and Windows. | |
| 444 char* endptr = NULL; | |
| 445 *output = dmg_fp::strtod(input.c_str(), &endptr); | |
| 446 | |
| 447 // Cases to return false: | |
| 448 // - If errno is ERANGE, there was an overflow or underflow. | |
| 449 // - If the input string is empty, there was nothing to parse. | |
| 450 // - If endptr does not point to the end of the string, there are either | |
| 451 // characters remaining in the string after a parsed number, or the string | |
| 452 // does not begin with a parseable number. endptr is compared to the | |
| 453 // expected end given the string's stated length to correctly catch cases | |
| 454 // where the string contains embedded NUL characters. | |
| 455 // - If the first character is a space, there was leading whitespace | |
| 456 return errno == 0 && | |
| 457 !input.empty() && | |
| 458 input.c_str() + input.length() == endptr && | |
| 459 !isspace(input[0]); | |
| 460 } | |
| 461 | |
| 462 // Note: if you need to add String16ToDouble, first ask yourself if it's | |
| 463 // really necessary. If it is, probably the best implementation here is to | |
| 464 // convert to 8-bit and then use the 8-bit version. | |
| 465 | |
| 466 // Note: if you need to add an iterator range version of StringToDouble, first | |
| 467 // ask yourself if it's really necessary. If it is, probably the best | |
| 468 // implementation here is to instantiate a string and use the string version. | |
| 469 | |
| 470 std::string HexEncode(const void* bytes, size_t size) { | |
| 471 static const char kHexChars[] = "0123456789ABCDEF"; | |
| 472 | |
| 473 // Each input byte creates two output hex characters. | |
| 474 std::string ret(size * 2, '\0'); | |
| 475 | |
| 476 for (size_t i = 0; i < size; ++i) { | |
| 477 char b = reinterpret_cast<const char*>(bytes)[i]; | |
| 478 ret[(i * 2)] = kHexChars[(b >> 4) & 0xf]; | |
| 479 ret[(i * 2) + 1] = kHexChars[b & 0xf]; | |
| 480 } | |
| 481 return ret; | |
| 482 } | |
| 483 | |
| 484 bool HexStringToInt(const StringPiece& input, int* output) { | |
| 485 return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke( | |
| 486 input.begin(), input.end(), output); | |
| 487 } | |
| 488 | |
| 489 bool HexStringToInt64(const StringPiece& input, int64* output) { | |
| 490 return IteratorRangeToNumber<HexIteratorRangeToInt64Traits>::Invoke( | |
| 491 input.begin(), input.end(), output); | |
| 492 } | |
| 493 | |
| 494 bool HexStringToBytes(const std::string& input, std::vector<uint8>* output) { | |
| 495 return HexStringToBytesT(input, output); | |
| 496 } | |
| 497 | |
| 498 } // namespace base | |
| OLD | NEW |