| OLD | NEW | 
|---|
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 #include "base/string_util.h" | 5 #include "base/string_util.h" | 
| 6 | 6 | 
| 7 #include "build/build_config.h" | 7 #include "build/build_config.h" | 
| 8 | 8 | 
| 9 #include <ctype.h> | 9 #include <ctype.h> | 
| 10 #include <errno.h> | 10 #include <errno.h> | 
| (...skipping 319 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 330   0x3000,  // Ideographic Space | 330   0x3000,  // Ideographic Space | 
| 331   0 | 331   0 | 
| 332 }; | 332 }; | 
| 333 const char kWhitespaceASCII[] = { | 333 const char kWhitespaceASCII[] = { | 
| 334   0x09,    // <control-0009> to <control-000D> | 334   0x09,    // <control-0009> to <control-000D> | 
| 335   0x0A, | 335   0x0A, | 
| 336   0x0B, | 336   0x0B, | 
| 337   0x0C, | 337   0x0C, | 
| 338   0x0D, | 338   0x0D, | 
| 339   0x20,    // Space | 339   0x20,    // Space | 
|  | 340   '\x85',  // <control-0085> | 
|  | 341   '\xa0',  // No-Break Space | 
| 340   0 | 342   0 | 
| 341 }; | 343 }; | 
| 342 const char* const kCodepageUTF8 = "UTF-8"; | 344 const char* const kCodepageUTF8 = "UTF-8"; | 
| 343 | 345 | 
| 344 template<typename STR> | 346 template<typename STR> | 
| 345 TrimPositions TrimStringT(const STR& input, | 347 TrimPositions TrimStringT(const STR& input, | 
| 346                           const typename STR::value_type trim_chars[], | 348                           const typename STR::value_type trim_chars[], | 
| 347                           TrimPositions positions, | 349                           TrimPositions positions, | 
| 348                           STR* output) { | 350                           STR* output) { | 
| 349   // Find the edges of leading/trailing whitespace as desired. | 351   // Find the edges of leading/trailing whitespace as desired. | 
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 384                 std::string* output) { | 386                 std::string* output) { | 
| 385   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; | 387   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; | 
| 386 } | 388 } | 
| 387 | 389 | 
| 388 TrimPositions TrimWhitespace(const std::wstring& input, | 390 TrimPositions TrimWhitespace(const std::wstring& input, | 
| 389                              TrimPositions positions, | 391                              TrimPositions positions, | 
| 390                              std::wstring* output) { | 392                              std::wstring* output) { | 
| 391   return TrimStringT(input, kWhitespaceWide, positions, output); | 393   return TrimStringT(input, kWhitespaceWide, positions, output); | 
| 392 } | 394 } | 
| 393 | 395 | 
| 394 TrimPositions TrimWhitespaceASCII(const std::string& input, | 396 TrimPositions TrimWhitespace(const std::string& input, | 
| 395                                   TrimPositions positions, | 397                              TrimPositions positions, | 
| 396                                   std::string* output) { | 398                              std::string* output) { | 
| 397   return TrimStringT(input, kWhitespaceASCII, positions, output); | 399   return TrimStringT(input, kWhitespaceASCII, positions, output); | 
| 398 } | 400 } | 
| 399 | 401 | 
| 400 TrimPositions TrimWhitespaceUTF8(const std::string& input, |  | 
| 401                                  TrimPositions positions, |  | 
| 402                                  std::string* output) { |  | 
| 403   // This implementation is not so fast since it converts the text encoding |  | 
| 404   // twice. Please feel free to file a bug if this function hurts the |  | 
| 405   // performance of Chrome. |  | 
| 406   DCHECK(IsStringUTF8(input)); |  | 
| 407   std::wstring input_wide = UTF8ToWide(input); |  | 
| 408   std::wstring output_wide; |  | 
| 409   TrimPositions result = TrimWhitespace(input_wide, positions, &output_wide); |  | 
| 410   *output = WideToUTF8(output_wide); |  | 
| 411   return result; |  | 
| 412 } |  | 
| 413 |  | 
| 414 // This function is only for backward-compatibility. |  | 
| 415 // To be removed when all callers are updated. |  | 
| 416 TrimPositions TrimWhitespace(const std::string& input, |  | 
| 417                              TrimPositions positions, |  | 
| 418                              std::string* output) { |  | 
| 419   return TrimWhitespaceASCII(input, positions, output); |  | 
| 420 } |  | 
| 421 |  | 
| 422 std::wstring CollapseWhitespace(const std::wstring& text, | 402 std::wstring CollapseWhitespace(const std::wstring& text, | 
| 423                                 bool trim_sequences_with_line_breaks) { | 403                                 bool trim_sequences_with_line_breaks) { | 
| 424   std::wstring result; | 404   std::wstring result; | 
| 425   result.resize(text.size()); | 405   result.resize(text.size()); | 
| 426 | 406 | 
| 427   // Set flags to pretend we're already in a trimmed whitespace sequence, so we | 407   // Set flags to pretend we're already in a trimmed whitespace sequence, so we | 
| 428   // will trim any leading whitespace. | 408   // will trim any leading whitespace. | 
| 429   bool in_whitespace = true; | 409   bool in_whitespace = true; | 
| 430   bool already_trimmed = true; | 410   bool already_trimmed = true; | 
| 431 | 411 | 
| (...skipping 1194 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1626   // Each input byte creates two output hex characters. | 1606   // Each input byte creates two output hex characters. | 
| 1627   std::string ret(size * 2, '\0'); | 1607   std::string ret(size * 2, '\0'); | 
| 1628 | 1608 | 
| 1629   for (size_t i = 0; i < size; ++i) { | 1609   for (size_t i = 0; i < size; ++i) { | 
| 1630     char b = reinterpret_cast<const char*>(bytes)[i]; | 1610     char b = reinterpret_cast<const char*>(bytes)[i]; | 
| 1631     ret[(i * 2)] = kHexChars[(b >> 4) & 0xf]; | 1611     ret[(i * 2)] = kHexChars[(b >> 4) & 0xf]; | 
| 1632     ret[(i * 2) + 1] = kHexChars[b & 0xf]; | 1612     ret[(i * 2) + 1] = kHexChars[b & 0xf]; | 
| 1633   } | 1613   } | 
| 1634   return ret; | 1614   return ret; | 
| 1635 } | 1615 } | 
| OLD | NEW | 
|---|