base/file_util_icu.cc - Issue 126223: Replace std:;wstring with std::string in locale-name related APIs....

Side by Side Diff: base/file_util_icu.cc

Issue 126223: Replace std:;wstring with std::string in locale-name related APIs.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // File utilities that use the ICU library go in this file. Functions using ICU	5 // File utilities that use the ICU library go in this file. Functions using ICU

6 // are separated from the other functions to prevent ICU being pulled in by the	6 // are separated from the other functions to prevent ICU being pulled in by the

7 // linker if there is a false dependency.	7 // linker if there is a false dependency.

8 //	8 //

9 // (The VS2005 linker finds such a false dependency and adds ~300K of ICU to	9 // (The VS2005 linker finds such a false dependency and adds ~300K of ICU to

10 // chrome.exe if this code lives in file_util.cc, even though none of this code	10 // chrome.exe if this code lives in file_util.cc, even though none of this code

11 // is called.)	11 // is called.)

12	12

13 #include "base/file_util.h"	13 #include "base/file_util.h"

14	14

	15 #include "base/singleton.h"

15 #include "base/string_util.h"	16 #include "base/string_util.h"

16 #include "unicode/uniset.h"	17 #include "unicode/uniset.h"

17	18

18 namespace file_util {	19 namespace {

	20 class IllegalCharacters {

	21 public:

	22 bool contains(UChar32 ucs4) {

	23 return !!set->contains(ucs4);

	24 }

19	25

20 void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {	26 bool containsNone(const string16 &s) {

21 DCHECK(file_name);	27 return !!set->containsNone(UnicodeString(s.c_str(), s.size()));

	28 }

22	29

	30 private:

	31 friend class Singleton<IllegalCharacters>;

	32 friend struct DefaultSingletonTraits<IllegalCharacters>;

	33

	34 IllegalCharacters();

	35 ~IllegalCharacters() { }

	36

	37 scoped_ptr<UnicodeSet> set;

	38

	39 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters);

	40 };

	41

	42 IllegalCharacters::IllegalCharacters() {

	43 UErrorCode status = U_ZERO_ERROR;

23 // Control characters, formatting characters, non-characters, and	44 // Control characters, formatting characters, non-characters, and

24 // some printable ASCII characters regarded as dangerous ('"*/:<>?\\').	45 // some printable ASCII characters regarded as dangerous ('"*/:<>?\\').

25 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx	46 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx

26 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx	47 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx

27 // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they	48 // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they

28 // are legitimate in Arabic and some S/SE Asian scripts. However, when used	49 // are legitimate in Arabic and some S/SE Asian scripts. However, when used

29 // elsewhere, they can be confusing/problematic.	50 // elsewhere, they can be confusing/problematic.

30 // Also, consider wrapping the set with our Singleton class to create and	51 // Also, consider wrapping the set with our Singleton class to create and

31 // freeze it only once. Note that there's a trade-off between memory and	52 // freeze it only once. Note that there's a trade-off between memory and

32 // speed.	53 // speed.

33

34 UErrorCode status = U_ZERO_ERROR;

35 #if defined(WCHAR_T_IS_UTF16)	54 #if defined(WCHAR_T_IS_UTF16)

36 UnicodeSet illegal_characters(UnicodeString(	55 set.reset(new UnicodeSet(UnicodeString(

37 L"[[\"*/:<>?\\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status);	56 L"[[\"*/:<>?\\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status));

38 #else	57 #else

39 UnicodeSet illegal_characters(UNICODE_STRING_SIMPLE(	58 set.reset(new UnicodeSet(UNICODE_STRING_SIMPLE(

40 "[[\"*/:<>?\\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), status);	59 "[[\"*/:<>?\\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(),

	60 status));

41 #endif	61 #endif

42 DCHECK(U_SUCCESS(status));	62 DCHECK(U_SUCCESS(status));

43 // Add non-characters. If this becomes a performance bottleneck by	63 // Add non-characters. If this becomes a performance bottleneck by

44 // any chance, check \|ucs4 & 0xFFFEu == 0xFFFEu\|, instead.	64 // any chance, do not add these to \|set\| and change IsFilenameLegal()

45 illegal_characters.add(0xFDD0, 0xFDEF);	65 // to check \|ucs4 & 0xFFFEu == 0xFFFEu\|, in addiition to calling

	66 // containsNone().

	67 set->add(0xFDD0, 0xFDEF);

46 for (int i = 0; i <= 0x10; ++i) {	68 for (int i = 0; i <= 0x10; ++i) {

47 int plane_base = 0x10000 * i;	69 int plane_base = 0x10000 * i;

48 illegal_characters.add(plane_base + 0xFFFE, plane_base + 0xFFFF);	70 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF);

49 }	71 }

50 illegal_characters.freeze();	72 set->freeze();

51 DCHECK(!illegal_characters.contains(replace_char) && replace_char < 0x10000);	73 }

	74

	75 } // namespace

	76

	77 namespace file_util {

	78

	79 bool IsFilenameLegal(const string16& file_name) {

	80 return Singleton<IllegalCharacters>()->containsNone(file_name);

	81 }

	82

	83 void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {

	84 DCHECK(file_name);

	85

	86 DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)) &&

	87 replace_char < 0x10000);

52	88

53 // Remove leading and trailing whitespace.	89 // Remove leading and trailing whitespace.

54 TrimWhitespace(*file_name, TRIM_ALL, file_name);	90 TrimWhitespace(*file_name, TRIM_ALL, file_name);

55	91

	92 if (IsFilenameLegal(WideToUTF16(*file_name)))

	93 return;

	94

56 std::wstring::size_type i = 0;	95 std::wstring::size_type i = 0;

57 std::wstring::size_type length = file_name->size();	96 std::wstring::size_type length = file_name->size();

58 const wchar_t* wstr = file_name->data();	97 const wchar_t* wstr = file_name->data();

59 #if defined(WCHAR_T_IS_UTF16)	98 #if defined(WCHAR_T_IS_UTF16)

60 // Using \|span\| method of UnicodeSet might speed things up a bit, but	99 // Using \|span\| method of UnicodeSet might speed things up a bit, but

61 // it's not likely to matter here.	100 // it's not likely to matter here.

62 std::wstring temp;	101 std::wstring temp;

63 temp.reserve(length);	102 temp.reserve(length);

64 while (i < length) {	103 while (i < length) {

65 UChar32 ucs4;	104 UChar32 ucs4;

66 std::wstring::size_type prev = i;	105 std::wstring::size_type prev = i;

67 U16_NEXT(wstr, i, length, ucs4);	106 U16_NEXT(wstr, i, length, ucs4);

68 if (illegal_characters.contains(ucs4)) {	107 if (Singleton<IllegalCharacters>()->contains(ucs4)) {

69 temp.push_back(replace_char);	108 temp.push_back(replace_char);

70 } else if (ucs4 < 0x10000) {	109 } else if (ucs4 < 0x10000) {

71 temp.push_back(ucs4);	110 temp.push_back(ucs4);

72 } else {	111 } else {

73 temp.push_back(wstr[prev]);	112 temp.push_back(wstr[prev]);

74 temp.push_back(wstr[prev + 1]);	113 temp.push_back(wstr[prev + 1]);

75 }	114 }

76 }	115 }

77 file_name->swap(temp);	116 file_name->swap(temp);

78 #elif defined(WCHAR_T_IS_UTF32)	117 #elif defined(WCHAR_T_IS_UTF32)

79 while (i < length) {	118 while (i < length) {

80 if (illegal_characters.contains(wstr[i])) {	119 if (Singleton<IllegalCharacters>()->contains(wstr[i])) {

81 (*file_name)[i] = replace_char;	120 (*file_name)[i] = replace_char;

82 }	121 }

83 ++i;	122 ++i;

84 }	123 }

85 #else	124 #else

86 #error wchar_t* should be either UTF-16 or UTF-32	125 #error wchar_t* should be either UTF-16 or UTF-32

87 #endif	126 #endif

88 }	127 }

89	128

90 } // namespace	129 } // namespace

OLD	NEW

« no previous file with comments | « base/file_util.h ('k') | chrome/browser/automation/automation_provider.cc » ('j') | no next file with comments »