base/file_util_icu.cc - Issue 267001: Separate out some more ICU from base and into base/i18n....

Side by Side Diff: base/file_util_icu.cc

Issue 267001: Separate out some more ICU from base and into base/i18n.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 // File utilities that use the ICU library go in this file. Functions using ICU

6 // are separated from the other functions to prevent ICU being pulled in by the

7 // linker if there is a false dependency.

8 //

9 // (The VS2005 linker finds such a false dependency and adds ~300K of ICU to

10 // chrome.exe if this code lives in file_util.cc, even though none of this code

11 // is called.)

12

13 #include "base/file_util.h"

14

15 #include "base/singleton.h"

16 #include "base/string_util.h"

17 #include "unicode/uniset.h"

18

19 namespace {

20 class IllegalCharacters {

21 public:

22 bool contains(UChar32 ucs4) {

23 return !!set->contains(ucs4);

24 }

25

26 bool containsNone(const string16 &s) {

27 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size()));

28 }

29

30 private:

31 friend class Singleton<IllegalCharacters>;

32 friend struct DefaultSingletonTraits<IllegalCharacters>;

33

34 IllegalCharacters();

35 ~IllegalCharacters() { }

36

37 scoped_ptr<icu::UnicodeSet> set;

38

39 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters);

40 };

41

42 IllegalCharacters::IllegalCharacters() {

43 UErrorCode status = U_ZERO_ERROR;

44 // Control characters, formatting characters, non-characters, and

45 // some printable ASCII characters regarded as dangerous ('"*/:<>?\\').

46 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx

47 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx

48 // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they

49 // are legitimate in Arabic and some S/SE Asian scripts. However, when used

50 // elsewhere, they can be confusing/problematic.

51 // Also, consider wrapping the set with our Singleton class to create and

52 // freeze it only once. Note that there's a trade-off between memory and

53 // speed.

54 #if defined(WCHAR_T_IS_UTF16)

55 set.reset(new icu::UnicodeSet(icu::UnicodeString(

56 L"[[\"*/:<>?\\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status));

57 #else

58 set.reset(new icu::UnicodeSet(UNICODE_STRING_SIMPLE(

59 "[[\"*/:<>?\\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(),

60 status));

61 #endif

62 DCHECK(U_SUCCESS(status));

63 // Add non-characters. If this becomes a performance bottleneck by

64 // any chance, do not add these to \|set\| and change IsFilenameLegal()

65 // to check \|ucs4 & 0xFFFEu == 0xFFFEu\|, in addiition to calling

66 // containsNone().

67 set->add(0xFDD0, 0xFDEF);

68 for (int i = 0; i <= 0x10; ++i) {

69 int plane_base = 0x10000 * i;

70 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF);

71 }

72 set->freeze();

73 }

74

75 } // namespace

76

77 namespace file_util {

78

79 bool IsFilenameLegal(const string16& file_name) {

80 return Singleton<IllegalCharacters>()->containsNone(file_name);

81 }

82

83 void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {

84 DCHECK(file_name);

85

86 DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)) &&

87 replace_char < 0x10000);

88

89 // Remove leading and trailing whitespace.

90 TrimWhitespace(*file_name, TRIM_ALL, file_name);

91

92 if (IsFilenameLegal(WideToUTF16(*file_name)))

93 return;

94

95 std::wstring::size_type i = 0;

96 std::wstring::size_type length = file_name->size();

97 const wchar_t* wstr = file_name->data();

98 #if defined(WCHAR_T_IS_UTF16)

99 // Using \|span\| method of UnicodeSet might speed things up a bit, but

100 // it's not likely to matter here.

101 std::wstring temp;

102 temp.reserve(length);

103 while (i < length) {

104 UChar32 ucs4;

105 std::wstring::size_type prev = i;

106 U16_NEXT(wstr, i, length, ucs4);

107 if (Singleton<IllegalCharacters>()->contains(ucs4)) {

108 temp.push_back(replace_char);

109 } else if (ucs4 < 0x10000) {

110 temp.push_back(ucs4);

111 } else {

112 temp.push_back(wstr[prev]);

113 temp.push_back(wstr[prev + 1]);

114 }

115 }

116 file_name->swap(temp);

117 #elif defined(WCHAR_T_IS_UTF32)

118 while (i < length) {

119 if (Singleton<IllegalCharacters>()->contains(wstr[i])) {

120 (*file_name)[i] = replace_char;

121 }

122 ++i;

123 }

124 #else

125 #error wchar_t* should be either UTF-16 or UTF-32

126 #endif

127 }

128

129 } // namespace

OLD	NEW

« no previous file with comments | « base/file_util.h ('k') | base/file_util_posix.cc » ('j') | no next file with comments »