base/i18n/file_util_icu.cc - Issue 895853003: Update from https://crrev.com/314320

Side by Side Diff: base/i18n/file_util_icu.cc

Issue 895853003: Update from https://crrev.com/314320 (Closed) Base URL: https://github.com/domokit/mojo.git@master

Patch Set: Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // File utilities that use the ICU library go in this file.	5 // File utilities that use the ICU library go in this file.

6	6

7 #include "base/i18n/file_util_icu.h"	7 #include "base/i18n/file_util_icu.h"

8	8

9 #include "base/files/file_path.h"	9 #include "base/files/file_path.h"

10 #include "base/i18n/icu_string_conversions.h"	10 #include "base/i18n/icu_string_conversions.h"

(...skipping 12 matching lines...) Expand all Loading...
23 namespace i18n {	23 namespace i18n {

24	24

25 namespace {	25 namespace {

26	26

27 class IllegalCharacters {	27 class IllegalCharacters {

28 public:	28 public:

29 static IllegalCharacters* GetInstance() {	29 static IllegalCharacters* GetInstance() {

30 return Singleton<IllegalCharacters>::get();	30 return Singleton<IllegalCharacters>::get();

31 }	31 }

32	32

33 bool contains(UChar32 ucs4) {	33 bool DisallowedEverywhere(UChar32 ucs4) {

34 return !!set->contains(ucs4);	34 return !!illegal_anywhere_->contains(ucs4);

35 }	35 }

36	36

37 bool containsNone(const string16 &s) {	37 bool DisallowedLeadingOrTrailing(UChar32 ucs4) {

38 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size()));	38 return !!illegal_at_ends_->contains(ucs4);

	39 }

	40

	41 bool IsAllowedName(const string16& s) {

	42 return s.empty() \|\| (!!illegal_anywhere_->containsNone(

	43 icu::UnicodeString(s.c_str(), s.size())) &&

	44 !illegal_at_ends_->contains(*s.begin()) &&

	45 !illegal_at_ends_->contains(*s.rbegin()));

39 }	46 }

40	47

41 private:	48 private:

42 friend class Singleton<IllegalCharacters>;	49 friend class Singleton<IllegalCharacters>;

43 friend struct DefaultSingletonTraits<IllegalCharacters>;	50 friend struct DefaultSingletonTraits<IllegalCharacters>;

44	51

45 IllegalCharacters();	52 IllegalCharacters();

46 ~IllegalCharacters() { }	53 ~IllegalCharacters() { }

47	54

48 scoped_ptr<icu::UnicodeSet> set;	55 // set of characters considered invalid anywhere inside a filename.

	56 scoped_ptr<icu::UnicodeSet> illegal_anywhere_;

	57

	58 // set of characters considered invalid at either end of a filename.

	59 scoped_ptr<icu::UnicodeSet> illegal_at_ends_;

49	60

50 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters);	61 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters);

51 };	62 };

52	63

53 IllegalCharacters::IllegalCharacters() {	64 IllegalCharacters::IllegalCharacters() {

54 UErrorCode status = U_ZERO_ERROR;	65 UErrorCode everywhere_status = U_ZERO_ERROR;

55 // Control characters, formatting characters, non-characters, and	66 UErrorCode ends_status = U_ZERO_ERROR;

56 // some printable ASCII characters regarded as dangerous ('"*/:<>?\\').	67 // Control characters, formatting characters, non-characters, path separators,

	68 // and some printable ASCII characters regarded as dangerous ('"*/:<>?\\').

57 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx	69 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx

58 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx	70 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx

59 // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they	71 // Note that code points in the "Other, Format" (Cf) category are ignored on

60 // are legitimate in Arabic and some S/SE Asian scripts. However, when used	72 // HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being

61 // elsewhere, they can be confusing/problematic.	73 // legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is

62 // Also, consider wrapping the set with our Singleton class to create and	74 // also excluded due to the possibility of interacting poorly with short

63 // freeze it only once. Note that there's a trade-off between memory and	75 // filenames on VFAT. (Related to CVE-2014-9390)

64 // speed.	76 illegal_anywhere_.reset(new icu::UnicodeSet(

65 #if defined(WCHAR_T_IS_UTF16)	77 UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\\|][:Cc:][:Cf:]]"),

66 set.reset(new icu::UnicodeSet(icu::UnicodeString(	78 everywhere_status));

67 L"[[\"*/:<>?\\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status));	79 illegal_at_ends_.reset(new icu::UnicodeSet(

68 #else	80 UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status));

69 set.reset(new icu::UnicodeSet(UNICODE_STRING_SIMPLE(	81 DCHECK(U_SUCCESS(everywhere_status));

70 "[[\"*/:<>?\\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(),	82 DCHECK(U_SUCCESS(ends_status));

71 status));	83

72 #endif

73 DCHECK(U_SUCCESS(status));

74 // Add non-characters. If this becomes a performance bottleneck by	84 // Add non-characters. If this becomes a performance bottleneck by

75 // any chance, do not add these to \|set\| and change IsFilenameLegal()	85 // any chance, do not add these to \|set\| and change IsFilenameLegal()

76 // to check \|ucs4 & 0xFFFEu == 0xFFFEu\|, in addiition to calling	86 // to check \|ucs4 & 0xFFFEu == 0xFFFEu\|, in addiition to calling

77 // containsNone().	87 // IsAllowedName().

78 set->add(0xFDD0, 0xFDEF);	88 illegal_anywhere_->add(0xFDD0, 0xFDEF);

79 for (int i = 0; i <= 0x10; ++i) {	89 for (int i = 0; i <= 0x10; ++i) {

80 int plane_base = 0x10000 * i;	90 int plane_base = 0x10000 * i;

81 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF);	91 illegal_anywhere_->add(plane_base + 0xFFFE, plane_base + 0xFFFF);

82 }	92 }

83 set->freeze();	93 illegal_anywhere_->freeze();

	94 illegal_at_ends_->freeze();

84 }	95 }

85	96

86 } // namespace	97 } // namespace

87	98

88 bool IsFilenameLegal(const string16& file_name) {	99 bool IsFilenameLegal(const string16& file_name) {

89 return IllegalCharacters::GetInstance()->containsNone(file_name);	100 return IllegalCharacters::GetInstance()->IsAllowedName(file_name);

90 }	101 }

91	102

92 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,	103 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,

93 char replace_char) {	104 char replace_char) {

94 DCHECK(file_name);	105 IllegalCharacters* illegal = IllegalCharacters::GetInstance();

95	106

96 DCHECK(!(IllegalCharacters::GetInstance()->contains(replace_char)));	107 DCHECK(!(illegal->DisallowedEverywhere(replace_char)));

	108 DCHECK(!(illegal->DisallowedLeadingOrTrailing(replace_char)));

97	109

98 // Remove leading and trailing whitespace.

99 TrimWhitespace(*file_name, TRIM_ALL, file_name);

100

101 IllegalCharacters* illegal = IllegalCharacters::GetInstance();

102 int cursor = 0; // The ICU macros expect an int.	110 int cursor = 0; // The ICU macros expect an int.

103 while (cursor < static_cast<int>(file_name->size())) {	111 while (cursor < static_cast<int>(file_name->size())) {

104 int char_begin = cursor;	112 int char_begin = cursor;

105 uint32 code_point;	113 uint32 code_point;

106 #if defined(OS_MACOSX)	114 #if defined(OS_MACOSX)

107 // Mac uses UTF-8 encoding for filenames.	115 // Mac uses UTF-8 encoding for filenames.

108 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),	116 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),

109 code_point);	117 code_point);

110 #elif defined(OS_WIN)	118 #elif defined(OS_WIN)

111 // Windows uses UTF-16 encoding for filenames.	119 // Windows uses UTF-16 encoding for filenames.

112 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),	120 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),

113 code_point);	121 code_point);

114 #elif defined(OS_POSIX)	122 #elif defined(OS_POSIX)

115 // Linux doesn't actually define an encoding. It basically allows anything	123 // Linux doesn't actually define an encoding. It basically allows anything

116 // except for a few special ASCII characters.	124 // except for a few special ASCII characters.

117 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]);	125 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]);

118 if (cur_char >= 0x80)	126 if (cur_char >= 0x80)

119 continue;	127 continue;

120 code_point = cur_char;	128 code_point = cur_char;

121 #else	129 #else

122 NOTREACHED();	130 NOTREACHED();

123 #endif	131 #endif

124	132

125 if (illegal->contains(code_point)) {	133 if (illegal->DisallowedEverywhere(code_point) \|\|

	134 ((char_begin == 0 \|\| cursor == static_cast<int>(file_name->length())) &&

	135 illegal->DisallowedLeadingOrTrailing(code_point))) {

126 file_name->replace(char_begin, cursor - char_begin, 1, replace_char);	136 file_name->replace(char_begin, cursor - char_begin, 1, replace_char);

127 // We just made the potentially multi-byte/word char into one that only	137 // We just made the potentially multi-byte/word char into one that only

128 // takes one byte/word, so need to adjust the cursor to point to the next	138 // takes one byte/word, so need to adjust the cursor to point to the next

129 // character again.	139 // character again.

130 cursor = char_begin + 1;	140 cursor = char_begin + 1;

131 }	141 }

132 }	142 }

133 }	143 }

134	144

135 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {	145 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {

(...skipping 27 matching lines...) Expand all Loading...
163 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(),	173 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(),

164 kCodepageUTF8,	174 kCodepageUTF8,

165 &normalized_str)) {	175 &normalized_str)) {

166 *file_name = file_name->DirName().Append(FilePath(normalized_str));	176 *file_name = file_name->DirName().Append(FilePath(normalized_str));

167 }	177 }

168 #endif	178 #endif

169 }	179 }

170	180

171 } // namespace i18n	181 } // namespace i18n

172 } // namespace base	182 } // namespace base

OLD	NEW

« no previous file with comments | « base/i18n/file_util_icu.h ('k') | base/i18n/file_util_icu_unittest.cc » ('j') | no next file with comments »