base/i18n/file_util_icu.cc - Issue 271056: Do some cleanup of file path name handling.

Side by Side Diff: base/i18n/file_util_icu.cc

Issue 271056: Do some cleanup of file path name handling. (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // File utilities that use the ICU library go in this file.	5 // File utilities that use the ICU library go in this file.

6	6

7 #include "base/i18n/file_util_icu.h"	7 #include "base/i18n/file_util_icu.h"

8	8

9 #include "base/file_path.h"	9 #include "base/file_path.h"

10 #include "base/scoped_ptr.h"	10 #include "base/scoped_ptr.h"

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
117 };	117 };

118	118

119 } // namespace	119 } // namespace

120	120

121 namespace file_util {	121 namespace file_util {

122	122

123 bool IsFilenameLegal(const string16& file_name) {	123 bool IsFilenameLegal(const string16& file_name) {

124 return Singleton<IllegalCharacters>()->containsNone(file_name);	124 return Singleton<IllegalCharacters>()->containsNone(file_name);

125 }	125 }

126	126

127 void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {	127 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,

	128 char replace_char) {

128 DCHECK(file_name);	129 DCHECK(file_name);

129	130

130 DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)) &&	131 DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)));

131 replace_char < 0x10000);

132	132

133 // Remove leading and trailing whitespace.	133 // Remove leading and trailing whitespace.

134 TrimWhitespace(*file_name, TRIM_ALL, file_name);	134 TrimWhitespace(*file_name, TRIM_ALL, file_name);

135	135

136 if (IsFilenameLegal(WideToUTF16(*file_name)))	136 IllegalCharacters* illegal = Singleton<IllegalCharacters>::get();

137 return;	137 int cursor = 0; // The ICU macros expect an int.

	138 while (cursor < static_cast<int>(file_name->size())) {

	139 int char_begin = cursor;

	140 uint32 code_point;

	141 #if defined(OS_MACOSX)

	142 // Mac uses UTF-8 encoding for filenames.

	143 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),

	144 code_point);

	145 #elif defined(OS_WIN)

	146 // Windows uses UTF-16 encoding for filenames.

	147 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),

	148 code_point);

	149 #elif defined(OS_LINUX)

	150 // Linux doesn't actually define an encoding. It basically allows anything

	151 // except for a few special ASCII characters.

	152 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]);

	153 if (cur_char >= 0x80)

	154 continue;

	155 code_point = cur_char;

	156 #else

	157 NOTREACHED();

	158 #endif

138	159

139 std::wstring::size_type i = 0;	160 if (illegal->contains(code_point)) {

140 std::wstring::size_type length = file_name->size();	161 file_name->replace(char_begin, cursor - char_begin, 1, replace_char);

141 const wchar_t* wstr = file_name->data();	162 // We just made the potentially multi-byte/word char into one that only

142 #if defined(WCHAR_T_IS_UTF16)	163 // takes one byte/word, so need to adjust the cursor to point to the next

143 // Using \|span\| method of UnicodeSet might speed things up a bit, but	164 // character again.

144 // it's not likely to matter here.	165 cursor = char_begin + 1;

145 std::wstring temp;

146 temp.reserve(length);

147 while (i < length) {

148 UChar32 ucs4;

149 std::wstring::size_type prev = i;

150 U16_NEXT(wstr, i, length, ucs4);

151 if (Singleton<IllegalCharacters>()->contains(ucs4)) {

152 temp.push_back(replace_char);

153 } else if (ucs4 < 0x10000) {

154 temp.push_back(ucs4);

155 } else {

156 temp.push_back(wstr[prev]);

157 temp.push_back(wstr[prev + 1]);

158 }	166 }

159 }	167 }

160 file_name->swap(temp);

161 #elif defined(WCHAR_T_IS_UTF32)

162 while (i < length) {

163 if (Singleton<IllegalCharacters>()->contains(wstr[i])) {

164 (*file_name)[i] = replace_char;

165 }

166 ++i;

167 }

168 #else

169 #error wchar_t* should be either UTF-16 or UTF-32

170 #endif

171 }	168 }

172	169

173 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {	170 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {

174 #if defined(OS_WIN)	171 #if defined(OS_WIN)

175 return Singleton<LocaleAwareComparator>()->Compare(a.value().c_str(),	172 return Singleton<LocaleAwareComparator>()->Compare(a.value().c_str(),

176 b.value().c_str()) < 0;	173 b.value().c_str()) < 0;

177	174

178 #elif defined(OS_POSIX)	175 #elif defined(OS_POSIX)

179 // On linux, the file system encoding is not defined. We assume	176 // On linux, the file system encoding is not defined. We assume

180 // SysNativeMBToWide takes care of it.	177 // SysNativeMBToWide takes care of it.

181 //	178 //

182 // ICU's collator can take strings in OS native encoding. But we convert the	179 // ICU's collator can take strings in OS native encoding. But we convert the

183 // strings to UTF-16 ourselves to ensure conversion consistency.	180 // strings to UTF-16 ourselves to ensure conversion consistency.

184 // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16?	181 // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16?

185 return Singleton<LocaleAwareComparator>()->Compare(	182 return Singleton<LocaleAwareComparator>()->Compare(

186 WideToUTF16(base::SysNativeMBToWide(a.value().c_str())),	183 WideToUTF16(base::SysNativeMBToWide(a.value().c_str())),

187 WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0;	184 WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0;

188 #else	185 #else

189 #error Not implemented on your system	186 #error Not implemented on your system

190 #endif	187 #endif

191 }	188 }

192	189

193 } // namespace	190 } // namespace

OLD	NEW

« no previous file with comments | « base/i18n/file_util_icu.h ('k') | base/i18n/file_util_icu_unittest.cc » ('j') | no next file with comments »