Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(241)

Side by Side Diff: base/utf_offset_string_conversions.cc

Issue 6898026: Eliminate wstring from base/utf_offset_string_conversions.h, net/base/escape.h, and net/base/net_... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/utf_offset_string_conversions.h" 5 #include "base/utf_offset_string_conversions.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/scoped_ptr.h" 9 #include "base/scoped_ptr.h"
10 #include "base/string_piece.h" 10 #include "base/string_piece.h"
11 #include "base/utf_string_conversion_utils.h" 11 #include "base/utf_string_conversion_utils.h"
12 12
13 using base::PrepareForUTF16Or32Output; 13 using base::PrepareForUTF16Or32Output;
14 using base::ReadUnicodeCharacter; 14 using base::ReadUnicodeCharacter;
15 using base::WriteUnicodeCharacter; 15 using base::WriteUnicodeCharacter;
16 16
17 // Generalized Unicode converter -----------------------------------------------
18
19 // Converts the given source Unicode character type to the given destination 17 // Converts the given source Unicode character type to the given destination
20 // Unicode character type as a STL string. The given input buffer and size 18 // Unicode character type as a STL string. The given input buffer and size
21 // determine the source, and the given output STL string will be replaced by 19 // determine the source, and the given output STL string will be replaced by
22 // the result. 20 // the result.
23 template<typename SRC_CHAR> 21 bool ConvertUnicode(const char* src,
24 bool ConvertUnicode(const SRC_CHAR* src,
25 size_t src_len, 22 size_t src_len,
26 std::wstring* output, 23 string16* output,
27 std::vector<size_t>* offsets_for_adjustment) { 24 std::vector<size_t>* offsets_for_adjustment) {
28 if (offsets_for_adjustment) { 25 if (offsets_for_adjustment) {
29 std::for_each(offsets_for_adjustment->begin(), 26 std::for_each(offsets_for_adjustment->begin(),
30 offsets_for_adjustment->end(), 27 offsets_for_adjustment->end(),
31 LimitOffset<std::wstring>(src_len)); 28 LimitOffset<string16>(src_len));
32 } 29 }
33 30
34 // ICU requires 32-bit numbers. 31 // ICU requires 32-bit numbers.
35 bool success = true; 32 bool success = true;
36 AdjustOffset::Adjustments adjustments; 33 OffsetAdjuster offset_adjuster(offsets_for_adjustment);
37 int32 src_len32 = static_cast<int32>(src_len); 34 int32 src_len32 = static_cast<int32>(src_len);
38 for (int32 i = 0; i < src_len32; i++) { 35 for (int32 i = 0; i < src_len32; i++) {
39 uint32 code_point; 36 uint32 code_point;
40 size_t original_i = i; 37 size_t original_i = i;
41 size_t chars_written = 0; 38 size_t chars_written = 0;
42 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { 39 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
43 chars_written = WriteUnicodeCharacter(code_point, output); 40 chars_written = WriteUnicodeCharacter(code_point, output);
44 } else { 41 } else {
45 chars_written = WriteUnicodeCharacter(0xFFFD, output); 42 chars_written = WriteUnicodeCharacter(0xFFFD, output);
46 success = false; 43 success = false;
47 } 44 }
48 if (offsets_for_adjustment) { 45 if (offsets_for_adjustment) {
49 // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last 46 // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
50 // character read, not after it (so that incrementing it in the loop 47 // character read, not after it (so that incrementing it in the loop
51 // increment will place it at the right location), so we need to account 48 // increment will place it at the right location), so we need to account
52 // for that in determining the amount that was read. 49 // for that in determining the amount that was read.
53 adjustments.push_back(AdjustOffset::Adjustment( 50 offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i,
54 original_i, i - original_i + 1, chars_written)); 51 i - original_i + 1, chars_written));
55 } 52 }
56 } 53 }
57
58 // Make offset adjustment.
59 if (offsets_for_adjustment && !adjustments.empty()) {
60 std::for_each(offsets_for_adjustment->begin(),
61 offsets_for_adjustment->end(),
62 AdjustOffset(adjustments));
63 }
64
65 return success; 54 return success;
66 } 55 }
67 56
68 // UTF-8 <-> Wide -------------------------------------------------------------- 57 bool UTF8ToUTF16AndAdjustOffset(const char* src,
69 58 size_t src_len,
70 bool UTF8ToWideAndAdjustOffset(const char* src, 59 string16* output,
71 size_t src_len, 60 size_t* offset_for_adjustment) {
72 std::wstring* output,
73 size_t* offset_for_adjustment) {
74 std::vector<size_t> offsets; 61 std::vector<size_t> offsets;
75 if (offset_for_adjustment) 62 if (offset_for_adjustment)
76 offsets.push_back(*offset_for_adjustment); 63 offsets.push_back(*offset_for_adjustment);
77 PrepareForUTF16Or32Output(src, src_len, output); 64 PrepareForUTF16Or32Output(src, src_len, output);
78 bool ret = ConvertUnicode(src, src_len, output, &offsets); 65 bool ret = ConvertUnicode(src, src_len, output, &offsets);
79 if (offset_for_adjustment) 66 if (offset_for_adjustment)
80 *offset_for_adjustment = offsets[0]; 67 *offset_for_adjustment = offsets[0];
81 return ret; 68 return ret;
82 } 69 }
83 70
84 bool UTF8ToWideAndAdjustOffsets(const char* src, 71 bool UTF8ToUTF16AndAdjustOffsets(const char* src,
85 size_t src_len, 72 size_t src_len,
86 std::wstring* output, 73 string16* output,
87 std::vector<size_t>* offsets_for_adjustment) { 74 std::vector<size_t>* offsets_for_adjustment) {
88 PrepareForUTF16Or32Output(src, src_len, output); 75 PrepareForUTF16Or32Output(src, src_len, output);
89 return ConvertUnicode(src, src_len, output, offsets_for_adjustment); 76 return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
90 } 77 }
91 78
92 std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8, 79 string16 UTF8ToUTF16AndAdjustOffset(const base::StringPiece& utf8,
93 size_t* offset_for_adjustment) {
94 std::vector<size_t> offsets;
95 if (offset_for_adjustment)
96 offsets.push_back(*offset_for_adjustment);
97 std::wstring result;
98 UTF8ToWideAndAdjustOffsets(utf8.data(), utf8.length(), &result,
99 &offsets);
100 if (offset_for_adjustment)
101 *offset_for_adjustment = offsets[0];
102 return result;
103 }
104
105 std::wstring UTF8ToWideAndAdjustOffsets(const base::StringPiece& utf8,
106 std::vector<size_t>*
107 offsets_for_adjustment) {
108 std::wstring result;
109 UTF8ToWideAndAdjustOffsets(utf8.data(), utf8.length(), &result,
110 offsets_for_adjustment);
111 return result;
112 }
113
114 // UTF-16 <-> Wide -------------------------------------------------------------
115
116 #if defined(WCHAR_T_IS_UTF16)
117
118 // When wide == UTF-16, then conversions are a NOP.
119 bool UTF16ToWideAndAdjustOffset(const char16* src,
120 size_t src_len,
121 std::wstring* output,
122 size_t* offset_for_adjustment) {
123 output->assign(src, src_len);
124 if (offset_for_adjustment && (*offset_for_adjustment >= src_len))
125 *offset_for_adjustment = std::wstring::npos;
126 return true;
127 }
128
129 bool UTF16ToWideAndAdjustOffsets(const char16* src,
130 size_t src_len,
131 std::wstring* output,
132 std::vector<size_t>* offsets_for_adjustment) {
133 output->assign(src, src_len);
134 if (offsets_for_adjustment) {
135 std::for_each(offsets_for_adjustment->begin(),
136 offsets_for_adjustment->end(),
137 LimitOffset<std::wstring>(src_len));
138 }
139 return true;
140 }
141
142 std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
143 size_t* offset_for_adjustment) {
144 if (offset_for_adjustment && (*offset_for_adjustment >= utf16.length()))
145 *offset_for_adjustment = std::wstring::npos;
146 return utf16;
147 }
148
149 std::wstring UTF16ToWideAndAdjustOffsets(
150 const string16& utf16,
151 std::vector<size_t>* offsets_for_adjustment) {
152 if (offsets_for_adjustment) {
153 std::for_each(offsets_for_adjustment->begin(),
154 offsets_for_adjustment->end(),
155 LimitOffset<std::wstring>(utf16.length()));
156 }
157 return utf16;
158 }
159
160 #elif defined(WCHAR_T_IS_UTF32)
161
162 bool UTF16ToWideAndAdjustOffset(const char16* src,
163 size_t src_len,
164 std::wstring* output,
165 size_t* offset_for_adjustment) {
166 std::vector<size_t> offsets;
167 if (offset_for_adjustment)
168 offsets.push_back(*offset_for_adjustment);
169 output->clear();
170 // Assume that normally we won't have any non-BMP characters so the counts
171 // will be the same.
172 output->reserve(src_len);
173 bool ret = ConvertUnicode(src, src_len, output, &offsets);
174 if (offset_for_adjustment)
175 *offset_for_adjustment = offsets[0];
176 return ret;
177 }
178
179 bool UTF16ToWideAndAdjustOffsets(const char16* src,
180 size_t src_len,
181 std::wstring* output,
182 std::vector<size_t>* offsets_for_adjustment) {
183 output->clear();
184 // Assume that normally we won't have any non-BMP characters so the counts
185 // will be the same.
186 output->reserve(src_len);
187 return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
188 }
189
190 std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
191 size_t* offset_for_adjustment) { 80 size_t* offset_for_adjustment) {
192 std::vector<size_t> offsets; 81 std::vector<size_t> offsets;
193 if (offset_for_adjustment) 82 if (offset_for_adjustment)
194 offsets.push_back(*offset_for_adjustment); 83 offsets.push_back(*offset_for_adjustment);
195 std::wstring result; 84 string16 result;
196 UTF16ToWideAndAdjustOffsets(utf16.data(), utf16.length(), &result, 85 UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result,
197 &offsets); 86 &offsets);
198 if (offset_for_adjustment) 87 if (offset_for_adjustment)
199 *offset_for_adjustment = offsets[0]; 88 *offset_for_adjustment = offsets[0];
200 return result; 89 return result;
201 } 90 }
202 91
203 std::wstring UTF16ToWideAndAdjustOffsets( 92 string16 UTF8ToUTF16AndAdjustOffsets(
204 const string16& utf16, 93 const base::StringPiece& utf8,
205 std::vector<size_t>* offsets_for_adjustment) { 94 std::vector<size_t>* offsets_for_adjustment) {
206 std::wstring result; 95 string16 result;
207 UTF16ToWideAndAdjustOffsets(utf16.data(), utf16.length(), &result, 96 UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result,
208 offsets_for_adjustment); 97 offsets_for_adjustment);
209 return result; 98 return result;
210 } 99 }
211 100
212 #endif // defined(WCHAR_T_IS_UTF32) 101 OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,
102 size_t original_length,
103 size_t output_length)
104 : original_offset(original_offset),
105 original_length(original_length),
106 output_length(output_length) {
107 }
213 108
214 AdjustOffset::Adjustment::Adjustment(size_t location, 109 OffsetAdjuster::OffsetAdjuster(std::vector<size_t>* offsets_for_adjustment)
215 size_t old_length, 110 : offsets_for_adjustment_(offsets_for_adjustment) {
216 size_t new_length) 111 }
217 : location(location),
218 old_length(old_length),
219 new_length(new_length) {}
220 112
221 AdjustOffset::AdjustOffset(const Adjustments& adjustments) 113 OffsetAdjuster::~OffsetAdjuster() {
222 : adjustments_(adjustments) {} 114 if (!offsets_for_adjustment_ || adjustments_.empty())
115 return;
116 for (std::vector<size_t>::iterator i(offsets_for_adjustment_->begin());
117 i != offsets_for_adjustment_->end(); ++i)
118 AdjustOffset(i);
119 }
223 120
224 void AdjustOffset::operator()(size_t& offset) { 121 void OffsetAdjuster::Add(const Adjustment& adjustment) {
225 if (offset == std::wstring::npos) 122 adjustments_.push_back(adjustment);
123 }
124
125 void OffsetAdjuster::AdjustOffset(std::vector<size_t>::iterator offset) {
126 if (*offset == string16::npos)
226 return; 127 return;
227 size_t adjustment = 0; 128 size_t adjustment = 0;
228 for (Adjustments::const_iterator i = adjustments_.begin(); 129 for (std::vector<Adjustment>::const_iterator i = adjustments_.begin();
229 i != adjustments_.end(); ++i) { 130 i != adjustments_.end(); ++i) {
230 size_t location = i->location; 131 if (*offset == i->original_offset && i->output_length == 0) {
231 if (offset == location && i->new_length == 0) { 132 *offset = string16::npos;
232 offset = std::wstring::npos;
233 return; 133 return;
234 } 134 }
235 if (offset <= location) 135 if (*offset <= i->original_offset)
236 break; 136 break;
237 if (offset < (location + i->old_length)) { 137 if (*offset < (i->original_offset + i->original_length)) {
238 offset = std::wstring::npos; 138 *offset = string16::npos;
239 return; 139 return;
240 } 140 }
241 adjustment += (i->old_length - i->new_length); 141 adjustment += (i->original_length - i->output_length);
242 } 142 }
243 offset -= adjustment; 143 *offset -= adjustment;
244 } 144 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698