base/strings/utf_offset_string_conversions.cc - Issue 1647803004: Move base to DEPS

Side by Side Diff: base/strings/utf_offset_string_conversions.cc

Issue 1647803004: Move base to DEPS (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "base/strings/utf_offset_string_conversions.h"

6

7 #include <algorithm>

8

9 #include "base/logging.h"

10 #include "base/memory/scoped_ptr.h"

11 #include "base/strings/string_piece.h"

12 #include "base/strings/utf_string_conversion_utils.h"

13

14 namespace base {

15

16 OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,

17 size_t original_length,

18 size_t output_length)

19 : original_offset(original_offset),

20 original_length(original_length),

21 output_length(output_length) {

22 }

23

24 // static

25 void OffsetAdjuster::AdjustOffsets(

26 const Adjustments& adjustments,

27 std::vector<size_t>* offsets_for_adjustment) {

28 if (!offsets_for_adjustment \|\| adjustments.empty())

29 return;

30 for (std::vector<size_t>::iterator i(offsets_for_adjustment->begin());

31 i != offsets_for_adjustment->end(); ++i)

32 AdjustOffset(adjustments, &(*i));

33 }

34

35 // static

36 void OffsetAdjuster::AdjustOffset(const Adjustments& adjustments,

37 size_t* offset) {

38 if (*offset == string16::npos)

39 return;

40 int adjustment = 0;

41 for (Adjustments::const_iterator i = adjustments.begin();

42 i != adjustments.end(); ++i) {

43 if (*offset <= i->original_offset)

44 break;

45 if (*offset < (i->original_offset + i->original_length)) {

46 *offset = string16::npos;

47 return;

48 }

49 adjustment += static_cast<int>(i->original_length - i->output_length);

50 }

51 *offset -= adjustment;

52 }

53

54 // static

55 void OffsetAdjuster::UnadjustOffsets(

56 const Adjustments& adjustments,

57 std::vector<size_t>* offsets_for_unadjustment) {

58 if (!offsets_for_unadjustment \|\| adjustments.empty())

59 return;

60 for (std::vector<size_t>::iterator i(offsets_for_unadjustment->begin());

61 i != offsets_for_unadjustment->end(); ++i)

62 UnadjustOffset(adjustments, &(*i));

63 }

64

65 // static

66 void OffsetAdjuster::UnadjustOffset(const Adjustments& adjustments,

67 size_t* offset) {

68 if (*offset == string16::npos)

69 return;

70 int adjustment = 0;

71 for (Adjustments::const_iterator i = adjustments.begin();

72 i != adjustments.end(); ++i) {

73 if (*offset + adjustment <= i->original_offset)

74 break;

75 adjustment += static_cast<int>(i->original_length - i->output_length);

76 if ((*offset + adjustment) <

77 (i->original_offset + i->original_length)) {

78 *offset = string16::npos;

79 return;

80 }

81 }

82 *offset += adjustment;

83 }

84

85 // static

86 void OffsetAdjuster::MergeSequentialAdjustments(

87 const Adjustments& first_adjustments,

88 Adjustments* adjustments_on_adjusted_string) {

89 Adjustments::iterator adjusted_iter = adjustments_on_adjusted_string->begin();

90 Adjustments::const_iterator first_iter = first_adjustments.begin();

91 // Simultaneously iterate over all \|adjustments_on_adjusted_string\| and

92 // \|first_adjustments\|, adding adjustments to or correcting the adjustments

93 // in \|adjustments_on_adjusted_string\| as we go. \|shift\| keeps track of the

94 // current number of characters collapsed by \|first_adjustments\| up to this

95 // point. \|currently_collapsing\| keeps track of the number of characters

96 // collapsed by \|first_adjustments\| into the current \|adjusted_iter\|'s

97 // length. These are characters that will change \|shift\| as soon as we're

98 // done processing the current \|adjusted_iter\|; they are not yet reflected in

99 // \|shift\|.

100 size_t shift = 0;

101 size_t currently_collapsing = 0;

102 while (adjusted_iter != adjustments_on_adjusted_string->end()) {

103 if ((first_iter == first_adjustments.end()) \|\|

104 ((adjusted_iter->original_offset + shift +

105 adjusted_iter->original_length) <= first_iter->original_offset)) {

106 // Entire \|adjusted_iter\| (accounting for its shift and including its

107 // whole original length) comes before \|first_iter\|.

108 //

109 // Correct the offset at \|adjusted_iter\| and move onto the next

110 // adjustment that needs revising.

111 adjusted_iter->original_offset += shift;

112 shift += currently_collapsing;

113 currently_collapsing = 0;

114 ++adjusted_iter;

115 } else if ((adjusted_iter->original_offset + shift) >

116 first_iter->original_offset) {

117 // \|first_iter\| comes before the \|adjusted_iter\| (as adjusted by \|shift\|).

118

119 // It's not possible for the adjustments to overlap. (It shouldn't

120 // be possible that we have an \|adjusted_iter->original_offset\| that,

121 // when adjusted by the computed \|shift\|, is in the middle of

122 // \|first_iter\|'s output's length. After all, that would mean the

123 // current adjustment_on_adjusted_string somehow points to an offset

124 // that was supposed to have been eliminated by the first set of

125 // adjustments.)

126 DCHECK_LE(first_iter->original_offset + first_iter->output_length,

127 adjusted_iter->original_offset + shift);

128

129 // Add the \|first_adjustment_iter\| to the full set of adjustments while

130 // making sure \|adjusted_iter\| continues pointing to the same element.

131 // We do this by inserting the \|first_adjustment_iter\| right before

132 // \|adjusted_iter\|, then incrementing \|adjusted_iter\| so it points to

133 // the following element.

134 shift += first_iter->original_length - first_iter->output_length;

135 adjusted_iter = adjustments_on_adjusted_string->insert(

136 adjusted_iter, *first_iter);

137 ++adjusted_iter;

138 ++first_iter;

139 } else {

140 // The first adjustment adjusted something that then got further adjusted

141 // by the second set of adjustments. In other words, \|first_iter\| points

142 // to something in the range covered by \|adjusted_iter\|'s length (after

143 // accounting for \|shift\|). Precisely,

144 // adjusted_iter->original_offset + shift

145 // <=

146 // first_iter->original_offset

147 // <=

148 // adjusted_iter->original_offset + shift +

149 // adjusted_iter->original_length

150

151 // Modify the current \|adjusted_iter\| to include whatever collapsing

152 // happened in \|first_iter\|, then advance to the next \|first_adjustments\|

153 // because we dealt with the current one.

154 const int collapse = static_cast<int>(first_iter->original_length) -

155 static_cast<int>(first_iter->output_length);

156 // This function does not know how to deal with a string that expands and

157 // then gets modified, only strings that collapse and then get modified.

158 DCHECK_GT(collapse, 0);

159 adjusted_iter->original_length += collapse;

160 currently_collapsing += collapse;

161 ++first_iter;

162 }

163 }

164 DCHECK_EQ(0u, currently_collapsing);

165 if (first_iter != first_adjustments.end()) {

166 // Only first adjustments are left. These do not need to be modified.

167 // (Their offsets are already correct with respect to the original string.)

168 // Append them all.

169 DCHECK(adjusted_iter == adjustments_on_adjusted_string->end());

170 adjustments_on_adjusted_string->insert(

171 adjustments_on_adjusted_string->end(), first_iter,

172 first_adjustments.end());

173 }

174 }

175

176 // Converts the given source Unicode character type to the given destination

177 // Unicode character type as a STL string. The given input buffer and size

178 // determine the source, and the given output STL string will be replaced by

179 // the result. If non-NULL, \|adjustments\| is set to reflect the all the

180 // alterations to the string that are not one-character-to-one-character.

181 // It will always be sorted by increasing offset.

182 template<typename SrcChar, typename DestStdString>

183 bool ConvertUnicode(const SrcChar* src,

184 size_t src_len,

185 DestStdString* output,

186 OffsetAdjuster::Adjustments* adjustments) {

187 if (adjustments)

188 adjustments->clear();

189 // ICU requires 32-bit numbers.

190 bool success = true;

191 int32 src_len32 = static_cast<int32>(src_len);

192 for (int32 i = 0; i < src_len32; i++) {

193 uint32 code_point;

194 size_t original_i = i;

195 size_t chars_written = 0;

196 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {

197 chars_written = WriteUnicodeCharacter(code_point, output);

198 } else {

199 chars_written = WriteUnicodeCharacter(0xFFFD, output);

200 success = false;

201 }

202

203 // Only bother writing an adjustment if this modification changed the

204 // length of this character.

205 // NOTE: ReadUnicodeCharacter() adjusts \|i\| to point _at_ the last

206 // character read, not after it (so that incrementing it in the loop

207 // increment will place it at the right location), so we need to account

208 // for that in determining the amount that was read.

209 if (adjustments && ((i - original_i + 1) != chars_written)) {

210 adjustments->push_back(OffsetAdjuster::Adjustment(

211 original_i, i - original_i + 1, chars_written));

212 }

213 }

214 return success;

215 }

216

217 bool UTF8ToUTF16WithAdjustments(

218 const char* src,

219 size_t src_len,

220 string16* output,

221 base::OffsetAdjuster::Adjustments* adjustments) {

222 PrepareForUTF16Or32Output(src, src_len, output);

223 return ConvertUnicode(src, src_len, output, adjustments);

224 }

225

226 string16 UTF8ToUTF16WithAdjustments(

227 const base::StringPiece& utf8,

228 base::OffsetAdjuster::Adjustments* adjustments) {

229 string16 result;

230 UTF8ToUTF16WithAdjustments(utf8.data(), utf8.length(), &result, adjustments);

231 return result;

232 }

233

234 string16 UTF8ToUTF16AndAdjustOffsets(

235 const base::StringPiece& utf8,

236 std::vector<size_t>* offsets_for_adjustment) {

237 std::for_each(offsets_for_adjustment->begin(),

238 offsets_for_adjustment->end(),

239 LimitOffset<base::StringPiece>(utf8.length()));

240 OffsetAdjuster::Adjustments adjustments;

241 string16 result = UTF8ToUTF16WithAdjustments(utf8, &adjustments);

242 OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);

243 return result;

244 }

245

246 std::string UTF16ToUTF8AndAdjustOffsets(

247 const base::StringPiece16& utf16,

248 std::vector<size_t>* offsets_for_adjustment) {

249 std::for_each(offsets_for_adjustment->begin(),

250 offsets_for_adjustment->end(),

251 LimitOffset<base::StringPiece16>(utf16.length()));

252 std::string result;

253 PrepareForUTF8Output(utf16.data(), utf16.length(), &result);

254 OffsetAdjuster::Adjustments adjustments;

255 ConvertUnicode(utf16.data(), utf16.length(), &result, &adjustments);

256 OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);

257 return result;

258 }

259

260 } // namespace base

OLD	NEW

« no previous file with comments | « base/strings/utf_offset_string_conversions.h ('k') | base/strings/utf_offset_string_conversions_unittest.cc » ('j') | no next file with comments »