packages/utf/lib/src/utf/utf32.dart - Issue 2989763002: Update charted to 0.4.8 and roll

Side by Side Diff: packages/utf/lib/src/utf/utf32.dart

Issue 2989763002: Update charted to 0.4.8 and roll (Closed)

Patch Set: Removed Cutch from list of reviewers Created 3 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.

4

5 part of utf;

6

7 /**

8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert

9 * as much of the input as needed. Determines the byte order from the BOM,

10 * or uses big-endian as a default. This method always strips a leading BOM.

11 * Set the replacementCharacter to null to throw an ArgumentError

12 * rather than replace the bad value.

13 */

14 IterableUtf32Decoder decodeUtf32AsIterable(List<int> bytes, [

15 int offset = 0, int length,

16 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

17 return new IterableUtf32Decoder._(

18 () => new Utf32BytesDecoder(bytes, offset, length, replacementCodepoint));

19 }

20

21 /**

22 * Decodes the UTF-32BE bytes as an iterable. Thus, the consumer can only conver t

23 * as much of the input as needed. This method strips a leading BOM by default,

24 * but can be overridden by setting the optional parameter [stripBom] to false.

25 * Set the replacementCharacter to null to throw an ArgumentError

26 * rather than replace the bad value.

27 */

28 IterableUtf32Decoder decodeUtf32beAsIterable(List<int> bytes, [

29 int offset = 0, int length, bool stripBom = true,

30 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

31 return new IterableUtf32Decoder._(

32 () => new Utf32beBytesDecoder(bytes, offset, length, stripBom,

33 replacementCodepoint));

34 }

35

36 /**

37 * Decodes the UTF-32LE bytes as an iterable. Thus, the consumer can only conver t

38 * as much of the input as needed. This method strips a leading BOM by default,

39 * but can be overridden by setting the optional parameter [stripBom] to false.

40 * Set the replacementCharacter to null to throw an ArgumentError

41 * rather than replace the bad value.

42 */

43 IterableUtf32Decoder decodeUtf32leAsIterable(List<int> bytes, [

44 int offset = 0, int length, bool stripBom = true,

45 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

46 return new IterableUtf32Decoder._(

47 () => new Utf32leBytesDecoder(bytes, offset, length, stripBom,

48 replacementCodepoint));

49 }

50

51 /**

52 * Produce a String from a sequence of UTF-32 encoded bytes. The parameters

53 * allow an offset into a list of bytes (as int), limiting the length of the

54 * values be decoded and the ability of override the default Unicode

55 * replacement character. Set the replacementCharacter to null to throw an

56 * ArgumentError rather than replace the bad value.

57 */

58 String decodeUtf32(List<int> bytes, [int offset = 0, int length,

59 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

60 return new String.fromCharCodes((new Utf32BytesDecoder(bytes, offset, length,

61 replacementCodepoint)).decodeRest());

62 }

63 /**

64 * Produce a String from a sequence of UTF-32BE encoded bytes. The parameters

65 * allow an offset into a list of bytes (as int), limiting the length of the

66 * values be decoded and the ability of override the default Unicode

67 * replacement character. Set the replacementCharacter to null to throw an

68 * ArgumentError rather than replace the bad value.

69 */

70 String decodeUtf32be(

71 List<int> bytes, [int offset = 0, int length, bool stripBom = true,

72 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) =>

73 new String.fromCharCodes((new Utf32beBytesDecoder(bytes, offset, length,

74 stripBom, replacementCodepoint)).decodeRest());

75

76 /**

77 * Produce a String from a sequence of UTF-32LE encoded bytes. The parameters

78 * allow an offset into a list of bytes (as int), limiting the length of the

79 * values be decoded and the ability of override the default Unicode

80 * replacement character. Set the replacementCharacter to null to throw an

81 * ArgumentError rather than replace the bad value.

82 */

83 String decodeUtf32le(

84 List<int> bytes, [int offset = 0, int length, bool stripBom = true,

85 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) =>

86 new String.fromCharCodes((new Utf32leBytesDecoder(bytes, offset, length,

87 stripBom, replacementCodepoint)).decodeRest());

88

89 /**

90 * Produce a list of UTF-32 encoded bytes. This method prefixes the resulting

91 * bytes with a big-endian byte-order-marker.

92 */

93 List<int> encodeUtf32(String str) =>

94 encodeUtf32be(str, true);

95

96 /**

97 * Produce a list of UTF-32BE encoded bytes. By default, this method produces

98 * UTF-32BE bytes with no BOM.

99 */

100 List<int> encodeUtf32be(String str, [bool writeBOM = false]) {

101 List<int> utf32CodeUnits = stringToCodepoints(str);

102 List<int> encoding = new List<int>(4 * utf32CodeUnits.length +

103 (writeBOM ? 4 : 0));

104 int i = 0;

105 if (writeBOM) {

106 encoding[i++] = 0;

107 encoding[i++] = 0;

108 encoding[i++] = UNICODE_UTF_BOM_HI;

109 encoding[i++] = UNICODE_UTF_BOM_LO;

110 }

111 for (int unit in utf32CodeUnits) {

112 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK;

113 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK;

114 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK;

115 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;

116 }

117 return encoding;

118 }

119

120 /**

121 * Produce a list of UTF-32LE encoded bytes. By default, this method produces

122 * UTF-32BE bytes with no BOM.

123 */

124 List<int> encodeUtf32le(String str, [bool writeBOM = false]) {

125 List<int> utf32CodeUnits = stringToCodepoints(str);

126 List<int> encoding = new List<int>(4 * utf32CodeUnits.length +

127 (writeBOM ? 4 : 0));

128 int i = 0;

129 if (writeBOM) {

130 encoding[i++] = UNICODE_UTF_BOM_LO;

131 encoding[i++] = UNICODE_UTF_BOM_HI;

132 encoding[i++] = 0;

133 encoding[i++] = 0;

134 }

135 for (int unit in utf32CodeUnits) {

136 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;

137 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK;

138 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK;

139 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK;

140 }

141 return encoding;

142 }

143

144 /**

145 * Identifies whether a List of bytes starts (based on offset) with a

146 * byte-order marker (BOM).

147 */

148 bool hasUtf32Bom(

149 List<int> utf32EncodedBytes, [int offset = 0, int length]) {

150 return hasUtf32beBom(utf32EncodedBytes, offset, length) \|\|

151 hasUtf32leBom(utf32EncodedBytes, offset, length);

152 }

153

154 /**

155 * Identifies whether a List of bytes starts (based on offset) with a

156 * big-endian byte-order marker (BOM).

157 */

158 bool hasUtf32beBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) {

159 int end = length != null ? offset + length : utf32EncodedBytes.length;

160 return (offset + 4) <= end &&

161 utf32EncodedBytes[offset] == 0 && utf32EncodedBytes[offset + 1] == 0 &&

162 utf32EncodedBytes[offset + 2] == UNICODE_UTF_BOM_HI &&

163 utf32EncodedBytes[offset + 3] == UNICODE_UTF_BOM_LO;

164 }

165

166 /**

167 * Identifies whether a List of bytes starts (based on offset) with a

168 * little-endian byte-order marker (BOM).

169 */

170 bool hasUtf32leBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) {

171 int end = length != null ? offset + length : utf32EncodedBytes.length;

172 return (offset + 4) <= end &&

173 utf32EncodedBytes[offset] == UNICODE_UTF_BOM_LO &&

174 utf32EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI &&

175 utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0;

176 }

177

178 typedef Utf32BytesDecoder Utf32BytesDecoderProvider();

179

180 /**

181 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type

182 * provides an iterator on demand and the iterator will only translate bytes

183 * as requested by the user of the iterator. (Note: results are not cached.)

184 */

185 // TODO(floitsch): Consider removing the extend and switch to implements since

186 // that's cheaper to allocate.

187 class IterableUtf32Decoder extends IterableBase<int> {

188 final Utf32BytesDecoderProvider codeunitsProvider;

189

190 IterableUtf32Decoder._(this.codeunitsProvider);

191

192 Utf32BytesDecoder get iterator => codeunitsProvider();

193 }

194

195 /**

196 * Abstrace parent class converts encoded bytes to codepoints.

197 */

198 abstract class Utf32BytesDecoder implements ListRangeIterator {

199 // TODO(kevmoo): should this field be private?

200 final ListRangeIterator utf32EncodedBytesIterator;

201 final int replacementCodepoint;

202 int _current = null;

203

204 Utf32BytesDecoder._fromListRangeIterator(

205 this.utf32EncodedBytesIterator, this.replacementCodepoint);

206

207 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [

208 int offset = 0, int length,

209 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

210 if (length == null) {

211 length = utf32EncodedBytes.length - offset;

212 }

213 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) {

214 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,

215 false, replacementCodepoint);

216 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) {

217 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,

218 false, replacementCodepoint);

219 } else {

220 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false,

221 replacementCodepoint);

222 }

223 }

224

225 List<int> decodeRest() {

226 List<int> codeunits = new List<int>(remaining);

227 int i = 0;

228 while (moveNext()) {

229 codeunits[i++] = current;

230 }

231 return codeunits;

232 }

233

234 int get current => _current;

235

236 bool moveNext() {

237 _current = null;

238 int remaining = utf32EncodedBytesIterator.remaining;

239 if (remaining == 0) {

240 _current = null;

241 return false;

242 }

243 if (remaining < 4) {

244 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining);

245 if (replacementCodepoint != null) {

246 _current = replacementCodepoint;

247 return true;

248 } else {

249 throw new ArgumentError(

250 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");

251 }

252 }

253 int codepoint = decode();

254 if (_validCodepoint(codepoint)) {

255 _current = codepoint;

256 return true;

257 } else if (replacementCodepoint != null) {

258 _current = replacementCodepoint;

259 return true;

260 } else {

261 throw new ArgumentError(

262 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");

263 }

264 }

265

266 int get position => utf32EncodedBytesIterator.position ~/ 4;

267

268 void backup([int by = 1]) {

269 utf32EncodedBytesIterator.backup(4 * by);

270 }

271

272 int get remaining => (utf32EncodedBytesIterator.remaining + 3) ~/ 4;

273

274 void skip([int count = 1]) {

275 utf32EncodedBytesIterator.skip(4 * count);

276 }

277

278 int decode();

279 }

280

281 /**

282 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes

283 * to produce the unicode codepoint.

284 */

285 class Utf32beBytesDecoder extends Utf32BytesDecoder {

286 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,

287 int length, bool stripBom = true,

288 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

289 super._fromListRangeIterator(

290 (new ListRange(utf32EncodedBytes, offset, length)).iterator,

291 replacementCodepoint) {

292 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) {

293 skip();

294 }

295 }

296

297 int decode() {

298 utf32EncodedBytesIterator.moveNext();

299 int value = utf32EncodedBytesIterator.current;

300 utf32EncodedBytesIterator.moveNext();

301 value = (value << 8) + utf32EncodedBytesIterator.current;

302 utf32EncodedBytesIterator.moveNext();

303 value = (value << 8) + utf32EncodedBytesIterator.current;

304 utf32EncodedBytesIterator.moveNext();

305 value = (value << 8) + utf32EncodedBytesIterator.current;

306 return value;

307 }

308 }

309

310 /**

311 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes

312 * to produce the unicode codepoint.

313 */

314 class Utf32leBytesDecoder extends Utf32BytesDecoder {

315 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,

316 int length, bool stripBom = true,

317 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :

318 super._fromListRangeIterator(

319 (new ListRange(utf32EncodedBytes, offset, length)).iterator,

320 replacementCodepoint) {

321 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) {

322 skip();

323 }

324 }

325

326 int decode() {

327 utf32EncodedBytesIterator.moveNext();

328 int value = utf32EncodedBytesIterator.current;

329 utf32EncodedBytesIterator.moveNext();

330 value += (utf32EncodedBytesIterator.current << 8);

331 utf32EncodedBytesIterator.moveNext();

332 value += (utf32EncodedBytesIterator.current << 16);

333 utf32EncodedBytesIterator.moveNext();

334 value += (utf32EncodedBytesIterator.current << 24);

335 return value;

336 }

337 }

338

339 bool _validCodepoint(int codepoint) {

340 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) \|\|

341 (codepoint > UNICODE_UTF16_RESERVED_HI &&

342 codepoint < UNICODE_VALID_RANGE_MAX);

343 }

OLD	NEW

« no previous file with comments | « packages/utf/lib/src/utf/utf16.dart ('k') | packages/utf/lib/src/utf/utf8.dart » ('j') | no next file with comments »