Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(142)

Side by Side Diff: pkg/utf/lib/utf_stream.dart

Issue 418433003: pkg/utf: fixed layout, added todos, updated docs and homepage pubspec links (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « pkg/utf/lib/utf8.dart ('k') | pkg/utf/pubspec.yaml » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 part of utf;
6
7 // TODO(floitsch): make this transformer reusable.
8 abstract class _StringDecoder
9 implements StreamTransformer<List<int>, String>, EventSink<List<int>> {
10 List<int> _carry;
11 List<int> _buffer;
12 int _replacementChar;
13
14 EventSink<String> _outSink;
15
16 _StringDecoder(int this._replacementChar);
17
18 Stream<String> bind(Stream<List<int>> stream) {
19 return new Stream.eventTransformed(
20 stream,
21 (EventSink<String> sink) {
22 if (_outSink != null) {
23 throw new StateError("String decoder already used");
24 }
25 _outSink = sink;
26 return this;
27 });
28 }
29
30 void add(List<int> bytes) {
31 try {
32 _buffer = <int>[];
33 List<int> carry = _carry;
34 _carry = null;
35 int pos = 0;
36 int available = bytes.length;
37 // If we have carry-over data, start from negative index, indicating carry
38 // index.
39 int goodChars = 0;
40 if (carry != null) pos = -carry.length;
41 while (pos < available) {
42 int currentPos = pos;
43 int getNext() {
44 if (pos < 0) {
45 return carry[pos++ + carry.length];
46 } else if (pos < available) {
47 return bytes[pos++];
48 }
49 return null;
50 }
51 int consumed = _processBytes(getNext);
52 if (consumed > 0) {
53 goodChars = _buffer.length;
54 } else if (consumed == 0) {
55 _buffer.length = goodChars;
56 if (currentPos < 0) {
57 _carry = [];
58 _carry.addAll(carry);
59 _carry.addAll(bytes);
60 } else {
61 _carry = bytes.sublist(currentPos);
62 }
63 break;
64 } else {
65 // Invalid byte at position pos - 1
66 _buffer.length = goodChars;
67 _addChar(-1);
68 goodChars = _buffer.length;
69 }
70 }
71 if (_buffer.length > 0) {
72 // Limit to 'goodChars', if lower than actual charCodes in the buffer.
73 _outSink.add(new String.fromCharCodes(_buffer));
74 }
75 _buffer = null;
76 } catch (e, stackTrace) {
77 _outSink.addError(e, stackTrace);
78 }
79 }
80
81 void addError(Object error, [StackTrace stackTrace]) {
82 _outSink.addError(error, stackTrace);
83 }
84
85 void close() {
86 if (_carry != null) {
87 if (_replacementChar != null) {
88 _outSink.add(new String.fromCharCodes(
89 new List.filled(_carry.length, _replacementChar)));
90 } else {
91 throw new ArgumentError('Invalid codepoint');
92 }
93 }
94 _outSink.close();
95 }
96
97 int _processBytes(int getNext());
98
99 void _addChar(int char) {
100 void error() {
101 if (_replacementChar != null) {
102 char = _replacementChar;
103 } else {
104 throw new ArgumentError('Invalid codepoint');
105 }
106 }
107 if (char < 0) error();
108 if (char >= 0xD800 && char <= 0xDFFF) error();
109 if (char > 0x10FFFF) error();
110 _buffer.add(char);
111 }
112 }
113
114 /**
115 * StringTransformer that decodes a stream of UTF-8 encoded bytes.
116 */
117 class Utf8DecoderTransformer extends _StringDecoder {
118 Utf8DecoderTransformer(
119 [int replacementChar = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
120 : super(replacementChar);
121
122 int _processBytes(int getNext()) {
123 int value = getNext();
124 if ((value & 0xFF) != value) return -1; // Not a byte.
125 if ((value & 0x80) == 0x80) {
126 int additionalBytes;
127 int min;
128 if ((value & 0xe0) == 0xc0) { // 110xxxxx
129 value = value & 0x1F;
130 additionalBytes = 1;
131 min = 0x80;
132 } else if ((value & 0xf0) == 0xe0) { // 1110xxxx
133 value = value & 0x0F;
134 additionalBytes = 2;
135 min = 0x800;
136 } else if ((value & 0xf8) == 0xf0) { // 11110xxx
137 value = value & 0x07;
138 additionalBytes = 3;
139 min = 0x10000;
140 } else if ((value & 0xfc) == 0xf8) { // 111110xx
141 value = value & 0x03;
142 additionalBytes = 4;
143 min = 0x200000;
144 } else if ((value & 0xfe) == 0xfc) { // 1111110x
145 value = value & 0x01;
146 additionalBytes = 5;
147 min = 0x4000000;
148 } else {
149 return -1;
150 }
151 for (int i = 0; i < additionalBytes; i++) {
152 int next = getNext();
153 if (next == null) return 0; // Not enough chars, reset.
154 if ((next & 0xc0) != 0x80 || (next & 0xff) != next) return -1;
155 value = value << 6 | (next & 0x3f);
156 if (additionalBytes >= 3 && i == 0 && value << 12 > 0x10FFFF) {
157 _addChar(-1);
158 }
159 }
160 // Invalid charCode if less then minimum expected.
161 if (value < min) value = -1;
162 _addChar(value);
163 return 1 + additionalBytes;
164 }
165 _addChar(value);
166 return 1;
167 }
168 }
169
170
171 abstract class _StringEncoder
172 implements StreamTransformer<String, List<int>>, EventSink<String> {
173
174 EventSink<List<int>> _outSink;
175
176 Stream<List<int>> bind(Stream<String> stream) {
177 return new Stream.eventTransformed(
178 stream,
179 (EventSink<List<int>> sink) {
180 if (_outSink != null) {
181 throw new StateError("String encoder already used");
182 }
183 _outSink = sink;
184 return this;
185 });
186 }
187
188 void add(String data) {
189 _outSink.add(_processString(data));
190 }
191
192 void addError(Object error, [StackTrace stackTrace]) {
193 _outSink.addError(error, stackTrace);
194 }
195
196 void close() { _outSink.close(); }
197
198 List<int> _processString(String string);
199 }
200
201 /**
202 * StringTransformer that UTF-8 encodes a stream of strings.
203 */
204 class Utf8EncoderTransformer extends _StringEncoder {
205 List<int> _processString(String string) {
206 var bytes = [];
207 int pos = 0;
208 List<int> codepoints = _utf16CodeUnitsToCodepoints(string.codeUnits);
209 int length = codepoints.length;
210 for (int i = 0; i < length; i++) {
211 int additionalBytes;
212 int charCode = codepoints[i];
213 if (charCode <= 0x007F) {
214 additionalBytes = 0;
215 bytes.add(charCode);
216 } else if (charCode <= 0x07FF) {
217 // 110xxxxx (xxxxx is top 5 bits).
218 bytes.add(((charCode >> 6) & 0x1F) | 0xC0);
219 additionalBytes = 1;
220 } else if (charCode <= 0xFFFF) {
221 // 1110xxxx (xxxx is top 4 bits)
222 bytes.add(((charCode >> 12) & 0x0F)| 0xE0);
223 additionalBytes = 2;
224 } else {
225 // 11110xxx (xxx is top 3 bits)
226 bytes.add(((charCode >> 18) & 0x07) | 0xF0);
227 additionalBytes = 3;
228 }
229 for (int i = additionalBytes; i > 0; i--) {
230 // 10xxxxxx (xxxxxx is next 6 bits from the top).
231 bytes.add(((charCode >> (6 * (i - 1))) & 0x3F) | 0x80);
232 }
233 pos += additionalBytes + 1;
234 }
235 return bytes;
236 }
237 }
OLDNEW
« no previous file with comments | « pkg/utf/lib/utf8.dart ('k') | pkg/utf/pubspec.yaml » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698