OLD | NEW |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of utf; | 5 library utf.utf_stream; |
| 6 |
| 7 import 'dart:async'; |
| 8 |
| 9 import 'constants.dart'; |
| 10 import 'util.dart'; |
6 | 11 |
7 // TODO(floitsch): make this transformer reusable. | 12 // TODO(floitsch): make this transformer reusable. |
8 abstract class _StringDecoder | 13 abstract class _StringDecoder |
9 implements StreamTransformer<List<int>, String>, EventSink<List<int>> { | 14 implements StreamTransformer<List<int>, String>, EventSink<List<int>> { |
10 List<int> _carry; | 15 List<int> _carry; |
11 List<int> _buffer; | 16 List<int> _buffer; |
12 int _replacementChar; | 17 int _replacementChar; |
13 | 18 |
14 EventSink<String> _outSink; | 19 EventSink<String> _outSink; |
15 | 20 |
16 _StringDecoder(int this._replacementChar); | 21 _StringDecoder(int this._replacementChar); |
17 | 22 |
18 Stream<String> bind(Stream<List<int>> stream) { | 23 Stream<String> bind(Stream<List<int>> stream) { |
19 return new Stream.eventTransformed( | 24 return new Stream<String>.eventTransformed(stream, |
20 stream, | |
21 (EventSink<String> sink) { | 25 (EventSink<String> sink) { |
22 if (_outSink != null) { | 26 if (_outSink != null) { |
23 throw new StateError("String decoder already used"); | 27 throw new StateError("String decoder already used"); |
24 } | 28 } |
25 _outSink = sink; | 29 _outSink = sink; |
26 return this; | 30 return this; |
27 }); | 31 }); |
28 } | 32 } |
29 | 33 |
30 void add(List<int> bytes) { | 34 void add(List<int> bytes) { |
31 try { | 35 try { |
32 _buffer = <int>[]; | 36 _buffer = <int>[]; |
33 List<int> carry = _carry; | 37 List<int> carry = _carry; |
34 _carry = null; | 38 _carry = null; |
35 int pos = 0; | 39 int pos = 0; |
36 int available = bytes.length; | 40 int available = bytes.length; |
37 // If we have carry-over data, start from negative index, indicating carry | 41 // If we have carry-over data, start from negative index, indicating carry |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
110 _buffer.add(char); | 114 _buffer.add(char); |
111 } | 115 } |
112 } | 116 } |
113 | 117 |
114 /** | 118 /** |
115 * StringTransformer that decodes a stream of UTF-8 encoded bytes. | 119 * StringTransformer that decodes a stream of UTF-8 encoded bytes. |
116 */ | 120 */ |
117 class Utf8DecoderTransformer extends _StringDecoder { | 121 class Utf8DecoderTransformer extends _StringDecoder { |
118 Utf8DecoderTransformer( | 122 Utf8DecoderTransformer( |
119 [int replacementChar = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) | 123 [int replacementChar = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) |
120 : super(replacementChar); | 124 : super(replacementChar); |
121 | 125 |
122 int _processBytes(int getNext()) { | 126 int _processBytes(int getNext()) { |
123 int value = getNext(); | 127 int value = getNext(); |
124 if ((value & 0xFF) != value) return -1; // Not a byte. | 128 if ((value & 0xFF) != value) return -1; // Not a byte. |
125 if ((value & 0x80) == 0x80) { | 129 if ((value & 0x80) == 0x80) { |
126 int additionalBytes; | 130 int additionalBytes; |
127 int min; | 131 int min; |
128 if ((value & 0xe0) == 0xc0) { // 110xxxxx | 132 if ((value & 0xe0) == 0xc0) { |
| 133 // 110xxxxx |
129 value = value & 0x1F; | 134 value = value & 0x1F; |
130 additionalBytes = 1; | 135 additionalBytes = 1; |
131 min = 0x80; | 136 min = 0x80; |
132 } else if ((value & 0xf0) == 0xe0) { // 1110xxxx | 137 } else if ((value & 0xf0) == 0xe0) { |
| 138 // 1110xxxx |
133 value = value & 0x0F; | 139 value = value & 0x0F; |
134 additionalBytes = 2; | 140 additionalBytes = 2; |
135 min = 0x800; | 141 min = 0x800; |
136 } else if ((value & 0xf8) == 0xf0) { // 11110xxx | 142 } else if ((value & 0xf8) == 0xf0) { |
| 143 // 11110xxx |
137 value = value & 0x07; | 144 value = value & 0x07; |
138 additionalBytes = 3; | 145 additionalBytes = 3; |
139 min = 0x10000; | 146 min = 0x10000; |
140 } else if ((value & 0xfc) == 0xf8) { // 111110xx | 147 } else if ((value & 0xfc) == 0xf8) { |
| 148 // 111110xx |
141 value = value & 0x03; | 149 value = value & 0x03; |
142 additionalBytes = 4; | 150 additionalBytes = 4; |
143 min = 0x200000; | 151 min = 0x200000; |
144 } else if ((value & 0xfe) == 0xfc) { // 1111110x | 152 } else if ((value & 0xfe) == 0xfc) { |
| 153 // 1111110x |
145 value = value & 0x01; | 154 value = value & 0x01; |
146 additionalBytes = 5; | 155 additionalBytes = 5; |
147 min = 0x4000000; | 156 min = 0x4000000; |
148 } else { | 157 } else { |
149 return -1; | 158 return -1; |
150 } | 159 } |
151 for (int i = 0; i < additionalBytes; i++) { | 160 for (int i = 0; i < additionalBytes; i++) { |
152 int next = getNext(); | 161 int next = getNext(); |
153 if (next == null) return 0; // Not enough chars, reset. | 162 if (next == null) return 0; // Not enough chars, reset. |
154 if ((next & 0xc0) != 0x80 || (next & 0xff) != next) return -1; | 163 if ((next & 0xc0) != 0x80 || (next & 0xff) != next) return -1; |
155 value = value << 6 | (next & 0x3f); | 164 value = value << 6 | (next & 0x3f); |
156 if (additionalBytes >= 3 && i == 0 && value << 12 > 0x10FFFF) { | 165 if (additionalBytes >= 3 && i == 0 && value << 12 > 0x10FFFF) { |
157 _addChar(-1); | 166 _addChar(-1); |
158 } | 167 } |
159 } | 168 } |
160 // Invalid charCode if less then minimum expected. | 169 // Invalid charCode if less then minimum expected. |
161 if (value < min) value = -1; | 170 if (value < min) value = -1; |
162 _addChar(value); | 171 _addChar(value); |
163 return 1 + additionalBytes; | 172 return 1 + additionalBytes; |
164 } | 173 } |
165 _addChar(value); | 174 _addChar(value); |
166 return 1; | 175 return 1; |
167 } | 176 } |
168 } | 177 } |
169 | 178 |
170 | |
171 abstract class _StringEncoder | 179 abstract class _StringEncoder |
172 implements StreamTransformer<String, List<int>>, EventSink<String> { | 180 implements StreamTransformer<String, List<int>>, EventSink<String> { |
173 | |
174 EventSink<List<int>> _outSink; | 181 EventSink<List<int>> _outSink; |
175 | 182 |
176 Stream<List<int>> bind(Stream<String> stream) { | 183 Stream<List<int>> bind(Stream<String> stream) { |
177 return new Stream.eventTransformed( | 184 return new Stream<List<int>>.eventTransformed(stream, |
178 stream, | |
179 (EventSink<List<int>> sink) { | 185 (EventSink<List<int>> sink) { |
180 if (_outSink != null) { | 186 if (_outSink != null) { |
181 throw new StateError("String encoder already used"); | 187 throw new StateError("String encoder already used"); |
182 } | 188 } |
183 _outSink = sink; | 189 _outSink = sink; |
184 return this; | 190 return this; |
185 }); | 191 }); |
186 } | 192 } |
187 | 193 |
188 void add(String data) { | 194 void add(String data) { |
189 _outSink.add(_processString(data)); | 195 _outSink.add(_processString(data)); |
190 } | 196 } |
191 | 197 |
192 void addError(error, [StackTrace stackTrace]) { | 198 void addError(error, [StackTrace stackTrace]) { |
193 _outSink.addError(error, stackTrace); | 199 _outSink.addError(error, stackTrace); |
194 } | 200 } |
195 | 201 |
196 void close() { _outSink.close(); } | 202 void close() { |
| 203 _outSink.close(); |
| 204 } |
197 | 205 |
198 List<int> _processString(String string); | 206 List<int> _processString(String string); |
199 } | 207 } |
200 | 208 |
201 /** | 209 /** |
202 * StringTransformer that UTF-8 encodes a stream of strings. | 210 * StringTransformer that UTF-8 encodes a stream of strings. |
203 */ | 211 */ |
204 class Utf8EncoderTransformer extends _StringEncoder { | 212 class Utf8EncoderTransformer extends _StringEncoder { |
205 List<int> _processString(String string) { | 213 List<int> _processString(String string) { |
206 var bytes = <int>[]; | 214 var bytes = <int>[]; |
207 int pos = 0; | |
208 List<int> codepoints = utf16CodeUnitsToCodepoints(string.codeUnits); | 215 List<int> codepoints = utf16CodeUnitsToCodepoints(string.codeUnits); |
209 int length = codepoints.length; | 216 int length = codepoints.length; |
210 for (int i = 0; i < length; i++) { | 217 for (int i = 0; i < length; i++) { |
211 int additionalBytes; | 218 int additionalBytes; |
212 int charCode = codepoints[i]; | 219 int charCode = codepoints[i]; |
213 if (charCode <= 0x007F) { | 220 if (charCode <= 0x007F) { |
214 additionalBytes = 0; | 221 additionalBytes = 0; |
215 bytes.add(charCode); | 222 bytes.add(charCode); |
216 } else if (charCode <= 0x07FF) { | 223 } else if (charCode <= 0x07FF) { |
217 // 110xxxxx (xxxxx is top 5 bits). | 224 // 110xxxxx (xxxxx is top 5 bits). |
218 bytes.add(((charCode >> 6) & 0x1F) | 0xC0); | 225 bytes.add(((charCode >> 6) & 0x1F) | 0xC0); |
219 additionalBytes = 1; | 226 additionalBytes = 1; |
220 } else if (charCode <= 0xFFFF) { | 227 } else if (charCode <= 0xFFFF) { |
221 // 1110xxxx (xxxx is top 4 bits) | 228 // 1110xxxx (xxxx is top 4 bits) |
222 bytes.add(((charCode >> 12) & 0x0F)| 0xE0); | 229 bytes.add(((charCode >> 12) & 0x0F) | 0xE0); |
223 additionalBytes = 2; | 230 additionalBytes = 2; |
224 } else { | 231 } else { |
225 // 11110xxx (xxx is top 3 bits) | 232 // 11110xxx (xxx is top 3 bits) |
226 bytes.add(((charCode >> 18) & 0x07) | 0xF0); | 233 bytes.add(((charCode >> 18) & 0x07) | 0xF0); |
227 additionalBytes = 3; | 234 additionalBytes = 3; |
228 } | 235 } |
229 for (int i = additionalBytes; i > 0; i--) { | 236 for (int i = additionalBytes; i > 0; i--) { |
230 // 10xxxxxx (xxxxxx is next 6 bits from the top). | 237 // 10xxxxxx (xxxxxx is next 6 bits from the top). |
231 bytes.add(((charCode >> (6 * (i - 1))) & 0x3F) | 0x80); | 238 bytes.add(((charCode >> (6 * (i - 1))) & 0x3F) | 0x80); |
232 } | 239 } |
233 pos += additionalBytes + 1; | |
234 } | 240 } |
235 return bytes; | 241 return bytes; |
236 } | 242 } |
237 } | 243 } |
OLD | NEW |