OLD | NEW |
| (Empty) |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 part of utf; | |
6 | |
7 // TODO(floitsch): make this transformer reusable. | |
8 abstract class _StringDecoder | |
9 implements StreamTransformer<List<int>, String>, EventSink<List<int>> { | |
10 List<int> _carry; | |
11 List<int> _buffer; | |
12 int _replacementChar; | |
13 | |
14 EventSink<String> _outSink; | |
15 | |
16 _StringDecoder(int this._replacementChar); | |
17 | |
18 Stream<String> bind(Stream<List<int>> stream) { | |
19 return new Stream.eventTransformed( | |
20 stream, | |
21 (EventSink<String> sink) { | |
22 if (_outSink != null) { | |
23 throw new StateError("String decoder already used"); | |
24 } | |
25 _outSink = sink; | |
26 return this; | |
27 }); | |
28 } | |
29 | |
30 void add(List<int> bytes) { | |
31 try { | |
32 _buffer = <int>[]; | |
33 List<int> carry = _carry; | |
34 _carry = null; | |
35 int pos = 0; | |
36 int available = bytes.length; | |
37 // If we have carry-over data, start from negative index, indicating carry | |
38 // index. | |
39 int goodChars = 0; | |
40 if (carry != null) pos = -carry.length; | |
41 while (pos < available) { | |
42 int currentPos = pos; | |
43 int getNext() { | |
44 if (pos < 0) { | |
45 return carry[pos++ + carry.length]; | |
46 } else if (pos < available) { | |
47 return bytes[pos++]; | |
48 } | |
49 return null; | |
50 } | |
51 int consumed = _processBytes(getNext); | |
52 if (consumed > 0) { | |
53 goodChars = _buffer.length; | |
54 } else if (consumed == 0) { | |
55 _buffer.length = goodChars; | |
56 if (currentPos < 0) { | |
57 _carry = []; | |
58 _carry.addAll(carry); | |
59 _carry.addAll(bytes); | |
60 } else { | |
61 _carry = bytes.sublist(currentPos); | |
62 } | |
63 break; | |
64 } else { | |
65 // Invalid byte at position pos - 1 | |
66 _buffer.length = goodChars; | |
67 _addChar(-1); | |
68 goodChars = _buffer.length; | |
69 } | |
70 } | |
71 if (_buffer.length > 0) { | |
72 // Limit to 'goodChars', if lower than actual charCodes in the buffer. | |
73 _outSink.add(new String.fromCharCodes(_buffer)); | |
74 } | |
75 _buffer = null; | |
76 } catch (e, stackTrace) { | |
77 _outSink.addError(e, stackTrace); | |
78 } | |
79 } | |
80 | |
81 void addError(Object error, [StackTrace stackTrace]) { | |
82 _outSink.addError(error, stackTrace); | |
83 } | |
84 | |
85 void close() { | |
86 if (_carry != null) { | |
87 if (_replacementChar != null) { | |
88 _outSink.add(new String.fromCharCodes( | |
89 new List.filled(_carry.length, _replacementChar))); | |
90 } else { | |
91 throw new ArgumentError('Invalid codepoint'); | |
92 } | |
93 } | |
94 _outSink.close(); | |
95 } | |
96 | |
97 int _processBytes(int getNext()); | |
98 | |
99 void _addChar(int char) { | |
100 void error() { | |
101 if (_replacementChar != null) { | |
102 char = _replacementChar; | |
103 } else { | |
104 throw new ArgumentError('Invalid codepoint'); | |
105 } | |
106 } | |
107 if (char < 0) error(); | |
108 if (char >= 0xD800 && char <= 0xDFFF) error(); | |
109 if (char > 0x10FFFF) error(); | |
110 _buffer.add(char); | |
111 } | |
112 } | |
113 | |
114 /** | |
115 * StringTransformer that decodes a stream of UTF-8 encoded bytes. | |
116 */ | |
117 class Utf8DecoderTransformer extends _StringDecoder { | |
118 Utf8DecoderTransformer( | |
119 [int replacementChar = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) | |
120 : super(replacementChar); | |
121 | |
122 int _processBytes(int getNext()) { | |
123 int value = getNext(); | |
124 if ((value & 0xFF) != value) return -1; // Not a byte. | |
125 if ((value & 0x80) == 0x80) { | |
126 int additionalBytes; | |
127 int min; | |
128 if ((value & 0xe0) == 0xc0) { // 110xxxxx | |
129 value = value & 0x1F; | |
130 additionalBytes = 1; | |
131 min = 0x80; | |
132 } else if ((value & 0xf0) == 0xe0) { // 1110xxxx | |
133 value = value & 0x0F; | |
134 additionalBytes = 2; | |
135 min = 0x800; | |
136 } else if ((value & 0xf8) == 0xf0) { // 11110xxx | |
137 value = value & 0x07; | |
138 additionalBytes = 3; | |
139 min = 0x10000; | |
140 } else if ((value & 0xfc) == 0xf8) { // 111110xx | |
141 value = value & 0x03; | |
142 additionalBytes = 4; | |
143 min = 0x200000; | |
144 } else if ((value & 0xfe) == 0xfc) { // 1111110x | |
145 value = value & 0x01; | |
146 additionalBytes = 5; | |
147 min = 0x4000000; | |
148 } else { | |
149 return -1; | |
150 } | |
151 for (int i = 0; i < additionalBytes; i++) { | |
152 int next = getNext(); | |
153 if (next == null) return 0; // Not enough chars, reset. | |
154 if ((next & 0xc0) != 0x80 || (next & 0xff) != next) return -1; | |
155 value = value << 6 | (next & 0x3f); | |
156 if (additionalBytes >= 3 && i == 0 && value << 12 > 0x10FFFF) { | |
157 _addChar(-1); | |
158 } | |
159 } | |
160 // Invalid charCode if less then minimum expected. | |
161 if (value < min) value = -1; | |
162 _addChar(value); | |
163 return 1 + additionalBytes; | |
164 } | |
165 _addChar(value); | |
166 return 1; | |
167 } | |
168 } | |
169 | |
170 | |
171 abstract class _StringEncoder | |
172 implements StreamTransformer<String, List<int>>, EventSink<String> { | |
173 | |
174 EventSink<List<int>> _outSink; | |
175 | |
176 Stream<List<int>> bind(Stream<String> stream) { | |
177 return new Stream.eventTransformed( | |
178 stream, | |
179 (EventSink<List<int>> sink) { | |
180 if (_outSink != null) { | |
181 throw new StateError("String encoder already used"); | |
182 } | |
183 _outSink = sink; | |
184 return this; | |
185 }); | |
186 } | |
187 | |
188 void add(String data) { | |
189 _outSink.add(_processString(data)); | |
190 } | |
191 | |
192 void addError(Object error, [StackTrace stackTrace]) { | |
193 _outSink.addError(error, stackTrace); | |
194 } | |
195 | |
196 void close() { _outSink.close(); } | |
197 | |
198 List<int> _processString(String string); | |
199 } | |
200 | |
201 /** | |
202 * StringTransformer that UTF-8 encodes a stream of strings. | |
203 */ | |
204 class Utf8EncoderTransformer extends _StringEncoder { | |
205 List<int> _processString(String string) { | |
206 var bytes = []; | |
207 int pos = 0; | |
208 List<int> codepoints = _utf16CodeUnitsToCodepoints(string.codeUnits); | |
209 int length = codepoints.length; | |
210 for (int i = 0; i < length; i++) { | |
211 int additionalBytes; | |
212 int charCode = codepoints[i]; | |
213 if (charCode <= 0x007F) { | |
214 additionalBytes = 0; | |
215 bytes.add(charCode); | |
216 } else if (charCode <= 0x07FF) { | |
217 // 110xxxxx (xxxxx is top 5 bits). | |
218 bytes.add(((charCode >> 6) & 0x1F) | 0xC0); | |
219 additionalBytes = 1; | |
220 } else if (charCode <= 0xFFFF) { | |
221 // 1110xxxx (xxxx is top 4 bits) | |
222 bytes.add(((charCode >> 12) & 0x0F)| 0xE0); | |
223 additionalBytes = 2; | |
224 } else { | |
225 // 11110xxx (xxx is top 3 bits) | |
226 bytes.add(((charCode >> 18) & 0x07) | 0xF0); | |
227 additionalBytes = 3; | |
228 } | |
229 for (int i = additionalBytes; i > 0; i--) { | |
230 // 10xxxxxx (xxxxxx is next 6 bits from the top). | |
231 bytes.add(((charCode >> (6 * (i - 1))) & 0x3F) | 0x80); | |
232 } | |
233 pos += additionalBytes + 1; | |
234 } | |
235 return bytes; | |
236 } | |
237 } | |
OLD | NEW |