Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(157)

Side by Side Diff: html/lib/src/encoding_parser.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « html/lib/src/css_class_set.dart ('k') | html/lib/src/inputstream.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 library encoding_parser;
2
3 import 'constants.dart';
4 import 'inputstream.dart';
5
6 // TODO(jmesserly): I converted StopIteration to StateError("No more elements").
7 // Seems strange to throw this from outside of an iterator though.
8 /// String-like object with an associated position and various extra methods
9 /// If the position is ever greater than the string length then an exception is
10 /// raised.
11 class EncodingBytes {
12 final String _bytes;
13 int _position = -1;
14
15 EncodingBytes(this._bytes);
16
17 int get length => _bytes.length;
18
19 String next() {
20 var p = _position = _position + 1;
21 if (p >= length) {
22 throw new StateError("No more elements");
23 } else if (p < 0) {
24 throw new RangeError(p);
25 }
26 return _bytes[p];
27 }
28
29 String previous() {
30 var p = _position;
31 if (p >= length) {
32 throw new StateError("No more elements");
33 } else if (p < 0) {
34 throw new RangeError(p);
35 }
36 _position = p = p - 1;
37 return _bytes[p];
38 }
39
40 set position(int value) {
41 if (_position >= length) {
42 throw new StateError("No more elements");
43 }
44 _position = value;
45 }
46
47 int get position {
48 if (_position >= length) {
49 throw new StateError("No more elements");
50 }
51 if (_position >= 0) {
52 return _position;
53 } else {
54 return 0;
55 }
56 }
57
58 String get currentByte => _bytes[position];
59
60 /// Skip past a list of characters. Defaults to skipping [isWhitespace].
61 String skipChars([CharPreciate skipChars]) {
62 if (skipChars == null) skipChars = isWhitespace;
63 var p = position; // use property for the error-checking
64 while (p < length) {
65 var c = _bytes[p];
66 if (!skipChars(c)) {
67 _position = p;
68 return c;
69 }
70 p += 1;
71 }
72 _position = p;
73 return null;
74 }
75
76 String skipUntil(CharPreciate untilChars) {
77 var p = position;
78 while (p < length) {
79 var c = _bytes[p];
80 if (untilChars(c)) {
81 _position = p;
82 return c;
83 }
84 p += 1;
85 }
86 return null;
87 }
88
89 /// Look for a sequence of bytes at the start of a string. If the bytes
90 /// are found return true and advance the position to the byte after the
91 /// match. Otherwise return false and leave the position alone.
92 bool matchBytes(String bytes) {
93 var p = position;
94 if (_bytes.length < p + bytes.length) {
95 return false;
96 }
97 var data = _bytes.substring(p, p + bytes.length);
98 if (data == bytes) {
99 position += bytes.length;
100 return true;
101 }
102 return false;
103 }
104
105 /// Look for the next sequence of bytes matching a given sequence. If
106 /// a match is found advance the position to the last byte of the match
107 bool jumpTo(String bytes) {
108 var newPosition = _bytes.indexOf(bytes, position);
109 if (newPosition >= 0) {
110 _position = newPosition + bytes.length - 1;
111 return true;
112 } else {
113 throw new StateError("No more elements");
114 }
115 }
116
117 String slice(int start, [int end]) {
118 if (end == null) end = length;
119 if (end < 0) end += length;
120 return _bytes.substring(start, end - start);
121 }
122 }
123
124 /// Mini parser for detecting character encoding from meta elements.
125 class EncodingParser {
126 final EncodingBytes data;
127 String encoding;
128
129 /// [bytes] - the data to work on for encoding detection.
130 EncodingParser(List<int> bytes)
131 // Note: this is intentionally interpreting bytes as codepoints.
132 : data = new EncodingBytes(new String.fromCharCodes(bytes).toLowerCase());
133
134 String getEncoding() {
135 final methodDispatch = [
136 ["<!--", handleComment],
137 ["<meta", handleMeta],
138 ["</", handlePossibleEndTag],
139 ["<!", handleOther],
140 ["<?", handleOther],
141 ["<", handlePossibleStartTag]
142 ];
143
144 try {
145 for (;;) {
146 for (var dispatch in methodDispatch) {
147 if (data.matchBytes(dispatch[0])) {
148 var keepParsing = dispatch[1]();
149 if (keepParsing) break;
150
151 // We found an encoding. Stop.
152 return encoding;
153 }
154 }
155 data.position += 1;
156 }
157 } on StateError catch (e) {
158 // Catch this here to match behavior of Python's StopIteration
159 // TODO(jmesserly): refactor to not use exceptions
160 }
161 return encoding;
162 }
163
164 /// Skip over comments.
165 bool handleComment() => data.jumpTo("-->");
166
167 bool handleMeta() {
168 if (!isWhitespace(data.currentByte)) {
169 // if we have <meta not followed by a space so just keep going
170 return true;
171 }
172 // We have a valid meta element we want to search for attributes
173 while (true) {
174 // Try to find the next attribute after the current position
175 var attr = getAttribute();
176 if (attr == null) return true;
177
178 if (attr[0] == "charset") {
179 var tentativeEncoding = attr[1];
180 var codec = codecName(tentativeEncoding);
181 if (codec != null) {
182 encoding = codec;
183 return false;
184 }
185 } else if (attr[0] == "content") {
186 var contentParser = new ContentAttrParser(new EncodingBytes(attr[1]));
187 var tentativeEncoding = contentParser.parse();
188 var codec = codecName(tentativeEncoding);
189 if (codec != null) {
190 encoding = codec;
191 return false;
192 }
193 }
194 }
195 return true; // unreachable
196 }
197
198 bool handlePossibleStartTag() => handlePossibleTag(false);
199
200 bool handlePossibleEndTag() {
201 data.next();
202 return handlePossibleTag(true);
203 }
204
205 bool handlePossibleTag(bool endTag) {
206 if (!isLetter(data.currentByte)) {
207 //If the next byte is not an ascii letter either ignore this
208 //fragment (possible start tag case) or treat it according to
209 //handleOther
210 if (endTag) {
211 data.previous();
212 handleOther();
213 }
214 return true;
215 }
216
217 var c = data.skipUntil(isSpaceOrAngleBracket);
218 if (c == "<") {
219 // return to the first step in the overall "two step" algorithm
220 // reprocessing the < byte
221 data.previous();
222 } else {
223 //Read all attributes
224 var attr = getAttribute();
225 while (attr != null) {
226 attr = getAttribute();
227 }
228 }
229 return true;
230 }
231
232 bool handleOther() => data.jumpTo(">");
233
234 /// Return a name,value pair for the next attribute in the stream,
235 /// if one is found, or null
236 List<String> getAttribute() {
237 // Step 1 (skip chars)
238 var c = data.skipChars((x) => x == "/" || isWhitespace(x));
239 // Step 2
240 if (c == ">" || c == null) {
241 return null;
242 }
243 // Step 3
244 var attrName = [];
245 var attrValue = [];
246 // Step 4 attribute name
247 while (true) {
248 if (c == null) {
249 return null;
250 } else if (c == "=" && attrName.length > 0) {
251 break;
252 } else if (isWhitespace(c)) {
253 // Step 6!
254 c = data.skipChars();
255 c = data.next();
256 break;
257 } else if (c == "/" || c == ">") {
258 return [attrName.join(), ""];
259 } else if (isLetter(c)) {
260 attrName.add(c.toLowerCase());
261 } else {
262 attrName.add(c);
263 }
264 // Step 5
265 c = data.next();
266 }
267 // Step 7
268 if (c != "=") {
269 data.previous();
270 return [attrName.join(), ""];
271 }
272 // Step 8
273 data.next();
274 // Step 9
275 c = data.skipChars();
276 // Step 10
277 if (c == "'" || c == '"') {
278 // 10.1
279 var quoteChar = c;
280 while (true) {
281 // 10.2
282 c = data.next();
283 if (c == quoteChar) {
284 // 10.3
285 data.next();
286 return [attrName.join(), attrValue.join()];
287 } else if (isLetter(c)) {
288 // 10.4
289 attrValue.add(c.toLowerCase());
290 } else {
291 // 10.5
292 attrValue.add(c);
293 }
294 }
295 } else if (c == ">") {
296 return [attrName.join(), ""];
297 } else if (c == null) {
298 return null;
299 } else if (isLetter(c)) {
300 attrValue.add(c.toLowerCase());
301 } else {
302 attrValue.add(c);
303 }
304 // Step 11
305 while (true) {
306 c = data.next();
307 if (isSpaceOrAngleBracket(c)) {
308 return [attrName.join(), attrValue.join()];
309 } else if (c == null) {
310 return null;
311 } else if (isLetter(c)) {
312 attrValue.add(c.toLowerCase());
313 } else {
314 attrValue.add(c);
315 }
316 }
317 return null; // unreachable
318 }
319 }
320
321 class ContentAttrParser {
322 final EncodingBytes data;
323
324 ContentAttrParser(this.data);
325
326 String parse() {
327 try {
328 // Check if the attr name is charset
329 // otherwise return
330 data.jumpTo("charset");
331 data.position += 1;
332 data.skipChars();
333 if (data.currentByte != "=") {
334 // If there is no = sign keep looking for attrs
335 return null;
336 }
337 data.position += 1;
338 data.skipChars();
339 // Look for an encoding between matching quote marks
340 if (data.currentByte == '"' || data.currentByte == "'") {
341 var quoteMark = data.currentByte;
342 data.position += 1;
343 var oldPosition = data.position;
344 if (data.jumpTo(quoteMark)) {
345 return data.slice(oldPosition, data.position);
346 } else {
347 return null;
348 }
349 } else {
350 // Unquoted value
351 var oldPosition = data.position;
352 try {
353 data.skipUntil(isWhitespace);
354 return data.slice(oldPosition, data.position);
355 } on StateError catch (e) {
356 //Return the whole remaining value
357 return data.slice(oldPosition);
358 }
359 }
360 } on StateError catch (e) {
361 return null;
362 }
363 }
364 }
365
366 bool isSpaceOrAngleBracket(String char) {
367 return char == ">" || char == "<" || isWhitespace(char);
368 }
369
370 typedef bool CharPreciate(String char);
OLDNEW
« no previous file with comments | « html/lib/src/css_class_set.dart ('k') | html/lib/src/inputstream.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698