| OLD | NEW |
| 1 library encoding_parser; | 1 library encoding_parser; |
| 2 | 2 |
| 3 import 'dart:collection'; | 3 import 'dart:collection'; |
| 4 import 'constants.dart'; | 4 import 'constants.dart'; |
| 5 import 'inputstream.dart'; | 5 import 'inputstream.dart'; |
| 6 | 6 |
| 7 // TODO(jmesserly): I converted StopIteration to StateError("No more elements"). | 7 // TODO(jmesserly): I converted StopIteration to StateError("No more elements"). |
| 8 // Seems strange to throw this from outside of an iterator though. | 8 // Seems strange to throw this from outside of an iterator though. |
| 9 /** | 9 /// String-like object with an associated position and various extra methods |
| 10 * String-like object with an associated position and various extra methods | 10 /// If the position is ever greater than the string length then an exception is |
| 11 * If the position is ever greater than the string length then an exception is | 11 /// raised. |
| 12 * raised. | |
| 13 */ | |
| 14 class EncodingBytes extends IterableBase<String> { | 12 class EncodingBytes extends IterableBase<String> { |
| 15 final String _bytes; | 13 final String _bytes; |
| 16 int _position = -1; | 14 int _position = -1; |
| 17 | 15 |
| 18 EncodingBytes(this._bytes); | 16 EncodingBytes(this._bytes); |
| 19 | 17 |
| 20 Iterator<String> get iterator => _bytes.split('').iterator; | 18 Iterator<String> get iterator => _bytes.split('').iterator; |
| 21 | 19 |
| 22 int get length => _bytes.length; | 20 int get length => _bytes.length; |
| 23 | 21 |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 55 } | 53 } |
| 56 if (_position >= 0) { | 54 if (_position >= 0) { |
| 57 return _position; | 55 return _position; |
| 58 } else { | 56 } else { |
| 59 return 0; | 57 return 0; |
| 60 } | 58 } |
| 61 } | 59 } |
| 62 | 60 |
| 63 String get currentByte => _bytes[position]; | 61 String get currentByte => _bytes[position]; |
| 64 | 62 |
| 65 /** Skip past a list of characters. Defaults to skipping [isWhitespace]. */ | 63 /// Skip past a list of characters. Defaults to skipping [isWhitespace]. |
| 66 String skipChars([CharPreciate skipChars]) { | 64 String skipChars([CharPreciate skipChars]) { |
| 67 if (skipChars == null) skipChars = isWhitespace; | 65 if (skipChars == null) skipChars = isWhitespace; |
| 68 var p = position; // use property for the error-checking | 66 var p = position; // use property for the error-checking |
| 69 while (p < length) { | 67 while (p < length) { |
| 70 var c = _bytes[p]; | 68 var c = _bytes[p]; |
| 71 if (!skipChars(c)) { | 69 if (!skipChars(c)) { |
| 72 _position = p; | 70 _position = p; |
| 73 return c; | 71 return c; |
| 74 } | 72 } |
| 75 p += 1; | 73 p += 1; |
| 76 } | 74 } |
| 77 _position = p; | 75 _position = p; |
| 78 return null; | 76 return null; |
| 79 } | 77 } |
| 80 | 78 |
| 81 String skipUntil(CharPreciate untilChars) { | 79 String skipUntil(CharPreciate untilChars) { |
| 82 var p = position; | 80 var p = position; |
| 83 while (p < length) { | 81 while (p < length) { |
| 84 var c = _bytes[p]; | 82 var c = _bytes[p]; |
| 85 if (untilChars(c)) { | 83 if (untilChars(c)) { |
| 86 _position = p; | 84 _position = p; |
| 87 return c; | 85 return c; |
| 88 } | 86 } |
| 89 p += 1; | 87 p += 1; |
| 90 } | 88 } |
| 91 return null; | 89 return null; |
| 92 } | 90 } |
| 93 | 91 |
| 94 /** | 92 /// Look for a sequence of bytes at the start of a string. If the bytes |
| 95 * Look for a sequence of bytes at the start of a string. If the bytes | 93 /// are found return true and advance the position to the byte after the |
| 96 * are found return true and advance the position to the byte after the | 94 /// match. Otherwise return false and leave the position alone. |
| 97 * match. Otherwise return false and leave the position alone. | |
| 98 */ | |
| 99 bool matchBytes(String bytes) { | 95 bool matchBytes(String bytes) { |
| 100 var p = position; | 96 var p = position; |
| 101 if (_bytes.length < p + bytes.length) { | 97 if (_bytes.length < p + bytes.length) { |
| 102 return false; | 98 return false; |
| 103 } | 99 } |
| 104 var data = _bytes.substring(p, p + bytes.length); | 100 var data = _bytes.substring(p, p + bytes.length); |
| 105 if (data == bytes) { | 101 if (data == bytes) { |
| 106 position += bytes.length; | 102 position += bytes.length; |
| 107 return true; | 103 return true; |
| 108 } | 104 } |
| 109 return false; | 105 return false; |
| 110 } | 106 } |
| 111 | 107 |
| 112 /** | 108 /// Look for the next sequence of bytes matching a given sequence. If |
| 113 * Look for the next sequence of bytes matching a given sequence. If | 109 /// a match is found advance the position to the last byte of the match |
| 114 * a match is found advance the position to the last byte of the match | |
| 115 */ | |
| 116 bool jumpTo(String bytes) { | 110 bool jumpTo(String bytes) { |
| 117 var newPosition = _bytes.indexOf(bytes, position); | 111 var newPosition = _bytes.indexOf(bytes, position); |
| 118 if (newPosition >= 0) { | 112 if (newPosition >= 0) { |
| 119 _position = newPosition + bytes.length - 1; | 113 _position = newPosition + bytes.length - 1; |
| 120 return true; | 114 return true; |
| 121 } else { | 115 } else { |
| 122 throw new StateError("No more elements"); | 116 throw new StateError("No more elements"); |
| 123 } | 117 } |
| 124 } | 118 } |
| 125 | 119 |
| 126 String slice(int start, [int end]) { | 120 String slice(int start, [int end]) { |
| 127 if (end == null) end = length; | 121 if (end == null) end = length; |
| 128 if (end < 0) end += length; | 122 if (end < 0) end += length; |
| 129 return _bytes.substring(start, end - start); | 123 return _bytes.substring(start, end - start); |
| 130 } | 124 } |
| 131 } | 125 } |
| 132 | 126 |
| 133 /** Mini parser for detecting character encoding from meta elements. */ | 127 /// Mini parser for detecting character encoding from meta elements. |
| 134 class EncodingParser { | 128 class EncodingParser { |
| 135 final EncodingBytes data; | 129 final EncodingBytes data; |
| 136 String encoding; | 130 String encoding; |
| 137 | 131 |
| 138 /** [bytes] - the data to work on for encoding detection. */ | 132 /// [bytes] - the data to work on for encoding detection. |
| 139 EncodingParser(List<int> bytes) | 133 EncodingParser(List<int> bytes) |
| 140 // Note: this is intentionally interpreting bytes as codepoints. | 134 // Note: this is intentionally interpreting bytes as codepoints. |
| 141 : data = new EncodingBytes(new String.fromCharCodes(bytes).toLowerCase()); | 135 : data = new EncodingBytes(new String.fromCharCodes(bytes).toLowerCase()); |
| 142 | 136 |
| 143 String getEncoding() { | 137 String getEncoding() { |
| 144 final methodDispatch = [ | 138 final methodDispatch = [ |
| 145 ["<!--", handleComment], | 139 ["<!--", handleComment], |
| 146 ["<meta", handleMeta], | 140 ["<meta", handleMeta], |
| 147 ["</", handlePossibleEndTag], | 141 ["</", handlePossibleEndTag], |
| 148 ["<!", handleOther], | 142 ["<!", handleOther], |
| (...skipping 17 matching lines...) Expand all Loading... |
| 166 if (!keepParsing) { | 160 if (!keepParsing) { |
| 167 break; | 161 break; |
| 168 } | 162 } |
| 169 } | 163 } |
| 170 } on StateError catch (e) { | 164 } on StateError catch (e) { |
| 171 // Catch this here to match behavior of Python's StopIteration | 165 // Catch this here to match behavior of Python's StopIteration |
| 172 } | 166 } |
| 173 return encoding; | 167 return encoding; |
| 174 } | 168 } |
| 175 | 169 |
| 176 /** Skip over comments. */ | 170 /// Skip over comments. |
| 177 bool handleComment() => data.jumpTo("-->"); | 171 bool handleComment() => data.jumpTo("-->"); |
| 178 | 172 |
| 179 bool handleMeta() { | 173 bool handleMeta() { |
| 180 if (!isWhitespace(data.currentByte)) { | 174 if (!isWhitespace(data.currentByte)) { |
| 181 // if we have <meta not followed by a space so just keep going | 175 // if we have <meta not followed by a space so just keep going |
| 182 return true; | 176 return true; |
| 183 } | 177 } |
| 184 // We have a valid meta element we want to search for attributes | 178 // We have a valid meta element we want to search for attributes |
| 185 while (true) { | 179 while (true) { |
| 186 // Try to find the next attribute after the current position | 180 // Try to find the next attribute after the current position |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 236 var attr = getAttribute(); | 230 var attr = getAttribute(); |
| 237 while (attr != null) { | 231 while (attr != null) { |
| 238 attr = getAttribute(); | 232 attr = getAttribute(); |
| 239 } | 233 } |
| 240 } | 234 } |
| 241 return true; | 235 return true; |
| 242 } | 236 } |
| 243 | 237 |
| 244 bool handleOther() => data.jumpTo(">"); | 238 bool handleOther() => data.jumpTo(">"); |
| 245 | 239 |
| 246 /** | 240 /// Return a name,value pair for the next attribute in the stream, |
| 247 * Return a name,value pair for the next attribute in the stream, | 241 /// if one is found, or null |
| 248 * if one is found, or null | |
| 249 */ | |
| 250 List<String> getAttribute() { | 242 List<String> getAttribute() { |
| 251 // Step 1 (skip chars) | 243 // Step 1 (skip chars) |
| 252 var c = data.skipChars((x) => x == "/" || isWhitespace(x)); | 244 var c = data.skipChars((x) => x == "/" || isWhitespace(x)); |
| 253 // Step 2 | 245 // Step 2 |
| 254 if (c == ">" || c == null) { | 246 if (c == ">" || c == null) { |
| 255 return null; | 247 return null; |
| 256 } | 248 } |
| 257 // Step 3 | 249 // Step 3 |
| 258 var attrName = []; | 250 var attrName = []; |
| 259 var attrValue = []; | 251 var attrValue = []; |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 377 } | 369 } |
| 378 } | 370 } |
| 379 } | 371 } |
| 380 | 372 |
| 381 | 373 |
| 382 bool isSpaceOrAngleBracket(String char) { | 374 bool isSpaceOrAngleBracket(String char) { |
| 383 return char == ">" || char == "<" || isWhitespace(char); | 375 return char == ">" || char == "<" || isWhitespace(char); |
| 384 } | 376 } |
| 385 | 377 |
| 386 typedef bool CharPreciate(String char); | 378 typedef bool CharPreciate(String char); |
| OLD | NEW |