| OLD | NEW |
| 1 library encoding_parser; | 1 library encoding_parser; |
| 2 | 2 |
| 3 import 'constants.dart'; | 3 import 'constants.dart'; |
| 4 import 'inputstream.dart'; | 4 import 'inputstream.dart'; |
| 5 | 5 |
| 6 // TODO(jmesserly): I converted StopIteration to StateError("No more elements"). | 6 // TODO(jmesserly): I converted StopIteration to StateError("No more elements"). |
| 7 // Seems strange to throw this from outside of an iterator though. | 7 // Seems strange to throw this from outside of an iterator though. |
| 8 /// String-like object with an associated position and various extra methods | 8 /// String-like object with an associated position and various extra methods |
| 9 /// If the position is ever greater than the string length then an exception is | 9 /// If the position is ever greater than the string length then an exception is |
| 10 /// raised. | 10 /// raised. |
| (...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 114 } | 114 } |
| 115 } | 115 } |
| 116 | 116 |
| 117 String slice(int start, [int end]) { | 117 String slice(int start, [int end]) { |
| 118 if (end == null) end = length; | 118 if (end == null) end = length; |
| 119 if (end < 0) end += length; | 119 if (end < 0) end += length; |
| 120 return _bytes.substring(start, end - start); | 120 return _bytes.substring(start, end - start); |
| 121 } | 121 } |
| 122 } | 122 } |
| 123 | 123 |
| 124 typedef bool _MethodHandler(); |
| 125 |
| 126 class _DispatchEntry { |
| 127 final String pattern; |
| 128 final _MethodHandler handler; |
| 129 _DispatchEntry(this.pattern, this.handler); |
| 130 } |
| 131 |
| 124 /// Mini parser for detecting character encoding from meta elements. | 132 /// Mini parser for detecting character encoding from meta elements. |
| 125 class EncodingParser { | 133 class EncodingParser { |
| 126 final EncodingBytes data; | 134 final EncodingBytes data; |
| 127 String encoding; | 135 String encoding; |
| 128 | 136 |
| 129 /// [bytes] - the data to work on for encoding detection. | 137 /// [bytes] - the data to work on for encoding detection. |
| 130 EncodingParser(List<int> bytes) | 138 EncodingParser(List<int> bytes) |
| 131 // Note: this is intentionally interpreting bytes as codepoints. | 139 // Note: this is intentionally interpreting bytes as codepoints. |
| 132 : data = new EncodingBytes(new String.fromCharCodes(bytes).toLowerCase()); | 140 : data = new EncodingBytes(new String.fromCharCodes(bytes).toLowerCase()); |
| 133 | 141 |
| 134 String getEncoding() { | 142 String getEncoding() { |
| 135 final methodDispatch = [ | 143 final methodDispatch = [ |
| 136 ["<!--", handleComment], | 144 new _DispatchEntry("<!--", handleComment), |
| 137 ["<meta", handleMeta], | 145 new _DispatchEntry("<meta", handleMeta), |
| 138 ["</", handlePossibleEndTag], | 146 new _DispatchEntry("</", handlePossibleEndTag), |
| 139 ["<!", handleOther], | 147 new _DispatchEntry("<!", handleOther), |
| 140 ["<?", handleOther], | 148 new _DispatchEntry("<?", handleOther), |
| 141 ["<", handlePossibleStartTag] | 149 new _DispatchEntry("<", handlePossibleStartTag), |
| 142 ]; | 150 ]; |
| 143 | 151 |
| 144 try { | 152 try { |
| 145 for (;;) { | 153 for (;;) { |
| 146 for (var dispatch in methodDispatch) { | 154 for (var dispatch in methodDispatch) { |
| 147 if (data.matchBytes(dispatch[0])) { | 155 if (data.matchBytes(dispatch.pattern)) { |
| 148 var keepParsing = dispatch[1](); | 156 var keepParsing = dispatch.handler(); |
| 149 if (keepParsing) break; | 157 if (keepParsing) break; |
| 150 | 158 |
| 151 // We found an encoding. Stop. | 159 // We found an encoding. Stop. |
| 152 return encoding; | 160 return encoding; |
| 153 } | 161 } |
| 154 } | 162 } |
| 155 data.position += 1; | 163 data.position += 1; |
| 156 } | 164 } |
| 157 } on StateError catch (e) { | 165 } on StateError catch (_) { |
| 158 // Catch this here to match behavior of Python's StopIteration | 166 // Catch this here to match behavior of Python's StopIteration |
| 159 // TODO(jmesserly): refactor to not use exceptions | 167 // TODO(jmesserly): refactor to not use exceptions |
| 160 } | 168 } |
| 161 return encoding; | 169 return encoding; |
| 162 } | 170 } |
| 163 | 171 |
| 164 /// Skip over comments. | 172 /// Skip over comments. |
| 165 bool handleComment() => data.jumpTo("-->"); | 173 bool handleComment() => data.jumpTo("-->"); |
| 166 | 174 |
| 167 bool handleMeta() { | 175 bool handleMeta() { |
| (...skipping 17 matching lines...) Expand all Loading... |
| 185 } else if (attr[0] == "content") { | 193 } else if (attr[0] == "content") { |
| 186 var contentParser = new ContentAttrParser(new EncodingBytes(attr[1])); | 194 var contentParser = new ContentAttrParser(new EncodingBytes(attr[1])); |
| 187 var tentativeEncoding = contentParser.parse(); | 195 var tentativeEncoding = contentParser.parse(); |
| 188 var codec = codecName(tentativeEncoding); | 196 var codec = codecName(tentativeEncoding); |
| 189 if (codec != null) { | 197 if (codec != null) { |
| 190 encoding = codec; | 198 encoding = codec; |
| 191 return false; | 199 return false; |
| 192 } | 200 } |
| 193 } | 201 } |
| 194 } | 202 } |
| 195 return true; // unreachable | |
| 196 } | 203 } |
| 197 | 204 |
| 198 bool handlePossibleStartTag() => handlePossibleTag(false); | 205 bool handlePossibleStartTag() => handlePossibleTag(false); |
| 199 | 206 |
| 200 bool handlePossibleEndTag() { | 207 bool handlePossibleEndTag() { |
| 201 data.next(); | 208 data.next(); |
| 202 return handlePossibleTag(true); | 209 return handlePossibleTag(true); |
| 203 } | 210 } |
| 204 | 211 |
| 205 bool handlePossibleTag(bool endTag) { | 212 bool handlePossibleTag(bool endTag) { |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 307 if (isSpaceOrAngleBracket(c)) { | 314 if (isSpaceOrAngleBracket(c)) { |
| 308 return [attrName.join(), attrValue.join()]; | 315 return [attrName.join(), attrValue.join()]; |
| 309 } else if (c == null) { | 316 } else if (c == null) { |
| 310 return null; | 317 return null; |
| 311 } else if (isLetter(c)) { | 318 } else if (isLetter(c)) { |
| 312 attrValue.add(c.toLowerCase()); | 319 attrValue.add(c.toLowerCase()); |
| 313 } else { | 320 } else { |
| 314 attrValue.add(c); | 321 attrValue.add(c); |
| 315 } | 322 } |
| 316 } | 323 } |
| 317 return null; // unreachable | |
| 318 } | 324 } |
| 319 } | 325 } |
| 320 | 326 |
| 321 class ContentAttrParser { | 327 class ContentAttrParser { |
| 322 final EncodingBytes data; | 328 final EncodingBytes data; |
| 323 | 329 |
| 324 ContentAttrParser(this.data); | 330 ContentAttrParser(this.data); |
| 325 | 331 |
| 326 String parse() { | 332 String parse() { |
| 327 try { | 333 try { |
| (...skipping 17 matching lines...) Expand all Loading... |
| 345 return data.slice(oldPosition, data.position); | 351 return data.slice(oldPosition, data.position); |
| 346 } else { | 352 } else { |
| 347 return null; | 353 return null; |
| 348 } | 354 } |
| 349 } else { | 355 } else { |
| 350 // Unquoted value | 356 // Unquoted value |
| 351 var oldPosition = data.position; | 357 var oldPosition = data.position; |
| 352 try { | 358 try { |
| 353 data.skipUntil(isWhitespace); | 359 data.skipUntil(isWhitespace); |
| 354 return data.slice(oldPosition, data.position); | 360 return data.slice(oldPosition, data.position); |
| 355 } on StateError catch (e) { | 361 } on StateError catch (_) { |
| 356 //Return the whole remaining value | 362 //Return the whole remaining value |
| 357 return data.slice(oldPosition); | 363 return data.slice(oldPosition); |
| 358 } | 364 } |
| 359 } | 365 } |
| 360 } on StateError catch (e) { | 366 } on StateError catch (_) { |
| 361 return null; | 367 return null; |
| 362 } | 368 } |
| 363 } | 369 } |
| 364 } | 370 } |
| 365 | 371 |
| 366 bool isSpaceOrAngleBracket(String char) { | 372 bool isSpaceOrAngleBracket(String char) { |
| 367 return char == ">" || char == "<" || isWhitespace(char); | 373 return char == ">" || char == "<" || isWhitespace(char); |
| 368 } | 374 } |
| 369 | 375 |
| 370 typedef bool CharPreciate(String char); | 376 typedef bool CharPreciate(String char); |
| OLD | NEW |