Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(712)

Side by Side Diff: pkg/third_party/html5lib/lib/parser.dart

Issue 421503004: Switch transformers over to source_span (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Code review changes Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /// This library has a parser for HTML5 documents, that lets you parse HTML 1 /// This library has a parser for HTML5 documents, that lets you parse HTML
2 /// easily from a script or server side application: 2 /// easily from a script or server side application:
3 /// 3 ///
4 /// import 'package:html5lib/parser.dart' show parse; 4 /// import 'package:html5lib/parser.dart' show parse;
5 /// import 'package:html5lib/dom.dart'; 5 /// import 'package:html5lib/dom.dart';
6 /// main() { 6 /// main() {
7 /// var document = parse( 7 /// var document = parse(
8 /// '<body>Hello world! <a href="www.html5rocks.com">HTML5 rocks!'); 8 /// '<body>Hello world! <a href="www.html5rocks.com">HTML5 rocks!');
9 /// print(document.outerHtml); 9 /// print(document.outerHtml);
10 /// } 10 /// }
11 /// 11 ///
12 /// The resulting document you get back has a DOM-like API for easy tree 12 /// The resulting document you get back has a DOM-like API for easy tree
13 /// traversal and manipulation. 13 /// traversal and manipulation.
14 library parser; 14 library parser;
15 15
16 import 'dart:collection'; 16 import 'dart:collection';
17 import 'dart:math'; 17 import 'dart:math';
18 import 'package:source_maps/span.dart' show Span, FileSpan; 18 import 'package:source_span/source_span.dart';
19 19
20 import 'src/treebuilder.dart'; 20 import 'src/treebuilder.dart';
21 import 'src/constants.dart'; 21 import 'src/constants.dart';
22 import 'src/encoding_parser.dart'; 22 import 'src/encoding_parser.dart';
23 import 'src/token.dart'; 23 import 'src/token.dart';
24 import 'src/tokenizer.dart'; 24 import 'src/tokenizer.dart';
25 import 'src/utils.dart'; 25 import 'src/utils.dart';
26 import 'dom.dart'; 26 import 'dom.dart';
27 27
28 /// Parse the [input] html5 document into a tree. The [input] can be 28 /// Parse the [input] html5 document into a tree. The [input] can be
29 /// a [String], [List<int>] of bytes or an [HtmlTokenizer]. 29 /// a [String], [List<int>] of bytes or an [HtmlTokenizer].
30 /// 30 ///
31 /// If [input] is not a [HtmlTokenizer], you can optionally specify the file's 31 /// If [input] is not a [HtmlTokenizer], you can optionally specify the file's
32 /// [encoding], which must be a string. If specified that encoding will be 32 /// [encoding], which must be a string. If specified that encoding will be
33 /// used regardless of any BOM or later declaration (such as in a meta element). 33 /// used regardless of any BOM or later declaration (such as in a meta element).
34 /// 34 ///
35 /// Set [generateSpans] if you want to generate [Span]s, otherwise the 35 /// Set [generateSpans] if you want to generate [SourceSpan]s, otherwise the
36 /// [Node.sourceSpan] property will be `null`. When using [generateSpans] you 36 /// [Node.sourceSpan] property will be `null`. When using [generateSpans] you
37 /// can additionally pass [sourceUrl] to indicate where the [input] was 37 /// can additionally pass [sourceUrl] to indicate where the [input] was
38 /// extracted from. 38 /// extracted from.
39 Document parse(input, {String encoding, bool generateSpans: false, 39 Document parse(input, {String encoding, bool generateSpans: false,
40 String sourceUrl}) { 40 String sourceUrl}) {
41 var p = new HtmlParser(input, encoding: encoding, 41 var p = new HtmlParser(input, encoding: encoding,
42 generateSpans: generateSpans, sourceUrl: sourceUrl); 42 generateSpans: generateSpans, sourceUrl: sourceUrl);
43 return p.parse(); 43 return p.parse();
44 } 44 }
45 45
46 46
47 /// Parse the [input] html5 document fragment into a tree. The [input] can be 47 /// Parse the [input] html5 document fragment into a tree. The [input] can be
48 /// a [String], [List<int>] of bytes or an [HtmlTokenizer]. The [container] 48 /// a [String], [List<int>] of bytes or an [HtmlTokenizer]. The [container]
49 /// element can optionally be specified, otherwise it defaults to "div". 49 /// element can optionally be specified, otherwise it defaults to "div".
50 /// 50 ///
51 /// If [input] is not a [HtmlTokenizer], you can optionally specify the file's 51 /// If [input] is not a [HtmlTokenizer], you can optionally specify the file's
52 /// [encoding], which must be a string. If specified, that encoding will be used , 52 /// [encoding], which must be a string. If specified, that encoding will be used ,
53 /// regardless of any BOM or later declaration (such as in a meta element). 53 /// regardless of any BOM or later declaration (such as in a meta element).
54 /// 54 ///
55 /// Set [generateSpans] if you want to generate [Span]s, otherwise the 55 /// Set [generateSpans] if you want to generate [SourceSpan]s, otherwise the
56 /// [Node.sourceSpan] property will be `null`. When using [generateSpans] you ca n 56 /// [Node.sourceSpan] property will be `null`. When using [generateSpans] you ca n
57 /// additionally pass [sourceUrl] to indicate where the [input] was extracted 57 /// additionally pass [sourceUrl] to indicate where the [input] was extracted
58 /// from. 58 /// from.
59 DocumentFragment parseFragment(input, {String container: "div", 59 DocumentFragment parseFragment(input, {String container: "div",
60 String encoding, bool generateSpans: false, String sourceUrl}) { 60 String encoding, bool generateSpans: false, String sourceUrl}) {
61 var p = new HtmlParser(input, encoding: encoding, 61 var p = new HtmlParser(input, encoding: encoding,
62 generateSpans: generateSpans, sourceUrl: sourceUrl); 62 generateSpans: generateSpans, sourceUrl: sourceUrl);
63 return p.parseFragment(container); 63 return p.parseFragment(container);
64 } 64 }
65 65
66 66
67 /// Parser for HTML, which generates a tree structure from a stream of 67 /// Parser for HTML, which generates a tree structure from a stream of
68 /// (possibly malformed) characters. 68 /// (possibly malformed) characters.
69 class HtmlParser { 69 class HtmlParser {
70 /// Raise an exception on the first error encountered. 70 /// Raise an exception on the first error encountered.
71 final bool strict; 71 final bool strict;
72 72
73 /// True to generate [Span]s for the [Node.sourceSpan] property. 73 /// True to generate [SourceSpan]s for the [Node.sourceSpan] property.
74 final bool generateSpans; 74 final bool generateSpans;
75 75
76 final HtmlTokenizer tokenizer; 76 final HtmlTokenizer tokenizer;
77 77
78 final TreeBuilder tree; 78 final TreeBuilder tree;
79 79
80 final List<ParseError> errors = <ParseError>[]; 80 final List<ParseError> errors = <ParseError>[];
81 81
82 String container; 82 String container;
83 83
(...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after
356 reprocessPhases.add(phase); 356 reprocessPhases.add(phase);
357 reprocess = phase.processEOF(); 357 reprocess = phase.processEOF();
358 if (reprocess) { 358 if (reprocess) {
359 assert(!reprocessPhases.contains(phase)); 359 assert(!reprocessPhases.contains(phase));
360 } 360 }
361 } 361 }
362 } 362 }
363 363
364 /// The last span available. Used for EOF errors if we don't have something 364 /// The last span available. Used for EOF errors if we don't have something
365 /// better. 365 /// better.
366 Span get _lastSpan { 366 SourceSpan get _lastSpan {
367 if (tokenizer.stream.fileInfo == null) return null;
367 var pos = tokenizer.stream.position; 368 var pos = tokenizer.stream.position;
368 return new FileSpan(tokenizer.stream.fileInfo, pos, pos); 369 return tokenizer.stream.fileInfo.location(pos).pointSpan();
369 } 370 }
370 371
371 void parseError(Span span, String errorcode, 372 void parseError(SourceSpan span, String errorcode,
372 [Map datavars = const {}]) { 373 [Map datavars = const {}]) {
373 374
374 if (!generateSpans && span == null) { 375 if (!generateSpans && span == null) {
375 span = _lastSpan; 376 span = _lastSpan;
376 } 377 }
377 378
378 var err = new ParseError(errorcode, span, datavars); 379 var err = new ParseError(errorcode, span, datavars);
379 errors.add(err); 380 errors.add(err);
380 if (strict) throw err; 381 if (strict) throw err;
381 } 382 }
(...skipping 1788 matching lines...) Expand 10 before | Expand all | Expand 10 after
2170 super(parser); 2171 super(parser);
2171 2172
2172 void flushCharacters() { 2173 void flushCharacters() {
2173 if (characterTokens.length == 0) return; 2174 if (characterTokens.length == 0) return;
2174 2175
2175 // TODO(sigmund,jmesserly): remove '' (dartbug.com/8480) 2176 // TODO(sigmund,jmesserly): remove '' (dartbug.com/8480)
2176 var data = characterTokens.map((t) => t.data).join(''); 2177 var data = characterTokens.map((t) => t.data).join('');
2177 var span = null; 2178 var span = null;
2178 2179
2179 if (parser.generateSpans) { 2180 if (parser.generateSpans) {
2180 span = new FileSpan.union( 2181 span = characterTokens[0].span.expand(characterTokens.last.span);
2181 characterTokens[0].span,
2182 characterTokens.last.span);
2183 } 2182 }
2184 2183
2185 if (!allWhitespace(data)) { 2184 if (!allWhitespace(data)) {
2186 parser._inTablePhase.insertText(new CharactersToken(data)..span = span); 2185 parser._inTablePhase.insertText(new CharactersToken(data)..span = span);
2187 } else if (data.length > 0) { 2186 } else if (data.length > 0) {
2188 tree.insertText(data, span); 2187 tree.insertText(data, span);
2189 } 2188 }
2190 characterTokens = <StringToken>[]; 2189 characterTokens = <StringToken>[];
2191 } 2190 }
2192 2191
(...skipping 1133 matching lines...) Expand 10 before | Expand all | Expand 10 after
3326 3325
3327 Token processEndTag(EndTagToken token) { 3326 Token processEndTag(EndTagToken token) {
3328 parser.parseError(token.span, "expected-eof-but-got-end-tag", 3327 parser.parseError(token.span, "expected-eof-but-got-end-tag",
3329 {"name": token.name}); 3328 {"name": token.name});
3330 return null; 3329 return null;
3331 } 3330 }
3332 } 3331 }
3333 3332
3334 3333
3335 /// Error in parsed document. 3334 /// Error in parsed document.
3336 class ParseError implements Exception { 3335 class ParseError implements SourceSpanException {
3337 final String errorCode; 3336 final String errorCode;
3338 final Span span; 3337 final SourceSpan span;
3339 final Map data; 3338 final Map data;
3340 3339
3341 ParseError(this.errorCode, this.span, this.data); 3340 ParseError(this.errorCode, this.span, this.data);
3342 3341
3343 int get line => span.start.line; 3342 int get line => span.start.line;
3344 3343
3345 int get column => span.start.column; 3344 int get column => span.start.column;
3346 3345
3347 /// Gets the human readable error message for this error. Use 3346 /// Gets the human readable error message for this error. Use
3348 /// [span.getLocationMessage] or [toString] to get a message including span 3347 /// [span.getLocationMessage] or [toString] to get a message including span
3349 /// information. If there is a file associated with the span, both 3348 /// information. If there is a file associated with the span, both
3350 /// [span.getLocationMessage] and [toString] are equivalent. Otherwise, 3349 /// [span.getLocationMessage] and [toString] are equivalent. Otherwise,
3351 /// [span.getLocationMessage] will not show any source url information, but 3350 /// [span.getLocationMessage] will not show any source url information, but
3352 /// [toString] will include 'ParserError:' as a prefix. 3351 /// [toString] will include 'ParserError:' as a prefix.
3353 String get message => formatStr(errorMessages[errorCode], data); 3352 String get message => formatStr(errorMessages[errorCode], data);
3354 3353
3355 String toString() { 3354 String toString({color}) {
3356 var res = span.getLocationMessage(message); 3355 var res = span.message(message, color: color);
3357 return span.sourceUrl == null ? 'ParserError on $res' : 'On $res'; 3356 return span.sourceUrl == null ? 'ParserError on $res' : 'On $res';
3358 } 3357 }
3359 } 3358 }
3360 3359
3361 3360
3362 /// Convenience function to get the pair of namespace and localName. 3361 /// Convenience function to get the pair of namespace and localName.
3363 Pair<String, String> getElementNameTuple(Element e) { 3362 Pair<String, String> getElementNameTuple(Element e) {
3364 var ns = e.namespaceUri; 3363 var ns = e.namespaceUri;
3365 if (ns == null) ns = Namespaces.html; 3364 if (ns == null) ns = Namespaces.html;
3366 return new Pair(ns, e.localName); 3365 return new Pair(ns, e.localName);
3367 } 3366 }
OLDNEW
« no previous file with comments | « pkg/third_party/html5lib/lib/dom.dart ('k') | pkg/third_party/html5lib/lib/src/inputstream.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698