Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(81)

Side by Side Diff: editor/tools/plugins/com.google.dart.engine/src/com/google/dart/engine/html/parser/HtmlParser.java

Issue 82903007: Improved HTML parsing (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Clean-up Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2013, the Dart project authors. 2 * Copyright (c) 2013, the Dart project authors.
3 * 3 *
4 * Licensed under the Eclipse Public License v1.0 (the "License"); you may not u se this file except 4 * Licensed under the Eclipse Public License v1.0 (the "License"); you may not u se this file except
5 * in compliance with the License. You may obtain a copy of the License at 5 * in compliance with the License. You may obtain a copy of the License at
6 * 6 *
7 * http://www.eclipse.org/legal/epl-v10.html 7 * http://www.eclipse.org/legal/epl-v10.html
8 * 8 *
9 * Unless required by applicable law or agreed to in writing, software distribut ed under the License 9 * Unless required by applicable law or agreed to in writing, software distribut ed under the License
10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY K IND, either express 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY K IND, either express
11 * or implied. See the License for the specific language governing permissions a nd limitations under 11 * or implied. See the License for the specific language governing permissions a nd limitations under
12 * the License. 12 * the License.
13 */ 13 */
14 package com.google.dart.engine.html.parser; 14 package com.google.dart.engine.html.parser;
15 15
16 import com.google.dart.engine.ast.CompilationUnit;
17 import com.google.dart.engine.ast.Expression;
18 import com.google.dart.engine.error.AnalysisErrorListener;
19 import com.google.dart.engine.html.ast.AttributeWithEmbeddedExpressions;
20 import com.google.dart.engine.html.ast.EmbeddedExpression;
21 import com.google.dart.engine.html.ast.HtmlScriptTagNode;
16 import com.google.dart.engine.html.ast.HtmlUnit; 22 import com.google.dart.engine.html.ast.HtmlUnit;
23 import com.google.dart.engine.html.ast.TagWithEmbeddedExpressions;
24 import com.google.dart.engine.html.ast.XmlAttributeNode;
17 import com.google.dart.engine.html.ast.XmlNode; 25 import com.google.dart.engine.html.ast.XmlNode;
18 import com.google.dart.engine.html.ast.XmlTagNode; 26 import com.google.dart.engine.html.ast.XmlTagNode;
19 import com.google.dart.engine.html.scanner.HtmlScanResult; 27 import com.google.dart.engine.html.scanner.HtmlScanResult;
20 import com.google.dart.engine.html.scanner.HtmlScanner; 28 import com.google.dart.engine.html.scanner.HtmlScanner;
21 import com.google.dart.engine.html.scanner.Token; 29 import com.google.dart.engine.html.scanner.Token;
30 import com.google.dart.engine.html.scanner.TokenType;
31 import com.google.dart.engine.parser.Parser;
32 import com.google.dart.engine.scanner.Scanner;
33 import com.google.dart.engine.scanner.SubSequenceReader;
22 import com.google.dart.engine.source.Source; 34 import com.google.dart.engine.source.Source;
35 import com.google.dart.engine.utilities.source.LineInfo;
36 import com.google.dart.engine.utilities.source.LineInfo.Location;
23 37
38 import java.util.ArrayList;
24 import java.util.Arrays; 39 import java.util.Arrays;
25 import java.util.HashSet; 40 import java.util.HashSet;
26 import java.util.List; 41 import java.util.List;
27 import java.util.Set; 42 import java.util.Set;
28 43
29 /** 44 /**
30 * Instances of the class {@code HtmlParser} are used to parse tokens into a AST structure comprised 45 * Instances of the class {@code HtmlParser} are used to parse tokens into a AST structure comprised
31 * of {@link XmlNode}s. 46 * of {@link XmlNode}s.
32 * 47 *
33 * @coverage dart.engine.html 48 * @coverage dart.engine.html
34 */ 49 */
35 public class HtmlParser extends XmlParser { 50 public class HtmlParser extends XmlParser {
51 /**
52 * The line information associated with the source being parsed.
53 */
54 private LineInfo lineInfo;
36 55
56 /**
57 * The error listener to which errors will be reported.
58 */
59 private AnalysisErrorListener errorListener;
60
61 private static final String APPLICATION_DART_IN_DOUBLE_QUOTES = "\"application /dart\"";
62 private static final String APPLICATION_DART_IN_SINGLE_QUOTES = "'application/ dart'";
63 private static final String OPENING_DELIMITER = "{{";
64 private static final String CLOSING_DELIMITER = "}}";
65 private static final String SCRIPT = "script";
66 private static final String TYPE = "type";
67
68 /**
69 * A set containing the names of tags that do not have a closing tag.
70 */
37 public static Set<String> SELF_CLOSING = new HashSet<String>(Arrays.asList(new String[] { 71 public static Set<String> SELF_CLOSING = new HashSet<String>(Arrays.asList(new String[] {
38 "area", "base", "basefont", "br", "col", "frame", "hr", "img", "input", "l ink", "meta", 72 "area", "base", "basefont", "br", "col", "frame", "hr", "img", "input", "l ink", "meta",
39 "param", "!", "h1", "h2", "h3", "h4", "h5", "h6"})); 73 "param", "!", "h1", "h2", "h3", "h4", "h5", "h6"}));
40 74
41 /** 75 /**
42 * Construct a parser for the specified source. 76 * Construct a parser for the specified source.
43 * 77 *
44 * @param source the source being parsed 78 * @param source the source being parsed
79 * @param errorListener the error listener to which errors will be reported
45 */ 80 */
46 public HtmlParser(Source source) { 81 public HtmlParser(Source source, AnalysisErrorListener errorListener) {
47 super(source); 82 super(source);
83 this.errorListener = errorListener;
84 }
85
86 public Token getEndToken(Token tag, List<XmlAttributeNode> attributes, Token a ttributeEnd,
87 List<XmlTagNode> tagNodes, Token contentEnd, Token closingTag, Token nodeE nd) {
88 if (nodeEnd != null) {
89 return nodeEnd;
90 }
91 if (closingTag != null) {
92 return closingTag;
93 }
94 if (contentEnd != null) {
95 return contentEnd;
96 }
97 if (!tagNodes.isEmpty()) {
98 return tagNodes.get(tagNodes.size() - 1).getEndToken();
99 }
100 if (attributeEnd != null) {
101 return attributeEnd;
102 }
103 if (!attributes.isEmpty()) {
104 return attributes.get(attributes.size() - 1).getEndToken();
105 }
106 return tag;
48 } 107 }
49 108
50 /** 109 /**
51 * Parse the tokens specified by the given scan result. 110 * Parse the tokens specified by the given scan result.
52 * 111 *
53 * @param scanResult the result of scanning an HTML source (not {@code null}) 112 * @param scanResult the result of scanning an HTML source (not {@code null})
54 * @return the parse result (not {@code null}) 113 * @return the parse result (not {@code null})
55 */ 114 */
56 public HtmlParseResult parse(HtmlScanResult scanResult) { 115 public HtmlParseResult parse(HtmlScanResult scanResult) {
116 int[] lineStarts = scanResult.getLineStarts();
117 lineInfo = new LineInfo(lineStarts);
57 Token firstToken = scanResult.getToken(); 118 Token firstToken = scanResult.getToken();
58 List<XmlTagNode> tagNodes = parseTopTagNodes(firstToken); 119 List<XmlTagNode> tagNodes = parseTopTagNodes(firstToken);
59 HtmlUnit unit = new HtmlUnit(firstToken, tagNodes, getCurrentToken()); 120 HtmlUnit unit = new HtmlUnit(firstToken, tagNodes, getCurrentToken());
60 return new HtmlParseResult( 121 return new HtmlParseResult(
61 scanResult.getModificationTime(), 122 scanResult.getModificationTime(),
62 firstToken, 123 firstToken,
63 scanResult.getLineStarts(), 124 scanResult.getLineStarts(),
64 unit); 125 unit);
65 } 126 }
66 127
67 /** 128 /**
68 * Scan then parse the specified source. 129 * Scan then parse the specified source.
69 * 130 *
70 * @param source the source to be scanned and parsed (not {@code null}) 131 * @param source the source to be scanned and parsed (not {@code null})
71 * @return the parse result (not {@code null}) 132 * @return the parse result (not {@code null})
72 */ 133 */
73 public HtmlParseResult parse(Source source) throws Exception { 134 public HtmlParseResult parse(Source source) throws Exception {
74 HtmlScanner scanner = new HtmlScanner(source); 135 HtmlScanner scanner = new HtmlScanner(source);
75 source.getContents(scanner); 136 source.getContents(scanner);
76 return parse(scanner.getResult()); 137 return parse(scanner.getResult());
77 } 138 }
78 139
79 @Override 140 @Override
141 protected XmlAttributeNode createAttributeNode(Token name, Token equals, Token value) {
142 ArrayList<EmbeddedExpression> expressions = new ArrayList<EmbeddedExpression >();
143 addEmbeddedExpressions(expressions, value);
144 if (expressions.isEmpty()) {
145 return new XmlAttributeNode(name, equals, value);
146 }
147 return new AttributeWithEmbeddedExpressions(
148 name,
149 equals,
150 value,
151 expressions.toArray(new EmbeddedExpression[expressions.size()]));
152 }
153
154 @Override
155 protected XmlTagNode createTagNode(Token nodeStart, Token tag, List<XmlAttribu teNode> attributes,
156 Token attributeEnd, List<XmlTagNode> tagNodes, Token contentEnd, Token clo singTag,
157 Token nodeEnd) {
158 if (isScriptNode(tag, attributes, tagNodes)) {
159 HtmlScriptTagNode tagNode = new HtmlScriptTagNode(
160 nodeStart,
161 tag,
162 attributes,
163 attributeEnd,
164 tagNodes,
165 contentEnd,
166 closingTag,
167 nodeEnd);
168 String contents = tagNode.getContent();
169 int contentOffset = attributeEnd.getEnd();
170 Location location = lineInfo.getLocation(contentOffset);
171 Scanner scanner = new Scanner(
172 getSource(),
173 new SubSequenceReader(contents, contentOffset),
174 errorListener);
175 scanner.setSourceStart(location.getLineNumber(), location.getColumnNumber( ));
176 com.google.dart.engine.scanner.Token firstToken = scanner.tokenize();
177 Parser parser = new Parser(getSource(), errorListener);
178 CompilationUnit unit = parser.parseCompilationUnit(firstToken);
179 unit.setLineInfo(lineInfo);
180 tagNode.setScript(unit);
181 return tagNode;
182 }
183 Token token = nodeStart;
184 Token endToken = getEndToken(
185 tag,
186 attributes,
187 attributeEnd,
188 tagNodes,
189 contentEnd,
190 closingTag,
191 nodeEnd);
192 ArrayList<EmbeddedExpression> expressions = new ArrayList<EmbeddedExpression >();
193 while (token != endToken) {
194 if (token.getType() == TokenType.TEXT) {
195 addEmbeddedExpressions(expressions, token);
196 }
197 token = token.getNext();
198 }
199 if (expressions.isEmpty()) {
200 return super.createTagNode(
201 nodeStart,
202 tag,
203 attributes,
204 attributeEnd,
205 tagNodes,
206 contentEnd,
207 closingTag,
208 nodeEnd);
209 }
210 return new TagWithEmbeddedExpressions(
211 nodeStart,
212 tag,
213 attributes,
214 attributeEnd,
215 tagNodes,
216 contentEnd,
217 closingTag,
218 nodeEnd,
219 expressions.toArray(new EmbeddedExpression[expressions.size()]));
220 }
221
222 @Override
80 protected boolean isSelfClosing(Token tag) { 223 protected boolean isSelfClosing(Token tag) {
81 return SELF_CLOSING.contains(tag.getLexeme()); 224 return SELF_CLOSING.contains(tag.getLexeme());
82 } 225 }
226
227 /**
228 * Parse the value of the given token for embedded expressions, and add any em bedded expressions
229 * that are found to the given list of expressions.
230 *
231 * @param expressions the list to which embedded expressions are to be added
232 * @param token the token whose value is to be parsed
233 */
234 private void addEmbeddedExpressions(ArrayList<EmbeddedExpression> expressions, Token token) {
235 String lexeme = token.getLexeme();
236 int startIndex = lexeme.indexOf(OPENING_DELIMITER);
237 while (startIndex >= 0) {
238 int endIndex = lexeme.indexOf(CLOSING_DELIMITER, startIndex + 2);
239 if (endIndex < 0) {
240 // TODO(brianwilkerson) Should we report this error or will it be report ed by something else?
241 return;
242 }
243 int offset = token.getOffset();
244 expressions.add(new EmbeddedExpression(startIndex, parseEmbeddedExpression (
245 lexeme.substring(startIndex + 2, endIndex),
246 offset + startIndex), endIndex));
247 startIndex = lexeme.indexOf(OPENING_DELIMITER, endIndex + 2);
248 }
249 }
250
251 /**
252 * Determine if the specified node is a Dart script.
253 *
254 * @param node the node to be tested (not {@code null})
255 * @return {@code true} if the node is a Dart script
256 */
257 private boolean isScriptNode(Token tag, List<XmlAttributeNode> attributes,
258 List<XmlTagNode> tagNodes) {
259 if (tagNodes.size() != 0 || !tag.getLexeme().equals(SCRIPT)) {
260 return false;
261 }
262 for (XmlAttributeNode attribute : attributes) {
263 if (attribute.getName().getLexeme().equals(TYPE)) {
264 Token valueToken = attribute.getValue();
265 if (valueToken != null) {
266 String value = valueToken.getLexeme();
267 if (value.equals(APPLICATION_DART_IN_DOUBLE_QUOTES)
268 || value.equals(APPLICATION_DART_IN_SINGLE_QUOTES)) {
269 return true;
270 }
271 }
272 }
273 }
274 return false;
275 }
276
277 /**
278 * Given the contents of an embedded expression that occurs at the given offse t, parse it as a
279 * Dart expression. The contents should not include the expression's delimiter s.
280 *
281 * @param contents the contents of the expression
282 * @param contentOffset the offset of the expression in the larger file
283 * @return the Dart expression that was parsed
284 */
285 private Expression parseEmbeddedExpression(String contents, int contentOffset) {
286 Location location = lineInfo.getLocation(contentOffset);
287 Scanner scanner = new Scanner(
288 getSource(),
289 new SubSequenceReader(contents, contentOffset),
290 errorListener);
291 scanner.setSourceStart(location.getLineNumber(), location.getColumnNumber()) ;
292 com.google.dart.engine.scanner.Token firstToken = scanner.tokenize();
293 Parser parser = new Parser(getSource(), errorListener);
294 return parser.parseExpression(firstToken);
295 }
83 } 296 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698