Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(176)

Side by Side Diff: ios/third_party/blink/src/html_tokenizer.mm

Issue 1031023002: Upstream ios/web/ HTML tokenizer (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "ios/third_party/blink/src/html_tokenizer.h"
29
30 #include "html_markup_tokenizer_inlines.h"
31
32 namespace WebCore {
33
34 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
35 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
36 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
37 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)
38
39 HTMLTokenizer::HTMLTokenizer()
40 : m_state(HTMLTokenizer::DataState)
41 , m_token(nullptr)
42 , m_additionalAllowedCharacter('\0')
43 , m_inputStreamPreprocessor(this)
44 {
45 }
46
47 HTMLTokenizer::~HTMLTokenizer()
48 {
49 }
50
51 void HTMLTokenizer::reset()
52 {
53 m_state = HTMLTokenizer::DataState;
54 m_token = 0;
55 m_additionalAllowedCharacter = '\0';
56 }
57
58 bool HTMLTokenizer::flushBufferedEndTag(CharacterProvider& source)
59 {
60 ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLTok en::Uninitialized);
61 source.next();
62 if (m_token->type() == HTMLToken::Character)
63 return true;
64
65 return false;
66 }
67
68 #define FLUSH_AND_ADVANCE_TO(stateName) \
69 do { \
70 m_state = HTMLTokenizer::stateName; \
71 if (flushBufferedEndTag(source)) \
72 return true; \
73 if (source.isEmpty() \
74 || !m_inputStreamPreprocessor.peek(source)) \
75 return haveBufferedCharacterToken(); \
76 cc = m_inputStreamPreprocessor.nextInputCharacter(); \
77 goto stateName; \
78 } while (false)
79
80 bool HTMLTokenizer::nextToken(CharacterProvider& source, HTMLToken& token)
81 {
82 // If we have a token in progress, then we're supposed to be called back
83 // with the same token so we can finish it.
84 ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitial ized);
85 m_token = &token;
86
87 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
88 return haveBufferedCharacterToken();
89 UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
90
91 // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
92 switch (m_state) {
93 HTML_BEGIN_STATE(DataState) {
94 if (cc == '<') {
95 if (m_token->type() == HTMLToken::Character) {
96 // We have a bunch of character tokens queued up that we
97 // are emitting lazily here.
98 return true;
99 }
100 HTML_ADVANCE_TO(TagOpenState);
101 } else if (cc == kEndOfFileMarker)
102 return emitEndOfFile(source);
103 else {
104 m_token->ensureIsCharacterToken();
105 HTML_ADVANCE_TO(DataState);
106 }
107 }
108 END_STATE()
109
110 HTML_BEGIN_STATE(TagOpenState) {
111 if (cc == '!')
112 HTML_ADVANCE_TO(MarkupDeclarationOpenState);
113 else if (cc == '/')
114 HTML_ADVANCE_TO(EndTagOpenState);
115 else if (isASCIIUpper(cc)) {
116 m_token->beginStartTag(toLowerCase(cc));
117 HTML_ADVANCE_TO(TagNameState);
118 } else if (isASCIILower(cc)) {
119 m_token->beginStartTag(cc);
120 HTML_ADVANCE_TO(TagNameState);
121 } else if (cc == '?') {
122 parseError();
123 // The spec consumes the current character before switching
124 // to the bogus comment state, but it's easier to implement
125 // if we reconsume the current character.
126 HTML_RECONSUME_IN(BogusCommentState);
127 } else {
128 parseError();
129 m_token->ensureIsCharacterToken();
130 HTML_RECONSUME_IN(DataState);
131 }
132 }
133 END_STATE()
134
135 HTML_BEGIN_STATE(EndTagOpenState) {
136 if (isASCIIUpper(cc)) {
137 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
138 HTML_ADVANCE_TO(TagNameState);
139 } else if (isASCIILower(cc)) {
140 m_token->beginEndTag(static_cast<LChar>(cc));
141 HTML_ADVANCE_TO(TagNameState);
142 } else if (cc == '>') {
143 parseError();
144 HTML_ADVANCE_TO(DataState);
145 } else if (cc == kEndOfFileMarker) {
146 parseError();
147 m_token->ensureIsCharacterToken();
148 HTML_RECONSUME_IN(DataState);
149 } else {
150 parseError();
151 HTML_RECONSUME_IN(BogusCommentState);
152 }
153 }
154 END_STATE()
155
156 HTML_BEGIN_STATE(TagNameState) {
157 if (isTokenizerWhitespace(cc))
158 HTML_ADVANCE_TO(BeforeAttributeNameState);
159 else if (cc == '/')
160 HTML_ADVANCE_TO(SelfClosingStartTagState);
161 else if (cc == '>')
162 return emitAndResumeIn(source, HTMLTokenizer::DataState);
163 else if (isASCIIUpper(cc)) {
164 m_token->appendToName(toLowerCase(cc));
165 HTML_ADVANCE_TO(TagNameState);
166 } else if (cc == kEndOfFileMarker) {
167 parseError();
168 HTML_RECONSUME_IN(DataState);
169 } else {
170 m_token->appendToName(cc);
171 HTML_ADVANCE_TO(TagNameState);
172 }
173 }
174 END_STATE()
175
176 HTML_BEGIN_STATE(BeforeAttributeNameState) {
177 if (isTokenizerWhitespace(cc))
178 HTML_ADVANCE_TO(BeforeAttributeNameState);
179 else if (cc == '/')
180 HTML_ADVANCE_TO(SelfClosingStartTagState);
181 else if (cc == '>')
182 return emitAndResumeIn(source, HTMLTokenizer::DataState);
183 else if (isASCIIUpper(cc)) {
184 HTML_ADVANCE_TO(AttributeNameState);
185 } else if (cc == kEndOfFileMarker) {
186 parseError();
187 HTML_RECONSUME_IN(DataState);
188 } else {
189 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
190 parseError();
191 HTML_ADVANCE_TO(AttributeNameState);
192 }
193 }
194 END_STATE()
195
196 HTML_BEGIN_STATE(AttributeNameState) {
197 if (isTokenizerWhitespace(cc)) {
198 HTML_ADVANCE_TO(AfterAttributeNameState);
199 } else if (cc == '/') {
200 HTML_ADVANCE_TO(SelfClosingStartTagState);
201 } else if (cc == '=') {
202 HTML_ADVANCE_TO(BeforeAttributeValueState);
203 } else if (cc == '>') {
204 return emitAndResumeIn(source, HTMLTokenizer::DataState);
205 } else if (isASCIIUpper(cc)) {
206 HTML_ADVANCE_TO(AttributeNameState);
207 } else if (cc == kEndOfFileMarker) {
208 parseError();
209 HTML_RECONSUME_IN(DataState);
210 } else {
211 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
212 parseError();
213 HTML_ADVANCE_TO(AttributeNameState);
214 }
215 }
216 END_STATE()
217
218 HTML_BEGIN_STATE(AfterAttributeNameState) {
219 if (isTokenizerWhitespace(cc))
220 HTML_ADVANCE_TO(AfterAttributeNameState);
221 else if (cc == '/')
222 HTML_ADVANCE_TO(SelfClosingStartTagState);
223 else if (cc == '=')
224 HTML_ADVANCE_TO(BeforeAttributeValueState);
225 else if (cc == '>')
226 return emitAndResumeIn(source, HTMLTokenizer::DataState);
227 else if (isASCIIUpper(cc)) {
228 HTML_ADVANCE_TO(AttributeNameState);
229 } else if (cc == kEndOfFileMarker) {
230 parseError();
231 HTML_RECONSUME_IN(DataState);
232 } else {
233 if (cc == '"' || cc == '\'' || cc == '<')
234 parseError();
235 HTML_ADVANCE_TO(AttributeNameState);
236 }
237 }
238 END_STATE()
239
240 HTML_BEGIN_STATE(BeforeAttributeValueState) {
241 if (isTokenizerWhitespace(cc))
242 HTML_ADVANCE_TO(BeforeAttributeValueState);
243 else if (cc == '"') {
244 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
245 } else if (cc == '&') {
246 HTML_RECONSUME_IN(AttributeValueUnquotedState);
247 } else if (cc == '\'') {
248 HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
249 } else if (cc == '>') {
250 parseError();
251 return emitAndResumeIn(source, HTMLTokenizer::DataState);
252 } else if (cc == kEndOfFileMarker) {
253 parseError();
254 HTML_RECONSUME_IN(DataState);
255 } else {
256 if (cc == '<' || cc == '=' || cc == '`')
257 parseError();
258 HTML_ADVANCE_TO(AttributeValueUnquotedState);
259 }
260 }
261 END_STATE()
262
263 HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
264 if (cc == '"') {
265 HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
266 } else if (cc == kEndOfFileMarker) {
267 parseError();
268 HTML_RECONSUME_IN(DataState);
269 } else {
270 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
271 }
272 }
273 END_STATE()
274
275 HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
276 if (cc == '\'') {
277 HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
278 } else if (cc == kEndOfFileMarker) {
279 parseError();
280 HTML_RECONSUME_IN(DataState);
281 } else {
282 HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
283 }
284 }
285 END_STATE()
286
287 HTML_BEGIN_STATE(AttributeValueUnquotedState) {
288 if (isTokenizerWhitespace(cc)) {
289 HTML_ADVANCE_TO(BeforeAttributeNameState);
290 } else if (cc == '>') {
291 return emitAndResumeIn(source, HTMLTokenizer::DataState);
292 } else if (cc == kEndOfFileMarker) {
293 parseError();
294 HTML_RECONSUME_IN(DataState);
295 } else {
296 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
297 parseError();
298 HTML_ADVANCE_TO(AttributeValueUnquotedState);
299 }
300 }
301 END_STATE()
302
303 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
304 if (isTokenizerWhitespace(cc))
305 HTML_ADVANCE_TO(BeforeAttributeNameState);
306 else if (cc == '/')
307 HTML_ADVANCE_TO(SelfClosingStartTagState);
308 else if (cc == '>')
309 return emitAndResumeIn(source, HTMLTokenizer::DataState);
310 else if (cc == kEndOfFileMarker) {
311 parseError();
312 HTML_RECONSUME_IN(DataState);
313 } else {
314 parseError();
315 HTML_RECONSUME_IN(BeforeAttributeNameState);
316 }
317 }
318 END_STATE()
319
320 HTML_BEGIN_STATE(SelfClosingStartTagState) {
321 if (cc == '>') {
322 return emitAndResumeIn(source, HTMLTokenizer::DataState);
323 } else if (cc == kEndOfFileMarker) {
324 parseError();
325 HTML_RECONSUME_IN(DataState);
326 } else {
327 parseError();
328 HTML_RECONSUME_IN(BeforeAttributeNameState);
329 }
330 }
331 END_STATE()
332
333 HTML_BEGIN_STATE(BogusCommentState) {
334 m_token->beginComment();
335 HTML_RECONSUME_IN(ContinueBogusCommentState);
336 }
337 END_STATE()
338
339 HTML_BEGIN_STATE(ContinueBogusCommentState) {
340 if (cc == '>')
341 return emitAndResumeIn(source, HTMLTokenizer::DataState);
342 else if (cc == kEndOfFileMarker)
343 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
344 else {
345 HTML_ADVANCE_TO(ContinueBogusCommentState);
346 }
347 }
348 END_STATE()
349
350 HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
351 DEFINE_STATIC_LOCAL_STRING(dashDashString, "--");
352 DEFINE_STATIC_LOCAL_STRING(doctypeString, "doctype");
353 if (cc == '-') {
354 if (source.startsWith(dashDashString, dashDashStringLength)) {
355 advanceAndASSERT(source, '-');
356 advanceAndASSERT(source, '-');
357 m_token->beginComment();
358 HTML_SWITCH_TO(CommentStartState);
359 } else if (source.remainingBytes() < dashDashStringLength)
360 return haveBufferedCharacterToken();
361 } else if (cc == 'D' || cc == 'd') {
362 if (source.startsWith(doctypeString, doctypeStringLength, true)) {
363 advanceStringAndASSERTIgnoringCase(source, doctypeString);
364 HTML_SWITCH_TO(DOCTYPEState);
365 } else if (source.remainingBytes() < doctypeStringLength)
366 return haveBufferedCharacterToken();
367 }
368 parseError();
369 HTML_RECONSUME_IN(BogusCommentState);
370 }
371 END_STATE()
372
373 HTML_BEGIN_STATE(CommentStartState) {
374 if (cc == '-')
375 HTML_ADVANCE_TO(CommentStartDashState);
376 else if (cc == '>') {
377 parseError();
378 return emitAndResumeIn(source, HTMLTokenizer::DataState);
379 } else if (cc == kEndOfFileMarker) {
380 parseError();
381 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
382 } else {
383 HTML_ADVANCE_TO(CommentState);
384 }
385 }
386 END_STATE()
387
388 HTML_BEGIN_STATE(CommentStartDashState) {
389 if (cc == '-')
390 HTML_ADVANCE_TO(CommentEndState);
391 else if (cc == '>') {
392 parseError();
393 return emitAndResumeIn(source, HTMLTokenizer::DataState);
394 } else if (cc == kEndOfFileMarker) {
395 parseError();
396 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
397 } else {
398 HTML_ADVANCE_TO(CommentState);
399 }
400 }
401 END_STATE()
402
403 HTML_BEGIN_STATE(CommentState) {
404 if (cc == '-')
405 HTML_ADVANCE_TO(CommentEndDashState);
406 else if (cc == kEndOfFileMarker) {
407 parseError();
408 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
409 } else {
410 HTML_ADVANCE_TO(CommentState);
411 }
412 }
413 END_STATE()
414
415 HTML_BEGIN_STATE(CommentEndDashState) {
416 if (cc == '-')
417 HTML_ADVANCE_TO(CommentEndState);
418 else if (cc == kEndOfFileMarker) {
419 parseError();
420 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
421 } else {
422 HTML_ADVANCE_TO(CommentState);
423 }
424 }
425 END_STATE()
426
427 HTML_BEGIN_STATE(CommentEndState) {
428 if (cc == '>')
429 return emitAndResumeIn(source, HTMLTokenizer::DataState);
430 else if (cc == '!') {
431 parseError();
432 HTML_ADVANCE_TO(CommentEndBangState);
433 } else if (cc == '-') {
434 parseError();
435 HTML_ADVANCE_TO(CommentEndState);
436 } else if (cc == kEndOfFileMarker) {
437 parseError();
438 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
439 } else {
440 parseError();
441 HTML_ADVANCE_TO(CommentState);
442 }
443 }
444 END_STATE()
445
446 HTML_BEGIN_STATE(CommentEndBangState) {
447 if (cc == '-') {
448 HTML_ADVANCE_TO(CommentEndDashState);
449 } else if (cc == '>')
450 return emitAndResumeIn(source, HTMLTokenizer::DataState);
451 else if (cc == kEndOfFileMarker) {
452 parseError();
453 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
454 } else {
455 HTML_ADVANCE_TO(CommentState);
456 }
457 }
458 END_STATE()
459
460 HTML_BEGIN_STATE(DOCTYPEState) {
461 if (isTokenizerWhitespace(cc))
462 HTML_ADVANCE_TO(BeforeDOCTYPENameState);
463 else if (cc == kEndOfFileMarker) {
464 parseError();
465 m_token->beginDOCTYPE();
466 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
467 } else {
468 parseError();
469 HTML_RECONSUME_IN(BeforeDOCTYPENameState);
470 }
471 }
472 END_STATE()
473
474 HTML_BEGIN_STATE(BeforeDOCTYPENameState) {
475 if (isTokenizerWhitespace(cc))
476 HTML_ADVANCE_TO(BeforeDOCTYPENameState);
477 else if (cc == '>') {
478 parseError();
479 m_token->beginDOCTYPE();
480 return emitAndResumeIn(source, HTMLTokenizer::DataState);
481 } else if (cc == kEndOfFileMarker) {
482 parseError();
483 m_token->beginDOCTYPE();
484 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
485 } else {
486 m_token->beginDOCTYPE();
487 HTML_ADVANCE_TO(DOCTYPENameState);
488 }
489 }
490 END_STATE()
491
492 HTML_BEGIN_STATE(DOCTYPENameState) {
493 if (isTokenizerWhitespace(cc))
494 HTML_ADVANCE_TO(AfterDOCTYPENameState);
495 else if (cc == '>')
496 return emitAndResumeIn(source, HTMLTokenizer::DataState);
497 else if (cc == kEndOfFileMarker) {
498 parseError();
499 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
500 } else {
501 HTML_ADVANCE_TO(DOCTYPENameState);
502 }
503 }
504 END_STATE()
505
506 HTML_BEGIN_STATE(AfterDOCTYPENameState) {
507 if (isTokenizerWhitespace(cc))
508 HTML_ADVANCE_TO(AfterDOCTYPENameState);
509 if (cc == '>')
510 return emitAndResumeIn(source, HTMLTokenizer::DataState);
511 else if (cc == kEndOfFileMarker) {
512 parseError();
513 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
514 } else {
515 DEFINE_STATIC_LOCAL_STRING(publicString, "public");
516 DEFINE_STATIC_LOCAL_STRING(systemString, "system");
517 if (cc == 'P' || cc == 'p') {
518 if (source.startsWith(publicString, publicStringLength, true)) {
519 advanceStringAndASSERTIgnoringCase(source, publicString);
520 HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
521 } else if (source.remainingBytes() < publicStringLength)
522 return haveBufferedCharacterToken();
523 } else if (cc == 'S' || cc == 's') {
524 if (source.startsWith(systemString, systemStringLength, true)) {
525 advanceStringAndASSERTIgnoringCase(source, systemString);
526 HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
527 } else if (source.remainingBytes() < systemStringLength)
528 return haveBufferedCharacterToken();
529 }
530 parseError();
531 HTML_ADVANCE_TO(BogusDOCTYPEState);
532 }
533 }
534 END_STATE()
535
536 HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
537 if (isTokenizerWhitespace(cc))
538 HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
539 else if (cc == '"') {
540 parseError();
541 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
542 } else if (cc == '\'') {
543 parseError();
544 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
545 } else if (cc == '>') {
546 parseError();
547 return emitAndResumeIn(source, HTMLTokenizer::DataState);
548 } else if (cc == kEndOfFileMarker) {
549 parseError();
550 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
551 } else {
552 parseError();
553 HTML_ADVANCE_TO(BogusDOCTYPEState);
554 }
555 }
556 END_STATE()
557
558 HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
559 if (isTokenizerWhitespace(cc))
560 HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
561 else if (cc == '"') {
562 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
563 } else if (cc == '\'') {
564 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
565 } else if (cc == '>') {
566 parseError();
567 return emitAndResumeIn(source, HTMLTokenizer::DataState);
568 } else if (cc == kEndOfFileMarker) {
569 parseError();
570 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
571 } else {
572 parseError();
573 HTML_ADVANCE_TO(BogusDOCTYPEState);
574 }
575 }
576 END_STATE()
577
578 HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
579 if (cc == '"')
580 HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
581 else if (cc == '>') {
582 parseError();
583 return emitAndResumeIn(source, HTMLTokenizer::DataState);
584 } else if (cc == kEndOfFileMarker) {
585 parseError();
586 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
587 } else {
588 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
589 }
590 }
591 END_STATE()
592
593 HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
594 if (cc == '\'')
595 HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
596 else if (cc == '>') {
597 parseError();
598 return emitAndResumeIn(source, HTMLTokenizer::DataState);
599 } else if (cc == kEndOfFileMarker) {
600 parseError();
601 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
602 } else {
603 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
604 }
605 }
606 END_STATE()
607
608 HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
609 if (isTokenizerWhitespace(cc))
610 HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
611 else if (cc == '>')
612 return emitAndResumeIn(source, HTMLTokenizer::DataState);
613 else if (cc == '"') {
614 parseError();
615 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
616 } else if (cc == '\'') {
617 parseError();
618 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
619 } else if (cc == kEndOfFileMarker) {
620 parseError();
621 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
622 } else {
623 parseError();
624 HTML_ADVANCE_TO(BogusDOCTYPEState);
625 }
626 }
627 END_STATE()
628
629 HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
630 if (isTokenizerWhitespace(cc))
631 HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
632 else if (cc == '>')
633 return emitAndResumeIn(source, HTMLTokenizer::DataState);
634 else if (cc == '"') {
635 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
636 } else if (cc == '\'') {
637 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
638 } else if (cc == kEndOfFileMarker) {
639 parseError();
640 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
641 } else {
642 parseError();
643 HTML_ADVANCE_TO(BogusDOCTYPEState);
644 }
645 }
646 END_STATE()
647
648 HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
649 if (isTokenizerWhitespace(cc))
650 HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
651 else if (cc == '"') {
652 parseError();
653 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
654 } else if (cc == '\'') {
655 parseError();
656 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
657 } else if (cc == '>') {
658 parseError();
659 return emitAndResumeIn(source, HTMLTokenizer::DataState);
660 } else if (cc == kEndOfFileMarker) {
661 parseError();
662 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
663 } else {
664 parseError();
665 HTML_ADVANCE_TO(BogusDOCTYPEState);
666 }
667 }
668 END_STATE()
669
670 HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
671 if (isTokenizerWhitespace(cc))
672 HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
673 if (cc == '"') {
674 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
675 } else if (cc == '\'') {
676 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
677 } else if (cc == '>') {
678 parseError();
679 return emitAndResumeIn(source, HTMLTokenizer::DataState);
680 } else if (cc == kEndOfFileMarker) {
681 parseError();
682 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
683 } else {
684 parseError();
685 HTML_ADVANCE_TO(BogusDOCTYPEState);
686 }
687 }
688 END_STATE()
689
690 HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
691 if (cc == '"')
692 HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
693 else if (cc == '>') {
694 parseError();
695 return emitAndResumeIn(source, HTMLTokenizer::DataState);
696 } else if (cc == kEndOfFileMarker) {
697 parseError();
698 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
699 } else {
700 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
701 }
702 }
703 END_STATE()
704
705 HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
706 if (cc == '\'')
707 HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
708 else if (cc == '>') {
709 parseError();
710 return emitAndResumeIn(source, HTMLTokenizer::DataState);
711 } else if (cc == kEndOfFileMarker) {
712 parseError();
713 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
714 } else {
715 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
716 }
717 }
718 END_STATE()
719
720 HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
721 if (isTokenizerWhitespace(cc))
722 HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
723 else if (cc == '>')
724 return emitAndResumeIn(source, HTMLTokenizer::DataState);
725 else if (cc == kEndOfFileMarker) {
726 parseError();
727 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
728 } else {
729 parseError();
730 HTML_ADVANCE_TO(BogusDOCTYPEState);
731 }
732 }
733 END_STATE()
734
735 HTML_BEGIN_STATE(BogusDOCTYPEState) {
736 if (cc == '>')
737 return emitAndResumeIn(source, HTMLTokenizer::DataState);
738 else if (cc == kEndOfFileMarker)
739 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
740 HTML_ADVANCE_TO(BogusDOCTYPEState);
741 }
742 END_STATE()
743
744 HTML_BEGIN_STATE(CDATASectionState) {
745 if (cc == ']')
746 HTML_ADVANCE_TO(CDATASectionRightSquareBracketState);
747 else if (cc == kEndOfFileMarker)
748 HTML_RECONSUME_IN(DataState);
749 else {
750 m_token->ensureIsCharacterToken();
751 HTML_ADVANCE_TO(CDATASectionState);
752 }
753 }
754 END_STATE()
755
756 HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) {
757 if (cc == ']')
758 HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
759 else {
760 m_token->ensureIsCharacterToken();
761 HTML_RECONSUME_IN(CDATASectionState);
762 }
763 }
764 END_STATE()
765
766 HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
767 if (cc == '>')
768 HTML_ADVANCE_TO(DataState);
769 else {
770 m_token->ensureIsCharacterToken();
771 HTML_RECONSUME_IN(CDATASectionState);
772 }
773 }
774 END_STATE()
775
776 }
777
778 ASSERT_NOT_REACHED();
779 return false;
780 }
781
782 inline void HTMLTokenizer::parseError()
783 {
784 notImplemented();
785 }
786
787 }
OLDNEW
« no previous file with comments | « ios/third_party/blink/src/html_tokenizer.h ('k') | ios/third_party/blink/src/html_tokenizer_adapter.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698