OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
| 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
| 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| 5 * |
| 6 * Redistribution and use in source and binary forms, with or without |
| 7 * modification, are permitted provided that the following conditions |
| 8 * are met: |
| 9 * 1. Redistributions of source code must retain the above copyright |
| 10 * notice, this list of conditions and the following disclaimer. |
| 11 * 2. Redistributions in binary form must reproduce the above copyright |
| 12 * notice, this list of conditions and the following disclaimer in the |
| 13 * documentation and/or other materials provided with the distribution. |
| 14 * |
| 15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 */ |
| 27 |
| 28 #include "ios/third_party/blink/src/html_tokenizer.h" |
| 29 |
| 30 #include "html_markup_tokenizer_inlines.h" |
| 31 |
| 32 namespace WebCore { |
| 33 |
| 34 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName) |
| 35 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName) |
| 36 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName) |
| 37 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName) |
| 38 |
| 39 HTMLTokenizer::HTMLTokenizer() |
| 40 : m_state(HTMLTokenizer::DataState) |
| 41 , m_token(nullptr) |
| 42 , m_additionalAllowedCharacter('\0') |
| 43 , m_inputStreamPreprocessor(this) |
| 44 { |
| 45 } |
| 46 |
| 47 HTMLTokenizer::~HTMLTokenizer() |
| 48 { |
| 49 } |
| 50 |
| 51 void HTMLTokenizer::reset() |
| 52 { |
| 53 m_state = HTMLTokenizer::DataState; |
| 54 m_token = 0; |
| 55 m_additionalAllowedCharacter = '\0'; |
| 56 } |
| 57 |
| 58 bool HTMLTokenizer::flushBufferedEndTag(CharacterProvider& source) |
| 59 { |
| 60 ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLTok
en::Uninitialized); |
| 61 source.next(); |
| 62 if (m_token->type() == HTMLToken::Character) |
| 63 return true; |
| 64 |
| 65 return false; |
| 66 } |
| 67 |
| 68 #define FLUSH_AND_ADVANCE_TO(stateName) \ |
| 69 do { \ |
| 70 m_state = HTMLTokenizer::stateName; \ |
| 71 if (flushBufferedEndTag(source)) \ |
| 72 return true; \ |
| 73 if (source.isEmpty() \ |
| 74 || !m_inputStreamPreprocessor.peek(source)) \ |
| 75 return haveBufferedCharacterToken(); \ |
| 76 cc = m_inputStreamPreprocessor.nextInputCharacter(); \ |
| 77 goto stateName; \ |
| 78 } while (false) |
| 79 |
| 80 bool HTMLTokenizer::nextToken(CharacterProvider& source, HTMLToken& token) |
| 81 { |
| 82 // If we have a token in progress, then we're supposed to be called back |
| 83 // with the same token so we can finish it. |
| 84 ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitial
ized); |
| 85 m_token = &token; |
| 86 |
| 87 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) |
| 88 return haveBufferedCharacterToken(); |
| 89 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); |
| 90 |
| 91 // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 |
| 92 switch (m_state) { |
| 93 HTML_BEGIN_STATE(DataState) { |
| 94 if (cc == '<') { |
| 95 if (m_token->type() == HTMLToken::Character) { |
| 96 // We have a bunch of character tokens queued up that we |
| 97 // are emitting lazily here. |
| 98 return true; |
| 99 } |
| 100 HTML_ADVANCE_TO(TagOpenState); |
| 101 } else if (cc == kEndOfFileMarker) |
| 102 return emitEndOfFile(source); |
| 103 else { |
| 104 m_token->ensureIsCharacterToken(); |
| 105 HTML_ADVANCE_TO(DataState); |
| 106 } |
| 107 } |
| 108 END_STATE() |
| 109 |
| 110 HTML_BEGIN_STATE(TagOpenState) { |
| 111 if (cc == '!') |
| 112 HTML_ADVANCE_TO(MarkupDeclarationOpenState); |
| 113 else if (cc == '/') |
| 114 HTML_ADVANCE_TO(EndTagOpenState); |
| 115 else if (isASCIIUpper(cc)) { |
| 116 m_token->beginStartTag(toLowerCase(cc)); |
| 117 HTML_ADVANCE_TO(TagNameState); |
| 118 } else if (isASCIILower(cc)) { |
| 119 m_token->beginStartTag(cc); |
| 120 HTML_ADVANCE_TO(TagNameState); |
| 121 } else if (cc == '?') { |
| 122 parseError(); |
| 123 // The spec consumes the current character before switching |
| 124 // to the bogus comment state, but it's easier to implement |
| 125 // if we reconsume the current character. |
| 126 HTML_RECONSUME_IN(BogusCommentState); |
| 127 } else { |
| 128 parseError(); |
| 129 m_token->ensureIsCharacterToken(); |
| 130 HTML_RECONSUME_IN(DataState); |
| 131 } |
| 132 } |
| 133 END_STATE() |
| 134 |
| 135 HTML_BEGIN_STATE(EndTagOpenState) { |
| 136 if (isASCIIUpper(cc)) { |
| 137 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc))); |
| 138 HTML_ADVANCE_TO(TagNameState); |
| 139 } else if (isASCIILower(cc)) { |
| 140 m_token->beginEndTag(static_cast<LChar>(cc)); |
| 141 HTML_ADVANCE_TO(TagNameState); |
| 142 } else if (cc == '>') { |
| 143 parseError(); |
| 144 HTML_ADVANCE_TO(DataState); |
| 145 } else if (cc == kEndOfFileMarker) { |
| 146 parseError(); |
| 147 m_token->ensureIsCharacterToken(); |
| 148 HTML_RECONSUME_IN(DataState); |
| 149 } else { |
| 150 parseError(); |
| 151 HTML_RECONSUME_IN(BogusCommentState); |
| 152 } |
| 153 } |
| 154 END_STATE() |
| 155 |
| 156 HTML_BEGIN_STATE(TagNameState) { |
| 157 if (isTokenizerWhitespace(cc)) |
| 158 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 159 else if (cc == '/') |
| 160 HTML_ADVANCE_TO(SelfClosingStartTagState); |
| 161 else if (cc == '>') |
| 162 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 163 else if (isASCIIUpper(cc)) { |
| 164 m_token->appendToName(toLowerCase(cc)); |
| 165 HTML_ADVANCE_TO(TagNameState); |
| 166 } else if (cc == kEndOfFileMarker) { |
| 167 parseError(); |
| 168 HTML_RECONSUME_IN(DataState); |
| 169 } else { |
| 170 m_token->appendToName(cc); |
| 171 HTML_ADVANCE_TO(TagNameState); |
| 172 } |
| 173 } |
| 174 END_STATE() |
| 175 |
| 176 HTML_BEGIN_STATE(BeforeAttributeNameState) { |
| 177 if (isTokenizerWhitespace(cc)) |
| 178 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 179 else if (cc == '/') |
| 180 HTML_ADVANCE_TO(SelfClosingStartTagState); |
| 181 else if (cc == '>') |
| 182 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 183 else if (isASCIIUpper(cc)) { |
| 184 HTML_ADVANCE_TO(AttributeNameState); |
| 185 } else if (cc == kEndOfFileMarker) { |
| 186 parseError(); |
| 187 HTML_RECONSUME_IN(DataState); |
| 188 } else { |
| 189 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') |
| 190 parseError(); |
| 191 HTML_ADVANCE_TO(AttributeNameState); |
| 192 } |
| 193 } |
| 194 END_STATE() |
| 195 |
| 196 HTML_BEGIN_STATE(AttributeNameState) { |
| 197 if (isTokenizerWhitespace(cc)) { |
| 198 HTML_ADVANCE_TO(AfterAttributeNameState); |
| 199 } else if (cc == '/') { |
| 200 HTML_ADVANCE_TO(SelfClosingStartTagState); |
| 201 } else if (cc == '=') { |
| 202 HTML_ADVANCE_TO(BeforeAttributeValueState); |
| 203 } else if (cc == '>') { |
| 204 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 205 } else if (isASCIIUpper(cc)) { |
| 206 HTML_ADVANCE_TO(AttributeNameState); |
| 207 } else if (cc == kEndOfFileMarker) { |
| 208 parseError(); |
| 209 HTML_RECONSUME_IN(DataState); |
| 210 } else { |
| 211 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') |
| 212 parseError(); |
| 213 HTML_ADVANCE_TO(AttributeNameState); |
| 214 } |
| 215 } |
| 216 END_STATE() |
| 217 |
| 218 HTML_BEGIN_STATE(AfterAttributeNameState) { |
| 219 if (isTokenizerWhitespace(cc)) |
| 220 HTML_ADVANCE_TO(AfterAttributeNameState); |
| 221 else if (cc == '/') |
| 222 HTML_ADVANCE_TO(SelfClosingStartTagState); |
| 223 else if (cc == '=') |
| 224 HTML_ADVANCE_TO(BeforeAttributeValueState); |
| 225 else if (cc == '>') |
| 226 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 227 else if (isASCIIUpper(cc)) { |
| 228 HTML_ADVANCE_TO(AttributeNameState); |
| 229 } else if (cc == kEndOfFileMarker) { |
| 230 parseError(); |
| 231 HTML_RECONSUME_IN(DataState); |
| 232 } else { |
| 233 if (cc == '"' || cc == '\'' || cc == '<') |
| 234 parseError(); |
| 235 HTML_ADVANCE_TO(AttributeNameState); |
| 236 } |
| 237 } |
| 238 END_STATE() |
| 239 |
| 240 HTML_BEGIN_STATE(BeforeAttributeValueState) { |
| 241 if (isTokenizerWhitespace(cc)) |
| 242 HTML_ADVANCE_TO(BeforeAttributeValueState); |
| 243 else if (cc == '"') { |
| 244 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); |
| 245 } else if (cc == '&') { |
| 246 HTML_RECONSUME_IN(AttributeValueUnquotedState); |
| 247 } else if (cc == '\'') { |
| 248 HTML_ADVANCE_TO(AttributeValueSingleQuotedState); |
| 249 } else if (cc == '>') { |
| 250 parseError(); |
| 251 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 252 } else if (cc == kEndOfFileMarker) { |
| 253 parseError(); |
| 254 HTML_RECONSUME_IN(DataState); |
| 255 } else { |
| 256 if (cc == '<' || cc == '=' || cc == '`') |
| 257 parseError(); |
| 258 HTML_ADVANCE_TO(AttributeValueUnquotedState); |
| 259 } |
| 260 } |
| 261 END_STATE() |
| 262 |
| 263 HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) { |
| 264 if (cc == '"') { |
| 265 HTML_ADVANCE_TO(AfterAttributeValueQuotedState); |
| 266 } else if (cc == kEndOfFileMarker) { |
| 267 parseError(); |
| 268 HTML_RECONSUME_IN(DataState); |
| 269 } else { |
| 270 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); |
| 271 } |
| 272 } |
| 273 END_STATE() |
| 274 |
| 275 HTML_BEGIN_STATE(AttributeValueSingleQuotedState) { |
| 276 if (cc == '\'') { |
| 277 HTML_ADVANCE_TO(AfterAttributeValueQuotedState); |
| 278 } else if (cc == kEndOfFileMarker) { |
| 279 parseError(); |
| 280 HTML_RECONSUME_IN(DataState); |
| 281 } else { |
| 282 HTML_ADVANCE_TO(AttributeValueSingleQuotedState); |
| 283 } |
| 284 } |
| 285 END_STATE() |
| 286 |
| 287 HTML_BEGIN_STATE(AttributeValueUnquotedState) { |
| 288 if (isTokenizerWhitespace(cc)) { |
| 289 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 290 } else if (cc == '>') { |
| 291 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 292 } else if (cc == kEndOfFileMarker) { |
| 293 parseError(); |
| 294 HTML_RECONSUME_IN(DataState); |
| 295 } else { |
| 296 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`') |
| 297 parseError(); |
| 298 HTML_ADVANCE_TO(AttributeValueUnquotedState); |
| 299 } |
| 300 } |
| 301 END_STATE() |
| 302 |
| 303 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { |
| 304 if (isTokenizerWhitespace(cc)) |
| 305 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 306 else if (cc == '/') |
| 307 HTML_ADVANCE_TO(SelfClosingStartTagState); |
| 308 else if (cc == '>') |
| 309 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 310 else if (cc == kEndOfFileMarker) { |
| 311 parseError(); |
| 312 HTML_RECONSUME_IN(DataState); |
| 313 } else { |
| 314 parseError(); |
| 315 HTML_RECONSUME_IN(BeforeAttributeNameState); |
| 316 } |
| 317 } |
| 318 END_STATE() |
| 319 |
| 320 HTML_BEGIN_STATE(SelfClosingStartTagState) { |
| 321 if (cc == '>') { |
| 322 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 323 } else if (cc == kEndOfFileMarker) { |
| 324 parseError(); |
| 325 HTML_RECONSUME_IN(DataState); |
| 326 } else { |
| 327 parseError(); |
| 328 HTML_RECONSUME_IN(BeforeAttributeNameState); |
| 329 } |
| 330 } |
| 331 END_STATE() |
| 332 |
| 333 HTML_BEGIN_STATE(BogusCommentState) { |
| 334 m_token->beginComment(); |
| 335 HTML_RECONSUME_IN(ContinueBogusCommentState); |
| 336 } |
| 337 END_STATE() |
| 338 |
| 339 HTML_BEGIN_STATE(ContinueBogusCommentState) { |
| 340 if (cc == '>') |
| 341 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 342 else if (cc == kEndOfFileMarker) |
| 343 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 344 else { |
| 345 HTML_ADVANCE_TO(ContinueBogusCommentState); |
| 346 } |
| 347 } |
| 348 END_STATE() |
| 349 |
| 350 HTML_BEGIN_STATE(MarkupDeclarationOpenState) { |
| 351 DEFINE_STATIC_LOCAL_STRING(dashDashString, "--"); |
| 352 DEFINE_STATIC_LOCAL_STRING(doctypeString, "doctype"); |
| 353 if (cc == '-') { |
| 354 if (source.startsWith(dashDashString, dashDashStringLength)) { |
| 355 advanceAndASSERT(source, '-'); |
| 356 advanceAndASSERT(source, '-'); |
| 357 m_token->beginComment(); |
| 358 HTML_SWITCH_TO(CommentStartState); |
| 359 } else if (source.remainingBytes() < dashDashStringLength) |
| 360 return haveBufferedCharacterToken(); |
| 361 } else if (cc == 'D' || cc == 'd') { |
| 362 if (source.startsWith(doctypeString, doctypeStringLength, true)) { |
| 363 advanceStringAndASSERTIgnoringCase(source, doctypeString); |
| 364 HTML_SWITCH_TO(DOCTYPEState); |
| 365 } else if (source.remainingBytes() < doctypeStringLength) |
| 366 return haveBufferedCharacterToken(); |
| 367 } |
| 368 parseError(); |
| 369 HTML_RECONSUME_IN(BogusCommentState); |
| 370 } |
| 371 END_STATE() |
| 372 |
| 373 HTML_BEGIN_STATE(CommentStartState) { |
| 374 if (cc == '-') |
| 375 HTML_ADVANCE_TO(CommentStartDashState); |
| 376 else if (cc == '>') { |
| 377 parseError(); |
| 378 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 379 } else if (cc == kEndOfFileMarker) { |
| 380 parseError(); |
| 381 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 382 } else { |
| 383 HTML_ADVANCE_TO(CommentState); |
| 384 } |
| 385 } |
| 386 END_STATE() |
| 387 |
| 388 HTML_BEGIN_STATE(CommentStartDashState) { |
| 389 if (cc == '-') |
| 390 HTML_ADVANCE_TO(CommentEndState); |
| 391 else if (cc == '>') { |
| 392 parseError(); |
| 393 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 394 } else if (cc == kEndOfFileMarker) { |
| 395 parseError(); |
| 396 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 397 } else { |
| 398 HTML_ADVANCE_TO(CommentState); |
| 399 } |
| 400 } |
| 401 END_STATE() |
| 402 |
| 403 HTML_BEGIN_STATE(CommentState) { |
| 404 if (cc == '-') |
| 405 HTML_ADVANCE_TO(CommentEndDashState); |
| 406 else if (cc == kEndOfFileMarker) { |
| 407 parseError(); |
| 408 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 409 } else { |
| 410 HTML_ADVANCE_TO(CommentState); |
| 411 } |
| 412 } |
| 413 END_STATE() |
| 414 |
| 415 HTML_BEGIN_STATE(CommentEndDashState) { |
| 416 if (cc == '-') |
| 417 HTML_ADVANCE_TO(CommentEndState); |
| 418 else if (cc == kEndOfFileMarker) { |
| 419 parseError(); |
| 420 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 421 } else { |
| 422 HTML_ADVANCE_TO(CommentState); |
| 423 } |
| 424 } |
| 425 END_STATE() |
| 426 |
| 427 HTML_BEGIN_STATE(CommentEndState) { |
| 428 if (cc == '>') |
| 429 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 430 else if (cc == '!') { |
| 431 parseError(); |
| 432 HTML_ADVANCE_TO(CommentEndBangState); |
| 433 } else if (cc == '-') { |
| 434 parseError(); |
| 435 HTML_ADVANCE_TO(CommentEndState); |
| 436 } else if (cc == kEndOfFileMarker) { |
| 437 parseError(); |
| 438 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 439 } else { |
| 440 parseError(); |
| 441 HTML_ADVANCE_TO(CommentState); |
| 442 } |
| 443 } |
| 444 END_STATE() |
| 445 |
| 446 HTML_BEGIN_STATE(CommentEndBangState) { |
| 447 if (cc == '-') { |
| 448 HTML_ADVANCE_TO(CommentEndDashState); |
| 449 } else if (cc == '>') |
| 450 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 451 else if (cc == kEndOfFileMarker) { |
| 452 parseError(); |
| 453 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 454 } else { |
| 455 HTML_ADVANCE_TO(CommentState); |
| 456 } |
| 457 } |
| 458 END_STATE() |
| 459 |
| 460 HTML_BEGIN_STATE(DOCTYPEState) { |
| 461 if (isTokenizerWhitespace(cc)) |
| 462 HTML_ADVANCE_TO(BeforeDOCTYPENameState); |
| 463 else if (cc == kEndOfFileMarker) { |
| 464 parseError(); |
| 465 m_token->beginDOCTYPE(); |
| 466 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 467 } else { |
| 468 parseError(); |
| 469 HTML_RECONSUME_IN(BeforeDOCTYPENameState); |
| 470 } |
| 471 } |
| 472 END_STATE() |
| 473 |
| 474 HTML_BEGIN_STATE(BeforeDOCTYPENameState) { |
| 475 if (isTokenizerWhitespace(cc)) |
| 476 HTML_ADVANCE_TO(BeforeDOCTYPENameState); |
| 477 else if (cc == '>') { |
| 478 parseError(); |
| 479 m_token->beginDOCTYPE(); |
| 480 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 481 } else if (cc == kEndOfFileMarker) { |
| 482 parseError(); |
| 483 m_token->beginDOCTYPE(); |
| 484 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 485 } else { |
| 486 m_token->beginDOCTYPE(); |
| 487 HTML_ADVANCE_TO(DOCTYPENameState); |
| 488 } |
| 489 } |
| 490 END_STATE() |
| 491 |
| 492 HTML_BEGIN_STATE(DOCTYPENameState) { |
| 493 if (isTokenizerWhitespace(cc)) |
| 494 HTML_ADVANCE_TO(AfterDOCTYPENameState); |
| 495 else if (cc == '>') |
| 496 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 497 else if (cc == kEndOfFileMarker) { |
| 498 parseError(); |
| 499 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 500 } else { |
| 501 HTML_ADVANCE_TO(DOCTYPENameState); |
| 502 } |
| 503 } |
| 504 END_STATE() |
| 505 |
| 506 HTML_BEGIN_STATE(AfterDOCTYPENameState) { |
| 507 if (isTokenizerWhitespace(cc)) |
| 508 HTML_ADVANCE_TO(AfterDOCTYPENameState); |
| 509 if (cc == '>') |
| 510 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 511 else if (cc == kEndOfFileMarker) { |
| 512 parseError(); |
| 513 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 514 } else { |
| 515 DEFINE_STATIC_LOCAL_STRING(publicString, "public"); |
| 516 DEFINE_STATIC_LOCAL_STRING(systemString, "system"); |
| 517 if (cc == 'P' || cc == 'p') { |
| 518 if (source.startsWith(publicString, publicStringLength, true)) { |
| 519 advanceStringAndASSERTIgnoringCase(source, publicString); |
| 520 HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState); |
| 521 } else if (source.remainingBytes() < publicStringLength) |
| 522 return haveBufferedCharacterToken(); |
| 523 } else if (cc == 'S' || cc == 's') { |
| 524 if (source.startsWith(systemString, systemStringLength, true)) { |
| 525 advanceStringAndASSERTIgnoringCase(source, systemString); |
| 526 HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState); |
| 527 } else if (source.remainingBytes() < systemStringLength) |
| 528 return haveBufferedCharacterToken(); |
| 529 } |
| 530 parseError(); |
| 531 HTML_ADVANCE_TO(BogusDOCTYPEState); |
| 532 } |
| 533 } |
| 534 END_STATE() |
| 535 |
| 536 HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) { |
| 537 if (isTokenizerWhitespace(cc)) |
| 538 HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); |
| 539 else if (cc == '"') { |
| 540 parseError(); |
| 541 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); |
| 542 } else if (cc == '\'') { |
| 543 parseError(); |
| 544 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); |
| 545 } else if (cc == '>') { |
| 546 parseError(); |
| 547 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 548 } else if (cc == kEndOfFileMarker) { |
| 549 parseError(); |
| 550 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 551 } else { |
| 552 parseError(); |
| 553 HTML_ADVANCE_TO(BogusDOCTYPEState); |
| 554 } |
| 555 } |
| 556 END_STATE() |
| 557 |
| 558 HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) { |
| 559 if (isTokenizerWhitespace(cc)) |
| 560 HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); |
| 561 else if (cc == '"') { |
| 562 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); |
| 563 } else if (cc == '\'') { |
| 564 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); |
| 565 } else if (cc == '>') { |
| 566 parseError(); |
| 567 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 568 } else if (cc == kEndOfFileMarker) { |
| 569 parseError(); |
| 570 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 571 } else { |
| 572 parseError(); |
| 573 HTML_ADVANCE_TO(BogusDOCTYPEState); |
| 574 } |
| 575 } |
| 576 END_STATE() |
| 577 |
| 578 HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) { |
| 579 if (cc == '"') |
| 580 HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); |
| 581 else if (cc == '>') { |
| 582 parseError(); |
| 583 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 584 } else if (cc == kEndOfFileMarker) { |
| 585 parseError(); |
| 586 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 587 } else { |
| 588 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); |
| 589 } |
| 590 } |
| 591 END_STATE() |
| 592 |
| 593 HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) { |
| 594 if (cc == '\'') |
| 595 HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); |
| 596 else if (cc == '>') { |
| 597 parseError(); |
| 598 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 599 } else if (cc == kEndOfFileMarker) { |
| 600 parseError(); |
| 601 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 602 } else { |
| 603 HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); |
| 604 } |
| 605 } |
| 606 END_STATE() |
| 607 |
| 608 HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) { |
| 609 if (isTokenizerWhitespace(cc)) |
| 610 HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); |
| 611 else if (cc == '>') |
| 612 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 613 else if (cc == '"') { |
| 614 parseError(); |
| 615 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
| 616 } else if (cc == '\'') { |
| 617 parseError(); |
| 618 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
| 619 } else if (cc == kEndOfFileMarker) { |
| 620 parseError(); |
| 621 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 622 } else { |
| 623 parseError(); |
| 624 HTML_ADVANCE_TO(BogusDOCTYPEState); |
| 625 } |
| 626 } |
| 627 END_STATE() |
| 628 |
| 629 HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) { |
| 630 if (isTokenizerWhitespace(cc)) |
| 631 HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); |
| 632 else if (cc == '>') |
| 633 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 634 else if (cc == '"') { |
| 635 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
| 636 } else if (cc == '\'') { |
| 637 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
| 638 } else if (cc == kEndOfFileMarker) { |
| 639 parseError(); |
| 640 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 641 } else { |
| 642 parseError(); |
| 643 HTML_ADVANCE_TO(BogusDOCTYPEState); |
| 644 } |
| 645 } |
| 646 END_STATE() |
| 647 |
| 648 HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) { |
| 649 if (isTokenizerWhitespace(cc)) |
| 650 HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); |
| 651 else if (cc == '"') { |
| 652 parseError(); |
| 653 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
| 654 } else if (cc == '\'') { |
| 655 parseError(); |
| 656 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
| 657 } else if (cc == '>') { |
| 658 parseError(); |
| 659 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 660 } else if (cc == kEndOfFileMarker) { |
| 661 parseError(); |
| 662 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 663 } else { |
| 664 parseError(); |
| 665 HTML_ADVANCE_TO(BogusDOCTYPEState); |
| 666 } |
| 667 } |
| 668 END_STATE() |
| 669 |
| 670 HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) { |
| 671 if (isTokenizerWhitespace(cc)) |
| 672 HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); |
| 673 if (cc == '"') { |
| 674 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
| 675 } else if (cc == '\'') { |
| 676 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
| 677 } else if (cc == '>') { |
| 678 parseError(); |
| 679 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 680 } else if (cc == kEndOfFileMarker) { |
| 681 parseError(); |
| 682 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 683 } else { |
| 684 parseError(); |
| 685 HTML_ADVANCE_TO(BogusDOCTYPEState); |
| 686 } |
| 687 } |
| 688 END_STATE() |
| 689 |
| 690 HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) { |
| 691 if (cc == '"') |
| 692 HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState); |
| 693 else if (cc == '>') { |
| 694 parseError(); |
| 695 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 696 } else if (cc == kEndOfFileMarker) { |
| 697 parseError(); |
| 698 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 699 } else { |
| 700 HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
| 701 } |
| 702 } |
| 703 END_STATE() |
| 704 |
| 705 HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) { |
| 706 if (cc == '\'') |
| 707 HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState); |
| 708 else if (cc == '>') { |
| 709 parseError(); |
| 710 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 711 } else if (cc == kEndOfFileMarker) { |
| 712 parseError(); |
| 713 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 714 } else { |
| 715 HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
| 716 } |
| 717 } |
| 718 END_STATE() |
| 719 |
| 720 HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) { |
| 721 if (isTokenizerWhitespace(cc)) |
| 722 HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState); |
| 723 else if (cc == '>') |
| 724 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 725 else if (cc == kEndOfFileMarker) { |
| 726 parseError(); |
| 727 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 728 } else { |
| 729 parseError(); |
| 730 HTML_ADVANCE_TO(BogusDOCTYPEState); |
| 731 } |
| 732 } |
| 733 END_STATE() |
| 734 |
| 735 HTML_BEGIN_STATE(BogusDOCTYPEState) { |
| 736 if (cc == '>') |
| 737 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 738 else if (cc == kEndOfFileMarker) |
| 739 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
| 740 HTML_ADVANCE_TO(BogusDOCTYPEState); |
| 741 } |
| 742 END_STATE() |
| 743 |
| 744 HTML_BEGIN_STATE(CDATASectionState) { |
| 745 if (cc == ']') |
| 746 HTML_ADVANCE_TO(CDATASectionRightSquareBracketState); |
| 747 else if (cc == kEndOfFileMarker) |
| 748 HTML_RECONSUME_IN(DataState); |
| 749 else { |
| 750 m_token->ensureIsCharacterToken(); |
| 751 HTML_ADVANCE_TO(CDATASectionState); |
| 752 } |
| 753 } |
| 754 END_STATE() |
| 755 |
| 756 HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) { |
| 757 if (cc == ']') |
| 758 HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState); |
| 759 else { |
| 760 m_token->ensureIsCharacterToken(); |
| 761 HTML_RECONSUME_IN(CDATASectionState); |
| 762 } |
| 763 } |
| 764 END_STATE() |
| 765 |
| 766 HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) { |
| 767 if (cc == '>') |
| 768 HTML_ADVANCE_TO(DataState); |
| 769 else { |
| 770 m_token->ensureIsCharacterToken(); |
| 771 HTML_RECONSUME_IN(CDATASectionState); |
| 772 } |
| 773 } |
| 774 END_STATE() |
| 775 |
| 776 } |
| 777 |
| 778 ASSERT_NOT_REACHED(); |
| 779 return false; |
| 780 } |
| 781 |
| 782 inline void HTMLTokenizer::parseError() |
| 783 { |
| 784 notImplemented(); |
| 785 } |
| 786 |
| 787 } |
OLD | NEW |