| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
| 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ | 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
| 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| 5 * | 5 * |
| 6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
| 7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
| 8 * are met: | 8 * are met: |
| 9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
| 10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
| (...skipping 27 matching lines...) Expand all Loading... |
| 38 | 38 |
| 39 // Please don't use DEFINE_STATIC_LOCAL in this file. The HTMLTokenizer is used | 39 // Please don't use DEFINE_STATIC_LOCAL in this file. The HTMLTokenizer is used |
| 40 // from multiple threads and DEFINE_STATIC_LOCAL isn't threadsafe. | 40 // from multiple threads and DEFINE_STATIC_LOCAL isn't threadsafe. |
| 41 #undef DEFINE_STATIC_LOCAL | 41 #undef DEFINE_STATIC_LOCAL |
| 42 | 42 |
| 43 namespace blink { | 43 namespace blink { |
| 44 | 44 |
| 45 using namespace HTMLNames; | 45 using namespace HTMLNames; |
| 46 | 46 |
| 47 static inline UChar toLowerCase(UChar cc) { | 47 static inline UChar toLowerCase(UChar cc) { |
| 48 ASSERT(isASCIIUpper(cc)); | 48 DCHECK(isASCIIUpper(cc)); |
| 49 const int lowerCaseOffset = 0x20; | 49 const int lowerCaseOffset = 0x20; |
| 50 return cc + lowerCaseOffset; | 50 return cc + lowerCaseOffset; |
| 51 } | 51 } |
| 52 | 52 |
| 53 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, | 53 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, |
| 54 const String& string) { | 54 const String& string) { |
| 55 if (vector.size() != string.length()) | 55 if (vector.size() != string.length()) |
| 56 return false; | 56 return false; |
| 57 | 57 |
| 58 if (!string.length()) | 58 if (!string.length()) |
| (...skipping 22 matching lines...) Expand all Loading... |
| 81 m_additionalAllowedCharacter = '\0'; | 81 m_additionalAllowedCharacter = '\0'; |
| 82 } | 82 } |
| 83 | 83 |
| 84 inline bool HTMLTokenizer::processEntity(SegmentedString& source) { | 84 inline bool HTMLTokenizer::processEntity(SegmentedString& source) { |
| 85 bool notEnoughCharacters = false; | 85 bool notEnoughCharacters = false; |
| 86 DecodedHTMLEntity decodedEntity; | 86 DecodedHTMLEntity decodedEntity; |
| 87 bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters); | 87 bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters); |
| 88 if (notEnoughCharacters) | 88 if (notEnoughCharacters) |
| 89 return false; | 89 return false; |
| 90 if (!success) { | 90 if (!success) { |
| 91 ASSERT(decodedEntity.isEmpty()); | 91 DCHECK(decodedEntity.isEmpty()); |
| 92 bufferCharacter('&'); | 92 bufferCharacter('&'); |
| 93 } else { | 93 } else { |
| 94 for (unsigned i = 0; i < decodedEntity.length; ++i) | 94 for (unsigned i = 0; i < decodedEntity.length; ++i) |
| 95 bufferCharacter(decodedEntity.data[i]); | 95 bufferCharacter(decodedEntity.data[i]); |
| 96 } | 96 } |
| 97 return true; | 97 return true; |
| 98 } | 98 } |
| 99 | 99 |
| 100 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) { | 100 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) { |
| 101 ASSERT(m_token->type() == HTMLToken::Character || | 101 DCHECK(m_token->type() == HTMLToken::Character || |
| 102 m_token->type() == HTMLToken::Uninitialized); | 102 m_token->type() == HTMLToken::Uninitialized); |
| 103 source.advanceAndUpdateLineNumber(); | 103 source.advanceAndUpdateLineNumber(); |
| 104 if (m_token->type() == HTMLToken::Character) | 104 if (m_token->type() == HTMLToken::Character) |
| 105 return true; | 105 return true; |
| 106 m_token->beginEndTag(m_bufferedEndTagName); | 106 m_token->beginEndTag(m_bufferedEndTagName); |
| 107 m_bufferedEndTagName.clear(); | 107 m_bufferedEndTagName.clear(); |
| 108 m_appropriateEndTagName.clear(); | 108 m_appropriateEndTagName.clear(); |
| 109 m_temporaryBuffer.clear(); | 109 m_temporaryBuffer.clear(); |
| 110 return false; | 110 return false; |
| 111 } | 111 } |
| (...skipping 12 matching lines...) Expand all Loading... |
| 124 bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, | 124 bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, |
| 125 HTMLTokenizer::State state) { | 125 HTMLTokenizer::State state) { |
| 126 m_state = state; | 126 m_state = state; |
| 127 flushBufferedEndTag(source); | 127 flushBufferedEndTag(source); |
| 128 return true; | 128 return true; |
| 129 } | 129 } |
| 130 | 130 |
| 131 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) { | 131 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) { |
| 132 // If we have a token in progress, then we're supposed to be called back | 132 // If we have a token in progress, then we're supposed to be called back |
| 133 // with the same token so we can finish it. | 133 // with the same token so we can finish it. |
| 134 ASSERT(!m_token || m_token == &token || | 134 DCHECK(!m_token || m_token == &token || |
| 135 token.type() == HTMLToken::Uninitialized); | 135 token.type() == HTMLToken::Uninitialized); |
| 136 m_token = &token; | 136 m_token = &token; |
| 137 | 137 |
| 138 if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) { | 138 if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) { |
| 139 // FIXME: This should call flushBufferedEndTag(). | 139 // FIXME: This should call flushBufferedEndTag(). |
| 140 // We started an end tag during our last iteration. | 140 // We started an end tag during our last iteration. |
| 141 m_token->beginEndTag(m_bufferedEndTagName); | 141 m_token->beginEndTag(m_bufferedEndTagName); |
| 142 m_bufferedEndTagName.clear(); | 142 m_bufferedEndTagName.clear(); |
| 143 m_appropriateEndTagName.clear(); | 143 m_appropriateEndTagName.clear(); |
| 144 m_temporaryBuffer.clear(); | 144 m_temporaryBuffer.clear(); |
| (...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 298 } else { | 298 } else { |
| 299 m_token->appendToName(cc); | 299 m_token->appendToName(cc); |
| 300 HTML_ADVANCE_TO(TagNameState); | 300 HTML_ADVANCE_TO(TagNameState); |
| 301 } | 301 } |
| 302 } | 302 } |
| 303 END_STATE() | 303 END_STATE() |
| 304 | 304 |
| 305 HTML_BEGIN_STATE(RCDATALessThanSignState) { | 305 HTML_BEGIN_STATE(RCDATALessThanSignState) { |
| 306 if (cc == '/') { | 306 if (cc == '/') { |
| 307 m_temporaryBuffer.clear(); | 307 m_temporaryBuffer.clear(); |
| 308 ASSERT(m_bufferedEndTagName.isEmpty()); | 308 DCHECK(m_bufferedEndTagName.isEmpty()); |
| 309 HTML_ADVANCE_TO(RCDATAEndTagOpenState); | 309 HTML_ADVANCE_TO(RCDATAEndTagOpenState); |
| 310 } else { | 310 } else { |
| 311 bufferCharacter('<'); | 311 bufferCharacter('<'); |
| 312 HTML_RECONSUME_IN(RCDATAState); | 312 HTML_RECONSUME_IN(RCDATAState); |
| 313 } | 313 } |
| 314 } | 314 } |
| 315 END_STATE() | 315 END_STATE() |
| 316 | 316 |
| 317 HTML_BEGIN_STATE(RCDATAEndTagOpenState) { | 317 HTML_BEGIN_STATE(RCDATAEndTagOpenState) { |
| 318 if (isASCIIUpper(cc)) { | 318 if (isASCIIUpper(cc)) { |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 363 m_bufferedEndTagName.clear(); | 363 m_bufferedEndTagName.clear(); |
| 364 m_temporaryBuffer.clear(); | 364 m_temporaryBuffer.clear(); |
| 365 HTML_RECONSUME_IN(RCDATAState); | 365 HTML_RECONSUME_IN(RCDATAState); |
| 366 } | 366 } |
| 367 } | 367 } |
| 368 END_STATE() | 368 END_STATE() |
| 369 | 369 |
| 370 HTML_BEGIN_STATE(RAWTEXTLessThanSignState) { | 370 HTML_BEGIN_STATE(RAWTEXTLessThanSignState) { |
| 371 if (cc == '/') { | 371 if (cc == '/') { |
| 372 m_temporaryBuffer.clear(); | 372 m_temporaryBuffer.clear(); |
| 373 ASSERT(m_bufferedEndTagName.isEmpty()); | 373 DCHECK(m_bufferedEndTagName.isEmpty()); |
| 374 HTML_ADVANCE_TO(RAWTEXTEndTagOpenState); | 374 HTML_ADVANCE_TO(RAWTEXTEndTagOpenState); |
| 375 } else { | 375 } else { |
| 376 bufferCharacter('<'); | 376 bufferCharacter('<'); |
| 377 HTML_RECONSUME_IN(RAWTEXTState); | 377 HTML_RECONSUME_IN(RAWTEXTState); |
| 378 } | 378 } |
| 379 } | 379 } |
| 380 END_STATE() | 380 END_STATE() |
| 381 | 381 |
| 382 HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) { | 382 HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) { |
| 383 if (isASCIIUpper(cc)) { | 383 if (isASCIIUpper(cc)) { |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 428 m_bufferedEndTagName.clear(); | 428 m_bufferedEndTagName.clear(); |
| 429 m_temporaryBuffer.clear(); | 429 m_temporaryBuffer.clear(); |
| 430 HTML_RECONSUME_IN(RAWTEXTState); | 430 HTML_RECONSUME_IN(RAWTEXTState); |
| 431 } | 431 } |
| 432 } | 432 } |
| 433 END_STATE() | 433 END_STATE() |
| 434 | 434 |
| 435 HTML_BEGIN_STATE(ScriptDataLessThanSignState) { | 435 HTML_BEGIN_STATE(ScriptDataLessThanSignState) { |
| 436 if (cc == '/') { | 436 if (cc == '/') { |
| 437 m_temporaryBuffer.clear(); | 437 m_temporaryBuffer.clear(); |
| 438 ASSERT(m_bufferedEndTagName.isEmpty()); | 438 DCHECK(m_bufferedEndTagName.isEmpty()); |
| 439 HTML_ADVANCE_TO(ScriptDataEndTagOpenState); | 439 HTML_ADVANCE_TO(ScriptDataEndTagOpenState); |
| 440 } else if (cc == '!') { | 440 } else if (cc == '!') { |
| 441 bufferCharacter('<'); | 441 bufferCharacter('<'); |
| 442 bufferCharacter('!'); | 442 bufferCharacter('!'); |
| 443 HTML_ADVANCE_TO(ScriptDataEscapeStartState); | 443 HTML_ADVANCE_TO(ScriptDataEscapeStartState); |
| 444 } else { | 444 } else { |
| 445 bufferCharacter('<'); | 445 bufferCharacter('<'); |
| 446 HTML_RECONSUME_IN(ScriptDataState); | 446 HTML_RECONSUME_IN(ScriptDataState); |
| 447 } | 447 } |
| 448 } | 448 } |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 566 } else { | 566 } else { |
| 567 bufferCharacter(cc); | 567 bufferCharacter(cc); |
| 568 HTML_ADVANCE_TO(ScriptDataEscapedState); | 568 HTML_ADVANCE_TO(ScriptDataEscapedState); |
| 569 } | 569 } |
| 570 } | 570 } |
| 571 END_STATE() | 571 END_STATE() |
| 572 | 572 |
| 573 HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) { | 573 HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) { |
| 574 if (cc == '/') { | 574 if (cc == '/') { |
| 575 m_temporaryBuffer.clear(); | 575 m_temporaryBuffer.clear(); |
| 576 ASSERT(m_bufferedEndTagName.isEmpty()); | 576 DCHECK(m_bufferedEndTagName.isEmpty()); |
| 577 HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState); | 577 HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState); |
| 578 } else if (isASCIIUpper(cc)) { | 578 } else if (isASCIIUpper(cc)) { |
| 579 bufferCharacter('<'); | 579 bufferCharacter('<'); |
| 580 bufferCharacter(cc); | 580 bufferCharacter(cc); |
| 581 m_temporaryBuffer.clear(); | 581 m_temporaryBuffer.clear(); |
| 582 m_temporaryBuffer.push_back(toLowerCase(cc)); | 582 m_temporaryBuffer.push_back(toLowerCase(cc)); |
| 583 HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState); | 583 HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState); |
| 584 } else if (isASCIILower(cc)) { | 584 } else if (isASCIILower(cc)) { |
| 585 bufferCharacter('<'); | 585 bufferCharacter('<'); |
| 586 bufferCharacter(cc); | 586 bufferCharacter(cc); |
| (...skipping 336 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 923 | 923 |
| 924 HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) { | 924 HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) { |
| 925 bool notEnoughCharacters = false; | 925 bool notEnoughCharacters = false; |
| 926 DecodedHTMLEntity decodedEntity; | 926 DecodedHTMLEntity decodedEntity; |
| 927 bool success = | 927 bool success = |
| 928 consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, | 928 consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, |
| 929 m_additionalAllowedCharacter); | 929 m_additionalAllowedCharacter); |
| 930 if (notEnoughCharacters) | 930 if (notEnoughCharacters) |
| 931 return haveBufferedCharacterToken(); | 931 return haveBufferedCharacterToken(); |
| 932 if (!success) { | 932 if (!success) { |
| 933 ASSERT(decodedEntity.isEmpty()); | 933 DCHECK(decodedEntity.isEmpty()); |
| 934 m_token->appendToAttributeValue('&'); | 934 m_token->appendToAttributeValue('&'); |
| 935 } else { | 935 } else { |
| 936 for (unsigned i = 0; i < decodedEntity.length; ++i) | 936 for (unsigned i = 0; i < decodedEntity.length; ++i) |
| 937 m_token->appendToAttributeValue(decodedEntity.data[i]); | 937 m_token->appendToAttributeValue(decodedEntity.data[i]); |
| 938 } | 938 } |
| 939 // We're supposed to switch back to the attribute value state that | 939 // We're supposed to switch back to the attribute value state that |
| 940 // we were in when we were switched into this state. Rather than | 940 // we were in when we were switched into this state. Rather than |
| 941 // keeping track of this explictly, we observe that the previous | 941 // keeping track of this explictly, we observe that the previous |
| 942 // state can be determined by m_additionalAllowedCharacter. | 942 // state can be determined by m_additionalAllowedCharacter. |
| 943 if (m_additionalAllowedCharacter == '"') | 943 if (m_additionalAllowedCharacter == '"') |
| 944 HTML_SWITCH_TO(AttributeValueDoubleQuotedState); | 944 HTML_SWITCH_TO(AttributeValueDoubleQuotedState); |
| 945 else if (m_additionalAllowedCharacter == '\'') | 945 else if (m_additionalAllowedCharacter == '\'') |
| 946 HTML_SWITCH_TO(AttributeValueSingleQuotedState); | 946 HTML_SWITCH_TO(AttributeValueSingleQuotedState); |
| 947 else if (m_additionalAllowedCharacter == '>') | 947 else if (m_additionalAllowedCharacter == '>') |
| 948 HTML_SWITCH_TO(AttributeValueUnquotedState); | 948 HTML_SWITCH_TO(AttributeValueUnquotedState); |
| 949 else | 949 else |
| 950 ASSERT_NOT_REACHED(); | 950 NOTREACHED(); |
| 951 } | 951 } |
| 952 END_STATE() | 952 END_STATE() |
| 953 | 953 |
| 954 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { | 954 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { |
| 955 if (isTokenizerWhitespace(cc)) | 955 if (isTokenizerWhitespace(cc)) |
| 956 HTML_ADVANCE_TO(BeforeAttributeNameState); | 956 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 957 else if (cc == '/') | 957 else if (cc == '/') |
| 958 HTML_ADVANCE_TO(SelfClosingStartTagState); | 958 HTML_ADVANCE_TO(SelfClosingStartTagState); |
| 959 else if (cc == '>') | 959 else if (cc == '>') |
| 960 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 960 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| (...skipping 545 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1506 HTML_ADVANCE_TO(DataState); | 1506 HTML_ADVANCE_TO(DataState); |
| 1507 } else { | 1507 } else { |
| 1508 bufferCharacter(']'); | 1508 bufferCharacter(']'); |
| 1509 bufferCharacter(']'); | 1509 bufferCharacter(']'); |
| 1510 HTML_RECONSUME_IN(CDATASectionState); | 1510 HTML_RECONSUME_IN(CDATASectionState); |
| 1511 } | 1511 } |
| 1512 } | 1512 } |
| 1513 END_STATE() | 1513 END_STATE() |
| 1514 } | 1514 } |
| 1515 | 1515 |
| 1516 ASSERT_NOT_REACHED(); | 1516 NOTREACHED(); |
| 1517 return false; | 1517 return false; |
| 1518 } | 1518 } |
| 1519 | 1519 |
| 1520 String HTMLTokenizer::bufferedCharacters() const { | 1520 String HTMLTokenizer::bufferedCharacters() const { |
| 1521 // FIXME: Add an assert about m_state. | 1521 // FIXME: Add an assert about m_state. |
| 1522 StringBuilder characters; | 1522 StringBuilder characters; |
| 1523 characters.reserveCapacity(numberOfBufferedCharacters()); | 1523 characters.reserveCapacity(numberOfBufferedCharacters()); |
| 1524 characters.append('<'); | 1524 characters.append('<'); |
| 1525 characters.append('/'); | 1525 characters.append('/'); |
| 1526 characters.append(m_temporaryBuffer.data(), m_temporaryBuffer.size()); | 1526 characters.append(m_temporaryBuffer.data(), m_temporaryBuffer.size()); |
| (...skipping 15 matching lines...) Expand all Loading... |
| 1542 threadSafeMatch(tagName, noframesTag) || | 1542 threadSafeMatch(tagName, noframesTag) || |
| 1543 (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled)) | 1543 (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled)) |
| 1544 setState(HTMLTokenizer::RAWTEXTState); | 1544 setState(HTMLTokenizer::RAWTEXTState); |
| 1545 } | 1545 } |
| 1546 | 1546 |
| 1547 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) { | 1547 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) { |
| 1548 return vectorEqualsString(m_temporaryBuffer, expectedString); | 1548 return vectorEqualsString(m_temporaryBuffer, expectedString); |
| 1549 } | 1549 } |
| 1550 | 1550 |
| 1551 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) { | 1551 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) { |
| 1552 ASSERT(isEndTagBufferingState(m_state)); | 1552 DCHECK(isEndTagBufferingState(m_state)); |
| 1553 m_bufferedEndTagName.push_back(cc); | 1553 m_bufferedEndTagName.push_back(cc); |
| 1554 } | 1554 } |
| 1555 | 1555 |
| 1556 inline bool HTMLTokenizer::isAppropriateEndTag() { | 1556 inline bool HTMLTokenizer::isAppropriateEndTag() { |
| 1557 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size()) | 1557 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size()) |
| 1558 return false; | 1558 return false; |
| 1559 | 1559 |
| 1560 size_t numCharacters = m_bufferedEndTagName.size(); | 1560 size_t numCharacters = m_bufferedEndTagName.size(); |
| 1561 | 1561 |
| 1562 for (size_t i = 0; i < numCharacters; i++) { | 1562 for (size_t i = 0; i < numCharacters; i++) { |
| 1563 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i]) | 1563 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i]) |
| 1564 return false; | 1564 return false; |
| 1565 } | 1565 } |
| 1566 | 1566 |
| 1567 return true; | 1567 return true; |
| 1568 } | 1568 } |
| 1569 | 1569 |
| 1570 inline void HTMLTokenizer::parseError() { | 1570 inline void HTMLTokenizer::parseError() { |
| 1571 #if DCHECK_IS_ON() | 1571 #if DCHECK_IS_ON() |
| 1572 DVLOG(1) << "Not implemented."; | 1572 DVLOG(1) << "Not implemented."; |
| 1573 #endif | 1573 #endif |
| 1574 } | 1574 } |
| 1575 | 1575 |
| 1576 } // namespace blink | 1576 } // namespace blink |
| OLD | NEW |