OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ | 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
5 * | 5 * |
6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
8 * are met: | 8 * are met: |
9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
(...skipping 27 matching lines...) Expand all Loading... |
38 | 38 |
39 // Please don't use DEFINE_STATIC_LOCAL in this file. The HTMLTokenizer is used | 39 // Please don't use DEFINE_STATIC_LOCAL in this file. The HTMLTokenizer is used |
40 // from multiple threads and DEFINE_STATIC_LOCAL isn't threadsafe. | 40 // from multiple threads and DEFINE_STATIC_LOCAL isn't threadsafe. |
41 #undef DEFINE_STATIC_LOCAL | 41 #undef DEFINE_STATIC_LOCAL |
42 | 42 |
43 namespace blink { | 43 namespace blink { |
44 | 44 |
45 using namespace HTMLNames; | 45 using namespace HTMLNames; |
46 | 46 |
47 static inline UChar toLowerCase(UChar cc) { | 47 static inline UChar toLowerCase(UChar cc) { |
48 ASSERT(isASCIIUpper(cc)); | 48 DCHECK(isASCIIUpper(cc)); |
49 const int lowerCaseOffset = 0x20; | 49 const int lowerCaseOffset = 0x20; |
50 return cc + lowerCaseOffset; | 50 return cc + lowerCaseOffset; |
51 } | 51 } |
52 | 52 |
53 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, | 53 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, |
54 const String& string) { | 54 const String& string) { |
55 if (vector.size() != string.length()) | 55 if (vector.size() != string.length()) |
56 return false; | 56 return false; |
57 | 57 |
58 if (!string.length()) | 58 if (!string.length()) |
(...skipping 22 matching lines...) Expand all Loading... |
81 m_additionalAllowedCharacter = '\0'; | 81 m_additionalAllowedCharacter = '\0'; |
82 } | 82 } |
83 | 83 |
84 inline bool HTMLTokenizer::processEntity(SegmentedString& source) { | 84 inline bool HTMLTokenizer::processEntity(SegmentedString& source) { |
85 bool notEnoughCharacters = false; | 85 bool notEnoughCharacters = false; |
86 DecodedHTMLEntity decodedEntity; | 86 DecodedHTMLEntity decodedEntity; |
87 bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters); | 87 bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters); |
88 if (notEnoughCharacters) | 88 if (notEnoughCharacters) |
89 return false; | 89 return false; |
90 if (!success) { | 90 if (!success) { |
91 ASSERT(decodedEntity.isEmpty()); | 91 DCHECK(decodedEntity.isEmpty()); |
92 bufferCharacter('&'); | 92 bufferCharacter('&'); |
93 } else { | 93 } else { |
94 for (unsigned i = 0; i < decodedEntity.length; ++i) | 94 for (unsigned i = 0; i < decodedEntity.length; ++i) |
95 bufferCharacter(decodedEntity.data[i]); | 95 bufferCharacter(decodedEntity.data[i]); |
96 } | 96 } |
97 return true; | 97 return true; |
98 } | 98 } |
99 | 99 |
100 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) { | 100 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) { |
101 ASSERT(m_token->type() == HTMLToken::Character || | 101 DCHECK(m_token->type() == HTMLToken::Character || |
102 m_token->type() == HTMLToken::Uninitialized); | 102 m_token->type() == HTMLToken::Uninitialized); |
103 source.advanceAndUpdateLineNumber(); | 103 source.advanceAndUpdateLineNumber(); |
104 if (m_token->type() == HTMLToken::Character) | 104 if (m_token->type() == HTMLToken::Character) |
105 return true; | 105 return true; |
106 m_token->beginEndTag(m_bufferedEndTagName); | 106 m_token->beginEndTag(m_bufferedEndTagName); |
107 m_bufferedEndTagName.clear(); | 107 m_bufferedEndTagName.clear(); |
108 m_appropriateEndTagName.clear(); | 108 m_appropriateEndTagName.clear(); |
109 m_temporaryBuffer.clear(); | 109 m_temporaryBuffer.clear(); |
110 return false; | 110 return false; |
111 } | 111 } |
(...skipping 12 matching lines...) Expand all Loading... |
124 bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, | 124 bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, |
125 HTMLTokenizer::State state) { | 125 HTMLTokenizer::State state) { |
126 m_state = state; | 126 m_state = state; |
127 flushBufferedEndTag(source); | 127 flushBufferedEndTag(source); |
128 return true; | 128 return true; |
129 } | 129 } |
130 | 130 |
131 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) { | 131 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) { |
132 // If we have a token in progress, then we're supposed to be called back | 132 // If we have a token in progress, then we're supposed to be called back |
133 // with the same token so we can finish it. | 133 // with the same token so we can finish it. |
134 ASSERT(!m_token || m_token == &token || | 134 DCHECK(!m_token || m_token == &token || |
135 token.type() == HTMLToken::Uninitialized); | 135 token.type() == HTMLToken::Uninitialized); |
136 m_token = &token; | 136 m_token = &token; |
137 | 137 |
138 if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) { | 138 if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) { |
139 // FIXME: This should call flushBufferedEndTag(). | 139 // FIXME: This should call flushBufferedEndTag(). |
140 // We started an end tag during our last iteration. | 140 // We started an end tag during our last iteration. |
141 m_token->beginEndTag(m_bufferedEndTagName); | 141 m_token->beginEndTag(m_bufferedEndTagName); |
142 m_bufferedEndTagName.clear(); | 142 m_bufferedEndTagName.clear(); |
143 m_appropriateEndTagName.clear(); | 143 m_appropriateEndTagName.clear(); |
144 m_temporaryBuffer.clear(); | 144 m_temporaryBuffer.clear(); |
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
298 } else { | 298 } else { |
299 m_token->appendToName(cc); | 299 m_token->appendToName(cc); |
300 HTML_ADVANCE_TO(TagNameState); | 300 HTML_ADVANCE_TO(TagNameState); |
301 } | 301 } |
302 } | 302 } |
303 END_STATE() | 303 END_STATE() |
304 | 304 |
305 HTML_BEGIN_STATE(RCDATALessThanSignState) { | 305 HTML_BEGIN_STATE(RCDATALessThanSignState) { |
306 if (cc == '/') { | 306 if (cc == '/') { |
307 m_temporaryBuffer.clear(); | 307 m_temporaryBuffer.clear(); |
308 ASSERT(m_bufferedEndTagName.isEmpty()); | 308 DCHECK(m_bufferedEndTagName.isEmpty()); |
309 HTML_ADVANCE_TO(RCDATAEndTagOpenState); | 309 HTML_ADVANCE_TO(RCDATAEndTagOpenState); |
310 } else { | 310 } else { |
311 bufferCharacter('<'); | 311 bufferCharacter('<'); |
312 HTML_RECONSUME_IN(RCDATAState); | 312 HTML_RECONSUME_IN(RCDATAState); |
313 } | 313 } |
314 } | 314 } |
315 END_STATE() | 315 END_STATE() |
316 | 316 |
317 HTML_BEGIN_STATE(RCDATAEndTagOpenState) { | 317 HTML_BEGIN_STATE(RCDATAEndTagOpenState) { |
318 if (isASCIIUpper(cc)) { | 318 if (isASCIIUpper(cc)) { |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
363 m_bufferedEndTagName.clear(); | 363 m_bufferedEndTagName.clear(); |
364 m_temporaryBuffer.clear(); | 364 m_temporaryBuffer.clear(); |
365 HTML_RECONSUME_IN(RCDATAState); | 365 HTML_RECONSUME_IN(RCDATAState); |
366 } | 366 } |
367 } | 367 } |
368 END_STATE() | 368 END_STATE() |
369 | 369 |
370 HTML_BEGIN_STATE(RAWTEXTLessThanSignState) { | 370 HTML_BEGIN_STATE(RAWTEXTLessThanSignState) { |
371 if (cc == '/') { | 371 if (cc == '/') { |
372 m_temporaryBuffer.clear(); | 372 m_temporaryBuffer.clear(); |
373 ASSERT(m_bufferedEndTagName.isEmpty()); | 373 DCHECK(m_bufferedEndTagName.isEmpty()); |
374 HTML_ADVANCE_TO(RAWTEXTEndTagOpenState); | 374 HTML_ADVANCE_TO(RAWTEXTEndTagOpenState); |
375 } else { | 375 } else { |
376 bufferCharacter('<'); | 376 bufferCharacter('<'); |
377 HTML_RECONSUME_IN(RAWTEXTState); | 377 HTML_RECONSUME_IN(RAWTEXTState); |
378 } | 378 } |
379 } | 379 } |
380 END_STATE() | 380 END_STATE() |
381 | 381 |
382 HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) { | 382 HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) { |
383 if (isASCIIUpper(cc)) { | 383 if (isASCIIUpper(cc)) { |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
428 m_bufferedEndTagName.clear(); | 428 m_bufferedEndTagName.clear(); |
429 m_temporaryBuffer.clear(); | 429 m_temporaryBuffer.clear(); |
430 HTML_RECONSUME_IN(RAWTEXTState); | 430 HTML_RECONSUME_IN(RAWTEXTState); |
431 } | 431 } |
432 } | 432 } |
433 END_STATE() | 433 END_STATE() |
434 | 434 |
435 HTML_BEGIN_STATE(ScriptDataLessThanSignState) { | 435 HTML_BEGIN_STATE(ScriptDataLessThanSignState) { |
436 if (cc == '/') { | 436 if (cc == '/') { |
437 m_temporaryBuffer.clear(); | 437 m_temporaryBuffer.clear(); |
438 ASSERT(m_bufferedEndTagName.isEmpty()); | 438 DCHECK(m_bufferedEndTagName.isEmpty()); |
439 HTML_ADVANCE_TO(ScriptDataEndTagOpenState); | 439 HTML_ADVANCE_TO(ScriptDataEndTagOpenState); |
440 } else if (cc == '!') { | 440 } else if (cc == '!') { |
441 bufferCharacter('<'); | 441 bufferCharacter('<'); |
442 bufferCharacter('!'); | 442 bufferCharacter('!'); |
443 HTML_ADVANCE_TO(ScriptDataEscapeStartState); | 443 HTML_ADVANCE_TO(ScriptDataEscapeStartState); |
444 } else { | 444 } else { |
445 bufferCharacter('<'); | 445 bufferCharacter('<'); |
446 HTML_RECONSUME_IN(ScriptDataState); | 446 HTML_RECONSUME_IN(ScriptDataState); |
447 } | 447 } |
448 } | 448 } |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
566 } else { | 566 } else { |
567 bufferCharacter(cc); | 567 bufferCharacter(cc); |
568 HTML_ADVANCE_TO(ScriptDataEscapedState); | 568 HTML_ADVANCE_TO(ScriptDataEscapedState); |
569 } | 569 } |
570 } | 570 } |
571 END_STATE() | 571 END_STATE() |
572 | 572 |
573 HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) { | 573 HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) { |
574 if (cc == '/') { | 574 if (cc == '/') { |
575 m_temporaryBuffer.clear(); | 575 m_temporaryBuffer.clear(); |
576 ASSERT(m_bufferedEndTagName.isEmpty()); | 576 DCHECK(m_bufferedEndTagName.isEmpty()); |
577 HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState); | 577 HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState); |
578 } else if (isASCIIUpper(cc)) { | 578 } else if (isASCIIUpper(cc)) { |
579 bufferCharacter('<'); | 579 bufferCharacter('<'); |
580 bufferCharacter(cc); | 580 bufferCharacter(cc); |
581 m_temporaryBuffer.clear(); | 581 m_temporaryBuffer.clear(); |
582 m_temporaryBuffer.push_back(toLowerCase(cc)); | 582 m_temporaryBuffer.push_back(toLowerCase(cc)); |
583 HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState); | 583 HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState); |
584 } else if (isASCIILower(cc)) { | 584 } else if (isASCIILower(cc)) { |
585 bufferCharacter('<'); | 585 bufferCharacter('<'); |
586 bufferCharacter(cc); | 586 bufferCharacter(cc); |
(...skipping 336 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
923 | 923 |
924 HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) { | 924 HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) { |
925 bool notEnoughCharacters = false; | 925 bool notEnoughCharacters = false; |
926 DecodedHTMLEntity decodedEntity; | 926 DecodedHTMLEntity decodedEntity; |
927 bool success = | 927 bool success = |
928 consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, | 928 consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, |
929 m_additionalAllowedCharacter); | 929 m_additionalAllowedCharacter); |
930 if (notEnoughCharacters) | 930 if (notEnoughCharacters) |
931 return haveBufferedCharacterToken(); | 931 return haveBufferedCharacterToken(); |
932 if (!success) { | 932 if (!success) { |
933 ASSERT(decodedEntity.isEmpty()); | 933 DCHECK(decodedEntity.isEmpty()); |
934 m_token->appendToAttributeValue('&'); | 934 m_token->appendToAttributeValue('&'); |
935 } else { | 935 } else { |
936 for (unsigned i = 0; i < decodedEntity.length; ++i) | 936 for (unsigned i = 0; i < decodedEntity.length; ++i) |
937 m_token->appendToAttributeValue(decodedEntity.data[i]); | 937 m_token->appendToAttributeValue(decodedEntity.data[i]); |
938 } | 938 } |
939 // We're supposed to switch back to the attribute value state that | 939 // We're supposed to switch back to the attribute value state that |
940 // we were in when we were switched into this state. Rather than | 940 // we were in when we were switched into this state. Rather than |
941 // keeping track of this explictly, we observe that the previous | 941 // keeping track of this explictly, we observe that the previous |
942 // state can be determined by m_additionalAllowedCharacter. | 942 // state can be determined by m_additionalAllowedCharacter. |
943 if (m_additionalAllowedCharacter == '"') | 943 if (m_additionalAllowedCharacter == '"') |
944 HTML_SWITCH_TO(AttributeValueDoubleQuotedState); | 944 HTML_SWITCH_TO(AttributeValueDoubleQuotedState); |
945 else if (m_additionalAllowedCharacter == '\'') | 945 else if (m_additionalAllowedCharacter == '\'') |
946 HTML_SWITCH_TO(AttributeValueSingleQuotedState); | 946 HTML_SWITCH_TO(AttributeValueSingleQuotedState); |
947 else if (m_additionalAllowedCharacter == '>') | 947 else if (m_additionalAllowedCharacter == '>') |
948 HTML_SWITCH_TO(AttributeValueUnquotedState); | 948 HTML_SWITCH_TO(AttributeValueUnquotedState); |
949 else | 949 else |
950 ASSERT_NOT_REACHED(); | 950 NOTREACHED(); |
951 } | 951 } |
952 END_STATE() | 952 END_STATE() |
953 | 953 |
954 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { | 954 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { |
955 if (isTokenizerWhitespace(cc)) | 955 if (isTokenizerWhitespace(cc)) |
956 HTML_ADVANCE_TO(BeforeAttributeNameState); | 956 HTML_ADVANCE_TO(BeforeAttributeNameState); |
957 else if (cc == '/') | 957 else if (cc == '/') |
958 HTML_ADVANCE_TO(SelfClosingStartTagState); | 958 HTML_ADVANCE_TO(SelfClosingStartTagState); |
959 else if (cc == '>') | 959 else if (cc == '>') |
960 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 960 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
(...skipping 545 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1506 HTML_ADVANCE_TO(DataState); | 1506 HTML_ADVANCE_TO(DataState); |
1507 } else { | 1507 } else { |
1508 bufferCharacter(']'); | 1508 bufferCharacter(']'); |
1509 bufferCharacter(']'); | 1509 bufferCharacter(']'); |
1510 HTML_RECONSUME_IN(CDATASectionState); | 1510 HTML_RECONSUME_IN(CDATASectionState); |
1511 } | 1511 } |
1512 } | 1512 } |
1513 END_STATE() | 1513 END_STATE() |
1514 } | 1514 } |
1515 | 1515 |
1516 ASSERT_NOT_REACHED(); | 1516 NOTREACHED(); |
1517 return false; | 1517 return false; |
1518 } | 1518 } |
1519 | 1519 |
1520 String HTMLTokenizer::bufferedCharacters() const { | 1520 String HTMLTokenizer::bufferedCharacters() const { |
1521 // FIXME: Add an assert about m_state. | 1521 // FIXME: Add an assert about m_state. |
1522 StringBuilder characters; | 1522 StringBuilder characters; |
1523 characters.reserveCapacity(numberOfBufferedCharacters()); | 1523 characters.reserveCapacity(numberOfBufferedCharacters()); |
1524 characters.append('<'); | 1524 characters.append('<'); |
1525 characters.append('/'); | 1525 characters.append('/'); |
1526 characters.append(m_temporaryBuffer.data(), m_temporaryBuffer.size()); | 1526 characters.append(m_temporaryBuffer.data(), m_temporaryBuffer.size()); |
(...skipping 15 matching lines...) Expand all Loading... |
1542 threadSafeMatch(tagName, noframesTag) || | 1542 threadSafeMatch(tagName, noframesTag) || |
1543 (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled)) | 1543 (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled)) |
1544 setState(HTMLTokenizer::RAWTEXTState); | 1544 setState(HTMLTokenizer::RAWTEXTState); |
1545 } | 1545 } |
1546 | 1546 |
1547 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) { | 1547 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) { |
1548 return vectorEqualsString(m_temporaryBuffer, expectedString); | 1548 return vectorEqualsString(m_temporaryBuffer, expectedString); |
1549 } | 1549 } |
1550 | 1550 |
1551 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) { | 1551 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) { |
1552 ASSERT(isEndTagBufferingState(m_state)); | 1552 DCHECK(isEndTagBufferingState(m_state)); |
1553 m_bufferedEndTagName.push_back(cc); | 1553 m_bufferedEndTagName.push_back(cc); |
1554 } | 1554 } |
1555 | 1555 |
1556 inline bool HTMLTokenizer::isAppropriateEndTag() { | 1556 inline bool HTMLTokenizer::isAppropriateEndTag() { |
1557 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size()) | 1557 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size()) |
1558 return false; | 1558 return false; |
1559 | 1559 |
1560 size_t numCharacters = m_bufferedEndTagName.size(); | 1560 size_t numCharacters = m_bufferedEndTagName.size(); |
1561 | 1561 |
1562 for (size_t i = 0; i < numCharacters; i++) { | 1562 for (size_t i = 0; i < numCharacters; i++) { |
1563 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i]) | 1563 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i]) |
1564 return false; | 1564 return false; |
1565 } | 1565 } |
1566 | 1566 |
1567 return true; | 1567 return true; |
1568 } | 1568 } |
1569 | 1569 |
1570 inline void HTMLTokenizer::parseError() { | 1570 inline void HTMLTokenizer::parseError() { |
1571 #if DCHECK_IS_ON() | 1571 #if DCHECK_IS_ON() |
1572 DVLOG(1) << "Not implemented."; | 1572 DVLOG(1) << "Not implemented."; |
1573 #endif | 1573 #endif |
1574 } | 1574 } |
1575 | 1575 |
1576 } // namespace blink | 1576 } // namespace blink |
OLD | NEW |