third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp - Issue 2751483005: Replace ASSERT, ASSERT_NOT_REACHED, and RELEASE_ASSERT in core/html/parser/

Side by Side Diff: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp

Issue 2751483005: Replace ASSERT, ASSERT_NOT_REACHED, and RELEASE_ASSERT in core/html/parser/ (Closed)

Patch Set: rebase Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/WebKit/Source/core/html/parser/HTMLTokenizer.h ('k') | third_party/WebKit/Source/core/html/parser/HTMLTreeBuilder.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.	2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.

3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/	3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/

4 * Copyright (C) 2010 Google, Inc. All Rights Reserved.	4 * Copyright (C) 2010 Google, Inc. All Rights Reserved.

5 *	5 *

6 * Redistribution and use in source and binary forms, with or without	6 * Redistribution and use in source and binary forms, with or without

7 * modification, are permitted provided that the following conditions	7 * modification, are permitted provided that the following conditions

8 * are met:	8 * are met:

9 * 1. Redistributions of source code must retain the above copyright	9 * 1. Redistributions of source code must retain the above copyright

10 * notice, this list of conditions and the following disclaimer.	10 * notice, this list of conditions and the following disclaimer.

(...skipping 27 matching lines...) Expand all Loading...
38	38

39 // Please don't use DEFINE_STATIC_LOCAL in this file. The HTMLTokenizer is used	39 // Please don't use DEFINE_STATIC_LOCAL in this file. The HTMLTokenizer is used

40 // from multiple threads and DEFINE_STATIC_LOCAL isn't threadsafe.	40 // from multiple threads and DEFINE_STATIC_LOCAL isn't threadsafe.

41 #undef DEFINE_STATIC_LOCAL	41 #undef DEFINE_STATIC_LOCAL

42	42

43 namespace blink {	43 namespace blink {

44	44

45 using namespace HTMLNames;	45 using namespace HTMLNames;

46	46

47 static inline UChar toLowerCase(UChar cc) {	47 static inline UChar toLowerCase(UChar cc) {

48 ASSERT(isASCIIUpper(cc));	48 DCHECK(isASCIIUpper(cc));

49 const int lowerCaseOffset = 0x20;	49 const int lowerCaseOffset = 0x20;

50 return cc + lowerCaseOffset;	50 return cc + lowerCaseOffset;

51 }	51 }

52	52

53 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector,	53 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector,

54 const String& string) {	54 const String& string) {

55 if (vector.size() != string.length())	55 if (vector.size() != string.length())

56 return false;	56 return false;

57	57

58 if (!string.length())	58 if (!string.length())

(...skipping 22 matching lines...) Expand all Loading...
81 m_additionalAllowedCharacter = '\0';	81 m_additionalAllowedCharacter = '\0';

82 }	82 }

83	83

84 inline bool HTMLTokenizer::processEntity(SegmentedString& source) {	84 inline bool HTMLTokenizer::processEntity(SegmentedString& source) {

85 bool notEnoughCharacters = false;	85 bool notEnoughCharacters = false;

86 DecodedHTMLEntity decodedEntity;	86 DecodedHTMLEntity decodedEntity;

87 bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);	87 bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);

88 if (notEnoughCharacters)	88 if (notEnoughCharacters)

89 return false;	89 return false;

90 if (!success) {	90 if (!success) {

91 ASSERT(decodedEntity.isEmpty());	91 DCHECK(decodedEntity.isEmpty());

92 bufferCharacter('&');	92 bufferCharacter('&');

93 } else {	93 } else {

94 for (unsigned i = 0; i < decodedEntity.length; ++i)	94 for (unsigned i = 0; i < decodedEntity.length; ++i)

95 bufferCharacter(decodedEntity.data[i]);	95 bufferCharacter(decodedEntity.data[i]);

96 }	96 }

97 return true;	97 return true;

98 }	98 }

99	99

100 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) {	100 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) {

101 ASSERT(m_token->type() == HTMLToken::Character \|\|	101 DCHECK(m_token->type() == HTMLToken::Character \|\|

102 m_token->type() == HTMLToken::Uninitialized);	102 m_token->type() == HTMLToken::Uninitialized);

103 source.advanceAndUpdateLineNumber();	103 source.advanceAndUpdateLineNumber();

104 if (m_token->type() == HTMLToken::Character)	104 if (m_token->type() == HTMLToken::Character)

105 return true;	105 return true;

106 m_token->beginEndTag(m_bufferedEndTagName);	106 m_token->beginEndTag(m_bufferedEndTagName);

107 m_bufferedEndTagName.clear();	107 m_bufferedEndTagName.clear();

108 m_appropriateEndTagName.clear();	108 m_appropriateEndTagName.clear();

109 m_temporaryBuffer.clear();	109 m_temporaryBuffer.clear();

110 return false;	110 return false;

111 }	111 }

(...skipping 12 matching lines...) Expand all Loading...
124 bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source,	124 bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source,

125 HTMLTokenizer::State state) {	125 HTMLTokenizer::State state) {

126 m_state = state;	126 m_state = state;

127 flushBufferedEndTag(source);	127 flushBufferedEndTag(source);

128 return true;	128 return true;

129 }	129 }

130	130

131 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) {	131 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) {

132 // If we have a token in progress, then we're supposed to be called back	132 // If we have a token in progress, then we're supposed to be called back

133 // with the same token so we can finish it.	133 // with the same token so we can finish it.

134 ASSERT(!m_token \|\| m_token == &token \|\|	134 DCHECK(!m_token \|\| m_token == &token \|\|

135 token.type() == HTMLToken::Uninitialized);	135 token.type() == HTMLToken::Uninitialized);

136 m_token = &token;	136 m_token = &token;

137	137

138 if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) {	138 if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) {

139 // FIXME: This should call flushBufferedEndTag().	139 // FIXME: This should call flushBufferedEndTag().

140 // We started an end tag during our last iteration.	140 // We started an end tag during our last iteration.

141 m_token->beginEndTag(m_bufferedEndTagName);	141 m_token->beginEndTag(m_bufferedEndTagName);

142 m_bufferedEndTagName.clear();	142 m_bufferedEndTagName.clear();

143 m_appropriateEndTagName.clear();	143 m_appropriateEndTagName.clear();

144 m_temporaryBuffer.clear();	144 m_temporaryBuffer.clear();

(...skipping 153 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
298 } else {	298 } else {

299 m_token->appendToName(cc);	299 m_token->appendToName(cc);

300 HTML_ADVANCE_TO(TagNameState);	300 HTML_ADVANCE_TO(TagNameState);

301 }	301 }

302 }	302 }

303 END_STATE()	303 END_STATE()

304	304

305 HTML_BEGIN_STATE(RCDATALessThanSignState) {	305 HTML_BEGIN_STATE(RCDATALessThanSignState) {

306 if (cc == '/') {	306 if (cc == '/') {

307 m_temporaryBuffer.clear();	307 m_temporaryBuffer.clear();

308 ASSERT(m_bufferedEndTagName.isEmpty());	308 DCHECK(m_bufferedEndTagName.isEmpty());

309 HTML_ADVANCE_TO(RCDATAEndTagOpenState);	309 HTML_ADVANCE_TO(RCDATAEndTagOpenState);

310 } else {	310 } else {

311 bufferCharacter('<');	311 bufferCharacter('<');

312 HTML_RECONSUME_IN(RCDATAState);	312 HTML_RECONSUME_IN(RCDATAState);

313 }	313 }

314 }	314 }

315 END_STATE()	315 END_STATE()

316	316

317 HTML_BEGIN_STATE(RCDATAEndTagOpenState) {	317 HTML_BEGIN_STATE(RCDATAEndTagOpenState) {

318 if (isASCIIUpper(cc)) {	318 if (isASCIIUpper(cc)) {

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
363 m_bufferedEndTagName.clear();	363 m_bufferedEndTagName.clear();

364 m_temporaryBuffer.clear();	364 m_temporaryBuffer.clear();

365 HTML_RECONSUME_IN(RCDATAState);	365 HTML_RECONSUME_IN(RCDATAState);

366 }	366 }

367 }	367 }

368 END_STATE()	368 END_STATE()

369	369

370 HTML_BEGIN_STATE(RAWTEXTLessThanSignState) {	370 HTML_BEGIN_STATE(RAWTEXTLessThanSignState) {

371 if (cc == '/') {	371 if (cc == '/') {

372 m_temporaryBuffer.clear();	372 m_temporaryBuffer.clear();

373 ASSERT(m_bufferedEndTagName.isEmpty());	373 DCHECK(m_bufferedEndTagName.isEmpty());

374 HTML_ADVANCE_TO(RAWTEXTEndTagOpenState);	374 HTML_ADVANCE_TO(RAWTEXTEndTagOpenState);

375 } else {	375 } else {

376 bufferCharacter('<');	376 bufferCharacter('<');

377 HTML_RECONSUME_IN(RAWTEXTState);	377 HTML_RECONSUME_IN(RAWTEXTState);

378 }	378 }

379 }	379 }

380 END_STATE()	380 END_STATE()

381	381

382 HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) {	382 HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) {

383 if (isASCIIUpper(cc)) {	383 if (isASCIIUpper(cc)) {

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
428 m_bufferedEndTagName.clear();	428 m_bufferedEndTagName.clear();

429 m_temporaryBuffer.clear();	429 m_temporaryBuffer.clear();

430 HTML_RECONSUME_IN(RAWTEXTState);	430 HTML_RECONSUME_IN(RAWTEXTState);

431 }	431 }

432 }	432 }

433 END_STATE()	433 END_STATE()

434	434

435 HTML_BEGIN_STATE(ScriptDataLessThanSignState) {	435 HTML_BEGIN_STATE(ScriptDataLessThanSignState) {

436 if (cc == '/') {	436 if (cc == '/') {

437 m_temporaryBuffer.clear();	437 m_temporaryBuffer.clear();

438 ASSERT(m_bufferedEndTagName.isEmpty());	438 DCHECK(m_bufferedEndTagName.isEmpty());

439 HTML_ADVANCE_TO(ScriptDataEndTagOpenState);	439 HTML_ADVANCE_TO(ScriptDataEndTagOpenState);

440 } else if (cc == '!') {	440 } else if (cc == '!') {

441 bufferCharacter('<');	441 bufferCharacter('<');

442 bufferCharacter('!');	442 bufferCharacter('!');

443 HTML_ADVANCE_TO(ScriptDataEscapeStartState);	443 HTML_ADVANCE_TO(ScriptDataEscapeStartState);

444 } else {	444 } else {

445 bufferCharacter('<');	445 bufferCharacter('<');

446 HTML_RECONSUME_IN(ScriptDataState);	446 HTML_RECONSUME_IN(ScriptDataState);

447 }	447 }

448 }	448 }

(...skipping 117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
566 } else {	566 } else {

567 bufferCharacter(cc);	567 bufferCharacter(cc);

568 HTML_ADVANCE_TO(ScriptDataEscapedState);	568 HTML_ADVANCE_TO(ScriptDataEscapedState);

569 }	569 }

570 }	570 }

571 END_STATE()	571 END_STATE()

572	572

573 HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) {	573 HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) {

574 if (cc == '/') {	574 if (cc == '/') {

575 m_temporaryBuffer.clear();	575 m_temporaryBuffer.clear();

576 ASSERT(m_bufferedEndTagName.isEmpty());	576 DCHECK(m_bufferedEndTagName.isEmpty());

577 HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState);	577 HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState);

578 } else if (isASCIIUpper(cc)) {	578 } else if (isASCIIUpper(cc)) {

579 bufferCharacter('<');	579 bufferCharacter('<');

580 bufferCharacter(cc);	580 bufferCharacter(cc);

581 m_temporaryBuffer.clear();	581 m_temporaryBuffer.clear();

582 m_temporaryBuffer.push_back(toLowerCase(cc));	582 m_temporaryBuffer.push_back(toLowerCase(cc));

583 HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);	583 HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);

584 } else if (isASCIILower(cc)) {	584 } else if (isASCIILower(cc)) {

585 bufferCharacter('<');	585 bufferCharacter('<');

586 bufferCharacter(cc);	586 bufferCharacter(cc);

(...skipping 336 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
923	923

924 HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) {	924 HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) {

925 bool notEnoughCharacters = false;	925 bool notEnoughCharacters = false;

926 DecodedHTMLEntity decodedEntity;	926 DecodedHTMLEntity decodedEntity;

927 bool success =	927 bool success =

928 consumeHTMLEntity(source, decodedEntity, notEnoughCharacters,	928 consumeHTMLEntity(source, decodedEntity, notEnoughCharacters,

929 m_additionalAllowedCharacter);	929 m_additionalAllowedCharacter);

930 if (notEnoughCharacters)	930 if (notEnoughCharacters)

931 return haveBufferedCharacterToken();	931 return haveBufferedCharacterToken();

932 if (!success) {	932 if (!success) {

933 ASSERT(decodedEntity.isEmpty());	933 DCHECK(decodedEntity.isEmpty());

934 m_token->appendToAttributeValue('&');	934 m_token->appendToAttributeValue('&');

935 } else {	935 } else {

936 for (unsigned i = 0; i < decodedEntity.length; ++i)	936 for (unsigned i = 0; i < decodedEntity.length; ++i)

937 m_token->appendToAttributeValue(decodedEntity.data[i]);	937 m_token->appendToAttributeValue(decodedEntity.data[i]);

938 }	938 }

939 // We're supposed to switch back to the attribute value state that	939 // We're supposed to switch back to the attribute value state that

940 // we were in when we were switched into this state. Rather than	940 // we were in when we were switched into this state. Rather than

941 // keeping track of this explictly, we observe that the previous	941 // keeping track of this explictly, we observe that the previous

942 // state can be determined by m_additionalAllowedCharacter.	942 // state can be determined by m_additionalAllowedCharacter.

943 if (m_additionalAllowedCharacter == '"')	943 if (m_additionalAllowedCharacter == '"')

944 HTML_SWITCH_TO(AttributeValueDoubleQuotedState);	944 HTML_SWITCH_TO(AttributeValueDoubleQuotedState);

945 else if (m_additionalAllowedCharacter == '\'')	945 else if (m_additionalAllowedCharacter == '\'')

946 HTML_SWITCH_TO(AttributeValueSingleQuotedState);	946 HTML_SWITCH_TO(AttributeValueSingleQuotedState);

947 else if (m_additionalAllowedCharacter == '>')	947 else if (m_additionalAllowedCharacter == '>')

948 HTML_SWITCH_TO(AttributeValueUnquotedState);	948 HTML_SWITCH_TO(AttributeValueUnquotedState);

949 else	949 else

950 ASSERT_NOT_REACHED();	950 NOTREACHED();

951 }	951 }

952 END_STATE()	952 END_STATE()

953	953

954 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {	954 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {

955 if (isTokenizerWhitespace(cc))	955 if (isTokenizerWhitespace(cc))

956 HTML_ADVANCE_TO(BeforeAttributeNameState);	956 HTML_ADVANCE_TO(BeforeAttributeNameState);

957 else if (cc == '/')	957 else if (cc == '/')

958 HTML_ADVANCE_TO(SelfClosingStartTagState);	958 HTML_ADVANCE_TO(SelfClosingStartTagState);

959 else if (cc == '>')	959 else if (cc == '>')

960 return emitAndResumeIn(source, HTMLTokenizer::DataState);	960 return emitAndResumeIn(source, HTMLTokenizer::DataState);

(...skipping 545 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1506 HTML_ADVANCE_TO(DataState);	1506 HTML_ADVANCE_TO(DataState);

1507 } else {	1507 } else {

1508 bufferCharacter(']');	1508 bufferCharacter(']');

1509 bufferCharacter(']');	1509 bufferCharacter(']');

1510 HTML_RECONSUME_IN(CDATASectionState);	1510 HTML_RECONSUME_IN(CDATASectionState);

1511 }	1511 }

1512 }	1512 }

1513 END_STATE()	1513 END_STATE()

1514 }	1514 }

1515	1515

1516 ASSERT_NOT_REACHED();	1516 NOTREACHED();

1517 return false;	1517 return false;

1518 }	1518 }

1519	1519

1520 String HTMLTokenizer::bufferedCharacters() const {	1520 String HTMLTokenizer::bufferedCharacters() const {

1521 // FIXME: Add an assert about m_state.	1521 // FIXME: Add an assert about m_state.

1522 StringBuilder characters;	1522 StringBuilder characters;

1523 characters.reserveCapacity(numberOfBufferedCharacters());	1523 characters.reserveCapacity(numberOfBufferedCharacters());

1524 characters.append('<');	1524 characters.append('<');

1525 characters.append('/');	1525 characters.append('/');

1526 characters.append(m_temporaryBuffer.data(), m_temporaryBuffer.size());	1526 characters.append(m_temporaryBuffer.data(), m_temporaryBuffer.size());

(...skipping 15 matching lines...) Expand all Loading...
1542 threadSafeMatch(tagName, noframesTag) \|\|	1542 threadSafeMatch(tagName, noframesTag) \|\|

1543 (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled))	1543 (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled))

1544 setState(HTMLTokenizer::RAWTEXTState);	1544 setState(HTMLTokenizer::RAWTEXTState);

1545 }	1545 }

1546	1546

1547 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) {	1547 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) {

1548 return vectorEqualsString(m_temporaryBuffer, expectedString);	1548 return vectorEqualsString(m_temporaryBuffer, expectedString);

1549 }	1549 }

1550	1550

1551 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) {	1551 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) {

1552 ASSERT(isEndTagBufferingState(m_state));	1552 DCHECK(isEndTagBufferingState(m_state));

1553 m_bufferedEndTagName.push_back(cc);	1553 m_bufferedEndTagName.push_back(cc);

1554 }	1554 }

1555	1555

1556 inline bool HTMLTokenizer::isAppropriateEndTag() {	1556 inline bool HTMLTokenizer::isAppropriateEndTag() {

1557 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())	1557 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())

1558 return false;	1558 return false;

1559	1559

1560 size_t numCharacters = m_bufferedEndTagName.size();	1560 size_t numCharacters = m_bufferedEndTagName.size();

1561	1561

1562 for (size_t i = 0; i < numCharacters; i++) {	1562 for (size_t i = 0; i < numCharacters; i++) {

1563 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])	1563 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])

1564 return false;	1564 return false;

1565 }	1565 }

1566	1566

1567 return true;	1567 return true;

1568 }	1568 }

1569	1569

1570 inline void HTMLTokenizer::parseError() {	1570 inline void HTMLTokenizer::parseError() {

1571 #if DCHECK_IS_ON()	1571 #if DCHECK_IS_ON()

1572 DVLOG(1) << "Not implemented.";	1572 DVLOG(1) << "Not implemented.";

1573 #endif	1573 #endif

1574 }	1574 }

1575	1575

1576 } // namespace blink	1576 } // namespace blink

OLD	NEW