sky/engine/core/html/parser/HTMLTokenizer.h - Issue 678903002: Remove more unused HTMLTokenizer features

Side by Side Diff: sky/engine/core/html/parser/HTMLTokenizer.h

Issue 678903002: Remove more unused HTMLTokenizer features (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.	2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.

3 * Copyright (C) 2010 Google, Inc. All Rights Reserved.	3 * Copyright (C) 2010 Google, Inc. All Rights Reserved.

4 *	4 *

5 * Redistribution and use in source and binary forms, with or without	5 * Redistribution and use in source and binary forms, with or without

6 * modification, are permitted provided that the following conditions	6 * modification, are permitted provided that the following conditions

7 * are met:	7 * are met:

8 * 1. Redistributions of source code must retain the above copyright	8 * 1. Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.	9 * notice, this list of conditions and the following disclaimer.

10 * 2. Redistributions in binary form must reproduce the above copyright	10 * 2. Redistributions in binary form must reproduce the above copyright

(...skipping 29 matching lines...) Expand all Loading...
40 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize r()); }	40 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize r()); }

41 ~HTMLTokenizer();	41 ~HTMLTokenizer();

42	42

43 void reset();	43 void reset();

44	44

45 enum State {	45 enum State {

46 DataState,	46 DataState,

47 CharacterReferenceInDataState,	47 CharacterReferenceInDataState,

48 RAWTEXTState,	48 RAWTEXTState,

49 ScriptDataState,	49 ScriptDataState,

50 PLAINTEXTState,

51 TagOpenState,	50 TagOpenState,

52 EndTagOpenState,	51 EndTagOpenState,

53 TagNameState,	52 TagNameState,

54 RAWTEXTLessThanSignState,	53 RAWTEXTLessThanSignState,

55 RAWTEXTEndTagOpenState,	54 RAWTEXTEndTagOpenState,

56 RAWTEXTEndTagNameState,	55 RAWTEXTEndTagNameState,

57 ScriptDataLessThanSignState,	56 ScriptDataLessThanSignState,

58 ScriptDataEndTagOpenState,	57 ScriptDataEndTagOpenState,

59 ScriptDataEndTagNameState,	58 ScriptDataEndTagNameState,

60 ScriptDataEscapeStartState,	59 ScriptDataEscapeStartState,

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
93 CommentEndDashState,	92 CommentEndDashState,

94 CommentEndState,	93 CommentEndState,

95 CommentEndBangState,	94 CommentEndBangState,

96 };	95 };

97	96

98 // This function returns true if it emits a token. Otherwise, callers	97 // This function returns true if it emits a token. Otherwise, callers

99 // must provide the same (in progress) token on the next call (unless	98 // must provide the same (in progress) token on the next call (unless

100 // they call reset() first).	99 // they call reset() first).

101 bool nextToken(SegmentedString&, HTMLToken&);	100 bool nextToken(SegmentedString&, HTMLToken&);

102	101

103 // Returns a copy of any characters buffered internally by the tokenizer.

104 // The tokenizer buffers characters when searching for the </script> token

105 // that terminates a script element.

106 String bufferedCharacters() const;

107

108 size_t numberOfBufferedCharacters() const

109 {

110 // Notice that we add 2 to the length of the m_temporaryBuffer to

111 // account for the "</" characters, which are effecitvely buffered in

112 // the tokenizer's state machine.

113 return m_temporaryBuffer.size() ? m_temporaryBuffer.size() + 2 : 0;

114 }

115

116 // Updates the tokenizer's state according to the given tag name. This is

117 // an approximation of how the tree builder would update the tokenizer's

118 // state. This method is useful for approximating HTML tokenization. To

119 // get exactly the correct tokenization, you need the real tree builder.

120 //

121 // The main failures in the approximation are as follows:

122 //

123 // * The first set of character tokens emitted for a <pre> element might

124 // contain an extra leading newline.

125 // * The replacement of U+0000 with U+FFFD will not be sensitive to the

126 // tree builder's insertion mode.

127 // * CDATA sections in foreign content will be tokenized as bogus comments

128 // instead of as character tokens.

129 //

130 void updateStateFor(const String& tagName);

131

132 State state() const { return m_state; }	102 State state() const { return m_state; }

133 void setState(State state) { m_state = state; }	103 void setState(State state) { m_state = state; }

134	104

135 private:	105 private:

136 HTMLTokenizer();	106 HTMLTokenizer();

137	107

138 inline bool processEntity(SegmentedString&);	108 inline bool processEntity(SegmentedString&);

139	109

140 inline void parseError();	110 inline void parseError();

141	111

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
217	187

218 // We occationally want to emit both a character token and an end tag	188 // We occationally want to emit both a character token and an end tag

219 // token (e.g., when lexing script). We buffer the name of the end tag	189 // token (e.g., when lexing script). We buffer the name of the end tag

220 // token here so we remember it next time we re-enter the tokenizer.	190 // token here so we remember it next time we re-enter the tokenizer.

221 Vector<LChar, 32> m_bufferedEndTagName;	191 Vector<LChar, 32> m_bufferedEndTagName;

222 };	192 };

223	193

224 }	194 }

225	195

226 #endif	196 #endif

OLD	NEW

« no previous file with comments | « no previous file | sky/engine/core/html/parser/HTMLTokenizer.cpp » ('j') | no next file with comments »