Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(155)

Side by Side Diff: sky/engine/core/html/parser/HTMLTokenizer.h

Issue 678903002: Remove more unused HTMLTokenizer features (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | sky/engine/core/html/parser/HTMLTokenizer.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
7 * are met: 7 * are met:
8 * 1. Redistributions of source code must retain the above copyright 8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 10 * 2. Redistributions in binary form must reproduce the above copyright
(...skipping 29 matching lines...) Expand all
40 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize r()); } 40 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize r()); }
41 ~HTMLTokenizer(); 41 ~HTMLTokenizer();
42 42
43 void reset(); 43 void reset();
44 44
45 enum State { 45 enum State {
46 DataState, 46 DataState,
47 CharacterReferenceInDataState, 47 CharacterReferenceInDataState,
48 RAWTEXTState, 48 RAWTEXTState,
49 ScriptDataState, 49 ScriptDataState,
50 PLAINTEXTState,
51 TagOpenState, 50 TagOpenState,
52 EndTagOpenState, 51 EndTagOpenState,
53 TagNameState, 52 TagNameState,
54 RAWTEXTLessThanSignState, 53 RAWTEXTLessThanSignState,
55 RAWTEXTEndTagOpenState, 54 RAWTEXTEndTagOpenState,
56 RAWTEXTEndTagNameState, 55 RAWTEXTEndTagNameState,
57 ScriptDataLessThanSignState, 56 ScriptDataLessThanSignState,
58 ScriptDataEndTagOpenState, 57 ScriptDataEndTagOpenState,
59 ScriptDataEndTagNameState, 58 ScriptDataEndTagNameState,
60 ScriptDataEscapeStartState, 59 ScriptDataEscapeStartState,
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
93 CommentEndDashState, 92 CommentEndDashState,
94 CommentEndState, 93 CommentEndState,
95 CommentEndBangState, 94 CommentEndBangState,
96 }; 95 };
97 96
98 // This function returns true if it emits a token. Otherwise, callers 97 // This function returns true if it emits a token. Otherwise, callers
99 // must provide the same (in progress) token on the next call (unless 98 // must provide the same (in progress) token on the next call (unless
100 // they call reset() first). 99 // they call reset() first).
101 bool nextToken(SegmentedString&, HTMLToken&); 100 bool nextToken(SegmentedString&, HTMLToken&);
102 101
103 // Returns a copy of any characters buffered internally by the tokenizer.
104 // The tokenizer buffers characters when searching for the </script> token
105 // that terminates a script element.
106 String bufferedCharacters() const;
107
108 size_t numberOfBufferedCharacters() const
109 {
110 // Notice that we add 2 to the length of the m_temporaryBuffer to
111 // account for the "</" characters, which are effecitvely buffered in
112 // the tokenizer's state machine.
113 return m_temporaryBuffer.size() ? m_temporaryBuffer.size() + 2 : 0;
114 }
115
116 // Updates the tokenizer's state according to the given tag name. This is
117 // an approximation of how the tree builder would update the tokenizer's
118 // state. This method is useful for approximating HTML tokenization. To
119 // get exactly the correct tokenization, you need the real tree builder.
120 //
121 // The main failures in the approximation are as follows:
122 //
123 // * The first set of character tokens emitted for a <pre> element might
124 // contain an extra leading newline.
125 // * The replacement of U+0000 with U+FFFD will not be sensitive to the
126 // tree builder's insertion mode.
127 // * CDATA sections in foreign content will be tokenized as bogus comments
128 // instead of as character tokens.
129 //
130 void updateStateFor(const String& tagName);
131
132 State state() const { return m_state; } 102 State state() const { return m_state; }
133 void setState(State state) { m_state = state; } 103 void setState(State state) { m_state = state; }
134 104
135 private: 105 private:
136 HTMLTokenizer(); 106 HTMLTokenizer();
137 107
138 inline bool processEntity(SegmentedString&); 108 inline bool processEntity(SegmentedString&);
139 109
140 inline void parseError(); 110 inline void parseError();
141 111
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
217 187
218 // We occationally want to emit both a character token and an end tag 188 // We occationally want to emit both a character token and an end tag
219 // token (e.g., when lexing script). We buffer the name of the end tag 189 // token (e.g., when lexing script). We buffer the name of the end tag
220 // token here so we remember it next time we re-enter the tokenizer. 190 // token here so we remember it next time we re-enter the tokenizer.
221 Vector<LChar, 32> m_bufferedEndTagName; 191 Vector<LChar, 32> m_bufferedEndTagName;
222 }; 192 };
223 193
224 } 194 }
225 195
226 #endif 196 #endif
OLDNEW
« no previous file with comments | « no previous file | sky/engine/core/html/parser/HTMLTokenizer.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698