OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
53 return false; | 53 return false; |
54 | 54 |
55 if (!string.length()) | 55 if (!string.length()) |
56 return true; | 56 return true; |
57 | 57 |
58 return equal(string.impl(), vector.data(), vector.size()); | 58 return equal(string.impl(), vector.data(), vector.size()); |
59 } | 59 } |
60 | 60 |
61 void WebVTTTokenizer::reset() | 61 void WebVTTTokenizer::reset() |
62 { | 62 { |
63 m_state = WebVTTTokenizerState::DataState; | |
64 m_token = 0; | 63 m_token = 0; |
65 m_buffer.clear(); | 64 m_buffer.clear(); |
66 } | 65 } |
67 | 66 |
68 bool WebVTTTokenizer::nextToken(SegmentedString& source, WebVTTToken& token) | 67 bool WebVTTTokenizer::nextToken(SegmentedString& source, WebVTTToken& token) |
69 { | 68 { |
70 // If we have a token in progress, then we're supposed to be called back | 69 // If we have a token in progress, then we're supposed to be called back |
71 // with the same token so we can finish it. | 70 // with the same token so we can finish it. |
72 ASSERT(!m_token || m_token == &token || token.type() == WebVTTTokenTypes::Un
initialized); | 71 ASSERT(!m_token || m_token == &token || token.type() == WebVTTTokenTypes::Un
initialized); |
73 m_token = &token; | 72 m_token = &token; |
74 | 73 |
75 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) | 74 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) |
76 return haveBufferedCharacterToken(); | 75 return haveBufferedCharacterToken(); |
77 | 76 |
78 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); | 77 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); |
79 | 78 |
| 79 m_state = WebVTTTokenizerState::DataState; |
| 80 |
80 // 4.8.10.13.4 WebVTT cue text tokenizer | 81 // 4.8.10.13.4 WebVTT cue text tokenizer |
81 switch (m_state) { | 82 switch (m_state) { |
82 WEBVTT_BEGIN_STATE(DataState) { | 83 WEBVTT_BEGIN_STATE(DataState) { |
83 if (cc == '&') { | 84 if (cc == '&') { |
84 m_buffer.append(static_cast<LChar>(cc)); | 85 m_buffer.append(static_cast<LChar>(cc)); |
85 WEBVTT_ADVANCE_TO(EscapeState); | 86 WEBVTT_ADVANCE_TO(EscapeState); |
86 } else if (cc == '<') { | 87 } else if (cc == '<') { |
87 // FIXME: the explicit Vector conversion copies into a temporary | 88 // FIXME: the explicit Vector conversion copies into a temporary |
88 // and is wasteful. | 89 // and is wasteful. |
89 if (m_token->type() == WebVTTTokenTypes::Uninitialized | 90 if (m_token->type() == WebVTTTokenTypes::Uninitialized |
90 || vectorEqualsString<UChar>(Vector<UChar, 32>(m_token->char
acters()), emptyString())) | 91 || vectorEqualsString<UChar>(Vector<UChar, 32>(m_token->char
acters()), emptyString())) { |
91 WEBVTT_ADVANCE_TO(TagState); | 92 WEBVTT_ADVANCE_TO(TagState); |
92 else | 93 } else { |
93 return emitAndResumeIn(source, WebVTTTokenizerState::TagStat
e); | 94 // We don't want to advance input or perform a state transit
ion - just return a (new) token. |
| 95 // (On the next call to nextToken we will see '<' again, but
take the other branch in this if instead.) |
| 96 return emitToken(WebVTTTokenTypes::Character); |
| 97 } |
94 } else if (cc == kEndOfFileMarker) { | 98 } else if (cc == kEndOfFileMarker) { |
95 return emitEndOfFile(source); | 99 return emitToken(WebVTTTokenTypes::Character); |
96 } else { | 100 } else { |
97 bufferCharacter(cc); | 101 bufferCharacter(cc); |
98 WEBVTT_ADVANCE_TO(DataState); | 102 WEBVTT_ADVANCE_TO(DataState); |
99 } | 103 } |
100 } | 104 } |
101 END_STATE() | 105 END_STATE() |
102 | 106 |
103 WEBVTT_BEGIN_STATE(EscapeState) { | 107 WEBVTT_BEGIN_STATE(EscapeState) { |
104 if (cc == ';') { | 108 if (cc == ';') { |
105 if (vectorEqualsString(m_buffer, "&")) { | 109 if (vectorEqualsString(m_buffer, "&")) { |
(...skipping 12 matching lines...) Expand all Loading... |
118 m_buffer.append(static_cast<LChar>(cc)); | 122 m_buffer.append(static_cast<LChar>(cc)); |
119 m_token->appendToCharacter(m_buffer); | 123 m_token->appendToCharacter(m_buffer); |
120 } | 124 } |
121 m_buffer.clear(); | 125 m_buffer.clear(); |
122 WEBVTT_ADVANCE_TO(DataState); | 126 WEBVTT_ADVANCE_TO(DataState); |
123 } else if (isASCIIAlphanumeric(cc)) { | 127 } else if (isASCIIAlphanumeric(cc)) { |
124 m_buffer.append(static_cast<LChar>(cc)); | 128 m_buffer.append(static_cast<LChar>(cc)); |
125 WEBVTT_ADVANCE_TO(EscapeState); | 129 WEBVTT_ADVANCE_TO(EscapeState); |
126 } else if (cc == kEndOfFileMarker) { | 130 } else if (cc == kEndOfFileMarker) { |
127 m_token->appendToCharacter(m_buffer); | 131 m_token->appendToCharacter(m_buffer); |
128 return emitEndOfFile(source); | 132 return emitToken(WebVTTTokenTypes::Character); |
129 } else { | 133 } else { |
130 if (!vectorEqualsString(m_buffer, "&")) | 134 if (!vectorEqualsString(m_buffer, "&")) |
131 m_token->appendToCharacter(m_buffer); | 135 m_token->appendToCharacter(m_buffer); |
132 m_buffer.clear(); | 136 m_buffer.clear(); |
133 WEBVTT_ADVANCE_TO(DataState); | 137 WEBVTT_ADVANCE_TO(DataState); |
134 } | 138 } |
135 } | 139 } |
136 END_STATE() | 140 END_STATE() |
137 | 141 |
138 WEBVTT_BEGIN_STATE(TagState) { | 142 WEBVTT_BEGIN_STATE(TagState) { |
139 if (isTokenizerWhitespace(cc)) { | 143 if (isTokenizerWhitespace(cc)) { |
140 m_token->beginEmptyStartTag(); | 144 m_token->beginEmptyStartTag(); |
141 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 145 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
142 } else if (cc == '.') { | 146 } else if (cc == '.') { |
143 m_token->beginEmptyStartTag(); | 147 m_token->beginEmptyStartTag(); |
144 WEBVTT_ADVANCE_TO(StartTagClassState); | 148 WEBVTT_ADVANCE_TO(StartTagClassState); |
145 } else if (cc == '/') { | 149 } else if (cc == '/') { |
146 WEBVTT_ADVANCE_TO(EndTagOpenState); | 150 WEBVTT_ADVANCE_TO(EndTagOpenState); |
147 } else if (WTF::isASCIIDigit(cc)) { | 151 } else if (WTF::isASCIIDigit(cc)) { |
148 m_token->beginTimestampTag(cc); | 152 m_token->beginTimestampTag(cc); |
149 WEBVTT_ADVANCE_TO(TimestampTagState); | 153 WEBVTT_ADVANCE_TO(TimestampTagState); |
150 } else if (cc == '>' || cc == kEndOfFileMarker) { | 154 } else if (cc == '>' || cc == kEndOfFileMarker) { |
151 m_token->beginEmptyStartTag(); | 155 m_token->beginEmptyStartTag(); |
152 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 156 return advanceAndEmitToken(source, WebVTTTokenTypes::StartTag); |
153 } else { | 157 } else { |
154 m_token->beginStartTag(cc); | 158 m_token->beginStartTag(cc); |
155 WEBVTT_ADVANCE_TO(StartTagState); | 159 WEBVTT_ADVANCE_TO(StartTagState); |
156 } | 160 } |
157 } | 161 } |
158 END_STATE() | 162 END_STATE() |
159 | 163 |
160 WEBVTT_BEGIN_STATE(StartTagState) { | 164 WEBVTT_BEGIN_STATE(StartTagState) { |
161 if (isTokenizerWhitespace(cc)) { | 165 if (isTokenizerWhitespace(cc)) { |
162 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 166 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
163 } else if (cc == '.') { | 167 } else if (cc == '.') { |
164 WEBVTT_ADVANCE_TO(StartTagClassState); | 168 WEBVTT_ADVANCE_TO(StartTagClassState); |
165 } else if (cc == '>' || cc == kEndOfFileMarker) { | 169 } else if (cc == '>' || cc == kEndOfFileMarker) { |
166 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 170 return advanceAndEmitToken(source, WebVTTTokenTypes::StartTag); |
167 } else { | 171 } else { |
168 m_token->appendToName(cc); | 172 m_token->appendToName(cc); |
169 WEBVTT_ADVANCE_TO(StartTagState); | 173 WEBVTT_ADVANCE_TO(StartTagState); |
170 } | 174 } |
171 } | 175 } |
172 END_STATE() | 176 END_STATE() |
173 | 177 |
174 WEBVTT_BEGIN_STATE(StartTagClassState) { | 178 WEBVTT_BEGIN_STATE(StartTagClassState) { |
175 if (isTokenizerWhitespace(cc)) { | 179 if (isTokenizerWhitespace(cc)) { |
176 m_token->addNewClass(); | 180 m_token->addNewClass(); |
177 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 181 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
178 } else if (cc == '.') { | 182 } else if (cc == '.') { |
179 m_token->addNewClass(); | 183 m_token->addNewClass(); |
180 WEBVTT_ADVANCE_TO(StartTagClassState); | 184 WEBVTT_ADVANCE_TO(StartTagClassState); |
181 } else if (cc == '>' || cc == kEndOfFileMarker) { | 185 } else if (cc == '>' || cc == kEndOfFileMarker) { |
182 m_token->addNewClass(); | 186 m_token->addNewClass(); |
183 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 187 return advanceAndEmitToken(source, WebVTTTokenTypes::StartTag); |
184 } else { | 188 } else { |
185 m_token->appendToClass(cc); | 189 m_token->appendToClass(cc); |
186 WEBVTT_ADVANCE_TO(StartTagClassState); | 190 WEBVTT_ADVANCE_TO(StartTagClassState); |
187 } | 191 } |
188 | 192 |
189 } | 193 } |
190 END_STATE() | 194 END_STATE() |
191 | 195 |
192 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { | 196 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { |
193 if (cc == '>' || cc == kEndOfFileMarker) { | 197 if (cc == '>' || cc == kEndOfFileMarker) { |
194 m_token->addNewAnnotation(); | 198 m_token->addNewAnnotation(); |
195 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 199 return advanceAndEmitToken(source, WebVTTTokenTypes::StartTag); |
196 } | 200 } |
197 m_token->appendToAnnotation(cc); | 201 m_token->appendToAnnotation(cc); |
198 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 202 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
199 } | 203 } |
200 END_STATE() | 204 END_STATE() |
201 | 205 |
202 WEBVTT_BEGIN_STATE(EndTagOpenState) { | 206 WEBVTT_BEGIN_STATE(EndTagOpenState) { |
203 if (cc == '>' || cc == kEndOfFileMarker) { | 207 if (cc == '>' || cc == kEndOfFileMarker) { |
204 m_token->beginEndTag('\0'); | 208 m_token->beginEndTag('\0'); |
205 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 209 return advanceAndEmitToken(source, WebVTTTokenTypes::EndTag); |
206 } | 210 } |
207 m_token->beginEndTag(cc); | 211 m_token->beginEndTag(cc); |
208 WEBVTT_ADVANCE_TO(EndTagState); | 212 WEBVTT_ADVANCE_TO(EndTagState); |
209 } | 213 } |
210 END_STATE() | 214 END_STATE() |
211 | 215 |
212 WEBVTT_BEGIN_STATE(EndTagState) { | 216 WEBVTT_BEGIN_STATE(EndTagState) { |
213 if (cc == '>' || cc == kEndOfFileMarker) | 217 if (cc == '>' || cc == kEndOfFileMarker) |
214 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 218 return advanceAndEmitToken(source, WebVTTTokenTypes::EndTag); |
215 m_token->appendToName(cc); | 219 m_token->appendToName(cc); |
216 WEBVTT_ADVANCE_TO(EndTagState); | 220 WEBVTT_ADVANCE_TO(EndTagState); |
217 } | 221 } |
218 END_STATE() | 222 END_STATE() |
219 | 223 |
220 WEBVTT_BEGIN_STATE(TimestampTagState) { | 224 WEBVTT_BEGIN_STATE(TimestampTagState) { |
221 if (cc == '>' || cc == kEndOfFileMarker) | 225 if (cc == '>' || cc == kEndOfFileMarker) |
222 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 226 return advanceAndEmitToken(source, WebVTTTokenTypes::TimestampTa
g); |
223 m_token->appendToTimestamp(cc); | 227 m_token->appendToTimestamp(cc); |
224 WEBVTT_ADVANCE_TO(TimestampTagState); | 228 WEBVTT_ADVANCE_TO(TimestampTagState); |
225 } | 229 } |
226 END_STATE() | 230 END_STATE() |
227 | 231 |
228 } | 232 } |
229 | 233 |
230 ASSERT_NOT_REACHED(); | 234 ASSERT_NOT_REACHED(); |
231 return false; | 235 return false; |
232 } | 236 } |
233 | 237 |
234 } | 238 } |
235 | 239 |
OLD | NEW |