OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 20 matching lines...) Expand all Loading... |
31 #include "config.h" | 31 #include "config.h" |
32 | 32 |
33 #include "core/html/track/vtt/VTTTokenizer.h" | 33 #include "core/html/track/vtt/VTTTokenizer.h" |
34 | 34 |
35 #include "core/xml/parser/MarkupTokenizerInlines.h" | 35 #include "core/xml/parser/MarkupTokenizerInlines.h" |
36 #include "wtf/text/StringBuilder.h" | 36 #include "wtf/text/StringBuilder.h" |
37 #include "wtf/unicode/CharacterNames.h" | 37 #include "wtf/unicode/CharacterNames.h" |
38 | 38 |
39 namespace WebCore { | 39 namespace WebCore { |
40 | 40 |
41 #define WEBVTT_BEGIN_STATE(stateName) BEGIN_STATE(VTTTokenizerState, stateName) | 41 #define WEBVTT_BEGIN_STATE(stateName) case stateName: stateName: |
42 #define WEBVTT_ADVANCE_TO(stateName) ADVANCE_TO(VTTTokenizerState, stateName) | 42 #define WEBVTT_ADVANCE_TO(stateName) \ |
| 43 do { \ |
| 44 state = stateName; \ |
| 45 ASSERT(!m_input.isEmpty()); \ |
| 46 m_inputStreamPreprocessor.advance(m_input); \ |
| 47 cc = m_inputStreamPreprocessor.nextInputCharacter(); \ |
| 48 goto stateName; \ |
| 49 } while (false) |
43 | 50 |
44 template<unsigned charactersCount> | 51 template<unsigned charactersCount> |
45 ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)
[charactersCount]) | 52 ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)
[charactersCount]) |
46 { | 53 { |
47 return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersC
ount - 1); | 54 return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersC
ount - 1); |
48 } | 55 } |
49 | 56 |
50 static void addNewClass(StringBuilder& classes, const StringBuilder& newClass) | 57 static void addNewClass(StringBuilder& classes, const StringBuilder& newClass) |
51 { | 58 { |
52 if (!classes.isEmpty()) | 59 if (!classes.isEmpty()) |
53 classes.append(' '); | 60 classes.append(' '); |
54 classes.append(newClass); | 61 classes.append(newClass); |
55 } | 62 } |
56 | 63 |
| 64 inline bool emitToken(VTTToken& resultToken, const VTTToken& token) |
| 65 { |
| 66 resultToken = token; |
| 67 return true; |
| 68 } |
| 69 |
| 70 inline bool advanceAndEmitToken(SegmentedString& source, VTTToken& resultToken,
const VTTToken& token) |
| 71 { |
| 72 source.advanceAndUpdateLineNumber(); |
| 73 return emitToken(resultToken, token); |
| 74 } |
| 75 |
57 VTTTokenizer::VTTTokenizer(const String& input) | 76 VTTTokenizer::VTTTokenizer(const String& input) |
58 : m_input(input) | 77 : m_input(input) |
59 , m_inputStreamPreprocessor(this) | 78 , m_inputStreamPreprocessor(this) |
60 { | 79 { |
61 reset(); | |
62 | |
63 // Append a EOF marker and close the input "stream". | 80 // Append a EOF marker and close the input "stream". |
64 ASSERT(!m_input.isClosed()); | 81 ASSERT(!m_input.isClosed()); |
65 m_input.append(SegmentedString(String(&kEndOfFileMarker, 1))); | 82 m_input.append(SegmentedString(String(&kEndOfFileMarker, 1))); |
66 m_input.close(); | 83 m_input.close(); |
67 } | 84 } |
68 | 85 |
69 void VTTTokenizer::reset() | |
70 { | |
71 m_token = 0; | |
72 } | |
73 | |
74 bool VTTTokenizer::nextToken(VTTToken& token) | 86 bool VTTTokenizer::nextToken(VTTToken& token) |
75 { | 87 { |
76 // If we have a token in progress, then we're supposed to be called back | |
77 // with the same token so we can finish it. | |
78 ASSERT(!m_token || m_token == &token); | |
79 m_token = &token; | |
80 | |
81 if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input)) | 88 if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input)) |
82 return haveBufferedCharacterToken(); | 89 return false; |
83 | 90 |
84 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); | 91 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); |
85 if (cc == kEndOfFileMarker) { | 92 if (cc == kEndOfFileMarker) { |
86 m_inputStreamPreprocessor.advance(m_input); | 93 m_inputStreamPreprocessor.advance(m_input); |
87 return false; | 94 return false; |
88 } | 95 } |
89 | 96 |
90 StringBuilder buffer; | 97 StringBuilder buffer; |
91 StringBuilder result; | 98 StringBuilder result; |
92 StringBuilder classes; | 99 StringBuilder classes; |
93 m_state = VTTTokenizerState::DataState; | 100 enum { |
94 | 101 DataState, |
95 // The ADVANCE_TO helper macros expect this name ('source') on the input var
iable. | 102 EscapeState, |
96 SegmentedString& source = m_input; | 103 TagState, |
| 104 StartTagState, |
| 105 StartTagClassState, |
| 106 StartTagAnnotationState, |
| 107 EndTagState, |
| 108 TimestampTagState, |
| 109 } state = DataState; |
97 | 110 |
98 // 4.8.10.13.4 WebVTT cue text tokenizer | 111 // 4.8.10.13.4 WebVTT cue text tokenizer |
99 switch (m_state) { | 112 switch (state) { |
100 WEBVTT_BEGIN_STATE(DataState) { | 113 WEBVTT_BEGIN_STATE(DataState) { |
101 if (cc == '&') { | 114 if (cc == '&') { |
102 buffer.append(static_cast<LChar>(cc)); | 115 buffer.append(static_cast<LChar>(cc)); |
103 WEBVTT_ADVANCE_TO(EscapeState); | 116 WEBVTT_ADVANCE_TO(EscapeState); |
104 } else if (cc == '<') { | 117 } else if (cc == '<') { |
105 if (result.isEmpty()) { | 118 if (result.isEmpty()) { |
106 WEBVTT_ADVANCE_TO(TagState); | 119 WEBVTT_ADVANCE_TO(TagState); |
107 } else { | 120 } else { |
108 // We don't want to advance input or perform a state transit
ion - just return a (new) token. | 121 // We don't want to advance input or perform a state transit
ion - just return a (new) token. |
109 // (On the next call to nextToken we will see '<' again, but
take the other branch in this if instead.) | 122 // (On the next call to nextToken we will see '<' again, but
take the other branch in this if instead.) |
110 return emitToken(VTTToken::StringToken(result.toString())); | 123 return emitToken(token, VTTToken::StringToken(result.toStrin
g())); |
111 } | 124 } |
112 } else if (cc == kEndOfFileMarker) { | 125 } else if (cc == kEndOfFileMarker) { |
113 return advanceAndEmitToken(source, VTTToken::StringToken(result.
toString())); | 126 return advanceAndEmitToken(m_input, token, VTTToken::StringToken
(result.toString())); |
114 } else { | 127 } else { |
115 result.append(cc); | 128 result.append(cc); |
116 WEBVTT_ADVANCE_TO(DataState); | 129 WEBVTT_ADVANCE_TO(DataState); |
117 } | 130 } |
118 } | 131 } |
119 END_STATE() | 132 END_STATE() |
120 | 133 |
121 WEBVTT_BEGIN_STATE(EscapeState) { | 134 WEBVTT_BEGIN_STATE(EscapeState) { |
122 if (cc == ';') { | 135 if (cc == ';') { |
123 if (equalLiteral(buffer, "&")) { | 136 if (equalLiteral(buffer, "&")) { |
(...skipping 12 matching lines...) Expand all Loading... |
136 buffer.append(static_cast<LChar>(cc)); | 149 buffer.append(static_cast<LChar>(cc)); |
137 result.append(buffer); | 150 result.append(buffer); |
138 } | 151 } |
139 buffer.clear(); | 152 buffer.clear(); |
140 WEBVTT_ADVANCE_TO(DataState); | 153 WEBVTT_ADVANCE_TO(DataState); |
141 } else if (isASCIIAlphanumeric(cc)) { | 154 } else if (isASCIIAlphanumeric(cc)) { |
142 buffer.append(static_cast<LChar>(cc)); | 155 buffer.append(static_cast<LChar>(cc)); |
143 WEBVTT_ADVANCE_TO(EscapeState); | 156 WEBVTT_ADVANCE_TO(EscapeState); |
144 } else if (cc == '<') { | 157 } else if (cc == '<') { |
145 result.append(buffer); | 158 result.append(buffer); |
146 return emitToken(VTTToken::StringToken(result.toString())); | 159 return emitToken(token, VTTToken::StringToken(result.toString())
); |
147 } else if (cc == kEndOfFileMarker) { | 160 } else if (cc == kEndOfFileMarker) { |
148 result.append(buffer); | 161 result.append(buffer); |
149 return advanceAndEmitToken(source, VTTToken::StringToken(result.
toString())); | 162 return advanceAndEmitToken(m_input, token, VTTToken::StringToken
(result.toString())); |
150 } else { | 163 } else { |
151 result.append(buffer); | 164 result.append(buffer); |
152 buffer.clear(); | 165 buffer.clear(); |
153 | 166 |
154 if (cc == '&') { | 167 if (cc == '&') { |
155 buffer.append(static_cast<LChar>(cc)); | 168 buffer.append(static_cast<LChar>(cc)); |
156 WEBVTT_ADVANCE_TO(EscapeState); | 169 WEBVTT_ADVANCE_TO(EscapeState); |
157 } | 170 } |
158 result.append(cc); | 171 result.append(cc); |
159 WEBVTT_ADVANCE_TO(DataState); | 172 WEBVTT_ADVANCE_TO(DataState); |
160 } | 173 } |
161 } | 174 } |
162 END_STATE() | 175 END_STATE() |
163 | 176 |
164 WEBVTT_BEGIN_STATE(TagState) { | 177 WEBVTT_BEGIN_STATE(TagState) { |
165 if (isTokenizerWhitespace(cc)) { | 178 if (isTokenizerWhitespace(cc)) { |
166 ASSERT(result.isEmpty()); | 179 ASSERT(result.isEmpty()); |
167 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 180 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
168 } else if (cc == '.') { | 181 } else if (cc == '.') { |
169 ASSERT(result.isEmpty()); | 182 ASSERT(result.isEmpty()); |
170 WEBVTT_ADVANCE_TO(StartTagClassState); | 183 WEBVTT_ADVANCE_TO(StartTagClassState); |
171 } else if (cc == '/') { | 184 } else if (cc == '/') { |
172 WEBVTT_ADVANCE_TO(EndTagState); | 185 WEBVTT_ADVANCE_TO(EndTagState); |
173 } else if (WTF::isASCIIDigit(cc)) { | 186 } else if (WTF::isASCIIDigit(cc)) { |
174 result.append(cc); | 187 result.append(cc); |
175 WEBVTT_ADVANCE_TO(TimestampTagState); | 188 WEBVTT_ADVANCE_TO(TimestampTagState); |
176 } else if (cc == '>' || cc == kEndOfFileMarker) { | 189 } else if (cc == '>' || cc == kEndOfFileMarker) { |
177 ASSERT(result.isEmpty()); | 190 ASSERT(result.isEmpty()); |
178 return advanceAndEmitToken(source, VTTToken::StartTag(result.toS
tring())); | 191 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(re
sult.toString())); |
179 } else { | 192 } else { |
180 result.append(cc); | 193 result.append(cc); |
181 WEBVTT_ADVANCE_TO(StartTagState); | 194 WEBVTT_ADVANCE_TO(StartTagState); |
182 } | 195 } |
183 } | 196 } |
184 END_STATE() | 197 END_STATE() |
185 | 198 |
186 WEBVTT_BEGIN_STATE(StartTagState) { | 199 WEBVTT_BEGIN_STATE(StartTagState) { |
187 if (isTokenizerWhitespace(cc)) { | 200 if (isTokenizerWhitespace(cc)) { |
188 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 201 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
189 } else if (cc == '.') { | 202 } else if (cc == '.') { |
190 WEBVTT_ADVANCE_TO(StartTagClassState); | 203 WEBVTT_ADVANCE_TO(StartTagClassState); |
191 } else if (cc == '>' || cc == kEndOfFileMarker) { | 204 } else if (cc == '>' || cc == kEndOfFileMarker) { |
192 return advanceAndEmitToken(source, VTTToken::StartTag(result.toS
tring())); | 205 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(re
sult.toString())); |
193 } else { | 206 } else { |
194 result.append(cc); | 207 result.append(cc); |
195 WEBVTT_ADVANCE_TO(StartTagState); | 208 WEBVTT_ADVANCE_TO(StartTagState); |
196 } | 209 } |
197 } | 210 } |
198 END_STATE() | 211 END_STATE() |
199 | 212 |
200 WEBVTT_BEGIN_STATE(StartTagClassState) { | 213 WEBVTT_BEGIN_STATE(StartTagClassState) { |
201 if (isTokenizerWhitespace(cc)) { | 214 if (isTokenizerWhitespace(cc)) { |
202 addNewClass(classes, buffer); | 215 addNewClass(classes, buffer); |
203 buffer.clear(); | 216 buffer.clear(); |
204 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 217 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
205 } else if (cc == '.') { | 218 } else if (cc == '.') { |
206 addNewClass(classes, buffer); | 219 addNewClass(classes, buffer); |
207 buffer.clear(); | 220 buffer.clear(); |
208 WEBVTT_ADVANCE_TO(StartTagClassState); | 221 WEBVTT_ADVANCE_TO(StartTagClassState); |
209 } else if (cc == '>' || cc == kEndOfFileMarker) { | 222 } else if (cc == '>' || cc == kEndOfFileMarker) { |
210 addNewClass(classes, buffer); | 223 addNewClass(classes, buffer); |
211 buffer.clear(); | 224 buffer.clear(); |
212 return advanceAndEmitToken(source, VTTToken::StartTag(result.toS
tring(), classes.toAtomicString())); | 225 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(re
sult.toString(), classes.toAtomicString())); |
213 } else { | 226 } else { |
214 buffer.append(cc); | 227 buffer.append(cc); |
215 WEBVTT_ADVANCE_TO(StartTagClassState); | 228 WEBVTT_ADVANCE_TO(StartTagClassState); |
216 } | 229 } |
217 } | 230 } |
218 END_STATE() | 231 END_STATE() |
219 | 232 |
220 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { | 233 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { |
221 if (cc == '>' || cc == kEndOfFileMarker) { | 234 if (cc == '>' || cc == kEndOfFileMarker) { |
222 return advanceAndEmitToken(source, VTTToken::StartTag(result.toS
tring(), classes.toAtomicString(), buffer.toAtomicString())); | 235 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(re
sult.toString(), classes.toAtomicString(), buffer.toAtomicString())); |
223 } | 236 } |
224 buffer.append(cc); | 237 buffer.append(cc); |
225 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 238 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
226 } | 239 } |
227 END_STATE() | 240 END_STATE() |
228 | 241 |
229 WEBVTT_BEGIN_STATE(EndTagState) { | 242 WEBVTT_BEGIN_STATE(EndTagState) { |
230 if (cc == '>' || cc == kEndOfFileMarker) | 243 if (cc == '>' || cc == kEndOfFileMarker) |
231 return advanceAndEmitToken(source, VTTToken::EndTag(result.toStr
ing())); | 244 return advanceAndEmitToken(m_input, token, VTTToken::EndTag(resu
lt.toString())); |
232 result.append(cc); | 245 result.append(cc); |
233 WEBVTT_ADVANCE_TO(EndTagState); | 246 WEBVTT_ADVANCE_TO(EndTagState); |
234 } | 247 } |
235 END_STATE() | 248 END_STATE() |
236 | 249 |
237 WEBVTT_BEGIN_STATE(TimestampTagState) { | 250 WEBVTT_BEGIN_STATE(TimestampTagState) { |
238 if (cc == '>' || cc == kEndOfFileMarker) | 251 if (cc == '>' || cc == kEndOfFileMarker) |
239 return advanceAndEmitToken(source, VTTToken::TimestampTag(result
.toString())); | 252 return advanceAndEmitToken(m_input, token, VTTToken::TimestampTa
g(result.toString())); |
240 result.append(cc); | 253 result.append(cc); |
241 WEBVTT_ADVANCE_TO(TimestampTagState); | 254 WEBVTT_ADVANCE_TO(TimestampTagState); |
242 } | 255 } |
243 END_STATE() | 256 END_STATE() |
244 | 257 |
245 } | 258 } |
246 | 259 |
247 ASSERT_NOT_REACHED(); | 260 ASSERT_NOT_REACHED(); |
248 return false; | 261 return false; |
249 } | 262 } |
250 | 263 |
251 } | 264 } |
252 | 265 |
OLD | NEW |