OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 15 matching lines...) Expand all Loading... |
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 */ | 29 */ |
30 | 30 |
31 #include "config.h" | 31 #include "config.h" |
32 | 32 |
33 #include "core/html/track/vtt/VTTTokenizer.h" | 33 #include "core/html/track/vtt/VTTTokenizer.h" |
34 | 34 |
35 #include "core/xml/parser/MarkupTokenizerInlines.h" | 35 #include "core/xml/parser/MarkupTokenizerInlines.h" |
| 36 #include "wtf/text/StringBuilder.h" |
36 #include "wtf/unicode/CharacterNames.h" | 37 #include "wtf/unicode/CharacterNames.h" |
37 | 38 |
38 namespace WebCore { | 39 namespace WebCore { |
39 | 40 |
40 #define WEBVTT_BEGIN_STATE(stateName) BEGIN_STATE(VTTTokenizerState, stateName) | 41 #define WEBVTT_BEGIN_STATE(stateName) BEGIN_STATE(VTTTokenizerState, stateName) |
41 #define WEBVTT_ADVANCE_TO(stateName) ADVANCE_TO(VTTTokenizerState, stateName) | 42 #define WEBVTT_ADVANCE_TO(stateName) ADVANCE_TO(VTTTokenizerState, stateName) |
42 | 43 |
43 template<unsigned charactersCount> | 44 template<unsigned charactersCount> |
44 ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)
[charactersCount]) | 45 ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)
[charactersCount]) |
45 { | 46 { |
46 return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersC
ount - 1); | 47 return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersC
ount - 1); |
47 } | 48 } |
48 | 49 |
| 50 static void addNewClass(StringBuilder& classes, const StringBuilder& newClass) |
| 51 { |
| 52 if (!classes.isEmpty()) |
| 53 classes.append(' '); |
| 54 classes.append(newClass); |
| 55 } |
| 56 |
49 VTTTokenizer::VTTTokenizer(const String& input) | 57 VTTTokenizer::VTTTokenizer(const String& input) |
50 : m_input(input) | 58 : m_input(input) |
51 , m_inputStreamPreprocessor(this) | 59 , m_inputStreamPreprocessor(this) |
52 { | 60 { |
53 reset(); | 61 reset(); |
54 | 62 |
55 // Append a EOF marker and close the input "stream". | 63 // Append a EOF marker and close the input "stream". |
56 ASSERT(!m_input.isClosed()); | 64 ASSERT(!m_input.isClosed()); |
57 m_input.append(SegmentedString(String(&kEndOfFileMarker, 1))); | 65 m_input.append(SegmentedString(String(&kEndOfFileMarker, 1))); |
58 m_input.close(); | 66 m_input.close(); |
59 } | 67 } |
60 | 68 |
61 void VTTTokenizer::reset() | 69 void VTTTokenizer::reset() |
62 { | 70 { |
63 m_token = 0; | 71 m_token = 0; |
64 m_buffer.clear(); | |
65 } | 72 } |
66 | 73 |
67 bool VTTTokenizer::nextToken(VTTToken& token) | 74 bool VTTTokenizer::nextToken(VTTToken& token) |
68 { | 75 { |
69 // If we have a token in progress, then we're supposed to be called back | 76 // If we have a token in progress, then we're supposed to be called back |
70 // with the same token so we can finish it. | 77 // with the same token so we can finish it. |
71 ASSERT(!m_token || m_token == &token); | 78 ASSERT(!m_token || m_token == &token); |
72 m_token = &token; | 79 m_token = &token; |
73 | 80 |
74 if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input)) | 81 if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input)) |
75 return haveBufferedCharacterToken(); | 82 return haveBufferedCharacterToken(); |
76 | 83 |
77 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); | 84 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); |
78 if (cc == kEndOfFileMarker) { | 85 if (cc == kEndOfFileMarker) { |
79 m_inputStreamPreprocessor.advance(m_input); | 86 m_inputStreamPreprocessor.advance(m_input); |
80 return false; | 87 return false; |
81 } | 88 } |
82 | 89 |
| 90 StringBuilder buffer; |
| 91 StringBuilder result; |
| 92 StringBuilder classes; |
83 m_state = VTTTokenizerState::DataState; | 93 m_state = VTTTokenizerState::DataState; |
84 | 94 |
85 // The ADVANCE_TO helper macros expect this name ('source') on the input var
iable. | 95 // The ADVANCE_TO helper macros expect this name ('source') on the input var
iable. |
86 SegmentedString& source = m_input; | 96 SegmentedString& source = m_input; |
87 | 97 |
88 // 4.8.10.13.4 WebVTT cue text tokenizer | 98 // 4.8.10.13.4 WebVTT cue text tokenizer |
89 switch (m_state) { | 99 switch (m_state) { |
90 WEBVTT_BEGIN_STATE(DataState) { | 100 WEBVTT_BEGIN_STATE(DataState) { |
91 if (cc == '&') { | 101 if (cc == '&') { |
92 m_buffer.append(static_cast<LChar>(cc)); | 102 buffer.append(static_cast<LChar>(cc)); |
93 WEBVTT_ADVANCE_TO(EscapeState); | 103 WEBVTT_ADVANCE_TO(EscapeState); |
94 } else if (cc == '<') { | 104 } else if (cc == '<') { |
95 if (m_token->characters().isEmpty()) { | 105 if (result.isEmpty()) { |
96 WEBVTT_ADVANCE_TO(TagState); | 106 WEBVTT_ADVANCE_TO(TagState); |
97 } else { | 107 } else { |
98 // We don't want to advance input or perform a state transit
ion - just return a (new) token. | 108 // We don't want to advance input or perform a state transit
ion - just return a (new) token. |
99 // (On the next call to nextToken we will see '<' again, but
take the other branch in this if instead.) | 109 // (On the next call to nextToken we will see '<' again, but
take the other branch in this if instead.) |
100 return emitToken(VTTTokenTypes::Character); | 110 return emitToken(VTTToken::StringToken(result.toString())); |
101 } | 111 } |
102 } else if (cc == kEndOfFileMarker) { | 112 } else if (cc == kEndOfFileMarker) { |
103 return advanceAndEmitToken(source, VTTTokenTypes::Character); | 113 return advanceAndEmitToken(source, VTTToken::StringToken(result.
toString())); |
104 } else { | 114 } else { |
105 m_token->appendToData(cc); | 115 result.append(cc); |
106 WEBVTT_ADVANCE_TO(DataState); | 116 WEBVTT_ADVANCE_TO(DataState); |
107 } | 117 } |
108 } | 118 } |
109 END_STATE() | 119 END_STATE() |
110 | 120 |
111 WEBVTT_BEGIN_STATE(EscapeState) { | 121 WEBVTT_BEGIN_STATE(EscapeState) { |
112 if (cc == ';') { | 122 if (cc == ';') { |
113 if (equalLiteral(m_buffer, "&")) { | 123 if (equalLiteral(buffer, "&")) { |
114 m_token->appendToData('&'); | 124 result.append('&'); |
115 } else if (equalLiteral(m_buffer, "<")) { | 125 } else if (equalLiteral(buffer, "<")) { |
116 m_token->appendToData('<'); | 126 result.append('<'); |
117 } else if (equalLiteral(m_buffer, ">")) { | 127 } else if (equalLiteral(buffer, ">")) { |
118 m_token->appendToData('>'); | 128 result.append('>'); |
119 } else if (equalLiteral(m_buffer, "&lrm")) { | 129 } else if (equalLiteral(buffer, "&lrm")) { |
120 m_token->appendToData(leftToRightMark); | 130 result.append(leftToRightMark); |
121 } else if (equalLiteral(m_buffer, "&rlm")) { | 131 } else if (equalLiteral(buffer, "&rlm")) { |
122 m_token->appendToData(rightToLeftMark); | 132 result.append(rightToLeftMark); |
123 } else if (equalLiteral(m_buffer, " ")) { | 133 } else if (equalLiteral(buffer, " ")) { |
124 m_token->appendToData(noBreakSpace); | 134 result.append(noBreakSpace); |
125 } else { | 135 } else { |
126 m_buffer.append(static_cast<LChar>(cc)); | 136 buffer.append(static_cast<LChar>(cc)); |
127 m_token->appendToData(m_buffer); | 137 result.append(buffer); |
128 } | 138 } |
129 m_buffer.clear(); | 139 buffer.clear(); |
130 WEBVTT_ADVANCE_TO(DataState); | 140 WEBVTT_ADVANCE_TO(DataState); |
131 } else if (isASCIIAlphanumeric(cc)) { | 141 } else if (isASCIIAlphanumeric(cc)) { |
132 m_buffer.append(static_cast<LChar>(cc)); | 142 buffer.append(static_cast<LChar>(cc)); |
133 WEBVTT_ADVANCE_TO(EscapeState); | 143 WEBVTT_ADVANCE_TO(EscapeState); |
134 } else if (cc == kEndOfFileMarker) { | 144 } else if (cc == kEndOfFileMarker) { |
135 m_token->appendToData(m_buffer); | 145 result.append(buffer); |
136 return advanceAndEmitToken(source, VTTTokenTypes::Character); | 146 return advanceAndEmitToken(source, VTTToken::StringToken(result.
toString())); |
137 } else { | 147 } else { |
138 if (!equalLiteral(m_buffer, "&")) | 148 if (!equalLiteral(buffer, "&")) |
139 m_token->appendToData(m_buffer); | 149 result.append(buffer); |
140 m_buffer.clear(); | 150 buffer.clear(); |
141 WEBVTT_ADVANCE_TO(DataState); | 151 WEBVTT_ADVANCE_TO(DataState); |
142 } | 152 } |
143 } | 153 } |
144 END_STATE() | 154 END_STATE() |
145 | 155 |
146 WEBVTT_BEGIN_STATE(TagState) { | 156 WEBVTT_BEGIN_STATE(TagState) { |
147 if (isTokenizerWhitespace(cc)) { | 157 if (isTokenizerWhitespace(cc)) { |
148 ASSERT(m_token->name().isEmpty()); | 158 ASSERT(result.isEmpty()); |
149 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 159 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
150 } else if (cc == '.') { | 160 } else if (cc == '.') { |
151 ASSERT(m_token->name().isEmpty()); | 161 ASSERT(result.isEmpty()); |
152 WEBVTT_ADVANCE_TO(StartTagClassState); | 162 WEBVTT_ADVANCE_TO(StartTagClassState); |
153 } else if (cc == '/') { | 163 } else if (cc == '/') { |
154 WEBVTT_ADVANCE_TO(EndTagState); | 164 WEBVTT_ADVANCE_TO(EndTagState); |
155 } else if (WTF::isASCIIDigit(cc)) { | 165 } else if (WTF::isASCIIDigit(cc)) { |
156 m_token->appendToData(cc); | 166 result.append(cc); |
157 WEBVTT_ADVANCE_TO(TimestampTagState); | 167 WEBVTT_ADVANCE_TO(TimestampTagState); |
158 } else if (cc == '>' || cc == kEndOfFileMarker) { | 168 } else if (cc == '>' || cc == kEndOfFileMarker) { |
159 ASSERT(m_token->name().isEmpty()); | 169 ASSERT(result.isEmpty()); |
160 return advanceAndEmitToken(source, VTTTokenTypes::StartTag); | 170 return advanceAndEmitToken(source, VTTToken::StartTag(result.toS
tring())); |
161 } else { | 171 } else { |
162 m_token->appendToData(cc); | 172 result.append(cc); |
163 WEBVTT_ADVANCE_TO(StartTagState); | 173 WEBVTT_ADVANCE_TO(StartTagState); |
164 } | 174 } |
165 } | 175 } |
166 END_STATE() | 176 END_STATE() |
167 | 177 |
168 WEBVTT_BEGIN_STATE(StartTagState) { | 178 WEBVTT_BEGIN_STATE(StartTagState) { |
169 if (isTokenizerWhitespace(cc)) { | 179 if (isTokenizerWhitespace(cc)) { |
170 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 180 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
171 } else if (cc == '.') { | 181 } else if (cc == '.') { |
172 WEBVTT_ADVANCE_TO(StartTagClassState); | 182 WEBVTT_ADVANCE_TO(StartTagClassState); |
173 } else if (cc == '>' || cc == kEndOfFileMarker) { | 183 } else if (cc == '>' || cc == kEndOfFileMarker) { |
174 return advanceAndEmitToken(source, VTTTokenTypes::StartTag); | 184 return advanceAndEmitToken(source, VTTToken::StartTag(result.toS
tring())); |
175 } else { | 185 } else { |
176 m_token->appendToData(cc); | 186 result.append(cc); |
177 WEBVTT_ADVANCE_TO(StartTagState); | 187 WEBVTT_ADVANCE_TO(StartTagState); |
178 } | 188 } |
179 } | 189 } |
180 END_STATE() | 190 END_STATE() |
181 | 191 |
182 WEBVTT_BEGIN_STATE(StartTagClassState) { | 192 WEBVTT_BEGIN_STATE(StartTagClassState) { |
183 if (isTokenizerWhitespace(cc)) { | 193 if (isTokenizerWhitespace(cc)) { |
184 m_token->addNewClass(m_buffer); | 194 addNewClass(classes, buffer); |
185 m_buffer.clear(); | 195 buffer.clear(); |
186 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 196 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
187 } else if (cc == '.') { | 197 } else if (cc == '.') { |
188 m_token->addNewClass(m_buffer); | 198 addNewClass(classes, buffer); |
189 m_buffer.clear(); | 199 buffer.clear(); |
190 WEBVTT_ADVANCE_TO(StartTagClassState); | 200 WEBVTT_ADVANCE_TO(StartTagClassState); |
191 } else if (cc == '>' || cc == kEndOfFileMarker) { | 201 } else if (cc == '>' || cc == kEndOfFileMarker) { |
192 m_token->addNewClass(m_buffer); | 202 addNewClass(classes, buffer); |
193 m_buffer.clear(); | 203 buffer.clear(); |
194 return advanceAndEmitToken(source, VTTTokenTypes::StartTag); | 204 return advanceAndEmitToken(source, VTTToken::StartTag(result.toS
tring(), classes.toAtomicString())); |
195 } else { | 205 } else { |
196 m_buffer.append(cc); | 206 buffer.append(cc); |
197 WEBVTT_ADVANCE_TO(StartTagClassState); | 207 WEBVTT_ADVANCE_TO(StartTagClassState); |
198 } | 208 } |
199 } | 209 } |
200 END_STATE() | 210 END_STATE() |
201 | 211 |
202 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { | 212 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { |
203 if (cc == '>' || cc == kEndOfFileMarker) { | 213 if (cc == '>' || cc == kEndOfFileMarker) { |
204 m_token->addNewAnnotation(m_buffer); | 214 return advanceAndEmitToken(source, VTTToken::StartTag(result.toS
tring(), classes.toAtomicString(), buffer.toAtomicString())); |
205 m_buffer.clear(); | |
206 return advanceAndEmitToken(source, VTTTokenTypes::StartTag); | |
207 } | 215 } |
208 m_buffer.append(cc); | 216 buffer.append(cc); |
209 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 217 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
210 } | 218 } |
211 END_STATE() | 219 END_STATE() |
212 | 220 |
213 WEBVTT_BEGIN_STATE(EndTagState) { | 221 WEBVTT_BEGIN_STATE(EndTagState) { |
214 if (cc == '>' || cc == kEndOfFileMarker) | 222 if (cc == '>' || cc == kEndOfFileMarker) |
215 return advanceAndEmitToken(source, VTTTokenTypes::EndTag); | 223 return advanceAndEmitToken(source, VTTToken::EndTag(result.toStr
ing())); |
216 m_token->appendToData(cc); | 224 result.append(cc); |
217 WEBVTT_ADVANCE_TO(EndTagState); | 225 WEBVTT_ADVANCE_TO(EndTagState); |
218 } | 226 } |
219 END_STATE() | 227 END_STATE() |
220 | 228 |
221 WEBVTT_BEGIN_STATE(TimestampTagState) { | 229 WEBVTT_BEGIN_STATE(TimestampTagState) { |
222 if (cc == '>' || cc == kEndOfFileMarker) | 230 if (cc == '>' || cc == kEndOfFileMarker) |
223 return advanceAndEmitToken(source, VTTTokenTypes::TimestampTag); | 231 return advanceAndEmitToken(source, VTTToken::TimestampTag(result
.toString())); |
224 m_token->appendToData(cc); | 232 result.append(cc); |
225 WEBVTT_ADVANCE_TO(TimestampTagState); | 233 WEBVTT_ADVANCE_TO(TimestampTagState); |
226 } | 234 } |
227 END_STATE() | 235 END_STATE() |
228 | 236 |
229 } | 237 } |
230 | 238 |
231 ASSERT_NOT_REACHED(); | 239 ASSERT_NOT_REACHED(); |
232 return false; | 240 return false; |
233 } | 241 } |
234 | 242 |
235 } | 243 } |
236 | 244 |
OLD | NEW |