OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 28 matching lines...) Expand all Loading... |
39 | 39 |
40 #define WEBVTT_BEGIN_STATE(stateName) BEGIN_STATE(VTTTokenizerState, stateName) | 40 #define WEBVTT_BEGIN_STATE(stateName) BEGIN_STATE(VTTTokenizerState, stateName) |
41 #define WEBVTT_ADVANCE_TO(stateName) ADVANCE_TO(VTTTokenizerState, stateName) | 41 #define WEBVTT_ADVANCE_TO(stateName) ADVANCE_TO(VTTTokenizerState, stateName) |
42 | 42 |
43 template<unsigned charactersCount> | 43 template<unsigned charactersCount> |
44 ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)
[charactersCount]) | 44 ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)
[charactersCount]) |
45 { | 45 { |
46 return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersC
ount - 1); | 46 return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersC
ount - 1); |
47 } | 47 } |
48 | 48 |
49 VTTTokenizer::VTTTokenizer() | 49 VTTTokenizer::VTTTokenizer(const String& input) |
50 : m_inputStreamPreprocessor(this) | 50 : m_input(input) |
| 51 , m_inputStreamPreprocessor(this) |
51 { | 52 { |
52 reset(); | 53 reset(); |
| 54 |
| 55 // Append a EOF marker and close the input "stream". |
| 56 ASSERT(!m_input.isClosed()); |
| 57 m_input.append(SegmentedString(String(&kEndOfFileMarker, 1))); |
| 58 m_input.close(); |
53 } | 59 } |
54 | 60 |
55 void VTTTokenizer::reset() | 61 void VTTTokenizer::reset() |
56 { | 62 { |
57 m_token = 0; | 63 m_token = 0; |
58 m_buffer.clear(); | 64 m_buffer.clear(); |
59 } | 65 } |
60 | 66 |
61 bool VTTTokenizer::nextToken(SegmentedString& source, VTTToken& token) | 67 bool VTTTokenizer::nextToken(VTTToken& token) |
62 { | 68 { |
63 // If we have a token in progress, then we're supposed to be called back | 69 // If we have a token in progress, then we're supposed to be called back |
64 // with the same token so we can finish it. | 70 // with the same token so we can finish it. |
65 ASSERT(!m_token || m_token == &token || token.type() == VTTTokenTypes::Unini
tialized); | 71 ASSERT(!m_token || m_token == &token); |
66 m_token = &token; | 72 m_token = &token; |
67 | 73 |
68 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) | 74 if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input)) |
69 return haveBufferedCharacterToken(); | 75 return haveBufferedCharacterToken(); |
70 | 76 |
71 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); | 77 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); |
| 78 if (cc == kEndOfFileMarker) { |
| 79 m_inputStreamPreprocessor.advance(m_input); |
| 80 return false; |
| 81 } |
72 | 82 |
73 m_state = VTTTokenizerState::DataState; | 83 m_state = VTTTokenizerState::DataState; |
74 | 84 |
| 85 // The ADVANCE_TO helper macros expect this name ('source') on the input var
iable. |
| 86 SegmentedString& source = m_input; |
| 87 |
75 // 4.8.10.13.4 WebVTT cue text tokenizer | 88 // 4.8.10.13.4 WebVTT cue text tokenizer |
76 switch (m_state) { | 89 switch (m_state) { |
77 WEBVTT_BEGIN_STATE(DataState) { | 90 WEBVTT_BEGIN_STATE(DataState) { |
78 if (cc == '&') { | 91 if (cc == '&') { |
79 m_buffer.append(static_cast<LChar>(cc)); | 92 m_buffer.append(static_cast<LChar>(cc)); |
80 WEBVTT_ADVANCE_TO(EscapeState); | 93 WEBVTT_ADVANCE_TO(EscapeState); |
81 } else if (cc == '<') { | 94 } else if (cc == '<') { |
82 if (m_token->type() == VTTTokenTypes::Uninitialized || m_token->
characters().isEmpty()) { | 95 if (m_token->characters().isEmpty()) { |
83 WEBVTT_ADVANCE_TO(TagState); | 96 WEBVTT_ADVANCE_TO(TagState); |
84 } else { | 97 } else { |
85 // We don't want to advance input or perform a state transit
ion - just return a (new) token. | 98 // We don't want to advance input or perform a state transit
ion - just return a (new) token. |
86 // (On the next call to nextToken we will see '<' again, but
take the other branch in this if instead.) | 99 // (On the next call to nextToken we will see '<' again, but
take the other branch in this if instead.) |
87 return emitToken(VTTTokenTypes::Character); | 100 return emitToken(VTTTokenTypes::Character); |
88 } | 101 } |
89 } else if (cc == kEndOfFileMarker) { | 102 } else if (cc == kEndOfFileMarker) { |
90 return emitToken(VTTTokenTypes::Character); | 103 return advanceAndEmitToken(source, VTTTokenTypes::Character); |
91 } else { | 104 } else { |
92 bufferCharacter(cc); | 105 bufferCharacter(cc); |
93 WEBVTT_ADVANCE_TO(DataState); | 106 WEBVTT_ADVANCE_TO(DataState); |
94 } | 107 } |
95 } | 108 } |
96 END_STATE() | 109 END_STATE() |
97 | 110 |
98 WEBVTT_BEGIN_STATE(EscapeState) { | 111 WEBVTT_BEGIN_STATE(EscapeState) { |
99 if (cc == ';') { | 112 if (cc == ';') { |
100 if (equalLiteral(m_buffer, "&")) { | 113 if (equalLiteral(m_buffer, "&")) { |
(...skipping 12 matching lines...) Expand all Loading... |
113 m_buffer.append(static_cast<LChar>(cc)); | 126 m_buffer.append(static_cast<LChar>(cc)); |
114 m_token->appendToCharacter(m_buffer); | 127 m_token->appendToCharacter(m_buffer); |
115 } | 128 } |
116 m_buffer.clear(); | 129 m_buffer.clear(); |
117 WEBVTT_ADVANCE_TO(DataState); | 130 WEBVTT_ADVANCE_TO(DataState); |
118 } else if (isASCIIAlphanumeric(cc)) { | 131 } else if (isASCIIAlphanumeric(cc)) { |
119 m_buffer.append(static_cast<LChar>(cc)); | 132 m_buffer.append(static_cast<LChar>(cc)); |
120 WEBVTT_ADVANCE_TO(EscapeState); | 133 WEBVTT_ADVANCE_TO(EscapeState); |
121 } else if (cc == kEndOfFileMarker) { | 134 } else if (cc == kEndOfFileMarker) { |
122 m_token->appendToCharacter(m_buffer); | 135 m_token->appendToCharacter(m_buffer); |
123 return emitToken(VTTTokenTypes::Character); | 136 return advanceAndEmitToken(source, VTTTokenTypes::Character); |
124 } else { | 137 } else { |
125 if (!equalLiteral(m_buffer, "&")) | 138 if (!equalLiteral(m_buffer, "&")) |
126 m_token->appendToCharacter(m_buffer); | 139 m_token->appendToCharacter(m_buffer); |
127 m_buffer.clear(); | 140 m_buffer.clear(); |
128 WEBVTT_ADVANCE_TO(DataState); | 141 WEBVTT_ADVANCE_TO(DataState); |
129 } | 142 } |
130 } | 143 } |
131 END_STATE() | 144 END_STATE() |
132 | 145 |
133 WEBVTT_BEGIN_STATE(TagState) { | 146 WEBVTT_BEGIN_STATE(TagState) { |
134 if (isTokenizerWhitespace(cc)) { | 147 if (isTokenizerWhitespace(cc)) { |
135 m_token->beginEmptyStartTag(); | 148 m_token->beginEmptyStartTag(); |
136 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 149 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
137 } else if (cc == '.') { | 150 } else if (cc == '.') { |
138 m_token->beginEmptyStartTag(); | 151 m_token->beginEmptyStartTag(); |
139 WEBVTT_ADVANCE_TO(StartTagClassState); | 152 WEBVTT_ADVANCE_TO(StartTagClassState); |
140 } else if (cc == '/') { | 153 } else if (cc == '/') { |
141 WEBVTT_ADVANCE_TO(EndTagOpenState); | 154 WEBVTT_ADVANCE_TO(EndTagState); |
142 } else if (WTF::isASCIIDigit(cc)) { | 155 } else if (WTF::isASCIIDigit(cc)) { |
143 m_token->beginTimestampTag(cc); | 156 m_token->beginTimestampTag(cc); |
144 WEBVTT_ADVANCE_TO(TimestampTagState); | 157 WEBVTT_ADVANCE_TO(TimestampTagState); |
145 } else if (cc == '>' || cc == kEndOfFileMarker) { | 158 } else if (cc == '>' || cc == kEndOfFileMarker) { |
146 m_token->beginEmptyStartTag(); | 159 m_token->beginEmptyStartTag(); |
147 return advanceAndEmitToken(source, VTTTokenTypes::StartTag); | 160 return advanceAndEmitToken(source, VTTTokenTypes::StartTag); |
148 } else { | 161 } else { |
149 m_token->beginStartTag(cc); | 162 m_token->beginStartTag(cc); |
150 WEBVTT_ADVANCE_TO(StartTagState); | 163 WEBVTT_ADVANCE_TO(StartTagState); |
151 } | 164 } |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
187 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { | 200 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { |
188 if (cc == '>' || cc == kEndOfFileMarker) { | 201 if (cc == '>' || cc == kEndOfFileMarker) { |
189 m_token->addNewAnnotation(); | 202 m_token->addNewAnnotation(); |
190 return advanceAndEmitToken(source, VTTTokenTypes::StartTag); | 203 return advanceAndEmitToken(source, VTTTokenTypes::StartTag); |
191 } | 204 } |
192 m_token->appendToAnnotation(cc); | 205 m_token->appendToAnnotation(cc); |
193 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 206 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
194 } | 207 } |
195 END_STATE() | 208 END_STATE() |
196 | 209 |
197 WEBVTT_BEGIN_STATE(EndTagOpenState) { | |
198 if (cc == '>' || cc == kEndOfFileMarker) { | |
199 m_token->beginEndTag('\0'); | |
200 return advanceAndEmitToken(source, VTTTokenTypes::EndTag); | |
201 } | |
202 m_token->beginEndTag(cc); | |
203 WEBVTT_ADVANCE_TO(EndTagState); | |
204 } | |
205 END_STATE() | |
206 | |
207 WEBVTT_BEGIN_STATE(EndTagState) { | 210 WEBVTT_BEGIN_STATE(EndTagState) { |
208 if (cc == '>' || cc == kEndOfFileMarker) | 211 if (cc == '>' || cc == kEndOfFileMarker) |
209 return advanceAndEmitToken(source, VTTTokenTypes::EndTag); | 212 return advanceAndEmitToken(source, VTTTokenTypes::EndTag); |
210 m_token->appendToName(cc); | 213 m_token->appendToName(cc); |
211 WEBVTT_ADVANCE_TO(EndTagState); | 214 WEBVTT_ADVANCE_TO(EndTagState); |
212 } | 215 } |
213 END_STATE() | 216 END_STATE() |
214 | 217 |
215 WEBVTT_BEGIN_STATE(TimestampTagState) { | 218 WEBVTT_BEGIN_STATE(TimestampTagState) { |
216 if (cc == '>' || cc == kEndOfFileMarker) | 219 if (cc == '>' || cc == kEndOfFileMarker) |
217 return advanceAndEmitToken(source, VTTTokenTypes::TimestampTag); | 220 return advanceAndEmitToken(source, VTTTokenTypes::TimestampTag); |
218 m_token->appendToTimestamp(cc); | 221 m_token->appendToTimestamp(cc); |
219 WEBVTT_ADVANCE_TO(TimestampTagState); | 222 WEBVTT_ADVANCE_TO(TimestampTagState); |
220 } | 223 } |
221 END_STATE() | 224 END_STATE() |
222 | 225 |
223 } | 226 } |
224 | 227 |
225 ASSERT_NOT_REACHED(); | 228 ASSERT_NOT_REACHED(); |
226 return false; | 229 return false; |
227 } | 230 } |
228 | 231 |
229 } | 232 } |
230 | 233 |
OLD | NEW |