| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| (...skipping 20 matching lines...) Expand all Loading... |
| 31 #include "config.h" | 31 #include "config.h" |
| 32 | 32 |
| 33 #include "core/html/track/vtt/VTTTokenizer.h" | 33 #include "core/html/track/vtt/VTTTokenizer.h" |
| 34 | 34 |
| 35 #include "core/xml/parser/MarkupTokenizerInlines.h" | 35 #include "core/xml/parser/MarkupTokenizerInlines.h" |
| 36 #include "wtf/text/StringBuilder.h" | 36 #include "wtf/text/StringBuilder.h" |
| 37 #include "wtf/unicode/CharacterNames.h" | 37 #include "wtf/unicode/CharacterNames.h" |
| 38 | 38 |
| 39 namespace WebCore { | 39 namespace WebCore { |
| 40 | 40 |
| 41 #define WEBVTT_BEGIN_STATE(stateName) BEGIN_STATE(VTTTokenizerState, stateName) | 41 #define WEBVTT_BEGIN_STATE(stateName) case stateName: stateName: |
| 42 #define WEBVTT_ADVANCE_TO(stateName) ADVANCE_TO(VTTTokenizerState, stateName) | 42 #define WEBVTT_ADVANCE_TO(stateName) \ |
| 43 do { \ |
| 44 state = stateName; \ |
| 45 ASSERT(!m_input.isEmpty()); \ |
| 46 m_inputStreamPreprocessor.advance(m_input); \ |
| 47 cc = m_inputStreamPreprocessor.nextInputCharacter(); \ |
| 48 goto stateName; \ |
| 49 } while (false) |
| 43 | 50 |
| 44 template<unsigned charactersCount> | 51 template<unsigned charactersCount> |
| 45 ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)
[charactersCount]) | 52 ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)
[charactersCount]) |
| 46 { | 53 { |
| 47 return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersC
ount - 1); | 54 return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersC
ount - 1); |
| 48 } | 55 } |
| 49 | 56 |
| 50 static void addNewClass(StringBuilder& classes, const StringBuilder& newClass) | 57 static void addNewClass(StringBuilder& classes, const StringBuilder& newClass) |
| 51 { | 58 { |
| 52 if (!classes.isEmpty()) | 59 if (!classes.isEmpty()) |
| 53 classes.append(' '); | 60 classes.append(' '); |
| 54 classes.append(newClass); | 61 classes.append(newClass); |
| 55 } | 62 } |
| 56 | 63 |
| 64 inline bool emitToken(VTTToken& resultToken, const VTTToken& token) |
| 65 { |
| 66 resultToken = token; |
| 67 return true; |
| 68 } |
| 69 |
| 70 inline bool advanceAndEmitToken(SegmentedString& source, VTTToken& resultToken,
const VTTToken& token) |
| 71 { |
| 72 source.advanceAndUpdateLineNumber(); |
| 73 return emitToken(resultToken, token); |
| 74 } |
| 75 |
| 57 VTTTokenizer::VTTTokenizer(const String& input) | 76 VTTTokenizer::VTTTokenizer(const String& input) |
| 58 : m_input(input) | 77 : m_input(input) |
| 59 , m_inputStreamPreprocessor(this) | 78 , m_inputStreamPreprocessor(this) |
| 60 { | 79 { |
| 61 reset(); | |
| 62 | |
| 63 // Append a EOF marker and close the input "stream". | 80 // Append a EOF marker and close the input "stream". |
| 64 ASSERT(!m_input.isClosed()); | 81 ASSERT(!m_input.isClosed()); |
| 65 m_input.append(SegmentedString(String(&kEndOfFileMarker, 1))); | 82 m_input.append(SegmentedString(String(&kEndOfFileMarker, 1))); |
| 66 m_input.close(); | 83 m_input.close(); |
| 67 } | 84 } |
| 68 | 85 |
| 69 void VTTTokenizer::reset() | |
| 70 { | |
| 71 m_token = 0; | |
| 72 } | |
| 73 | |
| 74 bool VTTTokenizer::nextToken(VTTToken& token) | 86 bool VTTTokenizer::nextToken(VTTToken& token) |
| 75 { | 87 { |
| 76 // If we have a token in progress, then we're supposed to be called back | |
| 77 // with the same token so we can finish it. | |
| 78 ASSERT(!m_token || m_token == &token); | |
| 79 m_token = &token; | |
| 80 | |
| 81 if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input)) | 88 if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input)) |
| 82 return haveBufferedCharacterToken(); | 89 return false; |
| 83 | 90 |
| 84 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); | 91 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); |
| 85 if (cc == kEndOfFileMarker) { | 92 if (cc == kEndOfFileMarker) { |
| 86 m_inputStreamPreprocessor.advance(m_input); | 93 m_inputStreamPreprocessor.advance(m_input); |
| 87 return false; | 94 return false; |
| 88 } | 95 } |
| 89 | 96 |
| 90 StringBuilder buffer; | 97 StringBuilder buffer; |
| 91 StringBuilder result; | 98 StringBuilder result; |
| 92 StringBuilder classes; | 99 StringBuilder classes; |
| 93 m_state = VTTTokenizerState::DataState; | 100 enum { |
| 94 | 101 DataState, |
| 95 // The ADVANCE_TO helper macros expect this name ('source') on the input var
iable. | 102 EscapeState, |
| 96 SegmentedString& source = m_input; | 103 TagState, |
| 104 StartTagState, |
| 105 StartTagClassState, |
| 106 StartTagAnnotationState, |
| 107 EndTagState, |
| 108 TimestampTagState, |
| 109 } state = DataState; |
| 97 | 110 |
| 98 // 4.8.10.13.4 WebVTT cue text tokenizer | 111 // 4.8.10.13.4 WebVTT cue text tokenizer |
| 99 switch (m_state) { | 112 switch (state) { |
| 100 WEBVTT_BEGIN_STATE(DataState) { | 113 WEBVTT_BEGIN_STATE(DataState) { |
| 101 if (cc == '&') { | 114 if (cc == '&') { |
| 102 buffer.append(static_cast<LChar>(cc)); | 115 buffer.append(static_cast<LChar>(cc)); |
| 103 WEBVTT_ADVANCE_TO(EscapeState); | 116 WEBVTT_ADVANCE_TO(EscapeState); |
| 104 } else if (cc == '<') { | 117 } else if (cc == '<') { |
| 105 if (result.isEmpty()) { | 118 if (result.isEmpty()) { |
| 106 WEBVTT_ADVANCE_TO(TagState); | 119 WEBVTT_ADVANCE_TO(TagState); |
| 107 } else { | 120 } else { |
| 108 // We don't want to advance input or perform a state transit
ion - just return a (new) token. | 121 // We don't want to advance input or perform a state transit
ion - just return a (new) token. |
| 109 // (On the next call to nextToken we will see '<' again, but
take the other branch in this if instead.) | 122 // (On the next call to nextToken we will see '<' again, but
take the other branch in this if instead.) |
| 110 return emitToken(VTTToken::StringToken(result.toString())); | 123 return emitToken(token, VTTToken::StringToken(result.toStrin
g())); |
| 111 } | 124 } |
| 112 } else if (cc == kEndOfFileMarker) { | 125 } else if (cc == kEndOfFileMarker) { |
| 113 return advanceAndEmitToken(source, VTTToken::StringToken(result.
toString())); | 126 return advanceAndEmitToken(m_input, token, VTTToken::StringToken
(result.toString())); |
| 114 } else { | 127 } else { |
| 115 result.append(cc); | 128 result.append(cc); |
| 116 WEBVTT_ADVANCE_TO(DataState); | 129 WEBVTT_ADVANCE_TO(DataState); |
| 117 } | 130 } |
| 118 } | 131 } |
| 119 END_STATE() | 132 END_STATE() |
| 120 | 133 |
| 121 WEBVTT_BEGIN_STATE(EscapeState) { | 134 WEBVTT_BEGIN_STATE(EscapeState) { |
| 122 if (cc == ';') { | 135 if (cc == ';') { |
| 123 if (equalLiteral(buffer, "&")) { | 136 if (equalLiteral(buffer, "&")) { |
| (...skipping 12 matching lines...) Expand all Loading... |
| 136 buffer.append(static_cast<LChar>(cc)); | 149 buffer.append(static_cast<LChar>(cc)); |
| 137 result.append(buffer); | 150 result.append(buffer); |
| 138 } | 151 } |
| 139 buffer.clear(); | 152 buffer.clear(); |
| 140 WEBVTT_ADVANCE_TO(DataState); | 153 WEBVTT_ADVANCE_TO(DataState); |
| 141 } else if (isASCIIAlphanumeric(cc)) { | 154 } else if (isASCIIAlphanumeric(cc)) { |
| 142 buffer.append(static_cast<LChar>(cc)); | 155 buffer.append(static_cast<LChar>(cc)); |
| 143 WEBVTT_ADVANCE_TO(EscapeState); | 156 WEBVTT_ADVANCE_TO(EscapeState); |
| 144 } else if (cc == '<') { | 157 } else if (cc == '<') { |
| 145 result.append(buffer); | 158 result.append(buffer); |
| 146 return emitToken(VTTToken::StringToken(result.toString())); | 159 return emitToken(token, VTTToken::StringToken(result.toString())
); |
| 147 } else if (cc == kEndOfFileMarker) { | 160 } else if (cc == kEndOfFileMarker) { |
| 148 result.append(buffer); | 161 result.append(buffer); |
| 149 return advanceAndEmitToken(source, VTTToken::StringToken(result.
toString())); | 162 return advanceAndEmitToken(m_input, token, VTTToken::StringToken
(result.toString())); |
| 150 } else { | 163 } else { |
| 151 result.append(buffer); | 164 result.append(buffer); |
| 152 buffer.clear(); | 165 buffer.clear(); |
| 153 | 166 |
| 154 if (cc == '&') { | 167 if (cc == '&') { |
| 155 buffer.append(static_cast<LChar>(cc)); | 168 buffer.append(static_cast<LChar>(cc)); |
| 156 WEBVTT_ADVANCE_TO(EscapeState); | 169 WEBVTT_ADVANCE_TO(EscapeState); |
| 157 } | 170 } |
| 158 result.append(cc); | 171 result.append(cc); |
| 159 WEBVTT_ADVANCE_TO(DataState); | 172 WEBVTT_ADVANCE_TO(DataState); |
| 160 } | 173 } |
| 161 } | 174 } |
| 162 END_STATE() | 175 END_STATE() |
| 163 | 176 |
| 164 WEBVTT_BEGIN_STATE(TagState) { | 177 WEBVTT_BEGIN_STATE(TagState) { |
| 165 if (isTokenizerWhitespace(cc)) { | 178 if (isTokenizerWhitespace(cc)) { |
| 166 ASSERT(result.isEmpty()); | 179 ASSERT(result.isEmpty()); |
| 167 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 180 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
| 168 } else if (cc == '.') { | 181 } else if (cc == '.') { |
| 169 ASSERT(result.isEmpty()); | 182 ASSERT(result.isEmpty()); |
| 170 WEBVTT_ADVANCE_TO(StartTagClassState); | 183 WEBVTT_ADVANCE_TO(StartTagClassState); |
| 171 } else if (cc == '/') { | 184 } else if (cc == '/') { |
| 172 WEBVTT_ADVANCE_TO(EndTagState); | 185 WEBVTT_ADVANCE_TO(EndTagState); |
| 173 } else if (WTF::isASCIIDigit(cc)) { | 186 } else if (WTF::isASCIIDigit(cc)) { |
| 174 result.append(cc); | 187 result.append(cc); |
| 175 WEBVTT_ADVANCE_TO(TimestampTagState); | 188 WEBVTT_ADVANCE_TO(TimestampTagState); |
| 176 } else if (cc == '>' || cc == kEndOfFileMarker) { | 189 } else if (cc == '>' || cc == kEndOfFileMarker) { |
| 177 ASSERT(result.isEmpty()); | 190 ASSERT(result.isEmpty()); |
| 178 return advanceAndEmitToken(source, VTTToken::StartTag(result.toS
tring())); | 191 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(re
sult.toString())); |
| 179 } else { | 192 } else { |
| 180 result.append(cc); | 193 result.append(cc); |
| 181 WEBVTT_ADVANCE_TO(StartTagState); | 194 WEBVTT_ADVANCE_TO(StartTagState); |
| 182 } | 195 } |
| 183 } | 196 } |
| 184 END_STATE() | 197 END_STATE() |
| 185 | 198 |
| 186 WEBVTT_BEGIN_STATE(StartTagState) { | 199 WEBVTT_BEGIN_STATE(StartTagState) { |
| 187 if (isTokenizerWhitespace(cc)) { | 200 if (isTokenizerWhitespace(cc)) { |
| 188 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 201 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
| 189 } else if (cc == '.') { | 202 } else if (cc == '.') { |
| 190 WEBVTT_ADVANCE_TO(StartTagClassState); | 203 WEBVTT_ADVANCE_TO(StartTagClassState); |
| 191 } else if (cc == '>' || cc == kEndOfFileMarker) { | 204 } else if (cc == '>' || cc == kEndOfFileMarker) { |
| 192 return advanceAndEmitToken(source, VTTToken::StartTag(result.toS
tring())); | 205 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(re
sult.toString())); |
| 193 } else { | 206 } else { |
| 194 result.append(cc); | 207 result.append(cc); |
| 195 WEBVTT_ADVANCE_TO(StartTagState); | 208 WEBVTT_ADVANCE_TO(StartTagState); |
| 196 } | 209 } |
| 197 } | 210 } |
| 198 END_STATE() | 211 END_STATE() |
| 199 | 212 |
| 200 WEBVTT_BEGIN_STATE(StartTagClassState) { | 213 WEBVTT_BEGIN_STATE(StartTagClassState) { |
| 201 if (isTokenizerWhitespace(cc)) { | 214 if (isTokenizerWhitespace(cc)) { |
| 202 addNewClass(classes, buffer); | 215 addNewClass(classes, buffer); |
| 203 buffer.clear(); | 216 buffer.clear(); |
| 204 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 217 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
| 205 } else if (cc == '.') { | 218 } else if (cc == '.') { |
| 206 addNewClass(classes, buffer); | 219 addNewClass(classes, buffer); |
| 207 buffer.clear(); | 220 buffer.clear(); |
| 208 WEBVTT_ADVANCE_TO(StartTagClassState); | 221 WEBVTT_ADVANCE_TO(StartTagClassState); |
| 209 } else if (cc == '>' || cc == kEndOfFileMarker) { | 222 } else if (cc == '>' || cc == kEndOfFileMarker) { |
| 210 addNewClass(classes, buffer); | 223 addNewClass(classes, buffer); |
| 211 buffer.clear(); | 224 buffer.clear(); |
| 212 return advanceAndEmitToken(source, VTTToken::StartTag(result.toS
tring(), classes.toAtomicString())); | 225 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(re
sult.toString(), classes.toAtomicString())); |
| 213 } else { | 226 } else { |
| 214 buffer.append(cc); | 227 buffer.append(cc); |
| 215 WEBVTT_ADVANCE_TO(StartTagClassState); | 228 WEBVTT_ADVANCE_TO(StartTagClassState); |
| 216 } | 229 } |
| 217 } | 230 } |
| 218 END_STATE() | 231 END_STATE() |
| 219 | 232 |
| 220 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { | 233 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { |
| 221 if (cc == '>' || cc == kEndOfFileMarker) { | 234 if (cc == '>' || cc == kEndOfFileMarker) { |
| 222 return advanceAndEmitToken(source, VTTToken::StartTag(result.toS
tring(), classes.toAtomicString(), buffer.toAtomicString())); | 235 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(re
sult.toString(), classes.toAtomicString(), buffer.toAtomicString())); |
| 223 } | 236 } |
| 224 buffer.append(cc); | 237 buffer.append(cc); |
| 225 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 238 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
| 226 } | 239 } |
| 227 END_STATE() | 240 END_STATE() |
| 228 | 241 |
| 229 WEBVTT_BEGIN_STATE(EndTagState) { | 242 WEBVTT_BEGIN_STATE(EndTagState) { |
| 230 if (cc == '>' || cc == kEndOfFileMarker) | 243 if (cc == '>' || cc == kEndOfFileMarker) |
| 231 return advanceAndEmitToken(source, VTTToken::EndTag(result.toStr
ing())); | 244 return advanceAndEmitToken(m_input, token, VTTToken::EndTag(resu
lt.toString())); |
| 232 result.append(cc); | 245 result.append(cc); |
| 233 WEBVTT_ADVANCE_TO(EndTagState); | 246 WEBVTT_ADVANCE_TO(EndTagState); |
| 234 } | 247 } |
| 235 END_STATE() | 248 END_STATE() |
| 236 | 249 |
| 237 WEBVTT_BEGIN_STATE(TimestampTagState) { | 250 WEBVTT_BEGIN_STATE(TimestampTagState) { |
| 238 if (cc == '>' || cc == kEndOfFileMarker) | 251 if (cc == '>' || cc == kEndOfFileMarker) |
| 239 return advanceAndEmitToken(source, VTTToken::TimestampTag(result
.toString())); | 252 return advanceAndEmitToken(m_input, token, VTTToken::TimestampTa
g(result.toString())); |
| 240 result.append(cc); | 253 result.append(cc); |
| 241 WEBVTT_ADVANCE_TO(TimestampTagState); | 254 WEBVTT_ADVANCE_TO(TimestampTagState); |
| 242 } | 255 } |
| 243 END_STATE() | 256 END_STATE() |
| 244 | 257 |
| 245 } | 258 } |
| 246 | 259 |
| 247 ASSERT_NOT_REACHED(); | 260 ASSERT_NOT_REACHED(); |
| 248 return false; | 261 return false; |
| 249 } | 262 } |
| 250 | 263 |
| 251 } | 264 } |
| 252 | 265 |
| OLD | NEW |