| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 53 return false; | 53 return false; |
| 54 | 54 |
| 55 if (!string.length()) | 55 if (!string.length()) |
| 56 return true; | 56 return true; |
| 57 | 57 |
| 58 return equal(string.impl(), vector.data(), vector.size()); | 58 return equal(string.impl(), vector.data(), vector.size()); |
| 59 } | 59 } |
| 60 | 60 |
| 61 void WebVTTTokenizer::reset() | 61 void WebVTTTokenizer::reset() |
| 62 { | 62 { |
| 63 m_state = WebVTTTokenizerState::DataState; | |
| 64 m_token = 0; | 63 m_token = 0; |
| 65 m_buffer.clear(); | 64 m_buffer.clear(); |
| 66 } | 65 } |
| 67 | 66 |
| 68 bool WebVTTTokenizer::nextToken(SegmentedString& source, WebVTTToken& token) | 67 bool WebVTTTokenizer::nextToken(SegmentedString& source, WebVTTToken& token) |
| 69 { | 68 { |
| 70 // If we have a token in progress, then we're supposed to be called back | 69 // If we have a token in progress, then we're supposed to be called back |
| 71 // with the same token so we can finish it. | 70 // with the same token so we can finish it. |
| 72 ASSERT(!m_token || m_token == &token || token.type() == WebVTTTokenTypes::Un
initialized); | 71 ASSERT(!m_token || m_token == &token || token.type() == WebVTTTokenTypes::Un
initialized); |
| 73 m_token = &token; | 72 m_token = &token; |
| 74 | 73 |
| 75 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) | 74 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) |
| 76 return haveBufferedCharacterToken(); | 75 return haveBufferedCharacterToken(); |
| 77 | 76 |
| 78 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); | 77 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); |
| 79 | 78 |
| 79 m_state = WebVTTTokenizerState::DataState; |
| 80 |
| 80 // 4.8.10.13.4 WebVTT cue text tokenizer | 81 // 4.8.10.13.4 WebVTT cue text tokenizer |
| 81 switch (m_state) { | 82 switch (m_state) { |
| 82 WEBVTT_BEGIN_STATE(DataState) { | 83 WEBVTT_BEGIN_STATE(DataState) { |
| 83 if (cc == '&') { | 84 if (cc == '&') { |
| 84 m_buffer.append(static_cast<LChar>(cc)); | 85 m_buffer.append(static_cast<LChar>(cc)); |
| 85 WEBVTT_ADVANCE_TO(EscapeState); | 86 WEBVTT_ADVANCE_TO(EscapeState); |
| 86 } else if (cc == '<') { | 87 } else if (cc == '<') { |
| 87 // FIXME: the explicit Vector conversion copies into a temporary | 88 // FIXME: the explicit Vector conversion copies into a temporary |
| 88 // and is wasteful. | 89 // and is wasteful. |
| 89 if (m_token->type() == WebVTTTokenTypes::Uninitialized | 90 if (m_token->type() == WebVTTTokenTypes::Uninitialized |
| 90 || vectorEqualsString<UChar>(Vector<UChar, 32>(m_token->char
acters()), emptyString())) | 91 || vectorEqualsString<UChar>(Vector<UChar, 32>(m_token->char
acters()), emptyString())) { |
| 91 WEBVTT_ADVANCE_TO(TagState); | 92 WEBVTT_ADVANCE_TO(TagState); |
| 92 else | 93 } else { |
| 93 return emitAndResumeIn(source, WebVTTTokenizerState::TagStat
e); | 94 // We don't want to advance input or perform a state transit
ion - just return a (new) token. |
| 95 // (On the next call to nextToken we will see '<' again, but
take the other branch in this if instead.) |
| 96 return emitToken(WebVTTTokenTypes::Character); |
| 97 } |
| 94 } else if (cc == kEndOfFileMarker) { | 98 } else if (cc == kEndOfFileMarker) { |
| 95 return emitEndOfFile(source); | 99 return emitToken(WebVTTTokenTypes::Character); |
| 96 } else { | 100 } else { |
| 97 bufferCharacter(cc); | 101 bufferCharacter(cc); |
| 98 WEBVTT_ADVANCE_TO(DataState); | 102 WEBVTT_ADVANCE_TO(DataState); |
| 99 } | 103 } |
| 100 } | 104 } |
| 101 END_STATE() | 105 END_STATE() |
| 102 | 106 |
| 103 WEBVTT_BEGIN_STATE(EscapeState) { | 107 WEBVTT_BEGIN_STATE(EscapeState) { |
| 104 if (cc == ';') { | 108 if (cc == ';') { |
| 105 if (vectorEqualsString(m_buffer, "&")) { | 109 if (vectorEqualsString(m_buffer, "&")) { |
| (...skipping 12 matching lines...) Expand all Loading... |
| 118 m_buffer.append(static_cast<LChar>(cc)); | 122 m_buffer.append(static_cast<LChar>(cc)); |
| 119 m_token->appendToCharacter(m_buffer); | 123 m_token->appendToCharacter(m_buffer); |
| 120 } | 124 } |
| 121 m_buffer.clear(); | 125 m_buffer.clear(); |
| 122 WEBVTT_ADVANCE_TO(DataState); | 126 WEBVTT_ADVANCE_TO(DataState); |
| 123 } else if (isASCIIAlphanumeric(cc)) { | 127 } else if (isASCIIAlphanumeric(cc)) { |
| 124 m_buffer.append(static_cast<LChar>(cc)); | 128 m_buffer.append(static_cast<LChar>(cc)); |
| 125 WEBVTT_ADVANCE_TO(EscapeState); | 129 WEBVTT_ADVANCE_TO(EscapeState); |
| 126 } else if (cc == kEndOfFileMarker) { | 130 } else if (cc == kEndOfFileMarker) { |
| 127 m_token->appendToCharacter(m_buffer); | 131 m_token->appendToCharacter(m_buffer); |
| 128 return emitEndOfFile(source); | 132 return emitToken(WebVTTTokenTypes::Character); |
| 129 } else { | 133 } else { |
| 130 if (!vectorEqualsString(m_buffer, "&")) | 134 if (!vectorEqualsString(m_buffer, "&")) |
| 131 m_token->appendToCharacter(m_buffer); | 135 m_token->appendToCharacter(m_buffer); |
| 132 m_buffer.clear(); | 136 m_buffer.clear(); |
| 133 WEBVTT_ADVANCE_TO(DataState); | 137 WEBVTT_ADVANCE_TO(DataState); |
| 134 } | 138 } |
| 135 } | 139 } |
| 136 END_STATE() | 140 END_STATE() |
| 137 | 141 |
| 138 WEBVTT_BEGIN_STATE(TagState) { | 142 WEBVTT_BEGIN_STATE(TagState) { |
| 139 if (isTokenizerWhitespace(cc)) { | 143 if (isTokenizerWhitespace(cc)) { |
| 140 m_token->beginEmptyStartTag(); | 144 m_token->beginEmptyStartTag(); |
| 141 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 145 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
| 142 } else if (cc == '.') { | 146 } else if (cc == '.') { |
| 143 m_token->beginEmptyStartTag(); | 147 m_token->beginEmptyStartTag(); |
| 144 WEBVTT_ADVANCE_TO(StartTagClassState); | 148 WEBVTT_ADVANCE_TO(StartTagClassState); |
| 145 } else if (cc == '/') { | 149 } else if (cc == '/') { |
| 146 WEBVTT_ADVANCE_TO(EndTagOpenState); | 150 WEBVTT_ADVANCE_TO(EndTagOpenState); |
| 147 } else if (WTF::isASCIIDigit(cc)) { | 151 } else if (WTF::isASCIIDigit(cc)) { |
| 148 m_token->beginTimestampTag(cc); | 152 m_token->beginTimestampTag(cc); |
| 149 WEBVTT_ADVANCE_TO(TimestampTagState); | 153 WEBVTT_ADVANCE_TO(TimestampTagState); |
| 150 } else if (cc == '>' || cc == kEndOfFileMarker) { | 154 } else if (cc == '>' || cc == kEndOfFileMarker) { |
| 151 m_token->beginEmptyStartTag(); | 155 m_token->beginEmptyStartTag(); |
| 152 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 156 return advanceAndEmitToken(source, WebVTTTokenTypes::StartTag); |
| 153 } else { | 157 } else { |
| 154 m_token->beginStartTag(cc); | 158 m_token->beginStartTag(cc); |
| 155 WEBVTT_ADVANCE_TO(StartTagState); | 159 WEBVTT_ADVANCE_TO(StartTagState); |
| 156 } | 160 } |
| 157 } | 161 } |
| 158 END_STATE() | 162 END_STATE() |
| 159 | 163 |
| 160 WEBVTT_BEGIN_STATE(StartTagState) { | 164 WEBVTT_BEGIN_STATE(StartTagState) { |
| 161 if (isTokenizerWhitespace(cc)) { | 165 if (isTokenizerWhitespace(cc)) { |
| 162 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 166 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
| 163 } else if (cc == '.') { | 167 } else if (cc == '.') { |
| 164 WEBVTT_ADVANCE_TO(StartTagClassState); | 168 WEBVTT_ADVANCE_TO(StartTagClassState); |
| 165 } else if (cc == '>' || cc == kEndOfFileMarker) { | 169 } else if (cc == '>' || cc == kEndOfFileMarker) { |
| 166 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 170 return advanceAndEmitToken(source, WebVTTTokenTypes::StartTag); |
| 167 } else { | 171 } else { |
| 168 m_token->appendToName(cc); | 172 m_token->appendToName(cc); |
| 169 WEBVTT_ADVANCE_TO(StartTagState); | 173 WEBVTT_ADVANCE_TO(StartTagState); |
| 170 } | 174 } |
| 171 } | 175 } |
| 172 END_STATE() | 176 END_STATE() |
| 173 | 177 |
| 174 WEBVTT_BEGIN_STATE(StartTagClassState) { | 178 WEBVTT_BEGIN_STATE(StartTagClassState) { |
| 175 if (isTokenizerWhitespace(cc)) { | 179 if (isTokenizerWhitespace(cc)) { |
| 176 m_token->addNewClass(); | 180 m_token->addNewClass(); |
| 177 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 181 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
| 178 } else if (cc == '.') { | 182 } else if (cc == '.') { |
| 179 m_token->addNewClass(); | 183 m_token->addNewClass(); |
| 180 WEBVTT_ADVANCE_TO(StartTagClassState); | 184 WEBVTT_ADVANCE_TO(StartTagClassState); |
| 181 } else if (cc == '>' || cc == kEndOfFileMarker) { | 185 } else if (cc == '>' || cc == kEndOfFileMarker) { |
| 182 m_token->addNewClass(); | 186 m_token->addNewClass(); |
| 183 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 187 return advanceAndEmitToken(source, WebVTTTokenTypes::StartTag); |
| 184 } else { | 188 } else { |
| 185 m_token->appendToClass(cc); | 189 m_token->appendToClass(cc); |
| 186 WEBVTT_ADVANCE_TO(StartTagClassState); | 190 WEBVTT_ADVANCE_TO(StartTagClassState); |
| 187 } | 191 } |
| 188 | 192 |
| 189 } | 193 } |
| 190 END_STATE() | 194 END_STATE() |
| 191 | 195 |
| 192 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { | 196 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { |
| 193 if (cc == '>' || cc == kEndOfFileMarker) { | 197 if (cc == '>' || cc == kEndOfFileMarker) { |
| 194 m_token->addNewAnnotation(); | 198 m_token->addNewAnnotation(); |
| 195 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 199 return advanceAndEmitToken(source, WebVTTTokenTypes::StartTag); |
| 196 } | 200 } |
| 197 m_token->appendToAnnotation(cc); | 201 m_token->appendToAnnotation(cc); |
| 198 WEBVTT_ADVANCE_TO(StartTagAnnotationState); | 202 WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
| 199 } | 203 } |
| 200 END_STATE() | 204 END_STATE() |
| 201 | 205 |
| 202 WEBVTT_BEGIN_STATE(EndTagOpenState) { | 206 WEBVTT_BEGIN_STATE(EndTagOpenState) { |
| 203 if (cc == '>' || cc == kEndOfFileMarker) { | 207 if (cc == '>' || cc == kEndOfFileMarker) { |
| 204 m_token->beginEndTag('\0'); | 208 m_token->beginEndTag('\0'); |
| 205 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 209 return advanceAndEmitToken(source, WebVTTTokenTypes::EndTag); |
| 206 } | 210 } |
| 207 m_token->beginEndTag(cc); | 211 m_token->beginEndTag(cc); |
| 208 WEBVTT_ADVANCE_TO(EndTagState); | 212 WEBVTT_ADVANCE_TO(EndTagState); |
| 209 } | 213 } |
| 210 END_STATE() | 214 END_STATE() |
| 211 | 215 |
| 212 WEBVTT_BEGIN_STATE(EndTagState) { | 216 WEBVTT_BEGIN_STATE(EndTagState) { |
| 213 if (cc == '>' || cc == kEndOfFileMarker) | 217 if (cc == '>' || cc == kEndOfFileMarker) |
| 214 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 218 return advanceAndEmitToken(source, WebVTTTokenTypes::EndTag); |
| 215 m_token->appendToName(cc); | 219 m_token->appendToName(cc); |
| 216 WEBVTT_ADVANCE_TO(EndTagState); | 220 WEBVTT_ADVANCE_TO(EndTagState); |
| 217 } | 221 } |
| 218 END_STATE() | 222 END_STATE() |
| 219 | 223 |
| 220 WEBVTT_BEGIN_STATE(TimestampTagState) { | 224 WEBVTT_BEGIN_STATE(TimestampTagState) { |
| 221 if (cc == '>' || cc == kEndOfFileMarker) | 225 if (cc == '>' || cc == kEndOfFileMarker) |
| 222 return emitAndResumeIn(source, WebVTTTokenizerState::DataState); | 226 return advanceAndEmitToken(source, WebVTTTokenTypes::TimestampTa
g); |
| 223 m_token->appendToTimestamp(cc); | 227 m_token->appendToTimestamp(cc); |
| 224 WEBVTT_ADVANCE_TO(TimestampTagState); | 228 WEBVTT_ADVANCE_TO(TimestampTagState); |
| 225 } | 229 } |
| 226 END_STATE() | 230 END_STATE() |
| 227 | 231 |
| 228 } | 232 } |
| 229 | 233 |
| 230 ASSERT_NOT_REACHED(); | 234 ASSERT_NOT_REACHED(); |
| 231 return false; | 235 return false; |
| 232 } | 236 } |
| 233 | 237 |
| 234 } | 238 } |
| 235 | 239 |
| OLD | NEW |