Source/core/html/track/vtt/VTTTokenizer.cpp - Issue 75243004: Replace character vectors with StringBuilders in the WebVTT tokenizer

Side by Side Diff: Source/core/html/track/vtt/VTTTokenizer.cpp

Issue 75243004: Replace character vectors with StringBuilders in the WebVTT tokenizer (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Rebase after symbolnames changed Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2011 Google Inc. All rights reserved.	2 * Copyright (C) 2011 Google Inc. All rights reserved.

3 *	3 *

4 * Redistribution and use in source and binary forms, with or without	4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions are	5 * modification, are permitted provided that the following conditions are

6 * met:	6 * met:

7 *	7 *

8 * * Redistributions of source code must retain the above copyright	8 * * Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.	9 * notice, this list of conditions and the following disclaimer.

10 * * Redistributions in binary form must reproduce the above	10 * * Redistributions in binary form must reproduce the above

(...skipping 22 matching lines...) Expand all Loading...
33 #include "core/html/track/vtt/VTTTokenizer.h"	33 #include "core/html/track/vtt/VTTTokenizer.h"

34	34

35 #include "core/xml/parser/MarkupTokenizerInlines.h"	35 #include "core/xml/parser/MarkupTokenizerInlines.h"

36 #include "wtf/unicode/CharacterNames.h"	36 #include "wtf/unicode/CharacterNames.h"

37	37

38 namespace WebCore {	38 namespace WebCore {

39	39

40 #define WEBVTT_BEGIN_STATE(stateName) BEGIN_STATE(VTTTokenizerState, stateName)	40 #define WEBVTT_BEGIN_STATE(stateName) BEGIN_STATE(VTTTokenizerState, stateName)

41 #define WEBVTT_ADVANCE_TO(stateName) ADVANCE_TO(VTTTokenizerState, stateName)	41 #define WEBVTT_ADVANCE_TO(stateName) ADVANCE_TO(VTTTokenizerState, stateName)

42	42

	43 template<unsigned charactersCount>

	44 ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters) [charactersCount])

	45 {

	46 return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersC ount - 1);

	47 }

	48

43 VTTTokenizer::VTTTokenizer()	49 VTTTokenizer::VTTTokenizer()

44 : m_inputStreamPreprocessor(this)	50 : m_inputStreamPreprocessor(this)

45 {	51 {

46 reset();	52 reset();

47 }	53 }

48	54

49 template <typename CharacterType>

50 inline bool vectorEqualsString(const Vector<CharacterType, 32>& vector, const St ring& string)

51 {

52 if (vector.size() != string.length())

53 return false;

54

55 if (!string.length())

56 return true;

57

58 return equal(string.impl(), vector.data(), vector.size());

59 }

60

61 void VTTTokenizer::reset()	55 void VTTTokenizer::reset()

62 {	56 {

63 m_token = 0;	57 m_token = 0;

64 m_buffer.clear();	58 m_buffer.clear();

65 }	59 }

66	60

67 bool VTTTokenizer::nextToken(SegmentedString& source, VTTToken& token)	61 bool VTTTokenizer::nextToken(SegmentedString& source, VTTToken& token)

68 {	62 {

69 // If we have a token in progress, then we're supposed to be called back	63 // If we have a token in progress, then we're supposed to be called back

70 // with the same token so we can finish it.	64 // with the same token so we can finish it.

71 ASSERT(!m_token \|\| m_token == &token \|\| token.type() == VTTTokenTypes::Unini tialized);	65 ASSERT(!m_token \|\| m_token == &token \|\| token.type() == VTTTokenTypes::Unini tialized);

72 m_token = &token;	66 m_token = &token;

73	67

74 if (source.isEmpty() \|\| !m_inputStreamPreprocessor.peek(source))	68 if (source.isEmpty() \|\| !m_inputStreamPreprocessor.peek(source))

75 return haveBufferedCharacterToken();	69 return haveBufferedCharacterToken();

76	70

77 UChar cc = m_inputStreamPreprocessor.nextInputCharacter();	71 UChar cc = m_inputStreamPreprocessor.nextInputCharacter();

78	72

79 m_state = VTTTokenizerState::DataState;	73 m_state = VTTTokenizerState::DataState;

80	74

81 // 4.8.10.13.4 WebVTT cue text tokenizer	75 // 4.8.10.13.4 WebVTT cue text tokenizer

82 switch (m_state) {	76 switch (m_state) {

83 WEBVTT_BEGIN_STATE(DataState) {	77 WEBVTT_BEGIN_STATE(DataState) {

84 if (cc == '&') {	78 if (cc == '&') {

85 m_buffer.append(static_cast<LChar>(cc));	79 m_buffer.append(static_cast<LChar>(cc));

86 WEBVTT_ADVANCE_TO(EscapeState);	80 WEBVTT_ADVANCE_TO(EscapeState);

87 } else if (cc == '<') {	81 } else if (cc == '<') {

88 // FIXME: the explicit Vector conversion copies into a temporary	82 if (m_token->type() == VTTTokenTypes::Uninitialized \|\| m_token-> characters().isEmpty()) {

89 // and is wasteful.

90 if (m_token->type() == VTTTokenTypes::Uninitialized

91 \|\| vectorEqualsString<UChar>(Vector<UChar, 32>(m_token->char acters()), emptyString())) {

92 WEBVTT_ADVANCE_TO(TagState);	83 WEBVTT_ADVANCE_TO(TagState);

93 } else {	84 } else {

94 // We don't want to advance input or perform a state transit ion - just return a (new) token.	85 // We don't want to advance input or perform a state transit ion - just return a (new) token.

95 // (On the next call to nextToken we will see '<' again, but take the other branch in this if instead.)	86 // (On the next call to nextToken we will see '<' again, but take the other branch in this if instead.)

96 return emitToken(VTTTokenTypes::Character);	87 return emitToken(VTTTokenTypes::Character);

97 }	88 }

98 } else if (cc == kEndOfFileMarker) {	89 } else if (cc == kEndOfFileMarker) {

99 return emitToken(VTTTokenTypes::Character);	90 return emitToken(VTTTokenTypes::Character);

100 } else {	91 } else {

101 bufferCharacter(cc);	92 bufferCharacter(cc);

102 WEBVTT_ADVANCE_TO(DataState);	93 WEBVTT_ADVANCE_TO(DataState);

103 }	94 }

104 }	95 }

105 END_STATE()	96 END_STATE()

106	97

107 WEBVTT_BEGIN_STATE(EscapeState) {	98 WEBVTT_BEGIN_STATE(EscapeState) {

108 if (cc == ';') {	99 if (cc == ';') {

109 if (vectorEqualsString(m_buffer, "&amp")) {	100 if (equalLiteral(m_buffer, "&amp")) {

110 bufferCharacter('&');	101 bufferCharacter('&');

111 } else if (vectorEqualsString(m_buffer, "&lt")) {	102 } else if (equalLiteral(m_buffer, "&lt")) {

112 bufferCharacter('<');	103 bufferCharacter('<');

113 } else if (vectorEqualsString(m_buffer, "&gt")) {	104 } else if (equalLiteral(m_buffer, "&gt")) {

114 bufferCharacter('>');	105 bufferCharacter('>');

115 } else if (vectorEqualsString(m_buffer, "&lrm")) {	106 } else if (equalLiteral(m_buffer, "&lrm")) {

116 bufferCharacter(leftToRightMark);	107 bufferCharacter(leftToRightMark);

117 } else if (vectorEqualsString(m_buffer, "&rlm")) {	108 } else if (equalLiteral(m_buffer, "&rlm")) {

118 bufferCharacter(rightToLeftMark);	109 bufferCharacter(rightToLeftMark);

119 } else if (vectorEqualsString(m_buffer, "&nbsp")) {	110 } else if (equalLiteral(m_buffer, "&nbsp")) {

120 bufferCharacter(noBreakSpace);	111 bufferCharacter(noBreakSpace);

121 } else {	112 } else {

122 m_buffer.append(static_cast<LChar>(cc));	113 m_buffer.append(static_cast<LChar>(cc));

123 m_token->appendToCharacter(m_buffer);	114 m_token->appendToCharacter(m_buffer);

124 }	115 }

125 m_buffer.clear();	116 m_buffer.clear();

126 WEBVTT_ADVANCE_TO(DataState);	117 WEBVTT_ADVANCE_TO(DataState);

127 } else if (isASCIIAlphanumeric(cc)) {	118 } else if (isASCIIAlphanumeric(cc)) {

128 m_buffer.append(static_cast<LChar>(cc));	119 m_buffer.append(static_cast<LChar>(cc));

129 WEBVTT_ADVANCE_TO(EscapeState);	120 WEBVTT_ADVANCE_TO(EscapeState);

130 } else if (cc == kEndOfFileMarker) {	121 } else if (cc == kEndOfFileMarker) {

131 m_token->appendToCharacter(m_buffer);	122 m_token->appendToCharacter(m_buffer);

132 return emitToken(VTTTokenTypes::Character);	123 return emitToken(VTTTokenTypes::Character);

133 } else {	124 } else {

134 if (!vectorEqualsString(m_buffer, "&"))	125 if (!equalLiteral(m_buffer, "&"))

135 m_token->appendToCharacter(m_buffer);	126 m_token->appendToCharacter(m_buffer);

136 m_buffer.clear();	127 m_buffer.clear();

137 WEBVTT_ADVANCE_TO(DataState);	128 WEBVTT_ADVANCE_TO(DataState);

138 }	129 }

139 }	130 }

140 END_STATE()	131 END_STATE()

141	132

142 WEBVTT_BEGIN_STATE(TagState) {	133 WEBVTT_BEGIN_STATE(TagState) {

143 if (isTokenizerWhitespace(cc)) {	134 if (isTokenizerWhitespace(cc)) {

144 m_token->beginEmptyStartTag();	135 m_token->beginEmptyStartTag();

(...skipping 85 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
230 END_STATE()	221 END_STATE()

231	222

232 }	223 }

233	224

234 ASSERT_NOT_REACHED();	225 ASSERT_NOT_REACHED();

235 return false;	226 return false;

236 }	227 }

237	228

238 }	229 }

239	230

OLD	NEW

« no previous file with comments | « Source/core/html/track/vtt/VTTTokenizer.h ('k') | no next file » | no next file with comments »