sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart - Issue 27510003: Scanner for UTF-8 byte arrays

Side by Side Diff: sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart

Issue 27510003: Scanner for UTF-8 byte arrays (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: fixes compiler tests Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« sdk/lib/_internal/compiler/implementation/js_backend/backend.dart ('K') | « sdk/lib/_internal/compiler/implementation/patch_parser.dart ('k') | sdk/lib/_internal/compiler/implementation/scanner/parser.dart » ('j') | sdk/lib/_internal/compiler/implementation/scanner/scanner.dart » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 part of scanner_implementation;	5 part of scanner;

6	6

7 abstract	7 abstract class ArrayBasedScanner extends AbstractScanner {

8 class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {	8 ArrayBasedScanner(SourceFile file, bool includeComments)

9 int get charOffset => byteOffset + extraCharOffset;	9 : super(file, includeComments);

10 final Token tokens;

11 Token tail;

12 int tokenStart;

13 int byteOffset;

14 final bool includeComments;

15	10

16 /** Since the input is UTF8, some characters are represented by more	11 /**

17 * than one byte. [extraCharOffset] tracks the difference. */	12 * The stack of open groups, e.g [: { ... ( .. :]

18 int extraCharOffset;	13 * Each BeginGroupToken has a pointer to the token where the group

	14 * ends. This field is set when scanning the end group token.

	15 */

19 Link<BeginGroupToken> groupingStack = const Link<BeginGroupToken>();	16 Link<BeginGroupToken> groupingStack = const Link<BeginGroupToken>();

20	17

21 ArrayBasedScanner(this.includeComments)	18 /**

22 : this.extraCharOffset = 0,	19 * Appends a token whose kind is determined by [info] and content is defined

23 this.tokenStart = -1,	20 * by the String [value].

24 this.byteOffset = -1,	21 *

25 this.tokens = new Token(EOF_INFO, -1) {	22 * This method is invoked for class names, field names, method names, types,

26 this.tail = this.tokens;	23 * etc.

	24 */

	25 void appendStringToken(PrecedenceInfo info, String value) {

	26 tail.next = new StringToken.fromString(info, value, tokenStart, true);

	27 tail = tail.next;

27 }	28 }

28	29

29 int advance() {	30 /**

30 int next = nextByte();	31 * Appends a fixed token whose kind and content is determined by [info].

31 return next;	32 * Appends an operator token from [info].

	33 *

	34 * An operator token represent operators like ':', '.', ';', '&&', '==', '--',

	35 * '=>', etc.

	36 */

	37 void appendPrecedenceToken(PrecedenceInfo info) {

	38 tail.next = new SymbolToken(info, tokenStart);

	39 tail = tail.next;

32 }	40 }

33	41

	42 /**

	43 * Appends a fixed token based on whether the current char is [choice] or not.

	44 * If the current char is [choice] a fixed token whose kind and content

	45 * is determined by [yes] is appended, otherwise a fixed token whose kind

	46 * and content is determined by [no] is appended.

	47 */

34 int select(int choice, PrecedenceInfo yes, PrecedenceInfo no) {	48 int select(int choice, PrecedenceInfo yes, PrecedenceInfo no) {

35 int next = advance();	49 int next = advance();

36 if (identical(next, choice)) {	50 if (identical(next, choice)) {

37 appendPrecedenceToken(yes);	51 appendPrecedenceToken(yes);

38 return advance();	52 return advance();

39 } else {	53 } else {

40 appendPrecedenceToken(no);	54 appendPrecedenceToken(no);

41 return next;	55 return next;

42 }	56 }

43 }	57 }

44	58

45 void appendPrecedenceToken(PrecedenceInfo info) {	59 /**

46 tail.next = new Token(info, tokenStart);	60 * Appends a keyword token whose kind is determined by [keyword].

47 tail = tail.next;	61 */

48 }

49

50 void appendStringToken(PrecedenceInfo info, String value) {

51 tail.next = new StringToken(info, value, tokenStart);

52 tail = tail.next;

53 }

54

55 void appendKeywordToken(Keyword keyword) {	62 void appendKeywordToken(Keyword keyword) {

56 String syntax = keyword.syntax;	63 String syntax = keyword.syntax;

57

58 // Type parameters and arguments cannot contain 'this' or 'super'.	64 // Type parameters and arguments cannot contain 'this' or 'super'.

59 if (identical(syntax, 'this') \|\| identical(syntax, 'super')) discardOpenLt() ;	65 if (identical(syntax, 'this') \|\| identical(syntax, 'super')) {

	66 discardOpenLt();

	67 }

60 tail.next = new KeywordToken(keyword, tokenStart);	68 tail.next = new KeywordToken(keyword, tokenStart);

61 tail = tail.next;	69 tail = tail.next;

62 }	70 }

63	71

64 void appendEofToken() {	72 void appendEofToken() {

65 tail.next = new Token(EOF_INFO, charOffset);	73 beginToken();

	74 tail.next = new SymbolToken(EOF_INFO, tokenStart);

66 tail = tail.next;	75 tail = tail.next;

67 // EOF points to itself so there's always infinite look-ahead.	76 // EOF points to itself so there's always infinite look-ahead.

68 tail.next = tail;	77 tail.next = tail;

69 discardOpenLt();	78 discardOpenLt();

70 while (!groupingStack.isEmpty) {	79 while (!groupingStack.isEmpty) {

71 unmatchedBeginGroup(groupingStack.head);	80 unmatchedBeginGroup(groupingStack.head);

72 groupingStack = groupingStack.tail;	81 groupingStack = groupingStack.tail;

73 }	82 }

74 }	83 }

75	84

76 void beginToken() {	85 /**

77 tokenStart = charOffset;	86 * Notifies scanning a whitespace character. Note that [appendWhiteSpace] is

	87 * not always invoked for [$SPACE] characters.

	88 *

	89 * This method is used by the scanners to track line breaks and create the

	90 * [lineStarts] map.

	91 */

	92 void appendWhiteSpace(int next) {

	93 if (next == $LF && file != null) {

	94 lineStarts.add(stringOffset + 1); // +1, the line starts after the $LF.

	95 }

78 }	96 }

79	97

80 Token firstToken() {	98 /**

81 return tokens.next;	99 * Notifies on [$LF] characters in multi-line commends or strings.
	ngeoffray 2013/10/18 10:19:37 commends -> comments commends -> comments lukas 2013/10/24 16:48:36 Done. Done.
	100 *

	101 * This method is used by the scanners to track line breaks and create the

	102 * [lineStarts] map.

	103 */

	104 void lineFeedInMultiline() {

	105 if (file != null) {

	106 lineStarts.add(stringOffset + 1);

	107 }

82 }	108 }

83	109

84 Token previousToken() {	110 /**

85 return tail;	111 * Appends a token that begins a new group, represented by [value].

86 }	112 * Group begin tokens are '{', '(', '[' and '${'.

87	113 */

88 void addToCharOffset(int offset) {	114 void appendBeginGroup(PrecedenceInfo info) {

89 extraCharOffset += offset;	115 Token token = new BeginGroupToken(info, tokenStart);

90 }

91

92 void appendWhiteSpace(int next) {

93 // Do nothing, we don't collect white space.

94 }

95

96 void appendBeginGroup(PrecedenceInfo info, String value) {

97 Token token = new BeginGroupToken(info, value, tokenStart);

98 tail.next = token;	116 tail.next = token;

99 tail = tail.next;	117 tail = tail.next;

	118

	119 // { ( [ ${ cannot appear inside a type parameters / arguments.

100 if (!identical(info.kind, LT_TOKEN)) discardOpenLt();	120 if (!identical(info.kind, LT_TOKEN)) discardOpenLt();

101 groupingStack = groupingStack.prepend(token);	121 groupingStack = groupingStack.prepend(token);

102 }	122 }

103	123

104 int appendEndGroup(PrecedenceInfo info, String value, int openKind) {	124 /**

105 assert(!identical(openKind, LT_TOKEN));	125 * Appends a token that begins a ends group, represented by [value].
	ngeoffray 2013/10/18 10:19:37 a ends -> an end a ends -> an end lukas 2013/10/24 16:48:36 Done. Show quoted text On 2013/10/18 10:19:37, ngeoffray wrote: > a ends -> an end Done.
106 appendStringToken(info, value);	126 * It handles the group end tokens '}', ')' and ']'. The tokens '>' and

	127 * '>>' are handled separately bo [appendGt] and [appendGtGt].

	128 */

	129 int appendEndGroup(PrecedenceInfo info, int openKind) {

	130 assert(!identical(openKind, LT_TOKEN)); // openKind is < for > and >>

	131 appendPrecedenceToken(info);

	132 // Don't report unmatched errors for <; it is also the less-than operator.

107 discardOpenLt();	133 discardOpenLt();

108 if (groupingStack.isEmpty) {	134 if (groupingStack.isEmpty) {

109 return advance();	135 return advance();

110 }	136 }

111 BeginGroupToken begin = groupingStack.head;	137 BeginGroupToken begin = groupingStack.head;

112 if (!identical(begin.kind, openKind)) {	138 if (!identical(begin.kind, openKind)) {

113 if (!identical(openKind, OPEN_CURLY_BRACKET_TOKEN) \|\|	139 if (!identical(openKind, OPEN_CURLY_BRACKET_TOKEN) \|\|

114 !identical(begin.kind, STRING_INTERPOLATION_TOKEN)) {	140 !identical(begin.kind, STRING_INTERPOLATION_TOKEN)) {

115 // Not ending string interpolation.	141 // Not ending string interpolation.

116 return error(new SourceString('Unmatched ${begin.stringValue}'));	142 unmatchedBeginGroup(begin);

	143 return advance();

117 }	144 }

118 // We're ending an interpolated expression.	145 // We're ending an interpolated expression.

119 begin.endGroup = tail;	146 begin.endGroup = tail;

120 groupingStack = groupingStack.tail;	147 groupingStack = groupingStack.tail;

121 // Using "start-of-text" to signal that we're back in string	148 // Using "start-of-text" to signal that we're back in string

122 // scanning mode.	149 // scanning mode.

123 return $STX;	150 return $STX;

124 }	151 }

125 begin.endGroup = tail;	152 begin.endGroup = tail;

126 groupingStack = groupingStack.tail;	153 groupingStack = groupingStack.tail;

127 return advance();	154 return advance();

128 }	155 }

129	156

130 void appendGt(PrecedenceInfo info, String value) {	157 /**

131 appendStringToken(info, value);	158 * Appends a token for '>'.

	159 * This method does not issue unmatched errors, because > is also the

	160 * greater-than operator. It does not necessarily have to close a group.

	161 */

	162 void appendGt(PrecedenceInfo info) {

	163 appendPrecedenceToken(info);

132 if (groupingStack.isEmpty) return;	164 if (groupingStack.isEmpty) return;

133 if (identical(groupingStack.head.kind, LT_TOKEN)) {	165 if (identical(groupingStack.head.kind, LT_TOKEN)) {

134 groupingStack.head.endGroup = tail;	166 groupingStack.head.endGroup = tail;

135 groupingStack = groupingStack.tail;

136 }

137 }

138

139 void appendGtGt(PrecedenceInfo info, String value) {

140 appendStringToken(info, value);

141 if (groupingStack.isEmpty) return;

142 if (identical(groupingStack.head.kind, LT_TOKEN)) {

143 groupingStack = groupingStack.tail;

144 }

145 if (groupingStack.isEmpty) return;

146 if (identical(groupingStack.head.kind, LT_TOKEN)) {

147 groupingStack.head.endGroup = tail;

148 groupingStack = groupingStack.tail;	167 groupingStack = groupingStack.tail;

149 }	168 }

150 }	169 }

151	170

152 void appendGtGtGt(PrecedenceInfo info, String value) {	171 /**

153 appendStringToken(info, value);	172 * Appends a token for '>>'.

	173 * This method does not issue unmatched errors, because >> is also the

	174 * shift operator. It does not necessarily have to close a group.

	175 */

	176 void appendGtGt(PrecedenceInfo info) {

	177 appendPrecedenceToken(info);

154 if (groupingStack.isEmpty) return;	178 if (groupingStack.isEmpty) return;

155 if (identical(groupingStack.head.kind, LT_TOKEN)) {	179 if (identical(groupingStack.head.kind, LT_TOKEN)) {

	180 // Don't assign endGroup: in "T<U<V>>", the '>>' token closes the outer

	181 // '<', the inner '<' is left without endGroup.

156 groupingStack = groupingStack.tail;	182 groupingStack = groupingStack.tail;

157 }	183 }

158 if (groupingStack.isEmpty) return;	184 if (groupingStack.isEmpty) return;

159 if (identical(groupingStack.head.kind, LT_TOKEN)) {

160 groupingStack = groupingStack.tail;

161 }

162 if (groupingStack.isEmpty) return;

163 if (identical(groupingStack.head.kind, LT_TOKEN)) {	185 if (identical(groupingStack.head.kind, LT_TOKEN)) {

164 groupingStack.head.endGroup = tail;	186 groupingStack.head.endGroup = tail;

165 groupingStack = groupingStack.tail;	187 groupingStack = groupingStack.tail;

166 }	188 }

167 }	189 }

168	190

169 void appendComment() {	191 void appendComment(start, bool asciiOnly) {

170 if (!includeComments) return;	192 if (!includeComments) return;

171 SourceString value = utf8String(tokenStart, -1);	193 appendSubstringToken(COMMENT_INFO, start, asciiOnly);

172 appendByteStringToken(COMMENT_INFO, value);

173 }	194 }

174	195

	196 /**

	197 * We call this method to discard '<' from the "grouping" stack
	ngeoffray 2013/10/18 10:19:37 Replace 'We' by the actual callers. Replace 'We' by the actual callers. lukas 2013/10/24 16:48:36 Done. Show quoted text On 2013/10/18 10:19:37, ngeoffray wrote: > Replace 'We' by the actual callers. Done.
	198 * (maintained by subclasses).

	199 *

	200 * [PartialParser.skipExpression] relies on the fact that we do not

	201 * create groups for stuff like:

	202 * [:a = b < c, d = e > f:].

	203 *

	204 * In other words, this method is called when the scanner recognizes

	205 * something which cannot possibly be part of a type

	206 * parameter/argument list.

	207 */

175 void discardOpenLt() {	208 void discardOpenLt() {

176 while (!groupingStack.isEmpty	209 while (!groupingStack.isEmpty

177 && identical(groupingStack.head.kind, LT_TOKEN)) {	210 && identical(groupingStack.head.kind, LT_TOKEN)) {

178 groupingStack = groupingStack.tail;	211 groupingStack = groupingStack.tail;

179 }	212 }

180 }	213 }

181	214 }

182 void unmatchedBeginGroup(BeginGroupToken begin);

183 }

OLD	NEW