tools/gn/string_utils.cc - Issue 1268973003: Enhance GN string interpolation.

Side by Side Diff: tools/gn/string_utils.cc

Issue 1268973003: Enhance GN string interpolation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: grammar Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "tools/gn/string_utils.h"	5 #include "tools/gn/string_utils.h"

6	6

7 #include "tools/gn/err.h"	7 #include "tools/gn/err.h"

	8 #include "tools/gn/input_file.h"

	9 #include "tools/gn/parser.h"

8 #include "tools/gn/scope.h"	10 #include "tools/gn/scope.h"

9 #include "tools/gn/token.h"	11 #include "tools/gn/token.h"

10 #include "tools/gn/tokenizer.h"	12 #include "tools/gn/tokenizer.h"

11 #include "tools/gn/value.h"	13 #include "tools/gn/value.h"

12	14

13 namespace {	15 namespace {

14	16

15 // Constructs an Err indicating a range inside a string. We assume that the	17 // Constructs an Err indicating a range inside a string. We assume that the

16 // token has quotes around it that are not counted by the offset.	18 // token has quotes around it that are not counted by the offset.

17 Err ErrInsideStringToken(const Token& token, size_t offset, size_t size,	19 Err ErrInsideStringToken(const Token& token, size_t offset, size_t size,

18 const std::string& msg,	20 const std::string& msg,

19 const std::string& help = std::string()) {	21 const std::string& help = std::string()) {

20 // The "+1" is skipping over the " at the beginning of the token.	22 // The "+1" is skipping over the " at the beginning of the token.

21 int int_offset = static_cast<int>(offset);	23 int int_offset = static_cast<int>(offset);

22 Location begin_loc(token.location().file(),	24 Location begin_loc(token.location().file(),

23 token.location().line_number(),	25 token.location().line_number(),

24 token.location().char_offset() + int_offset + 1,	26 token.location().char_offset() + int_offset + 1,

25 token.location().byte() + int_offset + 1);	27 token.location().byte() + int_offset + 1);

26 Location end_loc(	28 Location end_loc(

27 token.location().file(),	29 token.location().file(),

28 token.location().line_number(),	30 token.location().line_number(),

29 token.location().char_offset() + int_offset + 1 + static_cast<int>(size),	31 token.location().char_offset() + int_offset + 1 + static_cast<int>(size),

30 token.location().byte() + int_offset + 1 + static_cast<int>(size));	32 token.location().byte() + int_offset + 1 + static_cast<int>(size));

31 return Err(LocationRange(begin_loc, end_loc), msg, help);	33 return Err(LocationRange(begin_loc, end_loc), msg, help);

32 }	34 }

33	35

34 // Given the character input[i] indicating the $ in a string, locates the	36 // Notes about expression interpolation. This is based loosly on Dart but is

35 // identifier and places its range in \|identifier\|, and updates \|i\| to	37 // slightly less flexible. In Dart, seeing the ${ in a string is something

36 // point to the last character consumed.	38 // the toplevel parser knows about, and it will recurse into the block

	39 // treating it as a first-class {...} block. So even things like this work:

	40 // "hello ${"foo}"*2+"bar"}" => "hello foo}foo}bar"

	41 // (you can see it did not get confused by the nested strings or the nested "}"

	42 // inside the block).

37 //	43 //

38 // On error returns false and sets the error.	44 // This is cool but complicates the parser for almost no benefit for this

39 bool LocateInlineIdenfitier(const Token& token,	45 // non-general-purpose programming langhage. The main reason expressions are
	Dirk Pranke 2015/08/03 15:21:19 typo: "language". typo: "language".
40 const char* input, size_t size,	46 // supported here at all are to support "${scope.variable}" and "${list[0]}",

41 size_t* i,	47 // neither of which have any of these edge-cases.

42 base::StringPiece* identifier,	48 //

43 Err* err) {	49 // In this simplified approach, we search for the termianting '}' and execute
	Dirk Pranke 2015/08/03 15:21:19 typo: "terminating". typo: "terminating".
	50 // the result. This means we can't support any expressions with embedded '}'

	51 // or '"'. To keep things from getting confusing about what's supported and

	52 // what's not, only identifier and accessor expressions are allowed (neither

	53 // of these run into any of these edge-cases).

	54 bool AppendInterpolatedExpression(Scope* scope,

	55 const Token& token,

	56 const char* input,

	57 size_t begin_offset,

	58 size_t end_offset,

	59 std::string* output,

	60 Err* err) {

	61 SourceFile empty_source_file; // Prevent most vexing parse.

	62 InputFile input_file(empty_source_file);

	63 input_file.SetContents(

	64 std::string(&input[begin_offset], end_offset - begin_offset));

	65

	66 // Tokenize.

	67 std::vector<Token> tokens = Tokenizer::Tokenize(&input_file, err);

	68 if (err->has_error()) {

	69 // The error will point into our temporary buffer, rewrite it to refer

	70 // to the original token. This will make the location information less

	71 // precise, but generally there won't be complicated things in string

	72 // interpolations.

	73 *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,

	74 err->message(), err->help_text());

	75 return false;

	76 }

	77

	78 // Parse.

	79 scoped_ptr<ParseNode> node = Parser::ParseExpression(tokens, err);

	80 if (err->has_error()) {

	81 // Rewrite error as above.

	82 *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,

	83 err->message(), err->help_text());

	84 return false;

	85 }

	86 if (!(node->AsIdentifier() \|\| node->AsAccessor())) {

	87 *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,

	88 "Invalid string interpolation.",

	89 "The thing inside the ${} must be an identifier ${foo},\n"

	90 "a scope access ${foo.bar}, or a list access ${foo[0]}.");

	91 return false;

	92 }

	93

	94 // Evaluate.

	95 Value result = node->Execute(scope, err);

	96 if (err->has_error()) {

	97 // Rewrite error as above.

	98 *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,

	99 err->message(), err->help_text());

	100 return false;

	101 }

	102

	103 output->append(result.ToString(false));

	104 return true;

	105 }

	106

	107 bool AppendInterpolatedIdentifier(Scope* scope,

	108 const Token& token,

	109 const char* input,

	110 size_t begin_offset,

	111 size_t end_offset,

	112 std::string* output,

	113 Err* err) {

	114 base::StringPiece identifier(&input[begin_offset],

	115 end_offset - begin_offset);

	116 const Value* value = scope->GetValue(identifier, true);

	117 if (!value) {

	118 // We assume the input points inside the token.

	119 *err = ErrInsideStringToken(

	120 token, identifier.data() - token.value().data() - 1, identifier.size(),

	121 "Undefined identifier in string expansion.",

	122 std::string("\"") + identifier + "\" is not currently in scope.");

	123 return false;

	124 }

	125

	126 output->append(value->ToString(false));

	127 return true;

	128 }

	129

	130 // Handles string interpolations: $identifier and ${expression}

	131 //

	132 // \|*i\| is the index into \|input\| of the $. This will be updated to point to

	133 // the last character consumed on success. The token is the original string

	134 // to blame on failure.

	135 //

	136 // On failure, returns false and sets the error. On success, appends the

	137 // result of the interpolation to \|*output\|.

	138 bool AppendStringInterpolation(Scope* scope,

	139 const Token& token,

	140 const char* input, size_t size,

	141 size_t* i,

	142 std::string* output,

	143 Err* err) {

44 size_t dollars_index = *i;	144 size_t dollars_index = *i;

45 (*i)++;	145 (*i)++;

46 if (*i == size) {	146 if (*i == size) {

47 *err = ErrInsideStringToken(token, dollars_index, 1, "$ at end of string.",	147 *err = ErrInsideStringToken(token, dollars_index, 1, "$ at end of string.",

48 "I was expecting an identifier after the $.");	148 "I was expecting an identifier or {...} after the $.");

49 return false;	149 return false;

50 }	150 }

51	151

52 bool has_brackets;

53 if (input[*i] == '{') {	152 if (input[*i] == '{') {

	153 // Bracketed expression.

54 (*i)++;	154 (*i)++;

	155 size_t begin_offset = *i;

	156

	157 // Find the closing } and check for non-identifier chars. Don't need to

	158 // bother checking for the more-restricted first character of an identifier

	159 // since the {} unambiguously denotes the range, and identifiers with

	160 // invalid names just won't be found later.

	161 bool has_non_ident_chars = false;

	162 while (i < size && input[i] != '}') {

	163 has_non_ident_chars \|= Tokenizer::IsIdentifierContinuingChar(input[*i]);

	164 (*i)++;

	165 }

55 if (*i == size) {	166 if (*i == size) {

56 *err = ErrInsideStringToken(token, dollars_index, 2,	167 err = ErrInsideStringToken(token, dollars_index, i - dollars_index,

57 "${ at end of string.",	168 "Unterminated ${...");

58 "I was expecting an identifier inside the ${...}.");

59 return false;	169 return false;

60 }	170 }

61 has_brackets = true;	171

62 } else {	172 // In the common case, the thing inside the {} will actually be a

63 has_brackets = false;	173 // simple identifier. Avoid all the complicated parsing of accessors

	174 // in this case.

	175 if (!has_non_ident_chars) {

	176 return AppendInterpolatedIdentifier(scope, token, input, begin_offset,

	177 *i, output, err);

	178 }

	179 return AppendInterpolatedExpression(scope, token, input, begin_offset, *i,

	180 output, err);

64 }	181 }

65	182

66 // First char is special.	183 // Simple identifier.

	184 // The first char of an identifier is more restricted.

67 if (!Tokenizer::IsIdentifierFirstChar(input[*i])) {	185 if (!Tokenizer::IsIdentifierFirstChar(input[*i])) {

68 *err = ErrInsideStringToken(	186 *err = ErrInsideStringToken(

69 token, dollars_index, *i - dollars_index + 1,	187 token, dollars_index, *i - dollars_index + 1,

70 "$ not followed by an identifier char.",	188 "$ not followed by an identifier char.",

71 "It you want a literal $ use \"\\$\".");	189 "It you want a literal $ use \"\\$\".");

72 return false;	190 return false;

73 }	191 }

74 size_t begin_offset = *i;	192 size_t begin_offset = *i;

75 (*i)++;	193 (*i)++;

76	194

77 // Find the first non-identifier char following the string.	195 // Find the first non-identifier char following the string.

78 while (i < size && Tokenizer::IsIdentifierContinuingChar(input[i]))	196 while (i < size && Tokenizer::IsIdentifierContinuingChar(input[i]))

79 (*i)++;	197 (*i)++;

80 size_t end_offset = *i;	198 size_t end_offset = *i;

81	199 (*i)--; // Back up to mark the last character consumed.

82 // If we started with a bracket, validate that there's an ending one. Leave	200 return AppendInterpolatedIdentifier(scope, token, input, begin_offset,

83 // *i pointing to the last char we consumed (backing up one).	201 end_offset, output, err);

84 if (has_brackets) {

85 if (*i == size) {

86 err = ErrInsideStringToken(token, dollars_index, i - dollars_index,

87 "Unterminated ${...");

88 return false;

89 } else if (input[*i] != '}') {

90 err = ErrInsideStringToken(token, i, 1, "Not an identifier in string exp ansion.",

91 "The contents of ${...} should be an identifier. "

92 "This character is out of sorts.");

93 return false;

94 }

95 // We want to consume the bracket but also back up one, so *i is unchanged.

96 } else {

97 (*i)--;

98 }

99

100 *identifier = base::StringPiece(&input[begin_offset],

101 end_offset - begin_offset);

102 return true;

103 }

104

105 bool AppendIdentifierValue(Scope* scope,

106 const Token& token,

107 const base::StringPiece& identifier,

108 std::string* output,

109 Err* err) {

110 const Value* value = scope->GetValue(identifier, true);

111 if (!value) {

112 // We assume the identifier points inside the token.

113 *err = ErrInsideStringToken(

114 token, identifier.data() - token.value().data() - 1, identifier.size(),

115 "Undefined identifier in string expansion.",

116 std::string("\"") + identifier + "\" is not currently in scope.");

117 return false;

118 }

119

120 output->append(value->ToString(false));

121 return true;

122 }	202 }

123	203

124 } // namespace	204 } // namespace

125	205

126 bool ExpandStringLiteral(Scope* scope,	206 bool ExpandStringLiteral(Scope* scope,

127 const Token& literal,	207 const Token& literal,

128 Value* result,	208 Value* result,

129 Err* err) {	209 Err* err) {

130 DCHECK(literal.type() == Token::STRING);	210 DCHECK(literal.type() == Token::STRING);

131 DCHECK(literal.value().size() > 1); // Should include quotes.	211 DCHECK(literal.value().size() > 1); // Should include quotes.

(...skipping 14 matching lines...) Expand all Loading...
146 case '$':	226 case '$':

147 output.push_back(input[i + 1]);	227 output.push_back(input[i + 1]);

148 i++;	228 i++;

149 continue;	229 continue;

150 default: // Everything else has no meaning: pass the literal.	230 default: // Everything else has no meaning: pass the literal.

151 break;	231 break;

152 }	232 }

153 }	233 }

154 output.push_back(input[i]);	234 output.push_back(input[i]);

155 } else if (input[i] == '$') {	235 } else if (input[i] == '$') {

156 base::StringPiece identifier;	236 if (!AppendStringInterpolation(scope, literal, input, size, &i,

157 if (!LocateInlineIdenfitier(literal, input, size, &i, &identifier, err))	237 &output, err))

158 return false;

159 if (!AppendIdentifierValue(scope, literal, identifier, &output, err))

160 return false;	238 return false;

161 } else {	239 } else {

162 output.push_back(input[i]);	240 output.push_back(input[i]);

163 }	241 }

164 }	242 }

165 return true;	243 return true;

166 }	244 }

167	245

168 std::string RemovePrefix(const std::string& str, const std::string& prefix) {	246 std::string RemovePrefix(const std::string& str, const std::string& prefix) {

169 CHECK(str.size() >= prefix.size() &&	247 CHECK(str.size() >= prefix.size() &&

170 str.compare(0, prefix.size(), prefix) == 0);	248 str.compare(0, prefix.size(), prefix) == 0);

171 return str.substr(prefix.size());	249 return str.substr(prefix.size());

172 }	250 }

173	251

174 void TrimTrailingSlash(std::string* str) {	252 void TrimTrailingSlash(std::string* str) {

175 if (!str->empty()) {	253 if (!str->empty()) {

176 DCHECK((*str)[str->size() - 1] == '/');	254 DCHECK((*str)[str->size() - 1] == '/');

177 str->resize(str->size() - 1);	255 str->resize(str->size() - 1);

178 }	256 }

179 }	257 }

OLD	NEW

« no previous file with comments | « tools/gn/parser.cc ('k') | tools/gn/string_utils_unittest.cc » ('j') | no next file with comments »