Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1370)

Side by Side Diff: tools/gn/string_utils.cc

Issue 1268973003: Enhance GN string interpolation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: grammar Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/gn/parser.cc ('k') | tools/gn/string_utils_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "tools/gn/string_utils.h" 5 #include "tools/gn/string_utils.h"
6 6
7 #include "tools/gn/err.h" 7 #include "tools/gn/err.h"
8 #include "tools/gn/input_file.h"
9 #include "tools/gn/parser.h"
8 #include "tools/gn/scope.h" 10 #include "tools/gn/scope.h"
9 #include "tools/gn/token.h" 11 #include "tools/gn/token.h"
10 #include "tools/gn/tokenizer.h" 12 #include "tools/gn/tokenizer.h"
11 #include "tools/gn/value.h" 13 #include "tools/gn/value.h"
12 14
13 namespace { 15 namespace {
14 16
15 // Constructs an Err indicating a range inside a string. We assume that the 17 // Constructs an Err indicating a range inside a string. We assume that the
16 // token has quotes around it that are not counted by the offset. 18 // token has quotes around it that are not counted by the offset.
17 Err ErrInsideStringToken(const Token& token, size_t offset, size_t size, 19 Err ErrInsideStringToken(const Token& token, size_t offset, size_t size,
18 const std::string& msg, 20 const std::string& msg,
19 const std::string& help = std::string()) { 21 const std::string& help = std::string()) {
20 // The "+1" is skipping over the " at the beginning of the token. 22 // The "+1" is skipping over the " at the beginning of the token.
21 int int_offset = static_cast<int>(offset); 23 int int_offset = static_cast<int>(offset);
22 Location begin_loc(token.location().file(), 24 Location begin_loc(token.location().file(),
23 token.location().line_number(), 25 token.location().line_number(),
24 token.location().char_offset() + int_offset + 1, 26 token.location().char_offset() + int_offset + 1,
25 token.location().byte() + int_offset + 1); 27 token.location().byte() + int_offset + 1);
26 Location end_loc( 28 Location end_loc(
27 token.location().file(), 29 token.location().file(),
28 token.location().line_number(), 30 token.location().line_number(),
29 token.location().char_offset() + int_offset + 1 + static_cast<int>(size), 31 token.location().char_offset() + int_offset + 1 + static_cast<int>(size),
30 token.location().byte() + int_offset + 1 + static_cast<int>(size)); 32 token.location().byte() + int_offset + 1 + static_cast<int>(size));
31 return Err(LocationRange(begin_loc, end_loc), msg, help); 33 return Err(LocationRange(begin_loc, end_loc), msg, help);
32 } 34 }
33 35
34 // Given the character input[i] indicating the $ in a string, locates the 36 // Notes about expression interpolation. This is based loosly on Dart but is
35 // identifier and places its range in |*identifier|, and updates |*i| to 37 // slightly less flexible. In Dart, seeing the ${ in a string is something
36 // point to the last character consumed. 38 // the toplevel parser knows about, and it will recurse into the block
39 // treating it as a first-class {...} block. So even things like this work:
40 // "hello ${"foo}"*2+"bar"}" => "hello foo}foo}bar"
41 // (you can see it did not get confused by the nested strings or the nested "}"
42 // inside the block).
37 // 43 //
38 // On error returns false and sets the error. 44 // This is cool but complicates the parser for almost no benefit for this
39 bool LocateInlineIdenfitier(const Token& token, 45 // non-general-purpose programming langhage. The main reason expressions are
Dirk Pranke 2015/08/03 15:21:19 typo: "language".
40 const char* input, size_t size, 46 // supported here at all are to support "${scope.variable}" and "${list[0]}",
41 size_t* i, 47 // neither of which have any of these edge-cases.
42 base::StringPiece* identifier, 48 //
43 Err* err) { 49 // In this simplified approach, we search for the termianting '}' and execute
Dirk Pranke 2015/08/03 15:21:19 typo: "terminating".
50 // the result. This means we can't support any expressions with embedded '}'
51 // or '"'. To keep things from getting confusing about what's supported and
52 // what's not, only identifier and accessor expressions are allowed (neither
53 // of these run into any of these edge-cases).
54 bool AppendInterpolatedExpression(Scope* scope,
55 const Token& token,
56 const char* input,
57 size_t begin_offset,
58 size_t end_offset,
59 std::string* output,
60 Err* err) {
61 SourceFile empty_source_file; // Prevent most vexing parse.
62 InputFile input_file(empty_source_file);
63 input_file.SetContents(
64 std::string(&input[begin_offset], end_offset - begin_offset));
65
66 // Tokenize.
67 std::vector<Token> tokens = Tokenizer::Tokenize(&input_file, err);
68 if (err->has_error()) {
69 // The error will point into our temporary buffer, rewrite it to refer
70 // to the original token. This will make the location information less
71 // precise, but generally there won't be complicated things in string
72 // interpolations.
73 *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,
74 err->message(), err->help_text());
75 return false;
76 }
77
78 // Parse.
79 scoped_ptr<ParseNode> node = Parser::ParseExpression(tokens, err);
80 if (err->has_error()) {
81 // Rewrite error as above.
82 *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,
83 err->message(), err->help_text());
84 return false;
85 }
86 if (!(node->AsIdentifier() || node->AsAccessor())) {
87 *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,
88 "Invalid string interpolation.",
89 "The thing inside the ${} must be an identifier ${foo},\n"
90 "a scope access ${foo.bar}, or a list access ${foo[0]}.");
91 return false;
92 }
93
94 // Evaluate.
95 Value result = node->Execute(scope, err);
96 if (err->has_error()) {
97 // Rewrite error as above.
98 *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,
99 err->message(), err->help_text());
100 return false;
101 }
102
103 output->append(result.ToString(false));
104 return true;
105 }
106
107 bool AppendInterpolatedIdentifier(Scope* scope,
108 const Token& token,
109 const char* input,
110 size_t begin_offset,
111 size_t end_offset,
112 std::string* output,
113 Err* err) {
114 base::StringPiece identifier(&input[begin_offset],
115 end_offset - begin_offset);
116 const Value* value = scope->GetValue(identifier, true);
117 if (!value) {
118 // We assume the input points inside the token.
119 *err = ErrInsideStringToken(
120 token, identifier.data() - token.value().data() - 1, identifier.size(),
121 "Undefined identifier in string expansion.",
122 std::string("\"") + identifier + "\" is not currently in scope.");
123 return false;
124 }
125
126 output->append(value->ToString(false));
127 return true;
128 }
129
130 // Handles string interpolations: $identifier and ${expression}
131 //
132 // |*i| is the index into |input| of the $. This will be updated to point to
133 // the last character consumed on success. The token is the original string
134 // to blame on failure.
135 //
136 // On failure, returns false and sets the error. On success, appends the
137 // result of the interpolation to |*output|.
138 bool AppendStringInterpolation(Scope* scope,
139 const Token& token,
140 const char* input, size_t size,
141 size_t* i,
142 std::string* output,
143 Err* err) {
44 size_t dollars_index = *i; 144 size_t dollars_index = *i;
45 (*i)++; 145 (*i)++;
46 if (*i == size) { 146 if (*i == size) {
47 *err = ErrInsideStringToken(token, dollars_index, 1, "$ at end of string.", 147 *err = ErrInsideStringToken(token, dollars_index, 1, "$ at end of string.",
48 "I was expecting an identifier after the $."); 148 "I was expecting an identifier or {...} after the $.");
49 return false; 149 return false;
50 } 150 }
51 151
52 bool has_brackets;
53 if (input[*i] == '{') { 152 if (input[*i] == '{') {
153 // Bracketed expression.
54 (*i)++; 154 (*i)++;
155 size_t begin_offset = *i;
156
157 // Find the closing } and check for non-identifier chars. Don't need to
158 // bother checking for the more-restricted first character of an identifier
159 // since the {} unambiguously denotes the range, and identifiers with
160 // invalid names just won't be found later.
161 bool has_non_ident_chars = false;
162 while (*i < size && input[*i] != '}') {
163 has_non_ident_chars |= Tokenizer::IsIdentifierContinuingChar(input[*i]);
164 (*i)++;
165 }
55 if (*i == size) { 166 if (*i == size) {
56 *err = ErrInsideStringToken(token, dollars_index, 2, 167 *err = ErrInsideStringToken(token, dollars_index, *i - dollars_index,
57 "${ at end of string.", 168 "Unterminated ${...");
58 "I was expecting an identifier inside the ${...}.");
59 return false; 169 return false;
60 } 170 }
61 has_brackets = true; 171
62 } else { 172 // In the common case, the thing inside the {} will actually be a
63 has_brackets = false; 173 // simple identifier. Avoid all the complicated parsing of accessors
174 // in this case.
175 if (!has_non_ident_chars) {
176 return AppendInterpolatedIdentifier(scope, token, input, begin_offset,
177 *i, output, err);
178 }
179 return AppendInterpolatedExpression(scope, token, input, begin_offset, *i,
180 output, err);
64 } 181 }
65 182
66 // First char is special. 183 // Simple identifier.
184 // The first char of an identifier is more restricted.
67 if (!Tokenizer::IsIdentifierFirstChar(input[*i])) { 185 if (!Tokenizer::IsIdentifierFirstChar(input[*i])) {
68 *err = ErrInsideStringToken( 186 *err = ErrInsideStringToken(
69 token, dollars_index, *i - dollars_index + 1, 187 token, dollars_index, *i - dollars_index + 1,
70 "$ not followed by an identifier char.", 188 "$ not followed by an identifier char.",
71 "It you want a literal $ use \"\\$\"."); 189 "It you want a literal $ use \"\\$\".");
72 return false; 190 return false;
73 } 191 }
74 size_t begin_offset = *i; 192 size_t begin_offset = *i;
75 (*i)++; 193 (*i)++;
76 194
77 // Find the first non-identifier char following the string. 195 // Find the first non-identifier char following the string.
78 while (*i < size && Tokenizer::IsIdentifierContinuingChar(input[*i])) 196 while (*i < size && Tokenizer::IsIdentifierContinuingChar(input[*i]))
79 (*i)++; 197 (*i)++;
80 size_t end_offset = *i; 198 size_t end_offset = *i;
81 199 (*i)--; // Back up to mark the last character consumed.
82 // If we started with a bracket, validate that there's an ending one. Leave 200 return AppendInterpolatedIdentifier(scope, token, input, begin_offset,
83 // *i pointing to the last char we consumed (backing up one). 201 end_offset, output, err);
84 if (has_brackets) {
85 if (*i == size) {
86 *err = ErrInsideStringToken(token, dollars_index, *i - dollars_index,
87 "Unterminated ${...");
88 return false;
89 } else if (input[*i] != '}') {
90 *err = ErrInsideStringToken(token, *i, 1, "Not an identifier in string exp ansion.",
91 "The contents of ${...} should be an identifier. "
92 "This character is out of sorts.");
93 return false;
94 }
95 // We want to consume the bracket but also back up one, so *i is unchanged.
96 } else {
97 (*i)--;
98 }
99
100 *identifier = base::StringPiece(&input[begin_offset],
101 end_offset - begin_offset);
102 return true;
103 }
104
105 bool AppendIdentifierValue(Scope* scope,
106 const Token& token,
107 const base::StringPiece& identifier,
108 std::string* output,
109 Err* err) {
110 const Value* value = scope->GetValue(identifier, true);
111 if (!value) {
112 // We assume the identifier points inside the token.
113 *err = ErrInsideStringToken(
114 token, identifier.data() - token.value().data() - 1, identifier.size(),
115 "Undefined identifier in string expansion.",
116 std::string("\"") + identifier + "\" is not currently in scope.");
117 return false;
118 }
119
120 output->append(value->ToString(false));
121 return true;
122 } 202 }
123 203
124 } // namespace 204 } // namespace
125 205
126 bool ExpandStringLiteral(Scope* scope, 206 bool ExpandStringLiteral(Scope* scope,
127 const Token& literal, 207 const Token& literal,
128 Value* result, 208 Value* result,
129 Err* err) { 209 Err* err) {
130 DCHECK(literal.type() == Token::STRING); 210 DCHECK(literal.type() == Token::STRING);
131 DCHECK(literal.value().size() > 1); // Should include quotes. 211 DCHECK(literal.value().size() > 1); // Should include quotes.
(...skipping 14 matching lines...) Expand all
146 case '$': 226 case '$':
147 output.push_back(input[i + 1]); 227 output.push_back(input[i + 1]);
148 i++; 228 i++;
149 continue; 229 continue;
150 default: // Everything else has no meaning: pass the literal. 230 default: // Everything else has no meaning: pass the literal.
151 break; 231 break;
152 } 232 }
153 } 233 }
154 output.push_back(input[i]); 234 output.push_back(input[i]);
155 } else if (input[i] == '$') { 235 } else if (input[i] == '$') {
156 base::StringPiece identifier; 236 if (!AppendStringInterpolation(scope, literal, input, size, &i,
157 if (!LocateInlineIdenfitier(literal, input, size, &i, &identifier, err)) 237 &output, err))
158 return false;
159 if (!AppendIdentifierValue(scope, literal, identifier, &output, err))
160 return false; 238 return false;
161 } else { 239 } else {
162 output.push_back(input[i]); 240 output.push_back(input[i]);
163 } 241 }
164 } 242 }
165 return true; 243 return true;
166 } 244 }
167 245
168 std::string RemovePrefix(const std::string& str, const std::string& prefix) { 246 std::string RemovePrefix(const std::string& str, const std::string& prefix) {
169 CHECK(str.size() >= prefix.size() && 247 CHECK(str.size() >= prefix.size() &&
170 str.compare(0, prefix.size(), prefix) == 0); 248 str.compare(0, prefix.size(), prefix) == 0);
171 return str.substr(prefix.size()); 249 return str.substr(prefix.size());
172 } 250 }
173 251
174 void TrimTrailingSlash(std::string* str) { 252 void TrimTrailingSlash(std::string* str) {
175 if (!str->empty()) { 253 if (!str->empty()) {
176 DCHECK((*str)[str->size() - 1] == '/'); 254 DCHECK((*str)[str->size() - 1] == '/');
177 str->resize(str->size() - 1); 255 str->resize(str->size() - 1);
178 } 256 }
179 } 257 }
OLDNEW
« no previous file with comments | « tools/gn/parser.cc ('k') | tools/gn/string_utils_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698