Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(43)

Unified Diff: tools/gn/string_utils.cc

Issue 1268973003: Enhance GN string interpolation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Review comments fixed Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/gn/parser.cc ('k') | tools/gn/string_utils_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/gn/string_utils.cc
diff --git a/tools/gn/string_utils.cc b/tools/gn/string_utils.cc
index 1aa10812bf7b43aea66a687d5cef58a1184bc7da..6d3f87774a04be5d3ff4b6aa46f651e84da92409 100644
--- a/tools/gn/string_utils.cc
+++ b/tools/gn/string_utils.cc
@@ -5,6 +5,8 @@
#include "tools/gn/string_utils.h"
#include "tools/gn/err.h"
+#include "tools/gn/input_file.h"
+#include "tools/gn/parser.h"
#include "tools/gn/scope.h"
#include "tools/gn/token.h"
#include "tools/gn/tokenizer.h"
@@ -31,39 +33,155 @@ Err ErrInsideStringToken(const Token& token, size_t offset, size_t size,
return Err(LocationRange(begin_loc, end_loc), msg, help);
}
-// Given the character input[i] indicating the $ in a string, locates the
-// identifier and places its range in |*identifier|, and updates |*i| to
-// point to the last character consumed.
+// Notes about expression interpolation. This is based loosly on Dart but is
+// slightly less flexible. In Dart, seeing the ${ in a string is something
+// the toplevel parser knows about, and it will recurse into the block
+// treating it as a first-class {...} block. So even things like this work:
+// "hello ${"foo}"*2+"bar"}" => "hello foo}foo}bar"
+// (you can see it did not get confused by the nested strings or the nested "}"
+// inside the block).
//
-// On error returns false and sets the error.
-bool LocateInlineIdenfitier(const Token& token,
- const char* input, size_t size,
- size_t* i,
- base::StringPiece* identifier,
- Err* err) {
+// This is cool but complicates the parser for almost no benefit for this
+// non-general-purpose programming language. The main reason expressions are
+// supported here at all are to support "${scope.variable}" and "${list[0]}",
+// neither of which have any of these edge-cases.
+//
+// In this simplified approach, we search for the terminating '}' and execute
+// the result. This means we can't support any expressions with embedded '}'
+// or '"'. To keep people from getting confusing about what's supported and
+// what's not, only identifier and accessor expressions are allowed (neither
+// of these run into any of these edge-cases).
+bool AppendInterpolatedExpression(Scope* scope,
+ const Token& token,
+ const char* input,
+ size_t begin_offset,
+ size_t end_offset,
+ std::string* output,
+ Err* err) {
+ SourceFile empty_source_file; // Prevent most vexing parse.
+ InputFile input_file(empty_source_file);
+ input_file.SetContents(
+ std::string(&input[begin_offset], end_offset - begin_offset));
+
+ // Tokenize.
+ std::vector<Token> tokens = Tokenizer::Tokenize(&input_file, err);
+ if (err->has_error()) {
+ // The error will point into our temporary buffer, rewrite it to refer
+ // to the original token. This will make the location information less
+ // precise, but generally there won't be complicated things in string
+ // interpolations.
+ *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,
+ err->message(), err->help_text());
+ return false;
+ }
+
+ // Parse.
+ scoped_ptr<ParseNode> node = Parser::ParseExpression(tokens, err);
+ if (err->has_error()) {
+ // Rewrite error as above.
+ *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,
+ err->message(), err->help_text());
+ return false;
+ }
+ if (!(node->AsIdentifier() || node->AsAccessor())) {
+ *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,
+ "Invalid string interpolation.",
+ "The thing inside the ${} must be an identifier ${foo},\n"
+ "a scope access ${foo.bar}, or a list access ${foo[0]}.");
+ return false;
+ }
+
+ // Evaluate.
+ Value result = node->Execute(scope, err);
+ if (err->has_error()) {
+ // Rewrite error as above.
+ *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,
+ err->message(), err->help_text());
+ return false;
+ }
+
+ output->append(result.ToString(false));
+ return true;
+}
+
+bool AppendInterpolatedIdentifier(Scope* scope,
+ const Token& token,
+ const char* input,
+ size_t begin_offset,
+ size_t end_offset,
+ std::string* output,
+ Err* err) {
+ base::StringPiece identifier(&input[begin_offset],
+ end_offset - begin_offset);
+ const Value* value = scope->GetValue(identifier, true);
+ if (!value) {
+ // We assume the input points inside the token.
+ *err = ErrInsideStringToken(
+ token, identifier.data() - token.value().data() - 1, identifier.size(),
+ "Undefined identifier in string expansion.",
+ std::string("\"") + identifier + "\" is not currently in scope.");
+ return false;
+ }
+
+ output->append(value->ToString(false));
+ return true;
+}
+
+// Handles string interpolations: $identifier and ${expression}
+//
+// |*i| is the index into |input| of the $. This will be updated to point to
+// the last character consumed on success. The token is the original string
+// to blame on failure.
+//
+// On failure, returns false and sets the error. On success, appends the
+// result of the interpolation to |*output|.
+bool AppendStringInterpolation(Scope* scope,
+ const Token& token,
+ const char* input, size_t size,
+ size_t* i,
+ std::string* output,
+ Err* err) {
size_t dollars_index = *i;
(*i)++;
if (*i == size) {
*err = ErrInsideStringToken(token, dollars_index, 1, "$ at end of string.",
- "I was expecting an identifier after the $.");
+ "I was expecting an identifier or {...} after the $.");
return false;
}
- bool has_brackets;
if (input[*i] == '{') {
+ // Bracketed expression.
(*i)++;
+ size_t begin_offset = *i;
+
+ // Find the closing } and check for non-identifier chars. Don't need to
+ // bother checking for the more-restricted first character of an identifier
+ // since the {} unambiguously denotes the range, and identifiers with
+ // invalid names just won't be found later.
+ bool has_non_ident_chars = false;
+ while (*i < size && input[*i] != '}') {
+ has_non_ident_chars |= Tokenizer::IsIdentifierContinuingChar(input[*i]);
+ (*i)++;
+ }
if (*i == size) {
- *err = ErrInsideStringToken(token, dollars_index, 2,
- "${ at end of string.",
- "I was expecting an identifier inside the ${...}.");
+ *err = ErrInsideStringToken(token, dollars_index, *i - dollars_index,
+ "Unterminated ${...");
return false;
}
- has_brackets = true;
- } else {
- has_brackets = false;
+
+ // In the common case, the thing inside the {} will actually be a
+ // simple identifier. Avoid all the complicated parsing of accessors
+ // in this case.
+ if (!has_non_ident_chars) {
+ return AppendInterpolatedIdentifier(scope, token, input, begin_offset,
+ *i, output, err);
+ }
+ return AppendInterpolatedExpression(scope, token, input, begin_offset, *i,
+ output, err);
}
- // First char is special.
+ // Simple identifier.
+ // The first char of an identifier is more restricted.
if (!Tokenizer::IsIdentifierFirstChar(input[*i])) {
*err = ErrInsideStringToken(
token, dollars_index, *i - dollars_index + 1,
@@ -78,47 +196,9 @@ bool LocateInlineIdenfitier(const Token& token,
while (*i < size && Tokenizer::IsIdentifierContinuingChar(input[*i]))
(*i)++;
size_t end_offset = *i;
-
- // If we started with a bracket, validate that there's an ending one. Leave
- // *i pointing to the last char we consumed (backing up one).
- if (has_brackets) {
- if (*i == size) {
- *err = ErrInsideStringToken(token, dollars_index, *i - dollars_index,
- "Unterminated ${...");
- return false;
- } else if (input[*i] != '}') {
- *err = ErrInsideStringToken(token, *i, 1, "Not an identifier in string expansion.",
- "The contents of ${...} should be an identifier. "
- "This character is out of sorts.");
- return false;
- }
- // We want to consume the bracket but also back up one, so *i is unchanged.
- } else {
- (*i)--;
- }
-
- *identifier = base::StringPiece(&input[begin_offset],
- end_offset - begin_offset);
- return true;
-}
-
-bool AppendIdentifierValue(Scope* scope,
- const Token& token,
- const base::StringPiece& identifier,
- std::string* output,
- Err* err) {
- const Value* value = scope->GetValue(identifier, true);
- if (!value) {
- // We assume the identifier points inside the token.
- *err = ErrInsideStringToken(
- token, identifier.data() - token.value().data() - 1, identifier.size(),
- "Undefined identifier in string expansion.",
- std::string("\"") + identifier + "\" is not currently in scope.");
- return false;
- }
-
- output->append(value->ToString(false));
- return true;
+ (*i)--; // Back up to mark the last character consumed.
+ return AppendInterpolatedIdentifier(scope, token, input, begin_offset,
+ end_offset, output, err);
}
} // namespace
@@ -153,10 +233,8 @@ bool ExpandStringLiteral(Scope* scope,
}
output.push_back(input[i]);
} else if (input[i] == '$') {
- base::StringPiece identifier;
- if (!LocateInlineIdenfitier(literal, input, size, &i, &identifier, err))
- return false;
- if (!AppendIdentifierValue(scope, literal, identifier, &output, err))
+ if (!AppendStringInterpolation(scope, literal, input, size, &i,
+ &output, err))
return false;
} else {
output.push_back(input[i]);
« no previous file with comments | « tools/gn/parser.cc ('k') | tools/gn/string_utils_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698