tools/gn/string_utils.cc - Issue 1268973003: Enhance GN string interpolation.

Unified Diff: tools/gn/string_utils.cc

Issue 1268973003: Enhance GN string interpolation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Review comments fixed Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/gn/string_utils.cc

diff --git a/tools/gn/string_utils.cc b/tools/gn/string_utils.cc

index 1aa10812bf7b43aea66a687d5cef58a1184bc7da..6d3f87774a04be5d3ff4b6aa46f651e84da92409 100644

--- a/tools/gn/string_utils.cc

+++ b/tools/gn/string_utils.cc

@@ -5,6 +5,8 @@

#include "tools/gn/string_utils.h"

#include "tools/gn/err.h"

+#include "tools/gn/input_file.h"

+#include "tools/gn/parser.h"

#include "tools/gn/scope.h"

#include "tools/gn/token.h"

#include "tools/gn/tokenizer.h"

@@ -31,39 +33,155 @@ Err ErrInsideStringToken(const Token& token, size_t offset, size_t size,

return Err(LocationRange(begin_loc, end_loc), msg, help);

}

-// Given the character input[i] indicating the $ in a string, locates the

-// identifier and places its range in |*identifier|, and updates |*i| to

-// point to the last character consumed.

+// Notes about expression interpolation. This is based loosly on Dart but is

+// slightly less flexible. In Dart, seeing the ${ in a string is something

+// the toplevel parser knows about, and it will recurse into the block

+// treating it as a first-class {...} block. So even things like this work:

+// "hello ${"foo}"*2+"bar"}" => "hello foo}foo}bar"

+// (you can see it did not get confused by the nested strings or the nested "}"

+// inside the block).

-// On error returns false and sets the error.

-bool LocateInlineIdenfitier(const Token& token,

- const char* input, size_t size,

- size_t* i,

- base::StringPiece* identifier,

- Err* err) {

+// This is cool but complicates the parser for almost no benefit for this

+// non-general-purpose programming language. The main reason expressions are

+// supported here at all are to support "${scope.variable}" and "${list[0]}",

+// neither of which have any of these edge-cases.

+//

+// In this simplified approach, we search for the terminating '}' and execute

+// the result. This means we can't support any expressions with embedded '}'

+// or '"'. To keep people from getting confusing about what's supported and

+// what's not, only identifier and accessor expressions are allowed (neither

+// of these run into any of these edge-cases).

+bool AppendInterpolatedExpression(Scope* scope,

+ const Token& token,

+ const char* input,

+ size_t begin_offset,

+ size_t end_offset,

+ std::string* output,

+ Err* err) {

+ SourceFile empty_source_file; // Prevent most vexing parse.

+ InputFile input_file(empty_source_file);

+ input_file.SetContents(

+ std::string(&input[begin_offset], end_offset - begin_offset));

+ // Tokenize.

+ std::vector<Token> tokens = Tokenizer::Tokenize(&input_file, err);

+ if (err->has_error()) {

+ // The error will point into our temporary buffer, rewrite it to refer

+ // to the original token. This will make the location information less

+ // precise, but generally there won't be complicated things in string

+ // interpolations.

+ *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,

+ err->message(), err->help_text());

+ return false;

+ }

+ // Parse.

+ scoped_ptr<ParseNode> node = Parser::ParseExpression(tokens, err);

+ if (err->has_error()) {

+ // Rewrite error as above.

+ *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,

+ err->message(), err->help_text());

+ return false;

+ }

+ if (!(node->AsIdentifier() || node->AsAccessor())) {

+ *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,

+ "Invalid string interpolation.",

+ "The thing inside the ${} must be an identifier ${foo},\n"

+ "a scope access ${foo.bar}, or a list access ${foo[0]}.");

+ return false;

+ }

+ // Evaluate.

+ Value result = node->Execute(scope, err);

+ if (err->has_error()) {

+ // Rewrite error as above.

+ *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,

+ err->message(), err->help_text());

+ return false;

+ }

+ output->append(result.ToString(false));

+ return true;

+bool AppendInterpolatedIdentifier(Scope* scope,

+ const Token& token,

+ const char* input,

+ size_t begin_offset,

+ size_t end_offset,

+ std::string* output,

+ Err* err) {

+ base::StringPiece identifier(&input[begin_offset],

+ end_offset - begin_offset);

+ const Value* value = scope->GetValue(identifier, true);

+ if (!value) {

+ // We assume the input points inside the token.

+ *err = ErrInsideStringToken(

+ token, identifier.data() - token.value().data() - 1, identifier.size(),

+ "Undefined identifier in string expansion.",

+ std::string("\"") + identifier + "\" is not currently in scope.");

+ return false;

+ }

+ output->append(value->ToString(false));

+ return true;

+// Handles string interpolations: $identifier and ${expression}

+//

+// |*i| is the index into |input| of the $. This will be updated to point to

+// the last character consumed on success. The token is the original string

+// to blame on failure.

+//

+// On failure, returns false and sets the error. On success, appends the

+// result of the interpolation to |*output|.

+bool AppendStringInterpolation(Scope* scope,

+ const Token& token,

+ const char* input, size_t size,

+ size_t* i,

+ std::string* output,

+ Err* err) {

size_t dollars_index = *i;

(*i)++;

if (*i == size) {

*err = ErrInsideStringToken(token, dollars_index, 1, "$ at end of string.",

- "I was expecting an identifier after the $.");

+ "I was expecting an identifier or {...} after the $.");

return false;

}

- bool has_brackets;

if (input[*i] == '{') {

+ // Bracketed expression.

(*i)++;

+ size_t begin_offset = *i;

+ // Find the closing } and check for non-identifier chars. Don't need to

+ // bother checking for the more-restricted first character of an identifier

+ // since the {} unambiguously denotes the range, and identifiers with

+ // invalid names just won't be found later.

+ bool has_non_ident_chars = false;

+ while (*i < size && input[*i] != '}') {

+ has_non_ident_chars |= Tokenizer::IsIdentifierContinuingChar(input[*i]);

+ (*i)++;

+ }

if (*i == size) {

- *err = ErrInsideStringToken(token, dollars_index, 2,

- "${ at end of string.",

- "I was expecting an identifier inside the ${...}.");

+ *err = ErrInsideStringToken(token, dollars_index, *i - dollars_index,

+ "Unterminated ${...");

return false;

}

- has_brackets = true;

- } else {

- has_brackets = false;

+ // In the common case, the thing inside the {} will actually be a

+ // simple identifier. Avoid all the complicated parsing of accessors

+ // in this case.

+ if (!has_non_ident_chars) {

+ return AppendInterpolatedIdentifier(scope, token, input, begin_offset,

+ *i, output, err);

+ }

+ return AppendInterpolatedExpression(scope, token, input, begin_offset, *i,

+ output, err);

}

- // First char is special.

+ // Simple identifier.

+ // The first char of an identifier is more restricted.

if (!Tokenizer::IsIdentifierFirstChar(input[*i])) {

*err = ErrInsideStringToken(

token, dollars_index, *i - dollars_index + 1,

@@ -78,47 +196,9 @@ bool LocateInlineIdenfitier(const Token& token,

while (*i < size && Tokenizer::IsIdentifierContinuingChar(input[*i]))

(*i)++;

size_t end_offset = *i;

- // If we started with a bracket, validate that there's an ending one. Leave

- // *i pointing to the last char we consumed (backing up one).

- if (has_brackets) {

- if (*i == size) {

- *err = ErrInsideStringToken(token, dollars_index, *i - dollars_index,

- "Unterminated ${...");

- return false;

- } else if (input[*i] != '}') {

- *err = ErrInsideStringToken(token, *i, 1, "Not an identifier in string expansion.",

- "The contents of ${...} should be an identifier. "

- "This character is out of sorts.");

- return false;

- }

- // We want to consume the bracket but also back up one, so *i is unchanged.

- } else {

- (*i)--;

- }

- *identifier = base::StringPiece(&input[begin_offset],

- end_offset - begin_offset);

- return true;

-bool AppendIdentifierValue(Scope* scope,

- const Token& token,

- const base::StringPiece& identifier,

- std::string* output,

- Err* err) {

- const Value* value = scope->GetValue(identifier, true);

- if (!value) {

- // We assume the identifier points inside the token.

- *err = ErrInsideStringToken(

- token, identifier.data() - token.value().data() - 1, identifier.size(),

- "Undefined identifier in string expansion.",

- std::string("\"") + identifier + "\" is not currently in scope.");

- return false;

- }

- output->append(value->ToString(false));

- return true;

+ (*i)--; // Back up to mark the last character consumed.

+ return AppendInterpolatedIdentifier(scope, token, input, begin_offset,

+ end_offset, output, err);

}

} // namespace

@@ -153,10 +233,8 @@ bool ExpandStringLiteral(Scope* scope,

}

output.push_back(input[i]);

} else if (input[i] == '$') {

- base::StringPiece identifier;

- if (!LocateInlineIdenfitier(literal, input, size, &i, &identifier, err))

- return false;

- if (!AppendIdentifierValue(scope, literal, identifier, &output, err))

+ if (!AppendStringInterpolation(scope, literal, input, size, &i,

+ &output, err))

return false;

} else {

output.push_back(input[i]);

« no previous file with comments | « tools/gn/parser.cc ('k') | tools/gn/string_utils_unittest.cc » ('j') | no next file with comments »