Chromium Code Reviews| Index: tools/gn/string_utils.cc |
| diff --git a/tools/gn/string_utils.cc b/tools/gn/string_utils.cc |
| index 1aa10812bf7b43aea66a687d5cef58a1184bc7da..e20a4e42c04642668f6f6ded8fe107fc9de7edb6 100644 |
| --- a/tools/gn/string_utils.cc |
| +++ b/tools/gn/string_utils.cc |
| @@ -5,6 +5,8 @@ |
| #include "tools/gn/string_utils.h" |
| #include "tools/gn/err.h" |
| +#include "tools/gn/input_file.h" |
| +#include "tools/gn/parser.h" |
| #include "tools/gn/scope.h" |
| #include "tools/gn/token.h" |
| #include "tools/gn/tokenizer.h" |
| @@ -31,39 +33,155 @@ Err ErrInsideStringToken(const Token& token, size_t offset, size_t size, |
| return Err(LocationRange(begin_loc, end_loc), msg, help); |
| } |
| -// Given the character input[i] indicating the $ in a string, locates the |
| -// identifier and places its range in |*identifier|, and updates |*i| to |
| -// point to the last character consumed. |
| +// Notes about expression interpolation. This is based loosly on Dart but is |
| +// slightly less flexible. In Dart, seeing the ${ in a string is something |
| +// the toplevel parser knows about, and it will recurse into the block |
| +// treating it as a first-class {...} block. So even things like this work: |
| +// "hello ${"foo}"*2+"bar"}" => "hello foo}foo}bar" |
| +// (you can see it did not get confused by the nested strings or the nested "}" |
| +// inside the block). |
| // |
| -// On error returns false and sets the error. |
| -bool LocateInlineIdenfitier(const Token& token, |
| - const char* input, size_t size, |
| - size_t* i, |
| - base::StringPiece* identifier, |
| - Err* err) { |
| +// This is cool but complicates the parser for almost no benefit for this |
| +// non-general-purpose programming langhage. The main reason expressions are |
|
Dirk Pranke
2015/08/03 15:21:19
typo: "language".
|
| +// supported here at all are to support "${scope.variable}" and "${list[0]}", |
| +// neither of which have any of these edge-cases. |
| +// |
| +// In this simplified approach, we search for the termianting '}' and execute |
|
Dirk Pranke
2015/08/03 15:21:19
typo: "terminating".
|
| +// the result. This means we can't support any expressions with embedded '}' |
| +// or '"'. To keep things from getting confusing about what's supported and |
| +// what's not, only identifier and accessor expressions are allowed (neither |
| +// of these run into any of these edge-cases). |
| +bool AppendInterpolatedExpression(Scope* scope, |
| + const Token& token, |
| + const char* input, |
| + size_t begin_offset, |
| + size_t end_offset, |
| + std::string* output, |
| + Err* err) { |
| + SourceFile empty_source_file; // Prevent most vexing parse. |
| + InputFile input_file(empty_source_file); |
| + input_file.SetContents( |
| + std::string(&input[begin_offset], end_offset - begin_offset)); |
| + |
| + // Tokenize. |
| + std::vector<Token> tokens = Tokenizer::Tokenize(&input_file, err); |
| + if (err->has_error()) { |
| + // The error will point into our temporary buffer, rewrite it to refer |
| + // to the original token. This will make the location information less |
| + // precise, but generally there won't be complicated things in string |
| + // interpolations. |
| + *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset, |
| + err->message(), err->help_text()); |
| + return false; |
| + } |
| + |
| + // Parse. |
| + scoped_ptr<ParseNode> node = Parser::ParseExpression(tokens, err); |
| + if (err->has_error()) { |
| + // Rewrite error as above. |
| + *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset, |
| + err->message(), err->help_text()); |
| + return false; |
| + } |
| + if (!(node->AsIdentifier() || node->AsAccessor())) { |
| + *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset, |
| + "Invalid string interpolation.", |
| + "The thing inside the ${} must be an identifier ${foo},\n" |
| + "a scope access ${foo.bar}, or a list access ${foo[0]}."); |
| + return false; |
| + } |
| + |
| + // Evaluate. |
| + Value result = node->Execute(scope, err); |
| + if (err->has_error()) { |
| + // Rewrite error as above. |
| + *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset, |
| + err->message(), err->help_text()); |
| + return false; |
| + } |
| + |
| + output->append(result.ToString(false)); |
| + return true; |
| +} |
| + |
| +bool AppendInterpolatedIdentifier(Scope* scope, |
| + const Token& token, |
| + const char* input, |
| + size_t begin_offset, |
| + size_t end_offset, |
| + std::string* output, |
| + Err* err) { |
| + base::StringPiece identifier(&input[begin_offset], |
| + end_offset - begin_offset); |
| + const Value* value = scope->GetValue(identifier, true); |
| + if (!value) { |
| + // We assume the input points inside the token. |
| + *err = ErrInsideStringToken( |
| + token, identifier.data() - token.value().data() - 1, identifier.size(), |
| + "Undefined identifier in string expansion.", |
| + std::string("\"") + identifier + "\" is not currently in scope."); |
| + return false; |
| + } |
| + |
| + output->append(value->ToString(false)); |
| + return true; |
| +} |
| + |
| +// Handles string interpolations: $identifier and ${expression} |
| +// |
| +// |*i| is the index into |input| of the $. This will be updated to point to |
| +// the last character consumed on success. The token is the original string |
| +// to blame on failure. |
| +// |
| +// On failure, returns false and sets the error. On success, appends the |
| +// result of the interpolation to |*output|. |
| +bool AppendStringInterpolation(Scope* scope, |
| + const Token& token, |
| + const char* input, size_t size, |
| + size_t* i, |
| + std::string* output, |
| + Err* err) { |
| size_t dollars_index = *i; |
| (*i)++; |
| if (*i == size) { |
| *err = ErrInsideStringToken(token, dollars_index, 1, "$ at end of string.", |
| - "I was expecting an identifier after the $."); |
| + "I was expecting an identifier or {...} after the $."); |
| return false; |
| } |
| - bool has_brackets; |
| if (input[*i] == '{') { |
| + // Bracketed expression. |
| (*i)++; |
| + size_t begin_offset = *i; |
| + |
| + // Find the closing } and check for non-identifier chars. Don't need to |
| + // bother checking for the more-restricted first character of an identifier |
| + // since the {} unambiguously denotes the range, and identifiers with |
| + // invalid names just won't be found later. |
| + bool has_non_ident_chars = false; |
| + while (*i < size && input[*i] != '}') { |
| + has_non_ident_chars |= Tokenizer::IsIdentifierContinuingChar(input[*i]); |
| + (*i)++; |
| + } |
| if (*i == size) { |
| - *err = ErrInsideStringToken(token, dollars_index, 2, |
| - "${ at end of string.", |
| - "I was expecting an identifier inside the ${...}."); |
| + *err = ErrInsideStringToken(token, dollars_index, *i - dollars_index, |
| + "Unterminated ${..."); |
| return false; |
| } |
| - has_brackets = true; |
| - } else { |
| - has_brackets = false; |
| + |
| + // In the common case, the thing inside the {} will actually be a |
| + // simple identifier. Avoid all the complicated parsing of accessors |
| + // in this case. |
| + if (!has_non_ident_chars) { |
| + return AppendInterpolatedIdentifier(scope, token, input, begin_offset, |
| + *i, output, err); |
| + } |
| + return AppendInterpolatedExpression(scope, token, input, begin_offset, *i, |
| + output, err); |
| } |
| - // First char is special. |
| + // Simple identifier. |
| + // The first char of an identifier is more restricted. |
| if (!Tokenizer::IsIdentifierFirstChar(input[*i])) { |
| *err = ErrInsideStringToken( |
| token, dollars_index, *i - dollars_index + 1, |
| @@ -78,47 +196,9 @@ bool LocateInlineIdenfitier(const Token& token, |
| while (*i < size && Tokenizer::IsIdentifierContinuingChar(input[*i])) |
| (*i)++; |
| size_t end_offset = *i; |
| - |
| - // If we started with a bracket, validate that there's an ending one. Leave |
| - // *i pointing to the last char we consumed (backing up one). |
| - if (has_brackets) { |
| - if (*i == size) { |
| - *err = ErrInsideStringToken(token, dollars_index, *i - dollars_index, |
| - "Unterminated ${..."); |
| - return false; |
| - } else if (input[*i] != '}') { |
| - *err = ErrInsideStringToken(token, *i, 1, "Not an identifier in string expansion.", |
| - "The contents of ${...} should be an identifier. " |
| - "This character is out of sorts."); |
| - return false; |
| - } |
| - // We want to consume the bracket but also back up one, so *i is unchanged. |
| - } else { |
| - (*i)--; |
| - } |
| - |
| - *identifier = base::StringPiece(&input[begin_offset], |
| - end_offset - begin_offset); |
| - return true; |
| -} |
| - |
| -bool AppendIdentifierValue(Scope* scope, |
| - const Token& token, |
| - const base::StringPiece& identifier, |
| - std::string* output, |
| - Err* err) { |
| - const Value* value = scope->GetValue(identifier, true); |
| - if (!value) { |
| - // We assume the identifier points inside the token. |
| - *err = ErrInsideStringToken( |
| - token, identifier.data() - token.value().data() - 1, identifier.size(), |
| - "Undefined identifier in string expansion.", |
| - std::string("\"") + identifier + "\" is not currently in scope."); |
| - return false; |
| - } |
| - |
| - output->append(value->ToString(false)); |
| - return true; |
| + (*i)--; // Back up to mark the last character consumed. |
| + return AppendInterpolatedIdentifier(scope, token, input, begin_offset, |
| + end_offset, output, err); |
| } |
| } // namespace |
| @@ -153,10 +233,8 @@ bool ExpandStringLiteral(Scope* scope, |
| } |
| output.push_back(input[i]); |
| } else if (input[i] == '$') { |
| - base::StringPiece identifier; |
| - if (!LocateInlineIdenfitier(literal, input, size, &i, &identifier, err)) |
| - return false; |
| - if (!AppendIdentifierValue(scope, literal, identifier, &output, err)) |
| + if (!AppendStringInterpolation(scope, literal, input, size, &i, |
| + &output, err)) |
| return false; |
| } else { |
| output.push_back(input[i]); |