Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "tools/gn/string_utils.h" | 5 #include "tools/gn/string_utils.h" |
| 6 | 6 |
| 7 #include "tools/gn/err.h" | 7 #include "tools/gn/err.h" |
| 8 #include "tools/gn/input_file.h" | |
| 9 #include "tools/gn/parser.h" | |
| 8 #include "tools/gn/scope.h" | 10 #include "tools/gn/scope.h" |
| 9 #include "tools/gn/token.h" | 11 #include "tools/gn/token.h" |
| 10 #include "tools/gn/tokenizer.h" | 12 #include "tools/gn/tokenizer.h" |
| 11 #include "tools/gn/value.h" | 13 #include "tools/gn/value.h" |
| 12 | 14 |
| 13 namespace { | 15 namespace { |
| 14 | 16 |
| 15 // Constructs an Err indicating a range inside a string. We assume that the | 17 // Constructs an Err indicating a range inside a string. We assume that the |
| 16 // token has quotes around it that are not counted by the offset. | 18 // token has quotes around it that are not counted by the offset. |
| 17 Err ErrInsideStringToken(const Token& token, size_t offset, size_t size, | 19 Err ErrInsideStringToken(const Token& token, size_t offset, size_t size, |
| 18 const std::string& msg, | 20 const std::string& msg, |
| 19 const std::string& help = std::string()) { | 21 const std::string& help = std::string()) { |
| 20 // The "+1" is skipping over the " at the beginning of the token. | 22 // The "+1" is skipping over the " at the beginning of the token. |
| 21 int int_offset = static_cast<int>(offset); | 23 int int_offset = static_cast<int>(offset); |
| 22 Location begin_loc(token.location().file(), | 24 Location begin_loc(token.location().file(), |
| 23 token.location().line_number(), | 25 token.location().line_number(), |
| 24 token.location().char_offset() + int_offset + 1, | 26 token.location().char_offset() + int_offset + 1, |
| 25 token.location().byte() + int_offset + 1); | 27 token.location().byte() + int_offset + 1); |
| 26 Location end_loc( | 28 Location end_loc( |
| 27 token.location().file(), | 29 token.location().file(), |
| 28 token.location().line_number(), | 30 token.location().line_number(), |
| 29 token.location().char_offset() + int_offset + 1 + static_cast<int>(size), | 31 token.location().char_offset() + int_offset + 1 + static_cast<int>(size), |
| 30 token.location().byte() + int_offset + 1 + static_cast<int>(size)); | 32 token.location().byte() + int_offset + 1 + static_cast<int>(size)); |
| 31 return Err(LocationRange(begin_loc, end_loc), msg, help); | 33 return Err(LocationRange(begin_loc, end_loc), msg, help); |
| 32 } | 34 } |
| 33 | 35 |
| 34 // Given the character input[i] indicating the $ in a string, locates the | 36 // Notes about expression interpolation. This is based loosly on Dart but is |
| 35 // identifier and places its range in |*identifier|, and updates |*i| to | 37 // slightly less flexible. In Dart, seeing the ${ in a string is something |
| 36 // point to the last character consumed. | 38 // the toplevel parser knows about, and it will recurse into the block |
| 39 // treating it as a first-class {...} block. So even things like this work: | |
| 40 // "hello ${"foo}"*2+"bar"}" => "hello foo}foo}bar" | |
| 41 // (you can see it did not get confused by the nested strings or the nested "}" | |
| 42 // inside the block). | |
| 37 // | 43 // |
| 38 // On error returns false and sets the error. | 44 // This is cool but complicates the parser for almost no benefit for this |
| 39 bool LocateInlineIdenfitier(const Token& token, | 45 // non-general-purpose programming langhage. The main reason expressions are |
|
Dirk Pranke
2015/08/03 15:21:19
typo: "language".
| |
| 40 const char* input, size_t size, | 46 // supported here at all are to support "${scope.variable}" and "${list[0]}", |
| 41 size_t* i, | 47 // neither of which have any of these edge-cases. |
| 42 base::StringPiece* identifier, | 48 // |
| 43 Err* err) { | 49 // In this simplified approach, we search for the termianting '}' and execute |
|
Dirk Pranke
2015/08/03 15:21:19
typo: "terminating".
| |
| 50 // the result. This means we can't support any expressions with embedded '}' | |
| 51 // or '"'. To keep things from getting confusing about what's supported and | |
| 52 // what's not, only identifier and accessor expressions are allowed (neither | |
| 53 // of these run into any of these edge-cases). | |
| 54 bool AppendInterpolatedExpression(Scope* scope, | |
| 55 const Token& token, | |
| 56 const char* input, | |
| 57 size_t begin_offset, | |
| 58 size_t end_offset, | |
| 59 std::string* output, | |
| 60 Err* err) { | |
| 61 SourceFile empty_source_file; // Prevent most vexing parse. | |
| 62 InputFile input_file(empty_source_file); | |
| 63 input_file.SetContents( | |
| 64 std::string(&input[begin_offset], end_offset - begin_offset)); | |
| 65 | |
| 66 // Tokenize. | |
| 67 std::vector<Token> tokens = Tokenizer::Tokenize(&input_file, err); | |
| 68 if (err->has_error()) { | |
| 69 // The error will point into our temporary buffer, rewrite it to refer | |
| 70 // to the original token. This will make the location information less | |
| 71 // precise, but generally there won't be complicated things in string | |
| 72 // interpolations. | |
| 73 *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset, | |
| 74 err->message(), err->help_text()); | |
| 75 return false; | |
| 76 } | |
| 77 | |
| 78 // Parse. | |
| 79 scoped_ptr<ParseNode> node = Parser::ParseExpression(tokens, err); | |
| 80 if (err->has_error()) { | |
| 81 // Rewrite error as above. | |
| 82 *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset, | |
| 83 err->message(), err->help_text()); | |
| 84 return false; | |
| 85 } | |
| 86 if (!(node->AsIdentifier() || node->AsAccessor())) { | |
| 87 *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset, | |
| 88 "Invalid string interpolation.", | |
| 89 "The thing inside the ${} must be an identifier ${foo},\n" | |
| 90 "a scope access ${foo.bar}, or a list access ${foo[0]}."); | |
| 91 return false; | |
| 92 } | |
| 93 | |
| 94 // Evaluate. | |
| 95 Value result = node->Execute(scope, err); | |
| 96 if (err->has_error()) { | |
| 97 // Rewrite error as above. | |
| 98 *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset, | |
| 99 err->message(), err->help_text()); | |
| 100 return false; | |
| 101 } | |
| 102 | |
| 103 output->append(result.ToString(false)); | |
| 104 return true; | |
| 105 } | |
| 106 | |
| 107 bool AppendInterpolatedIdentifier(Scope* scope, | |
| 108 const Token& token, | |
| 109 const char* input, | |
| 110 size_t begin_offset, | |
| 111 size_t end_offset, | |
| 112 std::string* output, | |
| 113 Err* err) { | |
| 114 base::StringPiece identifier(&input[begin_offset], | |
| 115 end_offset - begin_offset); | |
| 116 const Value* value = scope->GetValue(identifier, true); | |
| 117 if (!value) { | |
| 118 // We assume the input points inside the token. | |
| 119 *err = ErrInsideStringToken( | |
| 120 token, identifier.data() - token.value().data() - 1, identifier.size(), | |
| 121 "Undefined identifier in string expansion.", | |
| 122 std::string("\"") + identifier + "\" is not currently in scope."); | |
| 123 return false; | |
| 124 } | |
| 125 | |
| 126 output->append(value->ToString(false)); | |
| 127 return true; | |
| 128 } | |
| 129 | |
| 130 // Handles string interpolations: $identifier and ${expression} | |
| 131 // | |
| 132 // |*i| is the index into |input| of the $. This will be updated to point to | |
| 133 // the last character consumed on success. The token is the original string | |
| 134 // to blame on failure. | |
| 135 // | |
| 136 // On failure, returns false and sets the error. On success, appends the | |
| 137 // result of the interpolation to |*output|. | |
| 138 bool AppendStringInterpolation(Scope* scope, | |
| 139 const Token& token, | |
| 140 const char* input, size_t size, | |
| 141 size_t* i, | |
| 142 std::string* output, | |
| 143 Err* err) { | |
| 44 size_t dollars_index = *i; | 144 size_t dollars_index = *i; |
| 45 (*i)++; | 145 (*i)++; |
| 46 if (*i == size) { | 146 if (*i == size) { |
| 47 *err = ErrInsideStringToken(token, dollars_index, 1, "$ at end of string.", | 147 *err = ErrInsideStringToken(token, dollars_index, 1, "$ at end of string.", |
| 48 "I was expecting an identifier after the $."); | 148 "I was expecting an identifier or {...} after the $."); |
| 49 return false; | 149 return false; |
| 50 } | 150 } |
| 51 | 151 |
| 52 bool has_brackets; | |
| 53 if (input[*i] == '{') { | 152 if (input[*i] == '{') { |
| 153 // Bracketed expression. | |
| 54 (*i)++; | 154 (*i)++; |
| 155 size_t begin_offset = *i; | |
| 156 | |
| 157 // Find the closing } and check for non-identifier chars. Don't need to | |
| 158 // bother checking for the more-restricted first character of an identifier | |
| 159 // since the {} unambiguously denotes the range, and identifiers with | |
| 160 // invalid names just won't be found later. | |
| 161 bool has_non_ident_chars = false; | |
| 162 while (*i < size && input[*i] != '}') { | |
| 163 has_non_ident_chars |= Tokenizer::IsIdentifierContinuingChar(input[*i]); | |
| 164 (*i)++; | |
| 165 } | |
| 55 if (*i == size) { | 166 if (*i == size) { |
| 56 *err = ErrInsideStringToken(token, dollars_index, 2, | 167 *err = ErrInsideStringToken(token, dollars_index, *i - dollars_index, |
| 57 "${ at end of string.", | 168 "Unterminated ${..."); |
| 58 "I was expecting an identifier inside the ${...}."); | |
| 59 return false; | 169 return false; |
| 60 } | 170 } |
| 61 has_brackets = true; | 171 |
| 62 } else { | 172 // In the common case, the thing inside the {} will actually be a |
| 63 has_brackets = false; | 173 // simple identifier. Avoid all the complicated parsing of accessors |
| 174 // in this case. | |
| 175 if (!has_non_ident_chars) { | |
| 176 return AppendInterpolatedIdentifier(scope, token, input, begin_offset, | |
| 177 *i, output, err); | |
| 178 } | |
| 179 return AppendInterpolatedExpression(scope, token, input, begin_offset, *i, | |
| 180 output, err); | |
| 64 } | 181 } |
| 65 | 182 |
| 66 // First char is special. | 183 // Simple identifier. |
| 184 // The first char of an identifier is more restricted. | |
| 67 if (!Tokenizer::IsIdentifierFirstChar(input[*i])) { | 185 if (!Tokenizer::IsIdentifierFirstChar(input[*i])) { |
| 68 *err = ErrInsideStringToken( | 186 *err = ErrInsideStringToken( |
| 69 token, dollars_index, *i - dollars_index + 1, | 187 token, dollars_index, *i - dollars_index + 1, |
| 70 "$ not followed by an identifier char.", | 188 "$ not followed by an identifier char.", |
| 71 "It you want a literal $ use \"\\$\"."); | 189 "It you want a literal $ use \"\\$\"."); |
| 72 return false; | 190 return false; |
| 73 } | 191 } |
| 74 size_t begin_offset = *i; | 192 size_t begin_offset = *i; |
| 75 (*i)++; | 193 (*i)++; |
| 76 | 194 |
| 77 // Find the first non-identifier char following the string. | 195 // Find the first non-identifier char following the string. |
| 78 while (*i < size && Tokenizer::IsIdentifierContinuingChar(input[*i])) | 196 while (*i < size && Tokenizer::IsIdentifierContinuingChar(input[*i])) |
| 79 (*i)++; | 197 (*i)++; |
| 80 size_t end_offset = *i; | 198 size_t end_offset = *i; |
| 81 | 199 (*i)--; // Back up to mark the last character consumed. |
| 82 // If we started with a bracket, validate that there's an ending one. Leave | 200 return AppendInterpolatedIdentifier(scope, token, input, begin_offset, |
| 83 // *i pointing to the last char we consumed (backing up one). | 201 end_offset, output, err); |
| 84 if (has_brackets) { | |
| 85 if (*i == size) { | |
| 86 *err = ErrInsideStringToken(token, dollars_index, *i - dollars_index, | |
| 87 "Unterminated ${..."); | |
| 88 return false; | |
| 89 } else if (input[*i] != '}') { | |
| 90 *err = ErrInsideStringToken(token, *i, 1, "Not an identifier in string exp ansion.", | |
| 91 "The contents of ${...} should be an identifier. " | |
| 92 "This character is out of sorts."); | |
| 93 return false; | |
| 94 } | |
| 95 // We want to consume the bracket but also back up one, so *i is unchanged. | |
| 96 } else { | |
| 97 (*i)--; | |
| 98 } | |
| 99 | |
| 100 *identifier = base::StringPiece(&input[begin_offset], | |
| 101 end_offset - begin_offset); | |
| 102 return true; | |
| 103 } | |
| 104 | |
| 105 bool AppendIdentifierValue(Scope* scope, | |
| 106 const Token& token, | |
| 107 const base::StringPiece& identifier, | |
| 108 std::string* output, | |
| 109 Err* err) { | |
| 110 const Value* value = scope->GetValue(identifier, true); | |
| 111 if (!value) { | |
| 112 // We assume the identifier points inside the token. | |
| 113 *err = ErrInsideStringToken( | |
| 114 token, identifier.data() - token.value().data() - 1, identifier.size(), | |
| 115 "Undefined identifier in string expansion.", | |
| 116 std::string("\"") + identifier + "\" is not currently in scope."); | |
| 117 return false; | |
| 118 } | |
| 119 | |
| 120 output->append(value->ToString(false)); | |
| 121 return true; | |
| 122 } | 202 } |
| 123 | 203 |
| 124 } // namespace | 204 } // namespace |
| 125 | 205 |
| 126 bool ExpandStringLiteral(Scope* scope, | 206 bool ExpandStringLiteral(Scope* scope, |
| 127 const Token& literal, | 207 const Token& literal, |
| 128 Value* result, | 208 Value* result, |
| 129 Err* err) { | 209 Err* err) { |
| 130 DCHECK(literal.type() == Token::STRING); | 210 DCHECK(literal.type() == Token::STRING); |
| 131 DCHECK(literal.value().size() > 1); // Should include quotes. | 211 DCHECK(literal.value().size() > 1); // Should include quotes. |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 146 case '$': | 226 case '$': |
| 147 output.push_back(input[i + 1]); | 227 output.push_back(input[i + 1]); |
| 148 i++; | 228 i++; |
| 149 continue; | 229 continue; |
| 150 default: // Everything else has no meaning: pass the literal. | 230 default: // Everything else has no meaning: pass the literal. |
| 151 break; | 231 break; |
| 152 } | 232 } |
| 153 } | 233 } |
| 154 output.push_back(input[i]); | 234 output.push_back(input[i]); |
| 155 } else if (input[i] == '$') { | 235 } else if (input[i] == '$') { |
| 156 base::StringPiece identifier; | 236 if (!AppendStringInterpolation(scope, literal, input, size, &i, |
| 157 if (!LocateInlineIdenfitier(literal, input, size, &i, &identifier, err)) | 237 &output, err)) |
| 158 return false; | |
| 159 if (!AppendIdentifierValue(scope, literal, identifier, &output, err)) | |
| 160 return false; | 238 return false; |
| 161 } else { | 239 } else { |
| 162 output.push_back(input[i]); | 240 output.push_back(input[i]); |
| 163 } | 241 } |
| 164 } | 242 } |
| 165 return true; | 243 return true; |
| 166 } | 244 } |
| 167 | 245 |
| 168 std::string RemovePrefix(const std::string& str, const std::string& prefix) { | 246 std::string RemovePrefix(const std::string& str, const std::string& prefix) { |
| 169 CHECK(str.size() >= prefix.size() && | 247 CHECK(str.size() >= prefix.size() && |
| 170 str.compare(0, prefix.size(), prefix) == 0); | 248 str.compare(0, prefix.size(), prefix) == 0); |
| 171 return str.substr(prefix.size()); | 249 return str.substr(prefix.size()); |
| 172 } | 250 } |
| 173 | 251 |
| 174 void TrimTrailingSlash(std::string* str) { | 252 void TrimTrailingSlash(std::string* str) { |
| 175 if (!str->empty()) { | 253 if (!str->empty()) { |
| 176 DCHECK((*str)[str->size() - 1] == '/'); | 254 DCHECK((*str)[str->size() - 1] == '/'); |
| 177 str->resize(str->size() - 1); | 255 str->resize(str->size() - 1); |
| 178 } | 256 } |
| 179 } | 257 } |
| OLD | NEW |