OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 // |
| 5 // This implements a Clang tool to rewrite all instances of |
| 6 // scoped_refptr<T>'s implicit cast to T (operator T*) to an explicit call to |
| 7 // the .get() method. |
| 8 |
| 9 #include <assert.h> |
| 10 #include <algorithm> |
| 11 #include <memory> |
| 12 #include <string> |
| 13 |
| 14 #include "clang/AST/ASTContext.h" |
| 15 #include "clang/ASTMatchers/ASTMatchers.h" |
| 16 #include "clang/ASTMatchers/ASTMatchersMacros.h" |
| 17 #include "clang/ASTMatchers/ASTMatchFinder.h" |
| 18 #include "clang/Basic/SourceManager.h" |
| 19 #include "clang/Frontend/FrontendActions.h" |
| 20 #include "clang/Lex/Lexer.h" |
| 21 #include "clang/Tooling/CommonOptionsParser.h" |
| 22 #include "clang/Tooling/Refactoring.h" |
| 23 #include "clang/Tooling/Tooling.h" |
| 24 #include "llvm/Support/CommandLine.h" |
| 25 #include "llvm/support/TargetSelect.h" |
| 26 |
| 27 using namespace clang::ast_matchers; |
| 28 using clang::tooling::CommonOptionsParser; |
| 29 using clang::tooling::Replacement; |
| 30 using clang::tooling::Replacements; |
| 31 using llvm::StringRef; |
| 32 |
| 33 namespace clang { |
| 34 namespace ast_matchers { |
| 35 |
| 36 const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl> |
| 37 conversionDecl; |
| 38 |
| 39 AST_MATCHER(QualType, isBoolean) { |
| 40 return Node->isBooleanType(); |
| 41 } |
| 42 |
| 43 } // namespace ast_matchers |
| 44 } // namespace clang |
| 45 |
| 46 namespace { |
| 47 |
| 48 // Returns true if expr needs to be put in parens (eg: when it is an operator |
| 49 // syntactically). |
| 50 bool NeedsParens(const clang::Expr* expr) { |
| 51 if (llvm::dyn_cast<clang::UnaryOperator>(expr) || |
| 52 llvm::dyn_cast<clang::BinaryOperator>(expr) || |
| 53 llvm::dyn_cast<clang::ConditionalOperator>(expr)) { |
| 54 return true; |
| 55 } |
| 56 // Calls to an overloaded operator also need parens, except for foo(...) and |
| 57 // foo[...] expressions. |
| 58 if (const clang::CXXOperatorCallExpr* op = |
| 59 llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) { |
| 60 return op->getOperator() != clang::OO_Call && |
| 61 op->getOperator() != clang::OO_Subscript; |
| 62 } |
| 63 return false; |
| 64 } |
| 65 |
| 66 Replacement RewriteImplicitToExplicitConversion( |
| 67 const MatchFinder::MatchResult& result, |
| 68 const clang::Expr* expr) { |
| 69 clang::CharSourceRange range = clang::CharSourceRange::getTokenRange( |
| 70 result.SourceManager->getSpellingLoc(expr->getLocStart()), |
| 71 result.SourceManager->getSpellingLoc(expr->getLocEnd())); |
| 72 assert(range.isValid() && "Invalid range!"); |
| 73 |
| 74 // Handle cases where an implicit cast is being done by dereferencing a |
| 75 // pointer to a scoped_refptr<> (sadly, it happens...) |
| 76 // |
| 77 // This rewrites both "*foo" and "*(foo)" as "foo->get()". |
| 78 if (const clang::UnaryOperator* op = |
| 79 llvm::dyn_cast<clang::UnaryOperator>(expr)) { |
| 80 if (op->getOpcode() == clang::UO_Deref) { |
| 81 const clang::Expr* const sub_expr = |
| 82 op->getSubExpr()->IgnoreParenImpCasts(); |
| 83 clang::CharSourceRange sub_expr_range = |
| 84 clang::CharSourceRange::getTokenRange( |
| 85 result.SourceManager->getSpellingLoc(sub_expr->getLocStart()), |
| 86 result.SourceManager->getSpellingLoc(sub_expr->getLocEnd())); |
| 87 assert(sub_expr_range.isValid() && "Invalid subexpression range!"); |
| 88 |
| 89 std::string inner_text = clang::Lexer::getSourceText( |
| 90 sub_expr_range, *result.SourceManager, result.Context->getLangOpts()); |
| 91 assert(!inner_text.empty() && "No text for subexpression!"); |
| 92 if (NeedsParens(sub_expr)) { |
| 93 inner_text.insert(0, "("); |
| 94 inner_text.append(")"); |
| 95 } |
| 96 inner_text.append("->get()"); |
| 97 return Replacement(*result.SourceManager, range, inner_text); |
| 98 } |
| 99 } |
| 100 |
| 101 std::string text = clang::Lexer::getSourceText( |
| 102 range, *result.SourceManager, result.Context->getLangOpts()); |
| 103 assert(!text.empty() && "No text for expression!"); |
| 104 |
| 105 // Unwrap any temporaries - for example, custom iterators that return |
| 106 // scoped_refptr<T> as part of operator*. Any such iterators should also |
| 107 // be declaring a scoped_refptr<T>* operator->, per C++03 24.4.1.1 (Table 72) |
| 108 if (const clang::CXXBindTemporaryExpr* op = |
| 109 llvm::dyn_cast<clang::CXXBindTemporaryExpr>(expr)) { |
| 110 expr = op->getSubExpr(); |
| 111 } |
| 112 |
| 113 // Handle iterators (which are operator* calls, followed by implicit |
| 114 // conversions) by rewriting *it as it->get() |
| 115 if (const clang::CXXOperatorCallExpr* op = |
| 116 llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) { |
| 117 if (op->getOperator() == clang::OO_Star) { |
| 118 // Note that this doesn't rewrite **it correctly, since it should be |
| 119 // rewritten using parens, e.g. (*it)->get(). However, this shouldn't |
| 120 // happen frequently, if at all, since it would likely indicate code is |
| 121 // storing pointers to a scoped_refptr in a container. |
| 122 text.erase(0, 1); |
| 123 text.append("->get()"); |
| 124 return Replacement(*result.SourceManager, range, text); |
| 125 } |
| 126 } |
| 127 |
| 128 // The only remaining calls should be non-dereferencing calls (eg: member |
| 129 // calls), so a simple ".get()" appending should suffice. |
| 130 if (NeedsParens(expr)) { |
| 131 text.insert(0, "("); |
| 132 text.append(")"); |
| 133 } |
| 134 text.append(".get()"); |
| 135 return Replacement(*result.SourceManager, range, text); |
| 136 } |
| 137 |
| 138 Replacement RewriteRawPtrToScopedRefptr(const MatchFinder::MatchResult& result, |
| 139 clang::SourceLocation begin, |
| 140 clang::SourceLocation end) { |
| 141 clang::CharSourceRange range = clang::CharSourceRange::getTokenRange( |
| 142 result.SourceManager->getSpellingLoc(begin), |
| 143 result.SourceManager->getSpellingLoc(end)); |
| 144 assert(range.isValid() && "Invalid range!"); |
| 145 |
| 146 std::string text = clang::Lexer::getSourceText( |
| 147 range, *result.SourceManager, result.Context->getLangOpts()); |
| 148 text.erase(text.rfind('*')); |
| 149 |
| 150 std::string replacement_text("scoped_refptr<"); |
| 151 replacement_text += text; |
| 152 replacement_text += ">"; |
| 153 |
| 154 return Replacement(*result.SourceManager, range, replacement_text); |
| 155 } |
| 156 |
| 157 class GetRewriterCallback : public MatchFinder::MatchCallback { |
| 158 public: |
| 159 explicit GetRewriterCallback(Replacements* replacements) |
| 160 : replacements_(replacements) {} |
| 161 virtual void run(const MatchFinder::MatchResult& result) override; |
| 162 |
| 163 private: |
| 164 Replacements* const replacements_; |
| 165 }; |
| 166 |
| 167 void GetRewriterCallback::run(const MatchFinder::MatchResult& result) { |
| 168 const clang::Expr* arg = result.Nodes.getNodeAs<clang::Expr>("arg"); |
| 169 assert(arg && "Unexpected match! No Expr captured!"); |
| 170 replacements_->insert(RewriteImplicitToExplicitConversion(result, arg)); |
| 171 } |
| 172 |
| 173 class VarRewriterCallback : public MatchFinder::MatchCallback { |
| 174 public: |
| 175 explicit VarRewriterCallback(Replacements* replacements) |
| 176 : replacements_(replacements) {} |
| 177 virtual void run(const MatchFinder::MatchResult& result) override; |
| 178 |
| 179 private: |
| 180 Replacements* const replacements_; |
| 181 }; |
| 182 |
| 183 void VarRewriterCallback::run(const MatchFinder::MatchResult& result) { |
| 184 const clang::DeclaratorDecl* const var_decl = |
| 185 result.Nodes.getNodeAs<clang::DeclaratorDecl>("var"); |
| 186 assert(var_decl && "Unexpected match! No VarDecl captured!"); |
| 187 |
| 188 const clang::TypeSourceInfo* tsi = var_decl->getTypeSourceInfo(); |
| 189 |
| 190 // TODO(dcheng): This mishandles a case where a variable has multiple |
| 191 // declarations, e.g.: |
| 192 // |
| 193 // in .h: |
| 194 // Foo* my_global_magical_foo; |
| 195 // |
| 196 // in .cc: |
| 197 // Foo* my_global_magical_foo = CreateFoo(); |
| 198 // |
| 199 // In this case, it will only rewrite the .cc definition. Oh well. This should |
| 200 // be rare enough that these cases can be manually handled, since the style |
| 201 // guide prohibits globals of non-POD type. |
| 202 replacements_->insert(RewriteRawPtrToScopedRefptr( |
| 203 result, tsi->getTypeLoc().getBeginLoc(), tsi->getTypeLoc().getEndLoc())); |
| 204 } |
| 205 |
| 206 class FunctionRewriterCallback : public MatchFinder::MatchCallback { |
| 207 public: |
| 208 explicit FunctionRewriterCallback(Replacements* replacements) |
| 209 : replacements_(replacements) {} |
| 210 virtual void run(const MatchFinder::MatchResult& result) override; |
| 211 |
| 212 private: |
| 213 Replacements* const replacements_; |
| 214 }; |
| 215 |
| 216 void FunctionRewriterCallback::run(const MatchFinder::MatchResult& result) { |
| 217 const clang::FunctionDecl* const function_decl = |
| 218 result.Nodes.getNodeAs<clang::FunctionDecl>("fn"); |
| 219 assert(function_decl && "Unexpected match! No FunctionDecl captured!"); |
| 220 |
| 221 // If matched against an implicit conversion to a DeclRefExpr, make sure the |
| 222 // referenced declaration is of class type, e.g. the tool skips trying to |
| 223 // chase pointers/references to determine if the pointee is a scoped_refptr<T> |
| 224 // with local storage. Instead, let a human manually handle those cases. |
| 225 const clang::VarDecl* const var_decl = |
| 226 result.Nodes.getNodeAs<clang::VarDecl>("var"); |
| 227 if (var_decl && !var_decl->getTypeSourceInfo()->getType()->isClassType()) { |
| 228 return; |
| 229 } |
| 230 |
| 231 for (clang::FunctionDecl* f : function_decl->redecls()) { |
| 232 clang::SourceRange range = f->getReturnTypeSourceRange(); |
| 233 replacements_->insert( |
| 234 RewriteRawPtrToScopedRefptr(result, range.getBegin(), range.getEnd())); |
| 235 } |
| 236 } |
| 237 |
| 238 class MacroRewriterCallback : public MatchFinder::MatchCallback { |
| 239 public: |
| 240 explicit MacroRewriterCallback(Replacements* replacements) |
| 241 : replacements_(replacements) {} |
| 242 virtual void run(const MatchFinder::MatchResult& result) override; |
| 243 |
| 244 private: |
| 245 Replacements* const replacements_; |
| 246 }; |
| 247 |
| 248 void MacroRewriterCallback::run(const MatchFinder::MatchResult& result) { |
| 249 const clang::Expr* const expr = result.Nodes.getNodeAs<clang::Expr>("expr"); |
| 250 assert(expr && "Unexpected match! No Expr captured!"); |
| 251 replacements_->insert(RewriteImplicitToExplicitConversion(result, expr)); |
| 252 } |
| 253 |
| 254 } // namespace |
| 255 |
| 256 static llvm::cl::extrahelp common_help(CommonOptionsParser::HelpMessage); |
| 257 |
| 258 int main(int argc, const char* argv[]) { |
| 259 // TODO(dcheng): Clang tooling should do this itself. |
| 260 // http://llvm.org/bugs/show_bug.cgi?id=21627 |
| 261 llvm::InitializeNativeTarget(); |
| 262 llvm::InitializeNativeTargetAsmParser(); |
| 263 llvm::cl::OptionCategory category("Remove scoped_refptr conversions"); |
| 264 CommonOptionsParser options(argc, argv, category); |
| 265 clang::tooling::ClangTool tool(options.getCompilations(), |
| 266 options.getSourcePathList()); |
| 267 |
| 268 MatchFinder match_finder; |
| 269 Replacements replacements; |
| 270 |
| 271 auto is_scoped_refptr = recordDecl(isSameOrDerivedFrom("::scoped_refptr"), |
| 272 isTemplateInstantiation()); |
| 273 |
| 274 // Finds all calls to conversion operator member function. This catches calls |
| 275 // to "operator T*", "operator Testable", and "operator bool" equally. |
| 276 auto base_matcher = memberCallExpr(thisPointerType(is_scoped_refptr), |
| 277 callee(conversionDecl()), |
| 278 on(id("arg", expr()))); |
| 279 |
| 280 // The heuristic for whether or not converting a temporary is 'unsafe'. An |
| 281 // unsafe conversion is one where a temporary scoped_refptr<T> is converted to |
| 282 // another type. The matcher provides an exception for a temporary |
| 283 // scoped_refptr that is the result of an operator call. In this case, assume |
| 284 // that it's the result of an iterator dereference, and the container itself |
| 285 // retains the necessary reference, since this is a common idiom to see in |
| 286 // loop bodies. |
| 287 auto is_unsafe_temporary_conversion = |
| 288 on(bindTemporaryExpr(unless(has(operatorCallExpr())))); |
| 289 |
| 290 // Returning a scoped_refptr<T> as a T* is considered unsafe if either are |
| 291 // true: |
| 292 // - The scoped_refptr<T> is a temporary. |
| 293 // - The scoped_refptr<T> has local lifetime. |
| 294 auto returned_as_raw_ptr = hasParent( |
| 295 returnStmt(hasAncestor(id("fn", functionDecl(returns(pointerType())))))); |
| 296 // This matcher intentionally matches more than it should. For example, this |
| 297 // will match: |
| 298 // scoped_refptr<Foo>& foo = some_other_foo; |
| 299 // return foo; |
| 300 // The matcher callback filters out VarDecls that aren't a scoped_refptr<T>, |
| 301 // so those cases can be manually handled. |
| 302 auto is_local_variable = |
| 303 on(declRefExpr(to(id("var", varDecl(hasLocalStorage()))))); |
| 304 auto is_unsafe_return = |
| 305 anyOf(allOf(hasParent(implicitCastExpr(returned_as_raw_ptr)), |
| 306 is_local_variable), |
| 307 allOf(hasParent(implicitCastExpr( |
| 308 hasParent(exprWithCleanups(returned_as_raw_ptr)))), |
| 309 is_unsafe_temporary_conversion)); |
| 310 |
| 311 // This catches both user-defined conversions (eg: "operator bool") and |
| 312 // standard conversion sequence (C++03 13.3.3.1.1), such as converting a |
| 313 // pointer to a bool. |
| 314 auto implicit_to_bool = |
| 315 implicitCastExpr(hasImplicitDestinationType(isBoolean())); |
| 316 |
| 317 // Avoid converting calls to of "operator Testable" -> "bool" and calls of |
| 318 // "operator T*" -> "bool". |
| 319 auto bool_conversion_matcher = hasParent( |
| 320 expr(anyOf(implicit_to_bool, expr(hasParent(implicit_to_bool))))); |
| 321 |
| 322 auto is_logging_helper = |
| 323 functionDecl(anyOf(hasName("CheckEQImpl"), hasName("CheckNEImpl"))); |
| 324 auto is_gtest_helper = functionDecl( |
| 325 anyOf(methodDecl(ofClass(recordDecl(isSameOrDerivedFrom( |
| 326 hasName("::testing::internal::EqHelper")))), |
| 327 hasName("Compare")), |
| 328 hasName("::testing::internal::CmpHelperNE"))); |
| 329 auto is_gtest_assertion_result_ctor = constructorDecl(ofClass( |
| 330 recordDecl(isSameOrDerivedFrom(hasName("::testing::AssertionResult"))))); |
| 331 |
| 332 // Find all calls to an operator overload that are 'safe'. |
| 333 // |
| 334 // All bool conversions will be handled with the Testable trick, but that |
| 335 // can only be used once "operator T*" is removed, since otherwise it leaves |
| 336 // the call ambiguous. |
| 337 GetRewriterCallback get_callback(&replacements); |
| 338 match_finder.addMatcher( |
| 339 memberCallExpr( |
| 340 base_matcher, |
| 341 // Excluded since the conversion may be unsafe. |
| 342 unless(anyOf(is_unsafe_temporary_conversion, is_unsafe_return)), |
| 343 // Excluded since the conversion occurs inside a helper function that |
| 344 // the macro wraps. Letting this callback handle the rewrite would |
| 345 // result in an incorrect replacement that changes the helper function |
| 346 // itself. Instead, the right replacement is to rewrite the macro's |
| 347 // arguments. |
| 348 unless(hasAncestor(decl(anyOf(is_logging_helper, |
| 349 is_gtest_helper, |
| 350 is_gtest_assertion_result_ctor))))), |
| 351 &get_callback); |
| 352 |
| 353 // Find temporary scoped_refptr<T>'s being unsafely assigned to a T*. |
| 354 VarRewriterCallback var_callback(&replacements); |
| 355 auto initialized_with_temporary = ignoringImpCasts(exprWithCleanups( |
| 356 has(memberCallExpr(base_matcher, is_unsafe_temporary_conversion)))); |
| 357 match_finder.addMatcher(id("var", |
| 358 varDecl(hasInitializer(initialized_with_temporary), |
| 359 hasType(pointerType()))), |
| 360 &var_callback); |
| 361 match_finder.addMatcher( |
| 362 constructorDecl(forEachConstructorInitializer( |
| 363 allOf(withInitializer(initialized_with_temporary), |
| 364 forField(id("var", fieldDecl(hasType(pointerType()))))))), |
| 365 &var_callback); |
| 366 |
| 367 // Rewrite functions that unsafely turn a scoped_refptr<T> into a T* when |
| 368 // returning a value. |
| 369 FunctionRewriterCallback fn_callback(&replacements); |
| 370 match_finder.addMatcher(memberCallExpr(base_matcher, is_unsafe_return), |
| 371 &fn_callback); |
| 372 |
| 373 // Rewrite logging / gtest expressions that result in an implicit conversion. |
| 374 // Luckily, the matchers don't need to handle the case where one of the macro |
| 375 // arguments is NULL, such as: |
| 376 // CHECK_EQ(my_scoped_refptr, NULL) |
| 377 // because it simply doesn't compile--since NULL is actually of integral type, |
| 378 // this doesn't trigger scoped_refptr<T>'s implicit conversion. Since there is |
| 379 // no comparison overload for scoped_refptr<T> and int, this fails to compile. |
| 380 MacroRewriterCallback macro_callback(&replacements); |
| 381 // CHECK_EQ/CHECK_NE helpers. |
| 382 match_finder.addMatcher( |
| 383 callExpr(callee(is_logging_helper), |
| 384 argumentCountIs(3), |
| 385 hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))), |
| 386 hasAnyArgument(hasType(pointerType())), |
| 387 hasArgument(2, stringLiteral())), |
| 388 ¯o_callback); |
| 389 // ASSERT_EQ/ASSERT_NE/EXPECT_EQ/EXPECT_EQ, which use the same underlying |
| 390 // helper functions. Even though gtest has special handling for pointer to |
| 391 // NULL comparisons, it doesn't trigger in this case, so no special handling |
| 392 // is needed for the replacements. |
| 393 match_finder.addMatcher( |
| 394 callExpr(callee(is_gtest_helper), |
| 395 argumentCountIs(4), |
| 396 hasArgument(0, stringLiteral()), |
| 397 hasArgument(1, stringLiteral()), |
| 398 hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))), |
| 399 hasAnyArgument(hasType(pointerType()))), |
| 400 ¯o_callback); |
| 401 // ASSERT_TRUE/EXPECT_TRUE helpers. Note that this matcher doesn't need to |
| 402 // handle ASSERT_FALSE/EXPECT_FALSE, because it gets coerced to bool before |
| 403 // being passed as an argument to AssertionResult's constructor. As a result, |
| 404 // GetRewriterCallback handles this case properly since the conversion isn't |
| 405 // hidden inside AssertionResult, and the generated replacement properly |
| 406 // rewrites the macro argument. |
| 407 // However, the tool does need to handle the _TRUE counterparts, since the |
| 408 // conversion occurs inside the constructor in those cases. |
| 409 match_finder.addMatcher( |
| 410 constructExpr( |
| 411 argumentCountIs(2), |
| 412 hasArgument(0, id("expr", expr(hasType(is_scoped_refptr)))), |
| 413 hasDeclaration(is_gtest_assertion_result_ctor)), |
| 414 ¯o_callback); |
| 415 |
| 416 std::unique_ptr<clang::tooling::FrontendActionFactory> factory = |
| 417 clang::tooling::newFrontendActionFactory(&match_finder); |
| 418 int result = tool.run(factory.get()); |
| 419 if (result != 0) |
| 420 return result; |
| 421 |
| 422 // Serialization format is documented in tools/clang/scripts/run_tool.py |
| 423 llvm::outs() << "==== BEGIN EDITS ====\n"; |
| 424 for (const auto& r : replacements) { |
| 425 std::string replacement_text = r.getReplacementText().str(); |
| 426 std::replace(replacement_text.begin(), replacement_text.end(), '\n', '\0'); |
| 427 llvm::outs() << "r:::" << r.getFilePath() << ":::" << r.getOffset() << ":::" |
| 428 << r.getLength() << ":::" << replacement_text << "\n"; |
| 429 } |
| 430 llvm::outs() << "==== END EDITS ====\n"; |
| 431 |
| 432 return 0; |
| 433 } |
OLD | NEW |