OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 // This clang tool finds all instances of net::NetworkTrafficAnnotationTag in |
| 6 // given source code, extracts the location info and content of annotation tags |
| 7 // (unique id and annotation text), and stores them in separate text files |
| 8 // (per instance) in the given output directory. |
| 9 |
| 10 #include <stdio.h> |
| 11 #include <fstream> |
| 12 #include <memory> |
| 13 |
| 14 #include "clang/ASTMatchers/ASTMatchFinder.h" |
| 15 #include "clang/ASTMatchers/ASTMatchers.h" |
| 16 #include "clang/Basic/SourceManager.h" |
| 17 #include "clang/Frontend/FrontendActions.h" |
| 18 #include "clang/Lex/Lexer.h" |
| 19 #include "clang/Tooling/CommonOptionsParser.h" |
| 20 #include "clang/Tooling/Refactoring.h" |
| 21 #include "clang/Tooling/Tooling.h" |
| 22 #include "llvm/Support/CommandLine.h" |
| 23 |
| 24 using namespace clang::ast_matchers; |
| 25 |
| 26 namespace { |
| 27 |
| 28 // An instance of network traffic annotation usage. This can be either |
| 29 // a variable defined as NetworkTrafficAnnotationTag or a function that has |
| 30 // a variable of this type as it's input parameter. |
| 31 struct NetworkAnnotationInstance { |
| 32 NetworkAnnotationInstance() |
| 33 : variable_reference(nullptr), |
| 34 is_direct_call(false), |
| 35 transitive_parameter(false) {} |
| 36 |
| 37 // Information about where this annotation or call has happened. |
| 38 struct Location { |
| 39 Location() : line_number(-1) {} |
| 40 std::string file_path; |
| 41 int line_number; |
| 42 // Name of the function including this instance. |
| 43 std::string function_name; |
| 44 // Name of the variable that contains annotation or the function called |
| 45 // with annotation, e.g. SpellingServiceClient::CreateURLFetcher when it's |
| 46 // a function or net_annotation when it's a variable. |
| 47 std::string object_name; |
| 48 }; |
| 49 |
| 50 // Annotation content. |
| 51 struct Annotation { |
| 52 std::string unique_id; |
| 53 std::string text; |
| 54 }; |
| 55 |
| 56 Location location; |
| 57 Annotation annotation; |
| 58 |
| 59 // Possible error message (empty if no error). |
| 60 std::string error; |
| 61 // A reference to the variable containing annotation. Null if not available. |
| 62 const clang::NamedDecl* variable_reference; |
| 63 // Flag stating that parameter is directly passed to annotate function here |
| 64 // or it's through a variable. |
| 65 bool is_direct_call; |
| 66 // Flag stating that a variable is a parameter received by upper level |
| 67 // function. |
| 68 bool transitive_parameter; |
| 69 }; |
| 70 |
| 71 // Structure to collect instances of network traffic annotation usages. |
| 72 struct Collector { |
| 73 std::vector<NetworkAnnotationInstance> variable_definitions; |
| 74 std::vector<NetworkAnnotationInstance> calls; |
| 75 }; |
| 76 |
| 77 // Returns the function that includes the given token. For example, if the token |
| 78 // is variable x in the code "void foo() { int x; ... }", it returns "foo". |
| 79 std::string GetCoveringFunction(const clang::Decl* token, |
| 80 const MatchFinder::MatchResult& result); |
| 81 |
| 82 // Checks if a token matches a name, with or without net:: namespace. |
| 83 bool net_match(const std::string& token, const std::string& name) { |
| 84 return token == name || token == (std::string("net::") + name); |
| 85 } |
| 86 |
| 87 // Returns the source code of a given token, like function name, variable name, |
| 88 // string const, etc. |
| 89 std::string GetStmtText(const clang::Stmt* token, |
| 90 const clang::SourceManager& source_manager) { |
| 91 clang::LangOptions lopt; |
| 92 // Get text range. |
| 93 clang::SourceLocation start = token->getLocStart(); |
| 94 clang::SourceLocation end = token->getLocEnd(); |
| 95 |
| 96 // If it's a macro, go to definition. |
| 97 if (start.isMacroID()) |
| 98 start = source_manager.getSpellingLoc(start); |
| 99 if (end.isMacroID()) |
| 100 end = source_manager.getSpellingLoc(end); |
| 101 |
| 102 // Get the real end of the token. |
| 103 end = clang::Lexer::getLocForEndOfToken(end, 0, source_manager, lopt); |
| 104 |
| 105 // Extract text. |
| 106 std::string output(source_manager.getCharacterData(start), |
| 107 source_manager.getCharacterData(end)); |
| 108 |
| 109 // Raw string? |
| 110 if (output == "R") { |
| 111 if (auto* c1 = clang::dyn_cast<clang::ImplicitCastExpr>(token)) { |
| 112 if (const clang::StringLiteral* c2 = |
| 113 clang::dyn_cast<clang::StringLiteral>( |
| 114 c1->getSubExprAsWritten())) { |
| 115 output = c2->getString(); |
| 116 } |
| 117 } |
| 118 } |
| 119 |
| 120 return output; |
| 121 } |
| 122 |
| 123 // Extracts unique id and annotation text of a call to |
| 124 // "DefineNetworkTrafficAnnotation" function. Sets the error text if fails. |
| 125 void GetAnnotationText(const clang::CallExpr* call_expr, |
| 126 const clang::SourceManager& source_manager, |
| 127 NetworkAnnotationInstance* instance) { |
| 128 if (net_match(GetStmtText(call_expr->getCallee(), source_manager), |
| 129 "DefineNetworkTrafficAnnotation") && |
| 130 call_expr->getNumArgs() == 2) { |
| 131 instance->annotation.unique_id = |
| 132 GetStmtText(call_expr->getArgs()[0], source_manager); |
| 133 instance->annotation.text = |
| 134 GetStmtText(call_expr->getArgs()[1], source_manager); |
| 135 instance->error = ""; |
| 136 } else { |
| 137 instance->annotation.unique_id = ""; |
| 138 instance->annotation.text = ""; |
| 139 instance->error = "Unexpected function."; |
| 140 } |
| 141 } |
| 142 |
| 143 // Returns the function that includes the given token. For example, if the token |
| 144 // is the call to function bar() in the code "void foo() { bar(); }", it returns |
| 145 // "foo". |
| 146 std::string GetCoveringFunction(const clang::Stmt* token, |
| 147 const MatchFinder::MatchResult& result) { |
| 148 clang::ASTContext::DynTypedNodeList parents = |
| 149 result.Context->getParents(*token); |
| 150 // TODO: What exactly != 1 parent mean? |
| 151 if (parents.size() == 1) { |
| 152 if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) |
| 153 return GetCoveringFunction(s, result); |
| 154 else if (const clang::Decl* d = parents[0].get<clang::Decl>()) |
| 155 return GetCoveringFunction(d, result); |
| 156 } |
| 157 return "Unknown"; |
| 158 } |
| 159 |
| 160 // Returns the function that includes the given token. For example, if the token |
| 161 // is variable x in the code "void foo() { int x; ... }", it returns "foo". |
| 162 std::string GetCoveringFunction(const clang::Decl* token, |
| 163 const MatchFinder::MatchResult& result) { |
| 164 if (auto f = clang::dyn_cast<clang::FunctionDecl>(token)) |
| 165 return f->getQualifiedNameAsString(); |
| 166 |
| 167 clang::ASTContext::DynTypedNodeList parents = |
| 168 result.Context->getParents(*token); |
| 169 // TODO: What exactly != 1 parent mean? |
| 170 if (parents.size() == 1) { |
| 171 if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) |
| 172 return GetCoveringFunction(s, result); |
| 173 else if (const clang::Decl* d = parents[0].get<clang::Decl>()) |
| 174 return GetCoveringFunction(d, result); |
| 175 } |
| 176 return "Unknown"; |
| 177 } |
| 178 |
| 179 // Finds file name and line number of the given token. |
| 180 template <class T> |
| 181 void GetLocation(const T* token, |
| 182 const MatchFinder::MatchResult& result, |
| 183 NetworkAnnotationInstance::Location* location) { |
| 184 clang::SourceLocation source_location = token->getLocStart(); |
| 185 location->file_path = result.SourceManager->getFilename(source_location); |
| 186 location->line_number = |
| 187 result.SourceManager->getSpellingLineNumber(source_location); |
| 188 } |
| 189 |
| 190 // This class implements the call back functions for AST Matchers. The matchers |
| 191 // are defined in RunMatchers function and when a pattern is found there, |
| 192 // the run function in this class is called back with information on the match |
| 193 // location and description of the match pattern. |
| 194 class NetworkAnnotationTagCallback : public MatchFinder::MatchCallback { |
| 195 public: |
| 196 explicit NetworkAnnotationTagCallback(Collector* collector) |
| 197 : collector_(collector) {} |
| 198 ~NetworkAnnotationTagCallback() override = default; |
| 199 |
| 200 // Is called on any pattern found by ASTMathers that are defined in RunMathers |
| 201 // function. |
| 202 virtual void run(const MatchFinder::MatchResult& result) override { |
| 203 if (const clang::VarDecl* var_decl = |
| 204 result.Nodes.getNodeAs<clang::VarDecl>("annotation_variable")) { |
| 205 AddVariable(var_decl, result); |
| 206 } else if (const clang::CallExpr* call_expr = |
| 207 result.Nodes.getNodeAs<clang::CallExpr>("user_function")) { |
| 208 AddFunction(call_expr, result); |
| 209 } |
| 210 } |
| 211 |
| 212 // Stores an annotation variable defintion. |
| 213 void AddVariable(const clang::VarDecl* var_decl, |
| 214 const MatchFinder::MatchResult& result) { |
| 215 NetworkAnnotationInstance instance; |
| 216 |
| 217 GetLocation(var_decl, result, &instance.location); |
| 218 instance.location.object_name = var_decl->getQualifiedNameAsString(); |
| 219 instance.variable_reference = clang::dyn_cast<clang::NamedDecl>(var_decl); |
| 220 |
| 221 // Mark it as transitive parameter if it doesn't have initialization but |
| 222 // it's a function parameter. Otherwise, extract it's content. |
| 223 if (!var_decl->hasInit() && var_decl->isLocalVarDeclOrParm() && |
| 224 !var_decl->isLocalVarDecl()) { |
| 225 instance.transitive_parameter = true; |
| 226 } else if (auto* init_expr = var_decl->getInit()) { |
| 227 if (auto* call_expr = clang::dyn_cast<clang::CallExpr>(init_expr)) |
| 228 GetAnnotationText(call_expr, *result.SourceManager, &instance); |
| 229 } |
| 230 // If nothing is set, issue an error. |
| 231 if (!instance.transitive_parameter && |
| 232 instance.annotation.unique_id.empty() && instance.error.empty()) { |
| 233 instance.error = "Could not resolve variable initialization."; |
| 234 } |
| 235 |
| 236 collector_->variable_definitions.push_back(instance); |
| 237 } |
| 238 |
| 239 // Stores a function call that uses annotation variables. |
| 240 void AddFunction(const clang::CallExpr* call_expr, |
| 241 const MatchFinder::MatchResult& result) { |
| 242 NetworkAnnotationInstance instance; |
| 243 |
| 244 GetLocation(call_expr, result, &instance.location); |
| 245 instance.location.function_name = |
| 246 GetCoveringFunction(clang::dyn_cast<clang::Stmt>(call_expr), result); |
| 247 instance.location.object_name = |
| 248 call_expr->getDirectCallee()->getQualifiedNameAsString(); |
| 249 |
| 250 // Get annotation text. |
| 251 const clang::FunctionDecl* function_decl = call_expr->getDirectCallee(); |
| 252 unsigned params_count = function_decl->getNumParams(); |
| 253 unsigned args_count = call_expr->getNumArgs(); |
| 254 |
| 255 for (unsigned i = 0; i < params_count; i++) { |
| 256 if (net_match(clang::QualType::getAsString( |
| 257 function_decl->getParamDecl(i)->getType().split()), |
| 258 "NetworkTrafficAnnotationTag")) { |
| 259 if (i >= args_count) { |
| 260 instance.error = "Function missing annotation argument."; |
| 261 } else { |
| 262 // Get the argument. |
| 263 const clang::Expr* arg = call_expr->getArgs()[i]; |
| 264 |
| 265 // Is it a call to annotate function? |
| 266 if (auto* inner_call_expr = clang::dyn_cast<clang::CallExpr>(arg)) { |
| 267 instance.is_direct_call = true; |
| 268 GetAnnotationText(inner_call_expr, *result.SourceManager, |
| 269 &instance); |
| 270 instance.error = ""; |
| 271 } else { |
| 272 // Then it's a variable. |
| 273 instance.is_direct_call = false; |
| 274 if (auto* pure_arg = |
| 275 clang::dyn_cast<clang::DeclRefExpr>(arg->IgnoreCasts())) { |
| 276 instance.variable_reference = pure_arg->getFoundDecl(); |
| 277 instance.error = ""; |
| 278 } else { |
| 279 instance.error = "Unknwon parameter type."; |
| 280 } |
| 281 } |
| 282 } |
| 283 collector_->calls.push_back(instance); |
| 284 } |
| 285 } |
| 286 } |
| 287 |
| 288 private: |
| 289 Collector* collector_; |
| 290 }; |
| 291 |
| 292 // Sets up ASTMatchers and runs clang tool to populate collector. Returns the |
| 293 // result of running the clang tool. |
| 294 int RunMatchers(clang::tooling::ClangTool* clang_tool, Collector* collector) { |
| 295 NetworkAnnotationTagCallback call_back(collector); |
| 296 MatchFinder match_finder; |
| 297 |
| 298 // Set up a pattern to find variables defined with type |
| 299 // [net::]NetworkTrafficAnnotationTag. |
| 300 match_finder.addMatcher( |
| 301 varDecl(anyOf(hasType(asString("NetworkTrafficAnnotationTag")), |
| 302 hasType(asString("net::NetworkTrafficAnnotationTag")))) |
| 303 .bind("annotation_variable"), |
| 304 &call_back); |
| 305 |
| 306 // Set up a pattern to find functions that have a parameter of type |
| 307 // [net::]NetworkTrafficAnnotationTag. |
| 308 match_finder.addMatcher( |
| 309 callExpr(hasDeclaration(functionDecl(hasAnyParameter(anyOf( |
| 310 hasType(asString("NetworkTrafficAnnotationTag")), |
| 311 hasType(asString("net::NetworkTrafficAnnotationTag"))))))) |
| 312 .bind("user_function"), |
| 313 &call_back); |
| 314 |
| 315 std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory = |
| 316 clang::tooling::newFrontendActionFactory(&match_finder); |
| 317 return clang_tool->run(frontend_factory.get()); |
| 318 } |
| 319 |
| 320 } // namespace |
| 321 |
| 322 int main(int argc, const char* argv[]) { |
| 323 // Find output directory. |
| 324 if (argc < 5) { |
| 325 llvm::errs() << "Temporary files directory is not specified."; |
| 326 return -1; |
| 327 } |
| 328 std::string output_dir(argv[4]); |
| 329 // Keep to consumed parameter from being passed to clang parser. |
| 330 argc = 4; |
| 331 |
| 332 llvm::cl::OptionCategory category("Network Request Audit Extractor Tool"); |
| 333 clang::tooling::CommonOptionsParser options(argc, argv, category); |
| 334 clang::tooling::ClangTool tool(options.getCompilations(), |
| 335 options.getSourcePathList()); |
| 336 Collector collector; |
| 337 |
| 338 int result = RunMatchers(&tool, &collector); |
| 339 |
| 340 if (result != 0) |
| 341 return result; |
| 342 |
| 343 // llvm::outs() << "==== BEGIN EDITS ====\n"; |
| 344 // llvm::outs() << "==== END EDITS ====\n"; |
| 345 |
| 346 // For each call, if the parameter is not generated by a direct call to |
| 347 // "DefineNetworkTrafficAnnotation", find the variable that holds the value. |
| 348 for (auto& c : collector.calls) { |
| 349 if (!c.is_direct_call) { |
| 350 // Find the variable. |
| 351 for (const auto& v : collector.variable_definitions) |
| 352 if (v.variable_reference == c.variable_reference) { |
| 353 c.annotation = v.annotation; |
| 354 c.transitive_parameter = v.transitive_parameter; |
| 355 c.error = c.error + (c.error.length() ? "\n+" : "") + v.error; |
| 356 break; |
| 357 } |
| 358 if (!c.annotation.unique_id.length()) |
| 359 c.error = "Variable not found."; |
| 360 } |
| 361 |
| 362 // If the function just receives the variable and passes it to another |
| 363 // function, ignore it, otherwise write it to file. |
| 364 if (!c.transitive_parameter) { |
| 365 std::string s = c.location.file_path; |
| 366 std::replace(s.begin(), s.end(), '/', '_'); |
| 367 std::replace(s.begin(), s.end(), '.', '_'); |
| 368 std::string file_path = output_dir + "/" + s + "(" + |
| 369 std::to_string(c.location.line_number) + ").txt"; |
| 370 |
| 371 std::ofstream output_file(file_path); |
| 372 if (output_file.is_open()) { |
| 373 output_file << c.location.file_path << "\n"; |
| 374 output_file << c.location.function_name << "\n"; |
| 375 output_file << c.location.line_number << "\n"; |
| 376 output_file << c.location.object_name << "\n"; |
| 377 output_file << c.error << "\n"; |
| 378 output_file << c.annotation.unique_id << "\n"; |
| 379 output_file << c.annotation.text << "\n"; |
| 380 output_file.close(); |
| 381 } else { |
| 382 llvm::errs() << "Could not write to file: " << file_path << " because " |
| 383 << strerror(errno) << "\n"; |
| 384 return 1; |
| 385 } |
| 386 } |
| 387 } |
| 388 |
| 389 return 0; |
| 390 } |
OLD | NEW |