Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // This clang tool finds all instances of net::NetworkTrafficAnnotationTag in | |
| 6 // given source code, extracts the location info and content of annotation tags | |
| 7 // (unique id and annotation text), and stores them in separate text files | |
| 8 // (per instance) in the given output directory. Please refer to README.md for | |
| 9 // build and usage instructions. | |
| 10 | |
| 11 #include <stdio.h> | |
| 12 #include <fstream> | |
| 13 #include <memory> | |
| 14 | |
| 15 #include "clang/ASTMatchers/ASTMatchFinder.h" | |
| 16 #include "clang/ASTMatchers/ASTMatchers.h" | |
| 17 #include "clang/Basic/SourceManager.h" | |
| 18 #include "clang/Frontend/FrontendActions.h" | |
| 19 #include "clang/Lex/Lexer.h" | |
| 20 #include "clang/Tooling/CommonOptionsParser.h" | |
| 21 #include "clang/Tooling/Refactoring.h" | |
| 22 #include "clang/Tooling/Tooling.h" | |
| 23 #include "llvm/Support/CommandLine.h" | |
| 24 | |
| 25 using namespace clang::ast_matchers; | |
| 26 | |
| 27 namespace { | |
| 28 | |
| 29 // An instance of network traffic annotation usage. This can be either | |
| 30 // a variable defined as NetworkTrafficAnnotationTag or a function that has | |
| 31 // a variable of this type as it's input parameter, i.e., it can contain either | |
| 32 // of the following two 'foo' items: | |
| 33 // NetworkTrafficAnnotationTag foo = ... | |
| 34 // void foo(NetworkTrafficAnnotationTag bar) | |
| 35 struct NetworkAnnotationInstance { | |
| 36 NetworkAnnotationInstance() : variable_reference(nullptr) { | |
| 37 flag.is_direct_call = false; | |
| 38 } | |
| 39 | |
| 40 // Information about where this annotation or call has happened. | |
| 41 struct Location { | |
| 42 Location() : line_number(-1) {} | |
|
dcheng
2017/03/02 07:57:41
Consider using in-class member initializers. The c
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
| 43 std::string file_path; | |
| 44 int line_number; | |
| 45 | |
| 46 // Name of the function including this instance. E.g., in the following | |
| 47 // code, |function_name| will be 'foo': | |
| 48 // void foo() { NetworkTrafficAnnotationTag bar = ...; } | |
| 49 std::string function_name; | |
| 50 | |
| 51 // Name of the variable that contains annotation or the function called | |
| 52 // with annotation. E.g., in the following two code segments, |object_name| | |
| 53 // will be 'bar': | |
| 54 // void foo() { NetworkTrafficAnnotationTag bar = ...; } | |
| 55 // void foo() { bar(baz);} // baz is of type NetworkTrafficAnnotationTag. | |
| 56 std::string object_name; | |
| 57 }; | |
| 58 | |
| 59 // Annotation content. These are the parameters of a call to | |
| 60 // DefineNetworkTrafficAnnotation. The unique_id is an identifier for the | |
| 61 // annotation that has to be unique across the entire code base. The |text| | |
| 62 // stores a RAW string with the annotation that should be extracted. | |
| 63 struct Annotation { | |
| 64 std::string unique_id; | |
| 65 std::string text; | |
| 66 }; | |
| 67 | |
| 68 Location location; | |
| 69 Annotation annotation; | |
| 70 | |
| 71 // Possible error message (empty if no error). | |
| 72 std::string error; | |
| 73 | |
| 74 // A reference to the variable containing annotation. Null if not available. | |
| 75 const clang::NamedDecl* variable_reference; | |
| 76 | |
| 77 union { | |
| 78 // When this structure is refering to a function with a parameter of type | |
|
dcheng
2017/03/02 07:57:41
Nit: refering => referring
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
| 79 // NetworkTrafficAnnotationTag, |is_direct_call| variable is true if the | |
| 80 // parameter is generated by a direct call to DefineNetworkTrafficAnnotation | |
| 81 // and is false when the paramter is a variable. For example, in the | |
| 82 // following code segment, it is true for function 'foo'' and false for | |
| 83 // function 'baz': | |
| 84 // | |
| 85 // foo(DefineNetworkTrafficAnnotation(...)) | |
| 86 // NetworkTrafficAnnotationTag bar = DefineNetworkTrafficAnnotation(...) | |
| 87 // baz(bar); | |
| 88 bool is_direct_call; | |
| 89 | |
| 90 // When this structure is refering to a variable, |transitive_parameter| is | |
| 91 // false if variable is defined in the same function and is true when it is | |
| 92 // passed to this function. For example, in the following code segment, it | |
| 93 // is true for bar, and false for baz. | |
| 94 // | |
| 95 // void foo(NetworkTrafficAnnotationTag bar) { | |
| 96 // NetworkTrafficAnnotationTag baz = DefineNetworkTrafficAnnotation(...); | |
| 97 // } | |
| 98 bool transitive_parameter; | |
| 99 } flag; | |
| 100 }; | |
| 101 | |
| 102 // Structure to collect instances of network traffic annotation usages. | |
| 103 struct Collector { | |
| 104 std::vector<NetworkAnnotationInstance> variable_definitions; | |
| 105 std::vector<NetworkAnnotationInstance> calls; | |
| 106 }; | |
| 107 | |
| 108 // Returns the function that includes the given token. For example, if the token | |
| 109 // is variable x in the code "void foo() { int x; ... }", it returns "foo". | |
| 110 std::string GetDeclarationCoveringFunction(const clang::Decl* token, | |
| 111 clang::ASTContext* context); | |
| 112 | |
| 113 // Checks if a token matches a name, with or without net:: namespace. | |
| 114 bool StripNetNamespaceMatch(const std::string& token, const std::string& name) { | |
| 115 return token == name || token == (std::string("net::") + name); | |
| 116 } | |
| 117 | |
| 118 // Returns the source code of a given token, like function name, variable name, | |
| 119 // string literal, etc. | |
| 120 std::string GetStmtText(const clang::Stmt* token, | |
| 121 const clang::SourceManager& source_manager) { | |
| 122 clang::LangOptions lopt; | |
|
dcheng
2017/03/02 07:57:41
Nit: use the LangOptions from ASTContext.
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
| 123 // Get text range. | |
| 124 clang::SourceLocation start = token->getLocStart(); | |
| 125 clang::SourceLocation end = token->getLocEnd(); | |
| 126 | |
| 127 // If it's a macro, go to definition. | |
| 128 if (start.isMacroID()) | |
| 129 start = source_manager.getSpellingLoc(start); | |
| 130 if (end.isMacroID()) | |
| 131 end = source_manager.getSpellingLoc(end); | |
| 132 | |
| 133 // Get the real end of the token. | |
| 134 end = clang::Lexer::getLocForEndOfToken(end, 0, source_manager, lopt); | |
| 135 | |
| 136 // Extract text. | |
| 137 std::string output(source_manager.getCharacterData(start), | |
| 138 source_manager.getCharacterData(end)); | |
| 139 | |
| 140 // If |token| is a raw string literal, the above code just returns the "R" | |
| 141 // part of it. | |
| 142 if (output != "R") | |
| 143 return output; | |
| 144 | |
| 145 if (auto* literal = clang::dyn_cast<clang::StringLiteral>(token)) | |
| 146 return literal->getString(); | |
| 147 | |
| 148 if (auto* implicit_cast = clang::dyn_cast<clang::ImplicitCastExpr>(token)) { | |
| 149 if (const clang::StringLiteral* implicit_literal = | |
| 150 clang::dyn_cast<clang::StringLiteral>( | |
| 151 implicit_cast->getSubExprAsWritten())) { | |
| 152 return implicit_literal->getString(); | |
| 153 } | |
| 154 } | |
| 155 | |
| 156 return output; | |
| 157 } | |
| 158 | |
| 159 // Extracts unique id and annotation text of a call to | |
| 160 // "DefineNetworkTrafficAnnotation" function. Sets the error text if fails. | |
| 161 void GetAnnotationText(const clang::CallExpr* call_expr, | |
| 162 const clang::SourceManager& source_manager, | |
| 163 NetworkAnnotationInstance* instance) { | |
| 164 if (StripNetNamespaceMatch( | |
| 165 GetStmtText(call_expr->getCallee(), source_manager), | |
| 166 "DefineNetworkTrafficAnnotation") && | |
| 167 call_expr->getNumArgs() == 2) { | |
| 168 instance->annotation.unique_id = | |
| 169 GetStmtText(call_expr->getArgs()[0], source_manager); | |
|
dcheng
2017/03/02 07:57:41
It'd be ideal to take advantage of matcher binding
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
| 170 instance->annotation.text = | |
| 171 GetStmtText(call_expr->getArgs()[1], source_manager); | |
| 172 instance->error = ""; | |
| 173 } else { | |
| 174 instance->annotation.unique_id = ""; | |
| 175 instance->annotation.text = ""; | |
| 176 instance->error = "Unexpected function."; | |
| 177 } | |
| 178 } | |
| 179 | |
| 180 // Returns the function that includes the given token. For example, if the token | |
| 181 // is the call to function bar() in the code "void foo() { bar(); }", it returns | |
| 182 // "foo". | |
| 183 std::string GetStatementCoveringFunction(const clang::Stmt* token, | |
| 184 clang::ASTContext* context) { | |
| 185 // Get the parent of |token| and return its covering function. | |
| 186 clang::ASTContext::DynTypedNodeList parents = context->getParents(*token); | |
| 187 | |
| 188 // TODO: What exactly != 1 parent mean? I've not encountered any case that | |
| 189 // this value would be non-one. | |
| 190 if (parents.size() != 1) { | |
| 191 if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) | |
| 192 return GetStatementCoveringFunction(s, context); | |
| 193 else if (const clang::Decl* d = parents[0].get<clang::Decl>()) | |
| 194 return GetDeclarationCoveringFunction(d, context); | |
| 195 } | |
| 196 return "Unknown"; | |
| 197 } | |
| 198 | |
| 199 // Returns the function that includes the given token. For example, if the token | |
| 200 // is variable x in the code "void foo() { int x; ... }", it returns "foo". | |
| 201 std::string GetDeclarationCoveringFunction(const clang::Decl* token, | |
| 202 clang::ASTContext* context) { | |
| 203 // If |token| is a function declaration, return its name. | |
| 204 if (auto f = clang::dyn_cast<clang::FunctionDecl>(token)) | |
| 205 return f->getQualifiedNameAsString(); | |
| 206 | |
| 207 // As |token| is not a function declaration, get its parent and return its | |
| 208 // covering function. | |
| 209 clang::ASTContext::DynTypedNodeList parents = context->getParents(*token); | |
| 210 | |
| 211 // TODO: What exactly != 1 parent mean? I've not encountered any case that | |
| 212 // this value would be non-one. | |
| 213 if (parents.size() == 1) { | |
| 214 if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) | |
| 215 return GetStatementCoveringFunction(s, context); | |
| 216 else if (const clang::Decl* d = parents[0].get<clang::Decl>()) | |
| 217 return GetDeclarationCoveringFunction(d, context); | |
| 218 } | |
| 219 return "Unknown"; | |
| 220 } | |
| 221 | |
| 222 // Finds file name and line number of the given token and writes it into | |
| 223 // |location|. | |
| 224 template <class T> | |
| 225 void GetLocation(const T* token, | |
| 226 const clang::SourceManager& source_manager, | |
| 227 NetworkAnnotationInstance::Location* location) { | |
| 228 clang::SourceLocation source_location = token->getLocStart(); | |
| 229 location->file_path = source_manager.getFilename(source_location); | |
| 230 location->line_number = source_manager.getSpellingLineNumber(source_location); | |
| 231 } | |
| 232 | |
| 233 // This class implements the call back functions for AST Matchers. The matchers | |
| 234 // are defined in RunMatchers function and when a pattern is found there, | |
| 235 // the run function in this class is called back with information on the match | |
| 236 // location and description of the match pattern. | |
| 237 class NetworkAnnotationTagCallback : public MatchFinder::MatchCallback { | |
| 238 public: | |
| 239 explicit NetworkAnnotationTagCallback(Collector* collector) | |
| 240 : collector_(collector) {} | |
| 241 ~NetworkAnnotationTagCallback() override = default; | |
| 242 | |
| 243 // Is called on any pattern found by ASTMathers that are defined in RunMathers | |
| 244 // function. | |
| 245 virtual void run(const MatchFinder::MatchResult& result) override { | |
| 246 if (const clang::VarDecl* var_decl = | |
| 247 result.Nodes.getNodeAs<clang::VarDecl>("annotation_variable")) { | |
| 248 AddVariable(var_decl, result); | |
| 249 } else if (const clang::CallExpr* call_expr = | |
| 250 result.Nodes.getNodeAs<clang::CallExpr>("user_function")) { | |
| 251 AddFunction(call_expr, result); | |
| 252 } | |
| 253 } | |
| 254 | |
| 255 // Stores an annotation variable defintion. | |
| 256 void AddVariable(const clang::VarDecl* var_decl, | |
| 257 const MatchFinder::MatchResult& result) { | |
| 258 NetworkAnnotationInstance instance; | |
| 259 | |
| 260 GetLocation(var_decl, *result.SourceManager, &instance.location); | |
| 261 instance.location.object_name = var_decl->getQualifiedNameAsString(); | |
| 262 instance.variable_reference = clang::dyn_cast<clang::NamedDecl>(var_decl); | |
| 263 | |
| 264 // Mark the instance as transitive parameter if it doesn't have | |
| 265 // initialization in the function where it is defined and it is passed as a | |
| 266 // parameter to the function. Otherwise, extract its content. | |
| 267 if (!var_decl->hasInit() && var_decl->isLocalVarDeclOrParm() && | |
| 268 !var_decl->isLocalVarDecl()) { | |
| 269 instance.flag.transitive_parameter = true; | |
| 270 } else if (auto* init_expr = var_decl->getInit()) { | |
| 271 if (auto* call_expr = clang::dyn_cast<clang::CallExpr>(init_expr)) | |
| 272 GetAnnotationText(call_expr, *result.SourceManager, &instance); | |
| 273 } | |
| 274 // If nothing is set, issue an error. | |
| 275 if (!instance.flag.transitive_parameter && | |
| 276 instance.annotation.unique_id.empty() && instance.error.empty()) { | |
| 277 instance.error = "Could not resolve variable initialization."; | |
| 278 } | |
| 279 | |
| 280 collector_->variable_definitions.push_back(instance); | |
| 281 } | |
| 282 | |
| 283 // Stores a function call that uses annotation variables. | |
| 284 void AddFunction(const clang::CallExpr* call_expr, | |
| 285 const MatchFinder::MatchResult& result) { | |
| 286 NetworkAnnotationInstance instance; | |
| 287 | |
| 288 GetLocation(call_expr, *result.SourceManager, &instance.location); | |
| 289 instance.location.function_name = GetStatementCoveringFunction( | |
| 290 clang::dyn_cast<clang::Stmt>(call_expr), result.Context); | |
| 291 instance.location.object_name = | |
| 292 call_expr->getDirectCallee()->getQualifiedNameAsString(); | |
| 293 | |
| 294 // Get annotation text. | |
| 295 const clang::FunctionDecl* function_decl = call_expr->getDirectCallee(); | |
| 296 unsigned params_count = function_decl->getNumParams(); | |
| 297 unsigned args_count = call_expr->getNumArgs(); | |
| 298 | |
| 299 for (unsigned i = 0; i < params_count; i++) { | |
| 300 if (StripNetNamespaceMatch( | |
| 301 clang::QualType::getAsString( | |
| 302 function_decl->getParamDecl(i)->getType().split()), | |
| 303 "NetworkTrafficAnnotationTag")) { | |
| 304 if (i >= args_count) { | |
| 305 instance.error = "Function missing annotation argument."; | |
| 306 } else { | |
| 307 // Get the argument. | |
| 308 const clang::Expr* arg = call_expr->getArgs()[i]; | |
| 309 | |
| 310 // Is it a call to annotate function? | |
| 311 if (auto* inner_call_expr = clang::dyn_cast<clang::CallExpr>(arg)) { | |
| 312 instance.flag.is_direct_call = true; | |
| 313 GetAnnotationText(inner_call_expr, *result.SourceManager, | |
| 314 &instance); | |
| 315 instance.error = ""; | |
| 316 } else { | |
| 317 // Then it's a variable. | |
| 318 instance.flag.is_direct_call = false; | |
| 319 if (auto* pure_arg = | |
| 320 clang::dyn_cast<clang::DeclRefExpr>(arg->IgnoreCasts())) { | |
| 321 instance.variable_reference = pure_arg->getFoundDecl(); | |
| 322 instance.error = ""; | |
| 323 } else { | |
| 324 instance.error = "Unknwon parameter type."; | |
|
dcheng
2017/03/02 07:57:41
Nit: unknown
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
| 325 } | |
| 326 } | |
| 327 } | |
| 328 collector_->calls.push_back(instance); | |
| 329 } | |
| 330 } | |
| 331 } | |
| 332 | |
| 333 private: | |
| 334 Collector* collector_; | |
| 335 }; | |
| 336 | |
| 337 // Sets up ASTMatchers and runs clang tool to populate collector. Returns the | |
| 338 // result of running the clang tool. | |
| 339 int RunMatchers(clang::tooling::ClangTool* clang_tool, Collector* collector) { | |
| 340 NetworkAnnotationTagCallback call_back(collector); | |
| 341 MatchFinder match_finder; | |
| 342 | |
| 343 // Set up a pattern to find variables defined with type | |
| 344 // [net::]NetworkTrafficAnnotationTag. | |
| 345 match_finder.addMatcher( | |
| 346 varDecl(anyOf(hasType(asString("NetworkTrafficAnnotationTag")), | |
|
dcheng
2017/03/02 07:57:41
Can you help me understand why we need both? Shoul
Ramin Halavati
2017/04/06 13:32:29
If the code has the line "using namespace net;", t
| |
| 347 hasType(asString("net::NetworkTrafficAnnotationTag")))) | |
| 348 .bind("annotation_variable"), | |
| 349 &call_back); | |
| 350 | |
| 351 // Set up a pattern to find functions that have a parameter of type | |
| 352 // [net::]NetworkTrafficAnnotationTag. | |
| 353 match_finder.addMatcher( | |
| 354 callExpr(hasDeclaration(functionDecl(hasAnyParameter(anyOf( | |
| 355 hasType(asString("NetworkTrafficAnnotationTag")), | |
| 356 hasType(asString("net::NetworkTrafficAnnotationTag"))))))) | |
| 357 .bind("user_function"), | |
| 358 &call_back); | |
| 359 | |
| 360 std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory = | |
| 361 clang::tooling::newFrontendActionFactory(&match_finder); | |
| 362 return clang_tool->run(frontend_factory.get()); | |
| 363 } | |
| 364 | |
| 365 } // namespace | |
| 366 | |
| 367 int main(int argc, const char* argv[]) { | |
| 368 // Find output directory. | |
| 369 if (argc < 5) { | |
| 370 llvm::errs() << "Output files directory is not specified."; | |
| 371 return -1; | |
| 372 } | |
| 373 std::string output_dir(argv[4]); | |
| 374 | |
| 375 // Keep to consumed parameter from being passed to clang parser. | |
|
dcheng
2017/03/02 07:57:41
Maybe just make this a proper flag?
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
| 376 argc = 4; | |
| 377 | |
| 378 llvm::cl::OptionCategory category("Network Request Audit Extractor Tool"); | |
| 379 clang::tooling::CommonOptionsParser options(argc, argv, category); | |
| 380 clang::tooling::ClangTool tool(options.getCompilations(), | |
| 381 options.getSourcePathList()); | |
| 382 Collector collector; | |
| 383 | |
| 384 int result = RunMatchers(&tool, &collector); | |
| 385 | |
| 386 if (result != 0) | |
| 387 return result; | |
| 388 | |
| 389 // For each call, if the parameter is not generated by a direct call to | |
| 390 // "DefineNetworkTrafficAnnotation", find the variable that holds the value. | |
| 391 for (NetworkAnnotationInstance& call : collector.calls) { | |
| 392 if (!call.flag.is_direct_call) { | |
| 393 // Find the variable. | |
| 394 for (NetworkAnnotationInstance& var : collector.variable_definitions) | |
| 395 if (var.variable_reference == call.variable_reference) { | |
| 396 call.annotation = var.annotation; | |
| 397 call.flag.transitive_parameter = var.flag.transitive_parameter; | |
| 398 call.error = | |
| 399 call.error + (call.error.length() ? "\n+" : "") + var.error; | |
| 400 break; | |
| 401 } | |
| 402 if (!call.annotation.unique_id.length()) | |
| 403 call.error = "Variable not found."; | |
| 404 } | |
| 405 | |
| 406 // If the function just receives the variable and passes it to another | |
| 407 // function, ignore it, otherwise write it to file. | |
| 408 if (!call.flag.transitive_parameter) { | |
| 409 std::string s = call.location.file_path; | |
| 410 std::replace(s.begin(), s.end(), '/', '_'); | |
| 411 std::replace(s.begin(), s.end(), '.', '_'); | |
| 412 std::string file_path = output_dir + "/" + s + "(" + | |
| 413 std::to_string(call.location.line_number) + | |
| 414 ").txt"; | |
| 415 | |
| 416 std::ofstream output_file(file_path); | |
|
dcheng
2017/03/02 07:57:41
How are reads/writes to this file synchronized? If
Ramin Halavati
2017/04/06 13:32:29
Comment updated in refactored source:
"For each ca
| |
| 417 if (output_file.is_open()) { | |
| 418 output_file << call.location.file_path << "\n"; | |
| 419 output_file << call.location.function_name << "\n"; | |
| 420 output_file << call.location.line_number << "\n"; | |
| 421 output_file << call.location.object_name << "\n"; | |
| 422 output_file << call.error << "\n"; | |
| 423 output_file << call.annotation.unique_id << "\n"; | |
| 424 output_file << call.annotation.text << "\n"; | |
| 425 output_file.close(); | |
| 426 } else { | |
| 427 llvm::errs() << "Could not write to file: " << file_path << " because " | |
| 428 << strerror(errno) << "\n"; | |
| 429 return 1; | |
| 430 } | |
| 431 } | |
| 432 } | |
| 433 | |
| 434 return 0; | |
| 435 } | |
| OLD | NEW |