Index: tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp |
diff --git a/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp |
new file mode 100644 |
index 0000000000000000000000000000000000000000..03b3ecdee731e29a111568d9f741c4e7bfb6e5be |
--- /dev/null |
+++ b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp |
@@ -0,0 +1,435 @@ |
+// Copyright 2016 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+// This clang tool finds all instances of net::NetworkTrafficAnnotationTag in |
+// given source code, extracts the location info and content of annotation tags |
+// (unique id and annotation text), and stores them in separate text files |
+// (per instance) in the given output directory. Please refer to README.md for |
+// build and usage instructions. |
+ |
+#include <stdio.h> |
+#include <fstream> |
+#include <memory> |
+ |
+#include "clang/ASTMatchers/ASTMatchFinder.h" |
+#include "clang/ASTMatchers/ASTMatchers.h" |
+#include "clang/Basic/SourceManager.h" |
+#include "clang/Frontend/FrontendActions.h" |
+#include "clang/Lex/Lexer.h" |
+#include "clang/Tooling/CommonOptionsParser.h" |
+#include "clang/Tooling/Refactoring.h" |
+#include "clang/Tooling/Tooling.h" |
+#include "llvm/Support/CommandLine.h" |
+ |
+using namespace clang::ast_matchers; |
+ |
+namespace { |
+ |
+// An instance of network traffic annotation usage. This can be either |
+// a variable defined as NetworkTrafficAnnotationTag or a function that has |
+// a variable of this type as it's input parameter, i.e., it can contain either |
+// of the following two 'foo' items: |
+// NetworkTrafficAnnotationTag foo = ... |
+// void foo(NetworkTrafficAnnotationTag bar) |
+struct NetworkAnnotationInstance { |
+ NetworkAnnotationInstance() : variable_reference(nullptr) { |
+ flag.is_direct_call = false; |
+ } |
+ |
+ // Information about where this annotation or call has happened. |
+ struct Location { |
+ Location() : line_number(-1) {} |
dcheng
2017/03/02 07:57:41
Consider using in-class member initializers. The c
Ramin Halavati
2017/04/06 13:32:29
Done.
|
+ std::string file_path; |
+ int line_number; |
+ |
+ // Name of the function including this instance. E.g., in the following |
+ // code, |function_name| will be 'foo': |
+ // void foo() { NetworkTrafficAnnotationTag bar = ...; } |
+ std::string function_name; |
+ |
+ // Name of the variable that contains annotation or the function called |
+ // with annotation. E.g., in the following two code segments, |object_name| |
+ // will be 'bar': |
+ // void foo() { NetworkTrafficAnnotationTag bar = ...; } |
+ // void foo() { bar(baz);} // baz is of type NetworkTrafficAnnotationTag. |
+ std::string object_name; |
+ }; |
+ |
+ // Annotation content. These are the parameters of a call to |
+ // DefineNetworkTrafficAnnotation. The unique_id is an identifier for the |
+ // annotation that has to be unique across the entire code base. The |text| |
+ // stores a RAW string with the annotation that should be extracted. |
+ struct Annotation { |
+ std::string unique_id; |
+ std::string text; |
+ }; |
+ |
+ Location location; |
+ Annotation annotation; |
+ |
+ // Possible error message (empty if no error). |
+ std::string error; |
+ |
+ // A reference to the variable containing annotation. Null if not available. |
+ const clang::NamedDecl* variable_reference; |
+ |
+ union { |
+ // When this structure is refering to a function with a parameter of type |
dcheng
2017/03/02 07:57:41
Nit: refering => referring
Ramin Halavati
2017/04/06 13:32:29
Done.
|
+ // NetworkTrafficAnnotationTag, |is_direct_call| variable is true if the |
+ // parameter is generated by a direct call to DefineNetworkTrafficAnnotation |
+ // and is false when the paramter is a variable. For example, in the |
+ // following code segment, it is true for function 'foo'' and false for |
+ // function 'baz': |
+ // |
+ // foo(DefineNetworkTrafficAnnotation(...)) |
+ // NetworkTrafficAnnotationTag bar = DefineNetworkTrafficAnnotation(...) |
+ // baz(bar); |
+ bool is_direct_call; |
+ |
+ // When this structure is refering to a variable, |transitive_parameter| is |
+ // false if variable is defined in the same function and is true when it is |
+ // passed to this function. For example, in the following code segment, it |
+ // is true for bar, and false for baz. |
+ // |
+ // void foo(NetworkTrafficAnnotationTag bar) { |
+ // NetworkTrafficAnnotationTag baz = DefineNetworkTrafficAnnotation(...); |
+ // } |
+ bool transitive_parameter; |
+ } flag; |
+}; |
+ |
+// Structure to collect instances of network traffic annotation usages. |
+struct Collector { |
+ std::vector<NetworkAnnotationInstance> variable_definitions; |
+ std::vector<NetworkAnnotationInstance> calls; |
+}; |
+ |
+// Returns the function that includes the given token. For example, if the token |
+// is variable x in the code "void foo() { int x; ... }", it returns "foo". |
+std::string GetDeclarationCoveringFunction(const clang::Decl* token, |
+ clang::ASTContext* context); |
+ |
+// Checks if a token matches a name, with or without net:: namespace. |
+bool StripNetNamespaceMatch(const std::string& token, const std::string& name) { |
+ return token == name || token == (std::string("net::") + name); |
+} |
+ |
+// Returns the source code of a given token, like function name, variable name, |
+// string literal, etc. |
+std::string GetStmtText(const clang::Stmt* token, |
+ const clang::SourceManager& source_manager) { |
+ clang::LangOptions lopt; |
dcheng
2017/03/02 07:57:41
Nit: use the LangOptions from ASTContext.
Ramin Halavati
2017/04/06 13:32:29
Done.
|
+ // Get text range. |
+ clang::SourceLocation start = token->getLocStart(); |
+ clang::SourceLocation end = token->getLocEnd(); |
+ |
+ // If it's a macro, go to definition. |
+ if (start.isMacroID()) |
+ start = source_manager.getSpellingLoc(start); |
+ if (end.isMacroID()) |
+ end = source_manager.getSpellingLoc(end); |
+ |
+ // Get the real end of the token. |
+ end = clang::Lexer::getLocForEndOfToken(end, 0, source_manager, lopt); |
+ |
+ // Extract text. |
+ std::string output(source_manager.getCharacterData(start), |
+ source_manager.getCharacterData(end)); |
+ |
+ // If |token| is a raw string literal, the above code just returns the "R" |
+ // part of it. |
+ if (output != "R") |
+ return output; |
+ |
+ if (auto* literal = clang::dyn_cast<clang::StringLiteral>(token)) |
+ return literal->getString(); |
+ |
+ if (auto* implicit_cast = clang::dyn_cast<clang::ImplicitCastExpr>(token)) { |
+ if (const clang::StringLiteral* implicit_literal = |
+ clang::dyn_cast<clang::StringLiteral>( |
+ implicit_cast->getSubExprAsWritten())) { |
+ return implicit_literal->getString(); |
+ } |
+ } |
+ |
+ return output; |
+} |
+ |
+// Extracts unique id and annotation text of a call to |
+// "DefineNetworkTrafficAnnotation" function. Sets the error text if fails. |
+void GetAnnotationText(const clang::CallExpr* call_expr, |
+ const clang::SourceManager& source_manager, |
+ NetworkAnnotationInstance* instance) { |
+ if (StripNetNamespaceMatch( |
+ GetStmtText(call_expr->getCallee(), source_manager), |
+ "DefineNetworkTrafficAnnotation") && |
+ call_expr->getNumArgs() == 2) { |
+ instance->annotation.unique_id = |
+ GetStmtText(call_expr->getArgs()[0], source_manager); |
dcheng
2017/03/02 07:57:41
It'd be ideal to take advantage of matcher binding
Ramin Halavati
2017/04/06 13:32:29
Done.
|
+ instance->annotation.text = |
+ GetStmtText(call_expr->getArgs()[1], source_manager); |
+ instance->error = ""; |
+ } else { |
+ instance->annotation.unique_id = ""; |
+ instance->annotation.text = ""; |
+ instance->error = "Unexpected function."; |
+ } |
+} |
+ |
+// Returns the function that includes the given token. For example, if the token |
+// is the call to function bar() in the code "void foo() { bar(); }", it returns |
+// "foo". |
+std::string GetStatementCoveringFunction(const clang::Stmt* token, |
+ clang::ASTContext* context) { |
+ // Get the parent of |token| and return its covering function. |
+ clang::ASTContext::DynTypedNodeList parents = context->getParents(*token); |
+ |
+ // TODO: What exactly != 1 parent mean? I've not encountered any case that |
+ // this value would be non-one. |
+ if (parents.size() != 1) { |
+ if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) |
+ return GetStatementCoveringFunction(s, context); |
+ else if (const clang::Decl* d = parents[0].get<clang::Decl>()) |
+ return GetDeclarationCoveringFunction(d, context); |
+ } |
+ return "Unknown"; |
+} |
+ |
+// Returns the function that includes the given token. For example, if the token |
+// is variable x in the code "void foo() { int x; ... }", it returns "foo". |
+std::string GetDeclarationCoveringFunction(const clang::Decl* token, |
+ clang::ASTContext* context) { |
+ // If |token| is a function declaration, return its name. |
+ if (auto f = clang::dyn_cast<clang::FunctionDecl>(token)) |
+ return f->getQualifiedNameAsString(); |
+ |
+ // As |token| is not a function declaration, get its parent and return its |
+ // covering function. |
+ clang::ASTContext::DynTypedNodeList parents = context->getParents(*token); |
+ |
+ // TODO: What exactly != 1 parent mean? I've not encountered any case that |
+ // this value would be non-one. |
+ if (parents.size() == 1) { |
+ if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) |
+ return GetStatementCoveringFunction(s, context); |
+ else if (const clang::Decl* d = parents[0].get<clang::Decl>()) |
+ return GetDeclarationCoveringFunction(d, context); |
+ } |
+ return "Unknown"; |
+} |
+ |
+// Finds file name and line number of the given token and writes it into |
+// |location|. |
+template <class T> |
+void GetLocation(const T* token, |
+ const clang::SourceManager& source_manager, |
+ NetworkAnnotationInstance::Location* location) { |
+ clang::SourceLocation source_location = token->getLocStart(); |
+ location->file_path = source_manager.getFilename(source_location); |
+ location->line_number = source_manager.getSpellingLineNumber(source_location); |
+} |
+ |
+// This class implements the call back functions for AST Matchers. The matchers |
+// are defined in RunMatchers function and when a pattern is found there, |
+// the run function in this class is called back with information on the match |
+// location and description of the match pattern. |
+class NetworkAnnotationTagCallback : public MatchFinder::MatchCallback { |
+ public: |
+ explicit NetworkAnnotationTagCallback(Collector* collector) |
+ : collector_(collector) {} |
+ ~NetworkAnnotationTagCallback() override = default; |
+ |
+ // Is called on any pattern found by ASTMathers that are defined in RunMathers |
+ // function. |
+ virtual void run(const MatchFinder::MatchResult& result) override { |
+ if (const clang::VarDecl* var_decl = |
+ result.Nodes.getNodeAs<clang::VarDecl>("annotation_variable")) { |
+ AddVariable(var_decl, result); |
+ } else if (const clang::CallExpr* call_expr = |
+ result.Nodes.getNodeAs<clang::CallExpr>("user_function")) { |
+ AddFunction(call_expr, result); |
+ } |
+ } |
+ |
+ // Stores an annotation variable defintion. |
+ void AddVariable(const clang::VarDecl* var_decl, |
+ const MatchFinder::MatchResult& result) { |
+ NetworkAnnotationInstance instance; |
+ |
+ GetLocation(var_decl, *result.SourceManager, &instance.location); |
+ instance.location.object_name = var_decl->getQualifiedNameAsString(); |
+ instance.variable_reference = clang::dyn_cast<clang::NamedDecl>(var_decl); |
+ |
+ // Mark the instance as transitive parameter if it doesn't have |
+ // initialization in the function where it is defined and it is passed as a |
+ // parameter to the function. Otherwise, extract its content. |
+ if (!var_decl->hasInit() && var_decl->isLocalVarDeclOrParm() && |
+ !var_decl->isLocalVarDecl()) { |
+ instance.flag.transitive_parameter = true; |
+ } else if (auto* init_expr = var_decl->getInit()) { |
+ if (auto* call_expr = clang::dyn_cast<clang::CallExpr>(init_expr)) |
+ GetAnnotationText(call_expr, *result.SourceManager, &instance); |
+ } |
+ // If nothing is set, issue an error. |
+ if (!instance.flag.transitive_parameter && |
+ instance.annotation.unique_id.empty() && instance.error.empty()) { |
+ instance.error = "Could not resolve variable initialization."; |
+ } |
+ |
+ collector_->variable_definitions.push_back(instance); |
+ } |
+ |
+ // Stores a function call that uses annotation variables. |
+ void AddFunction(const clang::CallExpr* call_expr, |
+ const MatchFinder::MatchResult& result) { |
+ NetworkAnnotationInstance instance; |
+ |
+ GetLocation(call_expr, *result.SourceManager, &instance.location); |
+ instance.location.function_name = GetStatementCoveringFunction( |
+ clang::dyn_cast<clang::Stmt>(call_expr), result.Context); |
+ instance.location.object_name = |
+ call_expr->getDirectCallee()->getQualifiedNameAsString(); |
+ |
+ // Get annotation text. |
+ const clang::FunctionDecl* function_decl = call_expr->getDirectCallee(); |
+ unsigned params_count = function_decl->getNumParams(); |
+ unsigned args_count = call_expr->getNumArgs(); |
+ |
+ for (unsigned i = 0; i < params_count; i++) { |
+ if (StripNetNamespaceMatch( |
+ clang::QualType::getAsString( |
+ function_decl->getParamDecl(i)->getType().split()), |
+ "NetworkTrafficAnnotationTag")) { |
+ if (i >= args_count) { |
+ instance.error = "Function missing annotation argument."; |
+ } else { |
+ // Get the argument. |
+ const clang::Expr* arg = call_expr->getArgs()[i]; |
+ |
+ // Is it a call to annotate function? |
+ if (auto* inner_call_expr = clang::dyn_cast<clang::CallExpr>(arg)) { |
+ instance.flag.is_direct_call = true; |
+ GetAnnotationText(inner_call_expr, *result.SourceManager, |
+ &instance); |
+ instance.error = ""; |
+ } else { |
+ // Then it's a variable. |
+ instance.flag.is_direct_call = false; |
+ if (auto* pure_arg = |
+ clang::dyn_cast<clang::DeclRefExpr>(arg->IgnoreCasts())) { |
+ instance.variable_reference = pure_arg->getFoundDecl(); |
+ instance.error = ""; |
+ } else { |
+ instance.error = "Unknwon parameter type."; |
dcheng
2017/03/02 07:57:41
Nit: unknown
Ramin Halavati
2017/04/06 13:32:29
Done.
|
+ } |
+ } |
+ } |
+ collector_->calls.push_back(instance); |
+ } |
+ } |
+ } |
+ |
+ private: |
+ Collector* collector_; |
+}; |
+ |
+// Sets up ASTMatchers and runs clang tool to populate collector. Returns the |
+// result of running the clang tool. |
+int RunMatchers(clang::tooling::ClangTool* clang_tool, Collector* collector) { |
+ NetworkAnnotationTagCallback call_back(collector); |
+ MatchFinder match_finder; |
+ |
+ // Set up a pattern to find variables defined with type |
+ // [net::]NetworkTrafficAnnotationTag. |
+ match_finder.addMatcher( |
+ varDecl(anyOf(hasType(asString("NetworkTrafficAnnotationTag")), |
dcheng
2017/03/02 07:57:41
Can you help me understand why we need both? Shoul
Ramin Halavati
2017/04/06 13:32:29
If the code has the line "using namespace net;", t
|
+ hasType(asString("net::NetworkTrafficAnnotationTag")))) |
+ .bind("annotation_variable"), |
+ &call_back); |
+ |
+ // Set up a pattern to find functions that have a parameter of type |
+ // [net::]NetworkTrafficAnnotationTag. |
+ match_finder.addMatcher( |
+ callExpr(hasDeclaration(functionDecl(hasAnyParameter(anyOf( |
+ hasType(asString("NetworkTrafficAnnotationTag")), |
+ hasType(asString("net::NetworkTrafficAnnotationTag"))))))) |
+ .bind("user_function"), |
+ &call_back); |
+ |
+ std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory = |
+ clang::tooling::newFrontendActionFactory(&match_finder); |
+ return clang_tool->run(frontend_factory.get()); |
+} |
+ |
+} // namespace |
+ |
+int main(int argc, const char* argv[]) { |
+ // Find output directory. |
+ if (argc < 5) { |
+ llvm::errs() << "Output files directory is not specified."; |
+ return -1; |
+ } |
+ std::string output_dir(argv[4]); |
+ |
+ // Keep to consumed parameter from being passed to clang parser. |
dcheng
2017/03/02 07:57:41
Maybe just make this a proper flag?
Ramin Halavati
2017/04/06 13:32:29
Done.
|
+ argc = 4; |
+ |
+ llvm::cl::OptionCategory category("Network Request Audit Extractor Tool"); |
+ clang::tooling::CommonOptionsParser options(argc, argv, category); |
+ clang::tooling::ClangTool tool(options.getCompilations(), |
+ options.getSourcePathList()); |
+ Collector collector; |
+ |
+ int result = RunMatchers(&tool, &collector); |
+ |
+ if (result != 0) |
+ return result; |
+ |
+ // For each call, if the parameter is not generated by a direct call to |
+ // "DefineNetworkTrafficAnnotation", find the variable that holds the value. |
+ for (NetworkAnnotationInstance& call : collector.calls) { |
+ if (!call.flag.is_direct_call) { |
+ // Find the variable. |
+ for (NetworkAnnotationInstance& var : collector.variable_definitions) |
+ if (var.variable_reference == call.variable_reference) { |
+ call.annotation = var.annotation; |
+ call.flag.transitive_parameter = var.flag.transitive_parameter; |
+ call.error = |
+ call.error + (call.error.length() ? "\n+" : "") + var.error; |
+ break; |
+ } |
+ if (!call.annotation.unique_id.length()) |
+ call.error = "Variable not found."; |
+ } |
+ |
+ // If the function just receives the variable and passes it to another |
+ // function, ignore it, otherwise write it to file. |
+ if (!call.flag.transitive_parameter) { |
+ std::string s = call.location.file_path; |
+ std::replace(s.begin(), s.end(), '/', '_'); |
+ std::replace(s.begin(), s.end(), '.', '_'); |
+ std::string file_path = output_dir + "/" + s + "(" + |
+ std::to_string(call.location.line_number) + |
+ ").txt"; |
+ |
+ std::ofstream output_file(file_path); |
dcheng
2017/03/02 07:57:41
How are reads/writes to this file synchronized? If
Ramin Halavati
2017/04/06 13:32:29
Comment updated in refactored source:
"For each ca
|
+ if (output_file.is_open()) { |
+ output_file << call.location.file_path << "\n"; |
+ output_file << call.location.function_name << "\n"; |
+ output_file << call.location.line_number << "\n"; |
+ output_file << call.location.object_name << "\n"; |
+ output_file << call.error << "\n"; |
+ output_file << call.annotation.unique_id << "\n"; |
+ output_file << call.annotation.text << "\n"; |
+ output_file.close(); |
+ } else { |
+ llvm::errs() << "Could not write to file: " << file_path << " because " |
+ << strerror(errno) << "\n"; |
+ return 1; |
+ } |
+ } |
+ } |
+ |
+ return 0; |
+} |