Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Unified Diff: tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp

Issue 2448133006: Tool added to extract network traffic annotations. (Closed)
Patch Set: Comments addressed. Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/clang/traffic_annotation_extractor/README.md ('k') | tools/traffic_annotation/auditor/BUILD.gn » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp
diff --git a/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..03b3ecdee731e29a111568d9f741c4e7bfb6e5be
--- /dev/null
+++ b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp
@@ -0,0 +1,435 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This clang tool finds all instances of net::NetworkTrafficAnnotationTag in
+// given source code, extracts the location info and content of annotation tags
+// (unique id and annotation text), and stores them in separate text files
+// (per instance) in the given output directory. Please refer to README.md for
+// build and usage instructions.
+
+#include <stdio.h>
+#include <fstream>
+#include <memory>
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Refactoring.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace clang::ast_matchers;
+
+namespace {
+
+// An instance of network traffic annotation usage. This can be either
+// a variable defined as NetworkTrafficAnnotationTag or a function that has
+// a variable of this type as it's input parameter, i.e., it can contain either
+// of the following two 'foo' items:
+// NetworkTrafficAnnotationTag foo = ...
+// void foo(NetworkTrafficAnnotationTag bar)
+struct NetworkAnnotationInstance {
+ NetworkAnnotationInstance() : variable_reference(nullptr) {
+ flag.is_direct_call = false;
+ }
+
+ // Information about where this annotation or call has happened.
+ struct Location {
+ Location() : line_number(-1) {}
dcheng 2017/03/02 07:57:41 Consider using in-class member initializers. The c
Ramin Halavati 2017/04/06 13:32:29 Done.
+ std::string file_path;
+ int line_number;
+
+ // Name of the function including this instance. E.g., in the following
+ // code, |function_name| will be 'foo':
+ // void foo() { NetworkTrafficAnnotationTag bar = ...; }
+ std::string function_name;
+
+ // Name of the variable that contains annotation or the function called
+ // with annotation. E.g., in the following two code segments, |object_name|
+ // will be 'bar':
+ // void foo() { NetworkTrafficAnnotationTag bar = ...; }
+ // void foo() { bar(baz);} // baz is of type NetworkTrafficAnnotationTag.
+ std::string object_name;
+ };
+
+ // Annotation content. These are the parameters of a call to
+ // DefineNetworkTrafficAnnotation. The unique_id is an identifier for the
+ // annotation that has to be unique across the entire code base. The |text|
+ // stores a RAW string with the annotation that should be extracted.
+ struct Annotation {
+ std::string unique_id;
+ std::string text;
+ };
+
+ Location location;
+ Annotation annotation;
+
+ // Possible error message (empty if no error).
+ std::string error;
+
+ // A reference to the variable containing annotation. Null if not available.
+ const clang::NamedDecl* variable_reference;
+
+ union {
+ // When this structure is refering to a function with a parameter of type
dcheng 2017/03/02 07:57:41 Nit: refering => referring
Ramin Halavati 2017/04/06 13:32:29 Done.
+ // NetworkTrafficAnnotationTag, |is_direct_call| variable is true if the
+ // parameter is generated by a direct call to DefineNetworkTrafficAnnotation
+ // and is false when the paramter is a variable. For example, in the
+ // following code segment, it is true for function 'foo'' and false for
+ // function 'baz':
+ //
+ // foo(DefineNetworkTrafficAnnotation(...))
+ // NetworkTrafficAnnotationTag bar = DefineNetworkTrafficAnnotation(...)
+ // baz(bar);
+ bool is_direct_call;
+
+ // When this structure is refering to a variable, |transitive_parameter| is
+ // false if variable is defined in the same function and is true when it is
+ // passed to this function. For example, in the following code segment, it
+ // is true for bar, and false for baz.
+ //
+ // void foo(NetworkTrafficAnnotationTag bar) {
+ // NetworkTrafficAnnotationTag baz = DefineNetworkTrafficAnnotation(...);
+ // }
+ bool transitive_parameter;
+ } flag;
+};
+
+// Structure to collect instances of network traffic annotation usages.
+struct Collector {
+ std::vector<NetworkAnnotationInstance> variable_definitions;
+ std::vector<NetworkAnnotationInstance> calls;
+};
+
+// Returns the function that includes the given token. For example, if the token
+// is variable x in the code "void foo() { int x; ... }", it returns "foo".
+std::string GetDeclarationCoveringFunction(const clang::Decl* token,
+ clang::ASTContext* context);
+
+// Checks if a token matches a name, with or without net:: namespace.
+bool StripNetNamespaceMatch(const std::string& token, const std::string& name) {
+ return token == name || token == (std::string("net::") + name);
+}
+
+// Returns the source code of a given token, like function name, variable name,
+// string literal, etc.
+std::string GetStmtText(const clang::Stmt* token,
+ const clang::SourceManager& source_manager) {
+ clang::LangOptions lopt;
dcheng 2017/03/02 07:57:41 Nit: use the LangOptions from ASTContext.
Ramin Halavati 2017/04/06 13:32:29 Done.
+ // Get text range.
+ clang::SourceLocation start = token->getLocStart();
+ clang::SourceLocation end = token->getLocEnd();
+
+ // If it's a macro, go to definition.
+ if (start.isMacroID())
+ start = source_manager.getSpellingLoc(start);
+ if (end.isMacroID())
+ end = source_manager.getSpellingLoc(end);
+
+ // Get the real end of the token.
+ end = clang::Lexer::getLocForEndOfToken(end, 0, source_manager, lopt);
+
+ // Extract text.
+ std::string output(source_manager.getCharacterData(start),
+ source_manager.getCharacterData(end));
+
+ // If |token| is a raw string literal, the above code just returns the "R"
+ // part of it.
+ if (output != "R")
+ return output;
+
+ if (auto* literal = clang::dyn_cast<clang::StringLiteral>(token))
+ return literal->getString();
+
+ if (auto* implicit_cast = clang::dyn_cast<clang::ImplicitCastExpr>(token)) {
+ if (const clang::StringLiteral* implicit_literal =
+ clang::dyn_cast<clang::StringLiteral>(
+ implicit_cast->getSubExprAsWritten())) {
+ return implicit_literal->getString();
+ }
+ }
+
+ return output;
+}
+
+// Extracts unique id and annotation text of a call to
+// "DefineNetworkTrafficAnnotation" function. Sets the error text if fails.
+void GetAnnotationText(const clang::CallExpr* call_expr,
+ const clang::SourceManager& source_manager,
+ NetworkAnnotationInstance* instance) {
+ if (StripNetNamespaceMatch(
+ GetStmtText(call_expr->getCallee(), source_manager),
+ "DefineNetworkTrafficAnnotation") &&
+ call_expr->getNumArgs() == 2) {
+ instance->annotation.unique_id =
+ GetStmtText(call_expr->getArgs()[0], source_manager);
dcheng 2017/03/02 07:57:41 It'd be ideal to take advantage of matcher binding
Ramin Halavati 2017/04/06 13:32:29 Done.
+ instance->annotation.text =
+ GetStmtText(call_expr->getArgs()[1], source_manager);
+ instance->error = "";
+ } else {
+ instance->annotation.unique_id = "";
+ instance->annotation.text = "";
+ instance->error = "Unexpected function.";
+ }
+}
+
+// Returns the function that includes the given token. For example, if the token
+// is the call to function bar() in the code "void foo() { bar(); }", it returns
+// "foo".
+std::string GetStatementCoveringFunction(const clang::Stmt* token,
+ clang::ASTContext* context) {
+ // Get the parent of |token| and return its covering function.
+ clang::ASTContext::DynTypedNodeList parents = context->getParents(*token);
+
+ // TODO: What exactly != 1 parent mean? I've not encountered any case that
+ // this value would be non-one.
+ if (parents.size() != 1) {
+ if (const clang::Stmt* s = parents[0].get<clang::Stmt>())
+ return GetStatementCoveringFunction(s, context);
+ else if (const clang::Decl* d = parents[0].get<clang::Decl>())
+ return GetDeclarationCoveringFunction(d, context);
+ }
+ return "Unknown";
+}
+
+// Returns the function that includes the given token. For example, if the token
+// is variable x in the code "void foo() { int x; ... }", it returns "foo".
+std::string GetDeclarationCoveringFunction(const clang::Decl* token,
+ clang::ASTContext* context) {
+ // If |token| is a function declaration, return its name.
+ if (auto f = clang::dyn_cast<clang::FunctionDecl>(token))
+ return f->getQualifiedNameAsString();
+
+ // As |token| is not a function declaration, get its parent and return its
+ // covering function.
+ clang::ASTContext::DynTypedNodeList parents = context->getParents(*token);
+
+ // TODO: What exactly != 1 parent mean? I've not encountered any case that
+ // this value would be non-one.
+ if (parents.size() == 1) {
+ if (const clang::Stmt* s = parents[0].get<clang::Stmt>())
+ return GetStatementCoveringFunction(s, context);
+ else if (const clang::Decl* d = parents[0].get<clang::Decl>())
+ return GetDeclarationCoveringFunction(d, context);
+ }
+ return "Unknown";
+}
+
+// Finds file name and line number of the given token and writes it into
+// |location|.
+template <class T>
+void GetLocation(const T* token,
+ const clang::SourceManager& source_manager,
+ NetworkAnnotationInstance::Location* location) {
+ clang::SourceLocation source_location = token->getLocStart();
+ location->file_path = source_manager.getFilename(source_location);
+ location->line_number = source_manager.getSpellingLineNumber(source_location);
+}
+
+// This class implements the call back functions for AST Matchers. The matchers
+// are defined in RunMatchers function and when a pattern is found there,
+// the run function in this class is called back with information on the match
+// location and description of the match pattern.
+class NetworkAnnotationTagCallback : public MatchFinder::MatchCallback {
+ public:
+ explicit NetworkAnnotationTagCallback(Collector* collector)
+ : collector_(collector) {}
+ ~NetworkAnnotationTagCallback() override = default;
+
+ // Is called on any pattern found by ASTMathers that are defined in RunMathers
+ // function.
+ virtual void run(const MatchFinder::MatchResult& result) override {
+ if (const clang::VarDecl* var_decl =
+ result.Nodes.getNodeAs<clang::VarDecl>("annotation_variable")) {
+ AddVariable(var_decl, result);
+ } else if (const clang::CallExpr* call_expr =
+ result.Nodes.getNodeAs<clang::CallExpr>("user_function")) {
+ AddFunction(call_expr, result);
+ }
+ }
+
+ // Stores an annotation variable defintion.
+ void AddVariable(const clang::VarDecl* var_decl,
+ const MatchFinder::MatchResult& result) {
+ NetworkAnnotationInstance instance;
+
+ GetLocation(var_decl, *result.SourceManager, &instance.location);
+ instance.location.object_name = var_decl->getQualifiedNameAsString();
+ instance.variable_reference = clang::dyn_cast<clang::NamedDecl>(var_decl);
+
+ // Mark the instance as transitive parameter if it doesn't have
+ // initialization in the function where it is defined and it is passed as a
+ // parameter to the function. Otherwise, extract its content.
+ if (!var_decl->hasInit() && var_decl->isLocalVarDeclOrParm() &&
+ !var_decl->isLocalVarDecl()) {
+ instance.flag.transitive_parameter = true;
+ } else if (auto* init_expr = var_decl->getInit()) {
+ if (auto* call_expr = clang::dyn_cast<clang::CallExpr>(init_expr))
+ GetAnnotationText(call_expr, *result.SourceManager, &instance);
+ }
+ // If nothing is set, issue an error.
+ if (!instance.flag.transitive_parameter &&
+ instance.annotation.unique_id.empty() && instance.error.empty()) {
+ instance.error = "Could not resolve variable initialization.";
+ }
+
+ collector_->variable_definitions.push_back(instance);
+ }
+
+ // Stores a function call that uses annotation variables.
+ void AddFunction(const clang::CallExpr* call_expr,
+ const MatchFinder::MatchResult& result) {
+ NetworkAnnotationInstance instance;
+
+ GetLocation(call_expr, *result.SourceManager, &instance.location);
+ instance.location.function_name = GetStatementCoveringFunction(
+ clang::dyn_cast<clang::Stmt>(call_expr), result.Context);
+ instance.location.object_name =
+ call_expr->getDirectCallee()->getQualifiedNameAsString();
+
+ // Get annotation text.
+ const clang::FunctionDecl* function_decl = call_expr->getDirectCallee();
+ unsigned params_count = function_decl->getNumParams();
+ unsigned args_count = call_expr->getNumArgs();
+
+ for (unsigned i = 0; i < params_count; i++) {
+ if (StripNetNamespaceMatch(
+ clang::QualType::getAsString(
+ function_decl->getParamDecl(i)->getType().split()),
+ "NetworkTrafficAnnotationTag")) {
+ if (i >= args_count) {
+ instance.error = "Function missing annotation argument.";
+ } else {
+ // Get the argument.
+ const clang::Expr* arg = call_expr->getArgs()[i];
+
+ // Is it a call to annotate function?
+ if (auto* inner_call_expr = clang::dyn_cast<clang::CallExpr>(arg)) {
+ instance.flag.is_direct_call = true;
+ GetAnnotationText(inner_call_expr, *result.SourceManager,
+ &instance);
+ instance.error = "";
+ } else {
+ // Then it's a variable.
+ instance.flag.is_direct_call = false;
+ if (auto* pure_arg =
+ clang::dyn_cast<clang::DeclRefExpr>(arg->IgnoreCasts())) {
+ instance.variable_reference = pure_arg->getFoundDecl();
+ instance.error = "";
+ } else {
+ instance.error = "Unknwon parameter type.";
dcheng 2017/03/02 07:57:41 Nit: unknown
Ramin Halavati 2017/04/06 13:32:29 Done.
+ }
+ }
+ }
+ collector_->calls.push_back(instance);
+ }
+ }
+ }
+
+ private:
+ Collector* collector_;
+};
+
+// Sets up ASTMatchers and runs clang tool to populate collector. Returns the
+// result of running the clang tool.
+int RunMatchers(clang::tooling::ClangTool* clang_tool, Collector* collector) {
+ NetworkAnnotationTagCallback call_back(collector);
+ MatchFinder match_finder;
+
+ // Set up a pattern to find variables defined with type
+ // [net::]NetworkTrafficAnnotationTag.
+ match_finder.addMatcher(
+ varDecl(anyOf(hasType(asString("NetworkTrafficAnnotationTag")),
dcheng 2017/03/02 07:57:41 Can you help me understand why we need both? Shoul
Ramin Halavati 2017/04/06 13:32:29 If the code has the line "using namespace net;", t
+ hasType(asString("net::NetworkTrafficAnnotationTag"))))
+ .bind("annotation_variable"),
+ &call_back);
+
+ // Set up a pattern to find functions that have a parameter of type
+ // [net::]NetworkTrafficAnnotationTag.
+ match_finder.addMatcher(
+ callExpr(hasDeclaration(functionDecl(hasAnyParameter(anyOf(
+ hasType(asString("NetworkTrafficAnnotationTag")),
+ hasType(asString("net::NetworkTrafficAnnotationTag")))))))
+ .bind("user_function"),
+ &call_back);
+
+ std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory =
+ clang::tooling::newFrontendActionFactory(&match_finder);
+ return clang_tool->run(frontend_factory.get());
+}
+
+} // namespace
+
+int main(int argc, const char* argv[]) {
+ // Find output directory.
+ if (argc < 5) {
+ llvm::errs() << "Output files directory is not specified.";
+ return -1;
+ }
+ std::string output_dir(argv[4]);
+
+ // Keep to consumed parameter from being passed to clang parser.
dcheng 2017/03/02 07:57:41 Maybe just make this a proper flag?
Ramin Halavati 2017/04/06 13:32:29 Done.
+ argc = 4;
+
+ llvm::cl::OptionCategory category("Network Request Audit Extractor Tool");
+ clang::tooling::CommonOptionsParser options(argc, argv, category);
+ clang::tooling::ClangTool tool(options.getCompilations(),
+ options.getSourcePathList());
+ Collector collector;
+
+ int result = RunMatchers(&tool, &collector);
+
+ if (result != 0)
+ return result;
+
+ // For each call, if the parameter is not generated by a direct call to
+ // "DefineNetworkTrafficAnnotation", find the variable that holds the value.
+ for (NetworkAnnotationInstance& call : collector.calls) {
+ if (!call.flag.is_direct_call) {
+ // Find the variable.
+ for (NetworkAnnotationInstance& var : collector.variable_definitions)
+ if (var.variable_reference == call.variable_reference) {
+ call.annotation = var.annotation;
+ call.flag.transitive_parameter = var.flag.transitive_parameter;
+ call.error =
+ call.error + (call.error.length() ? "\n+" : "") + var.error;
+ break;
+ }
+ if (!call.annotation.unique_id.length())
+ call.error = "Variable not found.";
+ }
+
+ // If the function just receives the variable and passes it to another
+ // function, ignore it, otherwise write it to file.
+ if (!call.flag.transitive_parameter) {
+ std::string s = call.location.file_path;
+ std::replace(s.begin(), s.end(), '/', '_');
+ std::replace(s.begin(), s.end(), '.', '_');
+ std::string file_path = output_dir + "/" + s + "(" +
+ std::to_string(call.location.line_number) +
+ ").txt";
+
+ std::ofstream output_file(file_path);
dcheng 2017/03/02 07:57:41 How are reads/writes to this file synchronized? If
Ramin Halavati 2017/04/06 13:32:29 Comment updated in refactored source: "For each ca
+ if (output_file.is_open()) {
+ output_file << call.location.file_path << "\n";
+ output_file << call.location.function_name << "\n";
+ output_file << call.location.line_number << "\n";
+ output_file << call.location.object_name << "\n";
+ output_file << call.error << "\n";
+ output_file << call.annotation.unique_id << "\n";
+ output_file << call.annotation.text << "\n";
+ output_file.close();
+ } else {
+ llvm::errs() << "Could not write to file: " << file_path << " because "
+ << strerror(errno) << "\n";
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
« no previous file with comments | « tools/clang/traffic_annotation_extractor/README.md ('k') | tools/traffic_annotation/auditor/BUILD.gn » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698