 Chromium Code Reviews
 Chromium Code Reviews Issue 2448133006:
  Tool added to extract network traffic annotations.  (Closed)
    
  
    Issue 2448133006:
  Tool added to extract network traffic annotations.  (Closed) 
  | Index: tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp | 
| diff --git a/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..03b3ecdee731e29a111568d9f741c4e7bfb6e5be | 
| --- /dev/null | 
| +++ b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp | 
| @@ -0,0 +1,435 @@ | 
| +// Copyright 2016 The Chromium Authors. All rights reserved. | 
| +// Use of this source code is governed by a BSD-style license that can be | 
| +// found in the LICENSE file. | 
| + | 
| +// This clang tool finds all instances of net::NetworkTrafficAnnotationTag in | 
| +// given source code, extracts the location info and content of annotation tags | 
| +// (unique id and annotation text), and stores them in separate text files | 
| +// (per instance) in the given output directory. Please refer to README.md for | 
| +// build and usage instructions. | 
| + | 
| +#include <stdio.h> | 
| +#include <fstream> | 
| +#include <memory> | 
| + | 
| +#include "clang/ASTMatchers/ASTMatchFinder.h" | 
| +#include "clang/ASTMatchers/ASTMatchers.h" | 
| +#include "clang/Basic/SourceManager.h" | 
| +#include "clang/Frontend/FrontendActions.h" | 
| +#include "clang/Lex/Lexer.h" | 
| +#include "clang/Tooling/CommonOptionsParser.h" | 
| +#include "clang/Tooling/Refactoring.h" | 
| +#include "clang/Tooling/Tooling.h" | 
| +#include "llvm/Support/CommandLine.h" | 
| + | 
| +using namespace clang::ast_matchers; | 
| + | 
| +namespace { | 
| + | 
| +// An instance of network traffic annotation usage. This can be either | 
| +// a variable defined as NetworkTrafficAnnotationTag or a function that has | 
| +// a variable of this type as it's input parameter, i.e., it can contain either | 
| +// of the following two 'foo' items: | 
| +// NetworkTrafficAnnotationTag foo = ... | 
| +// void foo(NetworkTrafficAnnotationTag bar) | 
| +struct NetworkAnnotationInstance { | 
| + NetworkAnnotationInstance() : variable_reference(nullptr) { | 
| + flag.is_direct_call = false; | 
| + } | 
| + | 
| + // Information about where this annotation or call has happened. | 
| + struct Location { | 
| + Location() : line_number(-1) {} | 
| 
dcheng
2017/03/02 07:57:41
Consider using in-class member initializers. The c
 
Ramin Halavati
2017/04/06 13:32:29
Done.
 | 
| + std::string file_path; | 
| + int line_number; | 
| + | 
| + // Name of the function including this instance. E.g., in the following | 
| + // code, |function_name| will be 'foo': | 
| + // void foo() { NetworkTrafficAnnotationTag bar = ...; } | 
| + std::string function_name; | 
| + | 
| + // Name of the variable that contains annotation or the function called | 
| + // with annotation. E.g., in the following two code segments, |object_name| | 
| + // will be 'bar': | 
| + // void foo() { NetworkTrafficAnnotationTag bar = ...; } | 
| + // void foo() { bar(baz);} // baz is of type NetworkTrafficAnnotationTag. | 
| + std::string object_name; | 
| + }; | 
| + | 
| + // Annotation content. These are the parameters of a call to | 
| + // DefineNetworkTrafficAnnotation. The unique_id is an identifier for the | 
| + // annotation that has to be unique across the entire code base. The |text| | 
| + // stores a RAW string with the annotation that should be extracted. | 
| + struct Annotation { | 
| + std::string unique_id; | 
| + std::string text; | 
| + }; | 
| + | 
| + Location location; | 
| + Annotation annotation; | 
| + | 
| + // Possible error message (empty if no error). | 
| + std::string error; | 
| + | 
| + // A reference to the variable containing annotation. Null if not available. | 
| + const clang::NamedDecl* variable_reference; | 
| + | 
| + union { | 
| + // When this structure is refering to a function with a parameter of type | 
| 
dcheng
2017/03/02 07:57:41
Nit: refering => referring
 
Ramin Halavati
2017/04/06 13:32:29
Done.
 | 
| + // NetworkTrafficAnnotationTag, |is_direct_call| variable is true if the | 
| + // parameter is generated by a direct call to DefineNetworkTrafficAnnotation | 
| + // and is false when the paramter is a variable. For example, in the | 
| + // following code segment, it is true for function 'foo'' and false for | 
| + // function 'baz': | 
| + // | 
| + // foo(DefineNetworkTrafficAnnotation(...)) | 
| + // NetworkTrafficAnnotationTag bar = DefineNetworkTrafficAnnotation(...) | 
| + // baz(bar); | 
| + bool is_direct_call; | 
| + | 
| + // When this structure is refering to a variable, |transitive_parameter| is | 
| + // false if variable is defined in the same function and is true when it is | 
| + // passed to this function. For example, in the following code segment, it | 
| + // is true for bar, and false for baz. | 
| + // | 
| + // void foo(NetworkTrafficAnnotationTag bar) { | 
| + // NetworkTrafficAnnotationTag baz = DefineNetworkTrafficAnnotation(...); | 
| + // } | 
| + bool transitive_parameter; | 
| + } flag; | 
| +}; | 
| + | 
| +// Structure to collect instances of network traffic annotation usages. | 
| +struct Collector { | 
| + std::vector<NetworkAnnotationInstance> variable_definitions; | 
| + std::vector<NetworkAnnotationInstance> calls; | 
| +}; | 
| + | 
| +// Returns the function that includes the given token. For example, if the token | 
| +// is variable x in the code "void foo() { int x; ... }", it returns "foo". | 
| +std::string GetDeclarationCoveringFunction(const clang::Decl* token, | 
| + clang::ASTContext* context); | 
| + | 
| +// Checks if a token matches a name, with or without net:: namespace. | 
| +bool StripNetNamespaceMatch(const std::string& token, const std::string& name) { | 
| + return token == name || token == (std::string("net::") + name); | 
| +} | 
| + | 
| +// Returns the source code of a given token, like function name, variable name, | 
| +// string literal, etc. | 
| +std::string GetStmtText(const clang::Stmt* token, | 
| + const clang::SourceManager& source_manager) { | 
| + clang::LangOptions lopt; | 
| 
dcheng
2017/03/02 07:57:41
Nit: use the LangOptions from ASTContext.
 
Ramin Halavati
2017/04/06 13:32:29
Done.
 | 
| + // Get text range. | 
| + clang::SourceLocation start = token->getLocStart(); | 
| + clang::SourceLocation end = token->getLocEnd(); | 
| + | 
| + // If it's a macro, go to definition. | 
| + if (start.isMacroID()) | 
| + start = source_manager.getSpellingLoc(start); | 
| + if (end.isMacroID()) | 
| + end = source_manager.getSpellingLoc(end); | 
| + | 
| + // Get the real end of the token. | 
| + end = clang::Lexer::getLocForEndOfToken(end, 0, source_manager, lopt); | 
| + | 
| + // Extract text. | 
| + std::string output(source_manager.getCharacterData(start), | 
| + source_manager.getCharacterData(end)); | 
| + | 
| + // If |token| is a raw string literal, the above code just returns the "R" | 
| + // part of it. | 
| + if (output != "R") | 
| + return output; | 
| + | 
| + if (auto* literal = clang::dyn_cast<clang::StringLiteral>(token)) | 
| + return literal->getString(); | 
| + | 
| + if (auto* implicit_cast = clang::dyn_cast<clang::ImplicitCastExpr>(token)) { | 
| + if (const clang::StringLiteral* implicit_literal = | 
| + clang::dyn_cast<clang::StringLiteral>( | 
| + implicit_cast->getSubExprAsWritten())) { | 
| + return implicit_literal->getString(); | 
| + } | 
| + } | 
| + | 
| + return output; | 
| +} | 
| + | 
| +// Extracts unique id and annotation text of a call to | 
| +// "DefineNetworkTrafficAnnotation" function. Sets the error text if fails. | 
| +void GetAnnotationText(const clang::CallExpr* call_expr, | 
| + const clang::SourceManager& source_manager, | 
| + NetworkAnnotationInstance* instance) { | 
| + if (StripNetNamespaceMatch( | 
| + GetStmtText(call_expr->getCallee(), source_manager), | 
| + "DefineNetworkTrafficAnnotation") && | 
| + call_expr->getNumArgs() == 2) { | 
| + instance->annotation.unique_id = | 
| + GetStmtText(call_expr->getArgs()[0], source_manager); | 
| 
dcheng
2017/03/02 07:57:41
It'd be ideal to take advantage of matcher binding
 
Ramin Halavati
2017/04/06 13:32:29
Done.
 | 
| + instance->annotation.text = | 
| + GetStmtText(call_expr->getArgs()[1], source_manager); | 
| + instance->error = ""; | 
| + } else { | 
| + instance->annotation.unique_id = ""; | 
| + instance->annotation.text = ""; | 
| + instance->error = "Unexpected function."; | 
| + } | 
| +} | 
| + | 
| +// Returns the function that includes the given token. For example, if the token | 
| +// is the call to function bar() in the code "void foo() { bar(); }", it returns | 
| +// "foo". | 
| +std::string GetStatementCoveringFunction(const clang::Stmt* token, | 
| + clang::ASTContext* context) { | 
| + // Get the parent of |token| and return its covering function. | 
| + clang::ASTContext::DynTypedNodeList parents = context->getParents(*token); | 
| + | 
| + // TODO: What exactly != 1 parent mean? I've not encountered any case that | 
| + // this value would be non-one. | 
| + if (parents.size() != 1) { | 
| + if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) | 
| + return GetStatementCoveringFunction(s, context); | 
| + else if (const clang::Decl* d = parents[0].get<clang::Decl>()) | 
| + return GetDeclarationCoveringFunction(d, context); | 
| + } | 
| + return "Unknown"; | 
| +} | 
| + | 
| +// Returns the function that includes the given token. For example, if the token | 
| +// is variable x in the code "void foo() { int x; ... }", it returns "foo". | 
| +std::string GetDeclarationCoveringFunction(const clang::Decl* token, | 
| + clang::ASTContext* context) { | 
| + // If |token| is a function declaration, return its name. | 
| + if (auto f = clang::dyn_cast<clang::FunctionDecl>(token)) | 
| + return f->getQualifiedNameAsString(); | 
| + | 
| + // As |token| is not a function declaration, get its parent and return its | 
| + // covering function. | 
| + clang::ASTContext::DynTypedNodeList parents = context->getParents(*token); | 
| + | 
| + // TODO: What exactly != 1 parent mean? I've not encountered any case that | 
| + // this value would be non-one. | 
| + if (parents.size() == 1) { | 
| + if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) | 
| + return GetStatementCoveringFunction(s, context); | 
| + else if (const clang::Decl* d = parents[0].get<clang::Decl>()) | 
| + return GetDeclarationCoveringFunction(d, context); | 
| + } | 
| + return "Unknown"; | 
| +} | 
| + | 
| +// Finds file name and line number of the given token and writes it into | 
| +// |location|. | 
| +template <class T> | 
| +void GetLocation(const T* token, | 
| + const clang::SourceManager& source_manager, | 
| + NetworkAnnotationInstance::Location* location) { | 
| + clang::SourceLocation source_location = token->getLocStart(); | 
| + location->file_path = source_manager.getFilename(source_location); | 
| + location->line_number = source_manager.getSpellingLineNumber(source_location); | 
| +} | 
| + | 
| +// This class implements the call back functions for AST Matchers. The matchers | 
| +// are defined in RunMatchers function and when a pattern is found there, | 
| +// the run function in this class is called back with information on the match | 
| +// location and description of the match pattern. | 
| +class NetworkAnnotationTagCallback : public MatchFinder::MatchCallback { | 
| + public: | 
| + explicit NetworkAnnotationTagCallback(Collector* collector) | 
| + : collector_(collector) {} | 
| + ~NetworkAnnotationTagCallback() override = default; | 
| + | 
| + // Is called on any pattern found by ASTMathers that are defined in RunMathers | 
| + // function. | 
| + virtual void run(const MatchFinder::MatchResult& result) override { | 
| + if (const clang::VarDecl* var_decl = | 
| + result.Nodes.getNodeAs<clang::VarDecl>("annotation_variable")) { | 
| + AddVariable(var_decl, result); | 
| + } else if (const clang::CallExpr* call_expr = | 
| + result.Nodes.getNodeAs<clang::CallExpr>("user_function")) { | 
| + AddFunction(call_expr, result); | 
| + } | 
| + } | 
| + | 
| + // Stores an annotation variable defintion. | 
| + void AddVariable(const clang::VarDecl* var_decl, | 
| + const MatchFinder::MatchResult& result) { | 
| + NetworkAnnotationInstance instance; | 
| + | 
| + GetLocation(var_decl, *result.SourceManager, &instance.location); | 
| + instance.location.object_name = var_decl->getQualifiedNameAsString(); | 
| + instance.variable_reference = clang::dyn_cast<clang::NamedDecl>(var_decl); | 
| + | 
| + // Mark the instance as transitive parameter if it doesn't have | 
| + // initialization in the function where it is defined and it is passed as a | 
| + // parameter to the function. Otherwise, extract its content. | 
| + if (!var_decl->hasInit() && var_decl->isLocalVarDeclOrParm() && | 
| + !var_decl->isLocalVarDecl()) { | 
| + instance.flag.transitive_parameter = true; | 
| + } else if (auto* init_expr = var_decl->getInit()) { | 
| + if (auto* call_expr = clang::dyn_cast<clang::CallExpr>(init_expr)) | 
| + GetAnnotationText(call_expr, *result.SourceManager, &instance); | 
| + } | 
| + // If nothing is set, issue an error. | 
| + if (!instance.flag.transitive_parameter && | 
| + instance.annotation.unique_id.empty() && instance.error.empty()) { | 
| + instance.error = "Could not resolve variable initialization."; | 
| + } | 
| + | 
| + collector_->variable_definitions.push_back(instance); | 
| + } | 
| + | 
| + // Stores a function call that uses annotation variables. | 
| + void AddFunction(const clang::CallExpr* call_expr, | 
| + const MatchFinder::MatchResult& result) { | 
| + NetworkAnnotationInstance instance; | 
| + | 
| + GetLocation(call_expr, *result.SourceManager, &instance.location); | 
| + instance.location.function_name = GetStatementCoveringFunction( | 
| + clang::dyn_cast<clang::Stmt>(call_expr), result.Context); | 
| + instance.location.object_name = | 
| + call_expr->getDirectCallee()->getQualifiedNameAsString(); | 
| + | 
| + // Get annotation text. | 
| + const clang::FunctionDecl* function_decl = call_expr->getDirectCallee(); | 
| + unsigned params_count = function_decl->getNumParams(); | 
| + unsigned args_count = call_expr->getNumArgs(); | 
| + | 
| + for (unsigned i = 0; i < params_count; i++) { | 
| + if (StripNetNamespaceMatch( | 
| + clang::QualType::getAsString( | 
| + function_decl->getParamDecl(i)->getType().split()), | 
| + "NetworkTrafficAnnotationTag")) { | 
| + if (i >= args_count) { | 
| + instance.error = "Function missing annotation argument."; | 
| + } else { | 
| + // Get the argument. | 
| + const clang::Expr* arg = call_expr->getArgs()[i]; | 
| + | 
| + // Is it a call to annotate function? | 
| + if (auto* inner_call_expr = clang::dyn_cast<clang::CallExpr>(arg)) { | 
| + instance.flag.is_direct_call = true; | 
| + GetAnnotationText(inner_call_expr, *result.SourceManager, | 
| + &instance); | 
| + instance.error = ""; | 
| + } else { | 
| + // Then it's a variable. | 
| + instance.flag.is_direct_call = false; | 
| + if (auto* pure_arg = | 
| + clang::dyn_cast<clang::DeclRefExpr>(arg->IgnoreCasts())) { | 
| + instance.variable_reference = pure_arg->getFoundDecl(); | 
| + instance.error = ""; | 
| + } else { | 
| + instance.error = "Unknwon parameter type."; | 
| 
dcheng
2017/03/02 07:57:41
Nit: unknown
 
Ramin Halavati
2017/04/06 13:32:29
Done.
 | 
| + } | 
| + } | 
| + } | 
| + collector_->calls.push_back(instance); | 
| + } | 
| + } | 
| + } | 
| + | 
| + private: | 
| + Collector* collector_; | 
| +}; | 
| + | 
| +// Sets up ASTMatchers and runs clang tool to populate collector. Returns the | 
| +// result of running the clang tool. | 
| +int RunMatchers(clang::tooling::ClangTool* clang_tool, Collector* collector) { | 
| + NetworkAnnotationTagCallback call_back(collector); | 
| + MatchFinder match_finder; | 
| + | 
| + // Set up a pattern to find variables defined with type | 
| + // [net::]NetworkTrafficAnnotationTag. | 
| + match_finder.addMatcher( | 
| + varDecl(anyOf(hasType(asString("NetworkTrafficAnnotationTag")), | 
| 
dcheng
2017/03/02 07:57:41
Can you help me understand why we need both? Shoul
 
Ramin Halavati
2017/04/06 13:32:29
If the code has the line "using namespace net;", t
 | 
| + hasType(asString("net::NetworkTrafficAnnotationTag")))) | 
| + .bind("annotation_variable"), | 
| + &call_back); | 
| + | 
| + // Set up a pattern to find functions that have a parameter of type | 
| + // [net::]NetworkTrafficAnnotationTag. | 
| + match_finder.addMatcher( | 
| + callExpr(hasDeclaration(functionDecl(hasAnyParameter(anyOf( | 
| + hasType(asString("NetworkTrafficAnnotationTag")), | 
| + hasType(asString("net::NetworkTrafficAnnotationTag"))))))) | 
| + .bind("user_function"), | 
| + &call_back); | 
| + | 
| + std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory = | 
| + clang::tooling::newFrontendActionFactory(&match_finder); | 
| + return clang_tool->run(frontend_factory.get()); | 
| +} | 
| + | 
| +} // namespace | 
| + | 
| +int main(int argc, const char* argv[]) { | 
| + // Find output directory. | 
| + if (argc < 5) { | 
| + llvm::errs() << "Output files directory is not specified."; | 
| + return -1; | 
| + } | 
| + std::string output_dir(argv[4]); | 
| + | 
| + // Keep to consumed parameter from being passed to clang parser. | 
| 
dcheng
2017/03/02 07:57:41
Maybe just make this a proper flag?
 
Ramin Halavati
2017/04/06 13:32:29
Done.
 | 
| + argc = 4; | 
| + | 
| + llvm::cl::OptionCategory category("Network Request Audit Extractor Tool"); | 
| + clang::tooling::CommonOptionsParser options(argc, argv, category); | 
| + clang::tooling::ClangTool tool(options.getCompilations(), | 
| + options.getSourcePathList()); | 
| + Collector collector; | 
| + | 
| + int result = RunMatchers(&tool, &collector); | 
| + | 
| + if (result != 0) | 
| + return result; | 
| + | 
| + // For each call, if the parameter is not generated by a direct call to | 
| + // "DefineNetworkTrafficAnnotation", find the variable that holds the value. | 
| + for (NetworkAnnotationInstance& call : collector.calls) { | 
| + if (!call.flag.is_direct_call) { | 
| + // Find the variable. | 
| + for (NetworkAnnotationInstance& var : collector.variable_definitions) | 
| + if (var.variable_reference == call.variable_reference) { | 
| + call.annotation = var.annotation; | 
| + call.flag.transitive_parameter = var.flag.transitive_parameter; | 
| + call.error = | 
| + call.error + (call.error.length() ? "\n+" : "") + var.error; | 
| + break; | 
| + } | 
| + if (!call.annotation.unique_id.length()) | 
| + call.error = "Variable not found."; | 
| + } | 
| + | 
| + // If the function just receives the variable and passes it to another | 
| + // function, ignore it, otherwise write it to file. | 
| + if (!call.flag.transitive_parameter) { | 
| + std::string s = call.location.file_path; | 
| + std::replace(s.begin(), s.end(), '/', '_'); | 
| + std::replace(s.begin(), s.end(), '.', '_'); | 
| + std::string file_path = output_dir + "/" + s + "(" + | 
| + std::to_string(call.location.line_number) + | 
| + ").txt"; | 
| + | 
| + std::ofstream output_file(file_path); | 
| 
dcheng
2017/03/02 07:57:41
How are reads/writes to this file synchronized? If
 
Ramin Halavati
2017/04/06 13:32:29
Comment updated in refactored source:
"For each ca
 | 
| + if (output_file.is_open()) { | 
| + output_file << call.location.file_path << "\n"; | 
| + output_file << call.location.function_name << "\n"; | 
| + output_file << call.location.line_number << "\n"; | 
| + output_file << call.location.object_name << "\n"; | 
| + output_file << call.error << "\n"; | 
| + output_file << call.annotation.unique_id << "\n"; | 
| + output_file << call.annotation.text << "\n"; | 
| + output_file.close(); | 
| + } else { | 
| + llvm::errs() << "Could not write to file: " << file_path << " because " | 
| + << strerror(errno) << "\n"; | 
| + return 1; | 
| + } | 
| + } | 
| + } | 
| + | 
| + return 0; | 
| +} |