Chromium Code Reviews| Index: tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp |
| diff --git a/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..ea773af06269618ea2f3265a3e325a1c08d06e2a |
| --- /dev/null |
| +++ b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp |
| @@ -0,0 +1,393 @@ |
| +// Copyright 2016 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +// This clang tool finds all instances of net::NetworkTrafficAnnotationTag in |
| +// given source code, extracts the location info and content of annotation tags |
| +// (unique id and annotation text), and stores them in separate text files |
| +// (per instance) in the given output directory. |
|
battre
2017/02/28 18:25:11
you don't specify the parameters of this binary an
Ramin Halavati
2017/04/06 13:32:28
This binary is not designed for direct calling and
|
| + |
| +#include <stdio.h> |
| +#include <fstream> |
| +#include <memory> |
| + |
| +#include "clang/ASTMatchers/ASTMatchFinder.h" |
| +#include "clang/ASTMatchers/ASTMatchers.h" |
| +#include "clang/Basic/SourceManager.h" |
| +#include "clang/Frontend/FrontendActions.h" |
| +#include "clang/Lex/Lexer.h" |
| +#include "clang/Tooling/CommonOptionsParser.h" |
| +#include "clang/Tooling/Refactoring.h" |
| +#include "clang/Tooling/Tooling.h" |
| +#include "llvm/Support/CommandLine.h" |
| + |
| +using namespace clang::ast_matchers; |
| + |
| +namespace { |
| + |
| +// An instance of network traffic annotation usage. This can be either |
| +// a variable defined as NetworkTrafficAnnotationTag or a function that has |
| +// a variable of this type as it's input parameter. |
|
battre
2017/02/28 18:25:12
Can you add examples of these two cases? I think t
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| +struct NetworkAnnotationInstance { |
| + NetworkAnnotationInstance() |
| + : variable_reference(nullptr), |
| + is_direct_call(false), |
| + transitive_parameter(false) {} |
| + |
| + // Information about where this annotation or call has happened. |
| + struct Location { |
| + Location() : line_number(-1) {} |
| + std::string file_path; |
| + int line_number; |
| + // Name of the function including this instance. |
|
battre
2017/02/28 18:25:11
What does this mean?
battre
2017/02/28 18:25:12
Nit (also below): newline before //
Ramin Halavati
2017/04/06 13:32:28
Done.
Ramin Halavati
2017/04/06 13:32:29
Done.
|
| + std::string function_name; |
| + // Name of the variable that contains annotation or the function called |
| + // with annotation, e.g. SpellingServiceClient::CreateURLFetcher when it's |
| + // a function or net_annotation when it's a variable. |
|
battre
2017/02/28 18:25:11
I don't understand this either.
Ramin Halavati
2017/04/06 13:32:29
Done.
|
| + std::string object_name; |
| + }; |
| + |
| + // Annotation content. |
|
battre
2017/02/28 18:25:12
// Annotation content: These are the parameters of
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| + struct Annotation { |
| + std::string unique_id; |
| + std::string text; |
| + }; |
| + |
| + Location location; |
| + Annotation annotation; |
| + |
| + // Possible error message (empty if no error). |
| + std::string error; |
|
battre
2017/02/28 18:25:11
extra linebreaks (see above)
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| + // A reference to the variable containing annotation. Null if not available. |
| + const clang::NamedDecl* variable_reference; |
| + // Flag stating that parameter is directly passed to annotate function here |
|
battre
2017/02/28 18:25:12
what is "parameter" here? I think this is an unres
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| + // or it's through a variable. |
| + bool is_direct_call; |
| + // Flag stating that a variable is a parameter received by upper level |
| + // function. |
|
battre
2017/02/28 18:25:12
Please give an example as well.
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| + bool transitive_parameter; |
| +}; |
| + |
| +// Structure to collect instances of network traffic annotation usages. |
| +struct Collector { |
| + std::vector<NetworkAnnotationInstance> variable_definitions; |
| + std::vector<NetworkAnnotationInstance> calls; |
| +}; |
| + |
| +// Returns the function that includes the given token. For example, if the token |
| +// is variable x in the code "void foo() { int x; ... }", it returns "foo". |
| +std::string GetCoveringFunction(const clang::Decl* token, |
| + const MatchFinder::MatchResult& result); |
| + |
| +// Checks if a token matches a name, with or without net:: namespace. |
| +bool net_match(const std::string& token, const std::string& name) { |
|
battre
2017/02/28 18:25:12
I think that the style guide requires CamelCase he
Ramin Halavati
2017/04/06 13:32:28
How about "StripNetNamespaceMatch"?
|
| + return token == name || token == (std::string("net::") + name); |
| +} |
| + |
| +// Returns the source code of a given token, like function name, variable name, |
| +// string const, etc. |
|
battre
2017/02/28 18:25:12
string literal? What's a string const?
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| +std::string GetStmtText(const clang::Stmt* token, |
| + const clang::SourceManager& source_manager) { |
| + clang::LangOptions lopt; |
| + // Get text range. |
| + clang::SourceLocation start = token->getLocStart(); |
| + clang::SourceLocation end = token->getLocEnd(); |
| + |
| + // If it's a macro, go to definition. |
| + if (start.isMacroID()) |
| + start = source_manager.getSpellingLoc(start); |
| + if (end.isMacroID()) |
| + end = source_manager.getSpellingLoc(end); |
| + |
| + // Get the real end of the token. |
| + end = clang::Lexer::getLocForEndOfToken(end, 0, source_manager, lopt); |
| + |
| + // Extract text. |
| + std::string output(source_manager.getCharacterData(start), |
| + source_manager.getCharacterData(end)); |
| + |
| + // Raw string? |
|
battre
2017/02/28 18:25:12
Can you explain the following branches better?
Ramin Halavati
2017/04/06 13:32:29
Done.
|
| + if (output != "R") |
| + return output; |
| + |
| + if (auto* c1 = clang::dyn_cast<clang::StringLiteral>(token)) |
|
battre
2017/02/28 18:25:12
why "c"? if (auto* literal = ...) ?
similarly bel
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| + return c1->getString(); |
| + |
| + if (auto* c2 = clang::dyn_cast<clang::ImplicitCastExpr>(token)) { |
| + if (const clang::StringLiteral* c3 = |
| + clang::dyn_cast<clang::StringLiteral>(c2->getSubExprAsWritten())) { |
| + return c3->getString(); |
| + } |
| + } |
| + |
| + return output; |
| +} |
| + |
| +// Extracts unique id and annotation text of a call to |
| +// "DefineNetworkTrafficAnnotation" function. Sets the error text if fails. |
| +void GetAnnotationText(const clang::CallExpr* call_expr, |
| + const clang::SourceManager& source_manager, |
| + NetworkAnnotationInstance* instance) { |
| + if (net_match(GetStmtText(call_expr->getCallee(), source_manager), |
| + "DefineNetworkTrafficAnnotation") && |
| + call_expr->getNumArgs() == 2) { |
| + instance->annotation.unique_id = |
| + GetStmtText(call_expr->getArgs()[0], source_manager); |
| + instance->annotation.text = |
| + GetStmtText(call_expr->getArgs()[1], source_manager); |
| + instance->error = ""; |
| + } else { |
| + instance->annotation.unique_id = ""; |
| + instance->annotation.text = ""; |
| + instance->error = "Unexpected function."; |
| + } |
| +} |
| + |
| +// Returns the function that includes the given token. For example, if the token |
| +// is the call to function bar() in the code "void foo() { bar(); }", it returns |
| +// "foo". |
| +std::string GetCoveringFunction(const clang::Stmt* token, |
|
battre
2017/02/28 18:25:12
This overloading is discouraged: https://google.gi
Ramin Halavati
2017/04/06 13:32:29
Done.
|
| + const MatchFinder::MatchResult& result) { |
| + clang::ASTContext::DynTypedNodeList parents = |
| + result.Context->getParents(*token); |
| + // TODO: What exactly != 1 parent mean? |
|
battre
2017/02/28 18:25:11
Can you resolve this TODO?
Ramin Halavati
2017/04/06 13:32:28
I haven't found a clue yet. I am hoping that clang
|
| + if (parents.size() == 1) { |
| + if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) |
| + return GetCoveringFunction(s, result); |
| + else if (const clang::Decl* d = parents[0].get<clang::Decl>()) |
| + return GetCoveringFunction(d, result); |
| + } |
| + return "Unknown"; |
| +} |
| + |
| +// Returns the function that includes the given token. For example, if the token |
| +// is variable x in the code "void foo() { int x; ... }", it returns "foo". |
| +std::string GetCoveringFunction(const clang::Decl* token, |
| + const MatchFinder::MatchResult& result) { |
| + if (auto f = clang::dyn_cast<clang::FunctionDecl>(token)) |
| + return f->getQualifiedNameAsString(); |
|
battre
2017/02/28 18:25:12
This path is not documented in the function commen
Ramin Halavati
2017/04/06 13:32:29
Done.
|
| + |
| + clang::ASTContext::DynTypedNodeList parents = |
| + result.Context->getParents(*token); |
| + // TODO: What exactly != 1 parent mean? |
| + if (parents.size() == 1) { |
| + if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) |
| + return GetCoveringFunction(s, result); |
| + else if (const clang::Decl* d = parents[0].get<clang::Decl>()) |
| + return GetCoveringFunction(d, result); |
| + } |
| + return "Unknown"; |
| +} |
| + |
| +// Finds file name and line number of the given token. |
|
battre
2017/02/28 18:25:11
... and writes it into |location|.
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| +template <class T> |
| +void GetLocation(const T* token, |
| + const MatchFinder::MatchResult& result, |
|
battre
2017/02/28 18:25:12
Do you want to pass the SourceManager instead of t
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| + NetworkAnnotationInstance::Location* location) { |
| + clang::SourceLocation source_location = token->getLocStart(); |
| + location->file_path = result.SourceManager->getFilename(source_location); |
| + location->line_number = |
| + result.SourceManager->getSpellingLineNumber(source_location); |
| +} |
| + |
| +// This class implements the call back functions for AST Matchers. The matchers |
| +// are defined in RunMatchers function and when a pattern is found there, |
| +// the run function in this class is called back with information on the match |
| +// location and description of the match pattern. |
| +class NetworkAnnotationTagCallback : public MatchFinder::MatchCallback { |
| + public: |
| + explicit NetworkAnnotationTagCallback(Collector* collector) |
| + : collector_(collector) {} |
| + ~NetworkAnnotationTagCallback() override = default; |
| + |
| + // Is called on any pattern found by ASTMathers that are defined in RunMathers |
| + // function. |
| + virtual void run(const MatchFinder::MatchResult& result) override { |
| + if (const clang::VarDecl* var_decl = |
| + result.Nodes.getNodeAs<clang::VarDecl>("annotation_variable")) { |
| + AddVariable(var_decl, result); |
| + } else if (const clang::CallExpr* call_expr = |
| + result.Nodes.getNodeAs<clang::CallExpr>("user_function")) { |
| + AddFunction(call_expr, result); |
| + } |
| + } |
| + |
| + // Stores an annotation variable defintion. |
| + void AddVariable(const clang::VarDecl* var_decl, |
| + const MatchFinder::MatchResult& result) { |
| + NetworkAnnotationInstance instance; |
| + |
| + GetLocation(var_decl, result, &instance.location); |
| + instance.location.object_name = var_decl->getQualifiedNameAsString(); |
| + instance.variable_reference = clang::dyn_cast<clang::NamedDecl>(var_decl); |
| + |
| + // Mark it as transitive parameter if it doesn't have initialization but |
|
battre
2017/02/28 18:25:11
what is "it"? Can you give it a name?
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| + // it's a function parameter. Otherwise, extract it's content. |
| + if (!var_decl->hasInit() && var_decl->isLocalVarDeclOrParm() && |
| + !var_decl->isLocalVarDecl()) { |
| + instance.transitive_parameter = true; |
| + } else if (auto* init_expr = var_decl->getInit()) { |
| + if (auto* call_expr = clang::dyn_cast<clang::CallExpr>(init_expr)) |
| + GetAnnotationText(call_expr, *result.SourceManager, &instance); |
|
battre
2017/02/28 18:25:12
I think that here is some inconsistency: GetLocati
Ramin Halavati
2017/04/06 13:32:28
AddVariable can be changed, but AddFunction requir
|
| + } |
| + // If nothing is set, issue an error. |
| + if (!instance.transitive_parameter && |
| + instance.annotation.unique_id.empty() && instance.error.empty()) { |
| + instance.error = "Could not resolve variable initialization."; |
| + } |
| + |
| + collector_->variable_definitions.push_back(instance); |
| + } |
| + |
| + // Stores a function call that uses annotation variables. |
| + void AddFunction(const clang::CallExpr* call_expr, |
| + const MatchFinder::MatchResult& result) { |
| + NetworkAnnotationInstance instance; |
| + |
| + GetLocation(call_expr, result, &instance.location); |
| + instance.location.function_name = |
| + GetCoveringFunction(clang::dyn_cast<clang::Stmt>(call_expr), result); |
| + instance.location.object_name = |
| + call_expr->getDirectCallee()->getQualifiedNameAsString(); |
| + |
| + // Get annotation text. |
| + const clang::FunctionDecl* function_decl = call_expr->getDirectCallee(); |
| + unsigned params_count = function_decl->getNumParams(); |
| + unsigned args_count = call_expr->getNumArgs(); |
| + |
| + for (unsigned i = 0; i < params_count; i++) { |
| + if (net_match(clang::QualType::getAsString( |
| + function_decl->getParamDecl(i)->getType().split()), |
| + "NetworkTrafficAnnotationTag")) { |
| + if (i >= args_count) { |
| + instance.error = "Function missing annotation argument."; |
| + } else { |
| + // Get the argument. |
| + const clang::Expr* arg = call_expr->getArgs()[i]; |
| + |
| + // Is it a call to annotate function? |
| + if (auto* inner_call_expr = clang::dyn_cast<clang::CallExpr>(arg)) { |
| + instance.is_direct_call = true; |
| + GetAnnotationText(inner_call_expr, *result.SourceManager, |
| + &instance); |
| + instance.error = ""; |
| + } else { |
| + // Then it's a variable. |
| + instance.is_direct_call = false; |
| + if (auto* pure_arg = |
| + clang::dyn_cast<clang::DeclRefExpr>(arg->IgnoreCasts())) { |
| + instance.variable_reference = pure_arg->getFoundDecl(); |
| + instance.error = ""; |
| + } else { |
| + instance.error = "Unknwon parameter type."; |
| + } |
| + } |
| + } |
| + collector_->calls.push_back(instance); |
| + } |
| + } |
| + } |
| + |
| + private: |
| + Collector* collector_; |
| +}; |
| + |
| +// Sets up ASTMatchers and runs clang tool to populate collector. Returns the |
| +// result of running the clang tool. |
| +int RunMatchers(clang::tooling::ClangTool* clang_tool, Collector* collector) { |
| + NetworkAnnotationTagCallback call_back(collector); |
| + MatchFinder match_finder; |
| + |
| + // Set up a pattern to find variables defined with type |
| + // [net::]NetworkTrafficAnnotationTag. |
| + match_finder.addMatcher( |
| + varDecl(anyOf(hasType(asString("NetworkTrafficAnnotationTag")), |
| + hasType(asString("net::NetworkTrafficAnnotationTag")))) |
| + .bind("annotation_variable"), |
| + &call_back); |
| + |
| + // Set up a pattern to find functions that have a parameter of type |
| + // [net::]NetworkTrafficAnnotationTag. |
| + match_finder.addMatcher( |
| + callExpr(hasDeclaration(functionDecl(hasAnyParameter(anyOf( |
| + hasType(asString("NetworkTrafficAnnotationTag")), |
| + hasType(asString("net::NetworkTrafficAnnotationTag"))))))) |
| + .bind("user_function"), |
| + &call_back); |
| + |
| + std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory = |
| + clang::tooling::newFrontendActionFactory(&match_finder); |
| + return clang_tool->run(frontend_factory.get()); |
| +} |
| + |
| +} // namespace |
| + |
| +int main(int argc, const char* argv[]) { |
| + // Find output directory. |
| + if (argc < 5) { |
| + llvm::errs() << "Temporary files directory is not specified."; |
| + return -1; |
| + } |
| + std::string output_dir(argv[4]); |
|
battre
2017/02/28 18:25:11
I think this parameter is not required by the read
Ramin Halavati
2017/04/06 13:32:28
I don't understand, elaborate please.
|
| + // Keep to consumed parameter from being passed to clang parser. |
| + argc = 4; |
|
battre
2017/02/28 18:25:12
Wouldn't it be cleaner introduce a new parameter -
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| + |
| + llvm::cl::OptionCategory category("Network Request Audit Extractor Tool"); |
| + clang::tooling::CommonOptionsParser options(argc, argv, category); |
|
battre
2017/02/28 18:25:12
CommonParserOptions supports a help messages https
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| + clang::tooling::ClangTool tool(options.getCompilations(), |
| + options.getSourcePathList()); |
| + Collector collector; |
| + |
| + int result = RunMatchers(&tool, &collector); |
| + |
| + if (result != 0) |
| + return result; |
| + |
| + // llvm::outs() << "==== BEGIN EDITS ====\n"; |
| + // llvm::outs() << "==== END EDITS ====\n"; |
|
battre
2017/02/28 18:25:12
Can this be deleted?
Ramin Halavati
2017/04/06 13:32:28
It was a requirement in previous version of clang
|
| + |
| + // For each call, if the parameter is not generated by a direct call to |
| + // "DefineNetworkTrafficAnnotation", find the variable that holds the value. |
| + for (auto& c : collector.calls) { |
|
battre
2017/02/28 18:25:11
I think that for (NetworkAnnotationInstance& call
Ramin Halavati
2017/04/06 13:32:28
Done.
|
| + if (!c.is_direct_call) { |
| + // Find the variable. |
| + for (const auto& v : collector.variable_definitions) |
| + if (v.variable_reference == c.variable_reference) { |
| + c.annotation = v.annotation; |
| + c.transitive_parameter = v.transitive_parameter; |
| + c.error = c.error + (c.error.length() ? "\n+" : "") + v.error; |
| + break; |
| + } |
| + if (!c.annotation.unique_id.length()) |
| + c.error = "Variable not found."; |
| + } |
| + |
| + // If the function just receives the variable and passes it to another |
| + // function, ignore it, otherwise write it to file. |
| + if (!c.transitive_parameter) { |
| + std::string s = c.location.file_path; |
| + std::replace(s.begin(), s.end(), '/', '_'); |
| + std::replace(s.begin(), s.end(), '.', '_'); |
| + std::string file_path = output_dir + "/" + s + "(" + |
| + std::to_string(c.location.line_number) + ").txt"; |
| + |
| + std::ofstream output_file(file_path); |
| + if (output_file.is_open()) { |
| + output_file << c.location.file_path << "\n"; |
| + output_file << c.location.function_name << "\n"; |
| + output_file << c.location.line_number << "\n"; |
| + output_file << c.location.object_name << "\n"; |
| + output_file << c.error << "\n"; |
| + output_file << c.annotation.unique_id << "\n"; |
| + output_file << c.annotation.text << "\n"; |
| + output_file.close(); |
| + } else { |
| + llvm::errs() << "Could not write to file: " << file_path << " because " |
| + << strerror(errno) << "\n"; |
| + return 1; |
| + } |
| + } |
| + } |
| + |
| + return 0; |
| +} |