Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(191)

Unified Diff: tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp

Issue 2448133006: Tool added to extract network traffic annotations. (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp
diff --git a/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7bb565a3760b38884f69182c9abf7093397f38ed
--- /dev/null
+++ b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp
@@ -0,0 +1,376 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <memory>
+#include <stdio.h>
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Refactoring.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/Support/CommandLine.h"
+//--> TODO: #include "../../traffic_annotation/traffic_annotation.pb.h"
battre 2016/10/26 14:29:18 remove?
Ramin Halavati 2016/10/27 09:40:08 I think if we want to add protobuf to clang tool i
+
+using namespace clang::ast_matchers;
+using clang::tooling::CommonOptionsParser;
+
+namespace {
+
+// Structure to collect instances.
battre 2016/10/26 14:29:19 instances of what? Can you add more comments to th
Ramin Halavati 2016/10/27 09:40:08 Done.
+typedef struct Collector {
battre 2016/10/26 14:29:18 I think that in C++ we don't need to "typedef stru
Ramin Halavati 2016/10/27 09:40:10 Done.
+ typedef struct Instance {
+ Instance() {
battre 2016/10/26 14:29:18 Can you use an initializer list? Instance() : is_
Ramin Halavati 2016/10/27 09:40:08 Done.
+ is_direct_call = false;
+ transitive_parameter = false;
+ variable_reference = NULL;
battre 2016/10/26 14:29:19 nullptr
Ramin Halavati 2016/10/27 09:40:09 Done.
+ }
+
+ // Information about where this annotation or call has happened.
+ struct Location {
+ std::string file_name;
+ int line_number;
battre 2016/10/26 14:29:19 line number is not default initialized.
Ramin Halavati 2016/10/27 09:40:09 Done.
+ // Name of the function including this instance.
+ std::string function_name;
+ // Name of the variable that contains annotation or the function called
+ // with annotation.
+ std::string object_name;
+ };
+
+ // Annotation content
battre 2016/10/26 14:29:18 Comments should end with period. (please also chec
Ramin Halavati 2016/10/27 09:40:09 Done.
+ struct Annotation {
+ std::string unique_id;
+ std::string text;
+ };
+
+ Location location;
+ Annotation annotation;
+
+ // Possible error (empty if no error)
+ std::string error;
+ // A reference to the variable containing annotation. Null if not available.
+ const clang::NamedDecl* variable_reference;
+ // Flag stating that parameter is directly passed to annotate function here
+ // or it's through a variable.
+ bool is_direct_call;
+ // Flag stating that a variable is a parameter received by upper level
+ // function.
+ bool transitive_parameter;
+ } Instance;
+
+ std::vector<Instance> variable_definitions, calls;
battre 2016/10/26 14:29:19 one variable definition per line.
Ramin Halavati 2016/10/27 09:40:09 Done.
+} Collector;
+
+// Returns the function that includes the given clang::Decl.
+std::string GetCoveringFunction(const clang::Decl* token,
+ const MatchFinder::MatchResult& result);
+
+// Checks if a token matches a name, with or without net:: namespace
+bool net_match(const std::string& token, const std::string& name) {
+ return token == name || token == (std::string("net::") + name);
+}
+
+// Returns the text of a given statement or subclass.
+std::string GetStmtText(const clang::Stmt* token,
+ const clang::SourceManager& source_manager) {
+ clang::LangOptions lopt;
+ // Get text range
+ clang::SourceLocation start = token->getLocStart();
+ clang::SourceLocation end = token->getLocEnd();
+
+ // If it's a macro, go to definition.
+ if (start.isMacroID())
+ start = source_manager.getSpellingLoc(start);
+ if (end.isMacroID())
+ end = source_manager.getSpellingLoc(end);
+
+ // Get the real end of the token.
+ end = clang::Lexer::getLocForEndOfToken(end, 0, source_manager, lopt);
+
+ // Extract text.
+ std::string output = std::string(source_manager.getCharacterData(start),
+ source_manager.getCharacterData(end) -
+ source_manager.getCharacterData(start));
battre 2016/10/26 14:29:19 can you also write std::string output(source_manag
Ramin Halavati 2016/10/27 09:40:09 Done.
+
+ // Raw string?
+ if (output == "R")
battre 2016/10/26 14:29:19 {} around multiline blocks. Same in the next line.
Ramin Halavati 2016/10/27 09:40:09 Done.
+ if (auto* c1 = clang::dyn_cast<clang::ImplicitCastExpr>(token))
+ if (const clang::StringLiteral* c2 =
+ clang::dyn_cast<clang::StringLiteral>(
+ c1->getSubExprAsWritten())) {
+ output = c2->getString();
+ }
+
+ return output;
+}
+
+// Returns annotation text of a call to "DefineNetworkTrafficAnnotation"
+// function.
+void GetAnnotationText(const clang::CallExpr* call_expr,
+ const clang::SourceManager& source_manager,
+ Collector::Instance* instance) {
+ if (net_match(GetStmtText(call_expr->getCallee(), source_manager),
+ "DefineNetworkTrafficAnnotation") &&
+ call_expr->getNumArgs() == 2) {
+ instance->annotation.unique_id =
+ GetStmtText(call_expr->getArgs()[0], source_manager);
+ instance->annotation.text =
+ GetStmtText(call_expr->getArgs()[1], source_manager);
+ instance->error = "";
+ } else {
+ instance->annotation.unique_id = "";
+ instance->annotation.text = "";
+ instance->error = "Unexpected function.";
+ }
+}
+
+// Returns the function that includes the given clang::Stmt.
+std::string GetCoveringFunction(const clang::Stmt* token,
+ const MatchFinder::MatchResult& result) {
+ auto parents = result.Context->getParents(*token);
+ if (parents.size() == 1) {
+ if (const clang::Stmt* s = parents[0].get<clang::Stmt>())
+ return GetCoveringFunction(s, result);
+ else if (const clang::Decl* d = parents[0].get<clang::Decl>())
+ return GetCoveringFunction(d, result);
+ }
+ return "Unknown";
+}
+
+// Returns the function that includes the given clang::Decl.
+std::string GetCoveringFunction(const clang::Decl* token,
+ const MatchFinder::MatchResult& result) {
+ if (auto f = clang::dyn_cast<clang::FunctionDecl>(token))
+ return f->getQualifiedNameAsString();
+
+ auto parents = result.Context->getParents(*token);
+ if (parents.size() == 1) {
battre 2016/10/26 14:29:18 Add a comment what having a single parent means?
Ramin Halavati 2016/10/27 09:40:09 I did not find a conclusive description on this an
+ if (const clang::Stmt* s = parents[0].get<clang::Stmt>())
+ return GetCoveringFunction(s, result);
+ else if (const clang::Decl* d = parents[0].get<clang::Decl>())
+ return GetCoveringFunction(d, result);
+ }
+ return "Unknown";
+}
+
+// Returns the file name and line number of the given token.
+template <class T>
+void GetOccuranceLocation(const T* token,
battre 2016/10/26 14:29:19 typo: occurr*e*nce What's the difference between
Ramin Halavati 2016/10/27 09:40:09 Renamed it to GetLocation as it can be called gene
+ const MatchFinder::MatchResult& result,
+ Collector::Instance::Location* location) {
+ clang::SourceLocation source_location = token->getLocStart();
+ location->file_name = result.SourceManager->getFilename(source_location);
+ location->line_number =
+ result.SourceManager->getSpellingLineNumber(source_location);
+}
+
+class TheCallback : public MatchFinder::MatchCallback {
battre 2016/10/26 14:29:19 Description of the responsibility of this class?
Ramin Halavati 2016/10/27 09:40:08 Done.
+ public:
+ explicit TheCallback(Collector* collector) : collector_(collector) {}
battre 2016/10/26 14:29:18 ~TheCallback() override = default;
Ramin Halavati 2016/10/27 09:40:09 Done.
+
+ virtual void run(const MatchFinder::MatchResult& result) override {
+ if (const clang::VarDecl* var_decl =
+ result.Nodes.getNodeAs<clang::VarDecl>("annotation_variable"))
battre 2016/10/26 14:29:19 also {} if the is statement spans more than one li
Ramin Halavati 2016/10/27 09:40:09 Done.
+ AddVariable(var_decl, result);
+ else if (const clang::CallExpr* call_expr =
+ result.Nodes.getNodeAs<clang::CallExpr>("user_function"))
+ AddFunction(call_expr, result);
+ }
+
+ // Stores an annotation variable defintion in the Collector.
+ void AddVariable(const clang::VarDecl* var_decl,
+ const MatchFinder::MatchResult& result) {
+ Collector::Instance instance;
+
+ GetOccuranceLocation(var_decl, result, &instance.location);
+ instance.location.object_name = var_decl->getQualifiedNameAsString();
+ instance.variable_reference = clang::dyn_cast<clang::NamedDecl>(var_decl);
+
+ // Get annotation text.
+ // If it doesn't have initialization, but it's a parameter, store it.
+ if (!var_decl->hasInit() && var_decl->isLocalVarDeclOrParm() &&
+ !var_decl->isLocalVarDecl()) {
+ instance.transitive_parameter = true;
+ } else if (auto* init_expr = var_decl->getInit()) {
+ if (auto* call_expr = clang::dyn_cast<clang::CallExpr>(init_expr))
+ GetAnnotationText(call_expr, *result.SourceManager, &instance);
+ }
+ // If nothing is set, issue an error.
+ if (!instance.transitive_parameter &&
+ instance.annotation.unique_id.empty() && instance.error.empty())
battre 2016/10/26 14:29:19 {}
Ramin Halavati 2016/10/27 09:40:09 Done.
+ instance.error = "Could not resolve variable initialization.";
+
+ collector_->variable_definitions.push_back(instance);
+ }
+
+ // Stores a function call that should be monitored.
+ void AddFunction(const clang::CallExpr* call_expr,
+ const MatchFinder::MatchResult& result) {
+ Collector::Instance instance;
+
+ GetOccuranceLocation(call_expr, result, &instance.location);
+ instance.location.function_name =
+ GetCoveringFunction(clang::dyn_cast<clang::Stmt>(call_expr), result);
+ instance.location.object_name =
+ call_expr->getDirectCallee()->getQualifiedNameAsString();
+
+ // Get annotation text.
+ const clang::FunctionDecl* function_decl = call_expr->getDirectCallee();
+ unsigned params_count = function_decl->getNumParams();
+ unsigned args_count = call_expr->getNumArgs();
+
+ for (unsigned i = 0; i < params_count; i++) {
+ if (net_match(clang::QualType::getAsString(
+ function_decl->getParamDecl(i)->getType().split()),
+ "NetworkTrafficAnnotationTag")) {
+ if (i >= args_count) {
+ instance.error = "Function missing annotation argument.";
+ } else {
+ // Get the argument.
+ const clang::Expr* arg = call_expr->getArgs()[i];
+
+ // Is it a call to annotate function?
+ if (auto* inner_call_expr = clang::dyn_cast<clang::CallExpr>(arg)) {
+ instance.is_direct_call = true;
+ GetAnnotationText(inner_call_expr, *result.SourceManager,
+ &instance);
+ instance.error = "";
+ } else {
+ // Then it's a variable.
+ instance.is_direct_call = false;
+ if (auto* pure_arg =
+ clang::dyn_cast<clang::DeclRefExpr>(arg->IgnoreCasts())) {
+ instance.variable_reference = pure_arg->getFoundDecl();
+ instance.error = "";
+ } else {
+ instance.error = "Unknwon parameter type.";
+ }
+ }
+ }
+ collector_->calls.push_back(instance);
+ }
+ }
+ }
+
+ private:
+ // TODO store pointer
battre 2016/10/26 14:29:19 Delete the todo?
Ramin Halavati 2016/10/27 09:40:09 Done.
+ Collector* collector_;
+};
+
+// NRA class keeps the call back function and sets the matchers.
battre 2016/10/26 14:29:18 Please don't use abbreviations.
Ramin Halavati 2016/10/27 09:40:08 Done. The class is totally removed and replaced by
+class NetworkRequestAuditor {
battre 2016/10/26 14:29:19 Rename this to NetworkAnnotationTagExtractor? Then
Ramin Halavati 2016/10/27 09:40:09 The class is totally removed and replaced by a sin
+ public:
+ explicit NetworkRequestAuditor(Collector* collector)
+ : the_callback_(collector) {}
+
+ void SetupMatchers(MatchFinder* match_finder) {
+ // Find variables defined as Annotation
battre 2016/10/26 14:29:19 . What does "Find variables defined as Annotation
Ramin Halavati 2016/10/27 09:40:08 Comments changed.
+ match_finder->addMatcher(
+ varDecl(anyOf(hasType(asString("NetworkTrafficAnnotationTag")),
+ hasType(asString("net::NetworkTrafficAnnotationTag"))))
+ .bind("annotation_variable"),
+ &the_callback_);
+
+ // Find instances of functions that have a parameter of type Annotate.
+ match_finder->addMatcher(
+ callExpr(hasDeclaration(functionDecl(hasAnyParameter(anyOf(
+ hasType(asString("NetworkTrafficAnnotationTag")),
+ hasType(asString("net::NetworkTrafficAnnotationTag")))))))
+ .bind("user_function"),
+ &the_callback_);
+ }
+
+ private:
+ TheCallback the_callback_;
+};
+
+} // namespace
+
+int main(int argc, const char* argv[]) {
+ llvm::cl::OptionCategory category("Network Request Audit Extractor Tool");
+ CommonOptionsParser options(argc, argv, category);
+ clang::tooling::ClangTool tool(options.getCompilations(),
+ options.getSourcePathList());
+
+ Collector collector;
+ NetworkRequestAuditor auditor(&collector);
+ MatchFinder match_finder;
+ auditor.SetupMatchers(&match_finder);
+
+ // Find output folder
+ const std::string kOutputSpecifier("output_dir=");
+ std::string output_dir;
+ for (int i = 0; i < argc; i++) {
+ if (!strncmp(argv[i], kOutputSpecifier.c_str(),
+ kOutputSpecifier.length())) {
battre 2016/10/26 14:29:19 if (argv[i] == kOutputSpecifier) does the same thi
Ramin Halavati 2016/10/27 09:40:08 argv[i] parameter includes the specifier and the p
+ output_dir = argv[i] + kOutputSpecifier.length();
+ break;
+ }
+ }
+
+ if (output_dir == "") {
+ llvm::errs() << "Temporary files directory is not specified.";
+ return 1;
+ }
+
+ std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory =
+ clang::tooling::newFrontendActionFactory(&match_finder);
+ int result = tool.run(frontend_factory.get());
+
+ if (result != 0)
+ return result;
+
+ llvm::outs() << "==== BEGIN EDITS ====\n";
+ llvm::outs() << "==== END EDITS ====\n";
+
+ // For each call, if the parameter is not generated by a direct call to
+ // "DefineNetworkTrafficAnnotation", find the variable that holds the value.
+ for (auto& c : collector.calls) {
battre 2016/10/26 14:29:19 const auto& c?
Ramin Halavati 2016/10/27 09:40:09 the annotation field of iterators may be changed 5
+ if (!c.is_direct_call) {
+ // Find the variable.
+ for (auto& v : collector.variable_definitions)
battre 2016/10/26 14:29:19 {} const auto& ?
Ramin Halavati 2016/10/27 09:40:09 Done.
+ if (v.variable_reference == c.variable_reference) {
+ c.annotation = v.annotation;
+ c.transitive_parameter = v.transitive_parameter;
+ c.error = c.error + (c.error.length() ? "\n+" : "") + v.error;
+ break;
+ }
+ if (!c.annotation.unique_id.length())
+ c.error = "Variable not found.";
+ }
+
+ // If the function just receives the variable and passes it to another
+ // function, ignore it, otherwise write it to file.
+ if (!c.transitive_parameter) {
+ std::string s = c.location.file_name;
+ std::replace(s.begin(), s.end(), '/', '_');
+ std::replace(s.begin(), s.end(), '.', '_');
+ char file_name[1000];
+
+ snprintf(file_name, sizeof(file_name), "%s/%s(%i).txt",
+ output_dir.c_str(), s.c_str(), c.location.line_number);
battre 2016/10/26 14:29:18 std::string filename = output_dir + "/" + s + "("
Ramin Halavati 2016/10/27 09:40:09 Done.
+
+ FILE* file = fopen(file_name, "wt");
battre 2016/10/26 14:29:18 how about staying C++ here and using std::ofstream
Ramin Halavati 2016/10/27 09:40:08 Done.
+ if (file) {
+ fprintf(file, "%s\n", c.location.file_name.c_str());
+ fprintf(file, "%s\n", c.location.function_name.c_str());
+ fprintf(file, "%i\n", c.location.line_number);
+ fprintf(file, "%s\n", c.location.object_name.c_str());
+ fprintf(file, "%s\n", c.error.c_str());
+ fprintf(file, "%s\n", c.annotation.unique_id.c_str());
+ fprintf(file, "%s", c.annotation.text.c_str());
+ fclose(file);
+ } else {
+ llvm::errs() << "Could not write to file: " << file_name << " because "
+ << strerror(errno) << "\n";
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}

Powered by Google App Engine
This is Rietveld 408576698