Index: tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp |
diff --git a/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp |
new file mode 100644 |
index 0000000000000000000000000000000000000000..7bb565a3760b38884f69182c9abf7093397f38ed |
--- /dev/null |
+++ b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp |
@@ -0,0 +1,376 @@ |
+// Copyright 2016 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include <memory> |
+#include <stdio.h> |
+ |
+#include "clang/ASTMatchers/ASTMatchFinder.h" |
+#include "clang/ASTMatchers/ASTMatchers.h" |
+#include "clang/Basic/SourceManager.h" |
+#include "clang/Frontend/FrontendActions.h" |
+#include "clang/Lex/Lexer.h" |
+#include "clang/Tooling/CommonOptionsParser.h" |
+#include "clang/Tooling/Refactoring.h" |
+#include "clang/Tooling/Tooling.h" |
+#include "llvm/Support/CommandLine.h" |
+//--> TODO: #include "../../traffic_annotation/traffic_annotation.pb.h" |
battre
2016/10/26 14:29:18
remove?
Ramin Halavati
2016/10/27 09:40:08
I think if we want to add protobuf to clang tool i
|
+ |
+using namespace clang::ast_matchers; |
+using clang::tooling::CommonOptionsParser; |
+ |
+namespace { |
+ |
+// Structure to collect instances. |
battre
2016/10/26 14:29:19
instances of what? Can you add more comments to th
Ramin Halavati
2016/10/27 09:40:08
Done.
|
+typedef struct Collector { |
battre
2016/10/26 14:29:18
I think that in C++ we don't need to "typedef stru
Ramin Halavati
2016/10/27 09:40:10
Done.
|
+ typedef struct Instance { |
+ Instance() { |
battre
2016/10/26 14:29:18
Can you use an initializer list?
Instance() : is_
Ramin Halavati
2016/10/27 09:40:08
Done.
|
+ is_direct_call = false; |
+ transitive_parameter = false; |
+ variable_reference = NULL; |
battre
2016/10/26 14:29:19
nullptr
Ramin Halavati
2016/10/27 09:40:09
Done.
|
+ } |
+ |
+ // Information about where this annotation or call has happened. |
+ struct Location { |
+ std::string file_name; |
+ int line_number; |
battre
2016/10/26 14:29:19
line number is not default initialized.
Ramin Halavati
2016/10/27 09:40:09
Done.
|
+ // Name of the function including this instance. |
+ std::string function_name; |
+ // Name of the variable that contains annotation or the function called |
+ // with annotation. |
+ std::string object_name; |
+ }; |
+ |
+ // Annotation content |
battre
2016/10/26 14:29:18
Comments should end with period. (please also chec
Ramin Halavati
2016/10/27 09:40:09
Done.
|
+ struct Annotation { |
+ std::string unique_id; |
+ std::string text; |
+ }; |
+ |
+ Location location; |
+ Annotation annotation; |
+ |
+ // Possible error (empty if no error) |
+ std::string error; |
+ // A reference to the variable containing annotation. Null if not available. |
+ const clang::NamedDecl* variable_reference; |
+ // Flag stating that parameter is directly passed to annotate function here |
+ // or it's through a variable. |
+ bool is_direct_call; |
+ // Flag stating that a variable is a parameter received by upper level |
+ // function. |
+ bool transitive_parameter; |
+ } Instance; |
+ |
+ std::vector<Instance> variable_definitions, calls; |
battre
2016/10/26 14:29:19
one variable definition per line.
Ramin Halavati
2016/10/27 09:40:09
Done.
|
+} Collector; |
+ |
+// Returns the function that includes the given clang::Decl. |
+std::string GetCoveringFunction(const clang::Decl* token, |
+ const MatchFinder::MatchResult& result); |
+ |
+// Checks if a token matches a name, with or without net:: namespace |
+bool net_match(const std::string& token, const std::string& name) { |
+ return token == name || token == (std::string("net::") + name); |
+} |
+ |
+// Returns the text of a given statement or subclass. |
+std::string GetStmtText(const clang::Stmt* token, |
+ const clang::SourceManager& source_manager) { |
+ clang::LangOptions lopt; |
+ // Get text range |
+ clang::SourceLocation start = token->getLocStart(); |
+ clang::SourceLocation end = token->getLocEnd(); |
+ |
+ // If it's a macro, go to definition. |
+ if (start.isMacroID()) |
+ start = source_manager.getSpellingLoc(start); |
+ if (end.isMacroID()) |
+ end = source_manager.getSpellingLoc(end); |
+ |
+ // Get the real end of the token. |
+ end = clang::Lexer::getLocForEndOfToken(end, 0, source_manager, lopt); |
+ |
+ // Extract text. |
+ std::string output = std::string(source_manager.getCharacterData(start), |
+ source_manager.getCharacterData(end) - |
+ source_manager.getCharacterData(start)); |
battre
2016/10/26 14:29:19
can you also write
std::string output(source_manag
Ramin Halavati
2016/10/27 09:40:09
Done.
|
+ |
+ // Raw string? |
+ if (output == "R") |
battre
2016/10/26 14:29:19
{} around multiline blocks. Same in the next line.
Ramin Halavati
2016/10/27 09:40:09
Done.
|
+ if (auto* c1 = clang::dyn_cast<clang::ImplicitCastExpr>(token)) |
+ if (const clang::StringLiteral* c2 = |
+ clang::dyn_cast<clang::StringLiteral>( |
+ c1->getSubExprAsWritten())) { |
+ output = c2->getString(); |
+ } |
+ |
+ return output; |
+} |
+ |
+// Returns annotation text of a call to "DefineNetworkTrafficAnnotation" |
+// function. |
+void GetAnnotationText(const clang::CallExpr* call_expr, |
+ const clang::SourceManager& source_manager, |
+ Collector::Instance* instance) { |
+ if (net_match(GetStmtText(call_expr->getCallee(), source_manager), |
+ "DefineNetworkTrafficAnnotation") && |
+ call_expr->getNumArgs() == 2) { |
+ instance->annotation.unique_id = |
+ GetStmtText(call_expr->getArgs()[0], source_manager); |
+ instance->annotation.text = |
+ GetStmtText(call_expr->getArgs()[1], source_manager); |
+ instance->error = ""; |
+ } else { |
+ instance->annotation.unique_id = ""; |
+ instance->annotation.text = ""; |
+ instance->error = "Unexpected function."; |
+ } |
+} |
+ |
+// Returns the function that includes the given clang::Stmt. |
+std::string GetCoveringFunction(const clang::Stmt* token, |
+ const MatchFinder::MatchResult& result) { |
+ auto parents = result.Context->getParents(*token); |
+ if (parents.size() == 1) { |
+ if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) |
+ return GetCoveringFunction(s, result); |
+ else if (const clang::Decl* d = parents[0].get<clang::Decl>()) |
+ return GetCoveringFunction(d, result); |
+ } |
+ return "Unknown"; |
+} |
+ |
+// Returns the function that includes the given clang::Decl. |
+std::string GetCoveringFunction(const clang::Decl* token, |
+ const MatchFinder::MatchResult& result) { |
+ if (auto f = clang::dyn_cast<clang::FunctionDecl>(token)) |
+ return f->getQualifiedNameAsString(); |
+ |
+ auto parents = result.Context->getParents(*token); |
+ if (parents.size() == 1) { |
battre
2016/10/26 14:29:18
Add a comment what having a single parent means?
Ramin Halavati
2016/10/27 09:40:09
I did not find a conclusive description on this an
|
+ if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) |
+ return GetCoveringFunction(s, result); |
+ else if (const clang::Decl* d = parents[0].get<clang::Decl>()) |
+ return GetCoveringFunction(d, result); |
+ } |
+ return "Unknown"; |
+} |
+ |
+// Returns the file name and line number of the given token. |
+template <class T> |
+void GetOccuranceLocation(const T* token, |
battre
2016/10/26 14:29:19
typo: occurr*e*nce
What's the difference between
Ramin Halavati
2016/10/27 09:40:09
Renamed it to GetLocation as it can be called gene
|
+ const MatchFinder::MatchResult& result, |
+ Collector::Instance::Location* location) { |
+ clang::SourceLocation source_location = token->getLocStart(); |
+ location->file_name = result.SourceManager->getFilename(source_location); |
+ location->line_number = |
+ result.SourceManager->getSpellingLineNumber(source_location); |
+} |
+ |
+class TheCallback : public MatchFinder::MatchCallback { |
battre
2016/10/26 14:29:19
Description of the responsibility of this class?
Ramin Halavati
2016/10/27 09:40:08
Done.
|
+ public: |
+ explicit TheCallback(Collector* collector) : collector_(collector) {} |
battre
2016/10/26 14:29:18
~TheCallback() override = default;
Ramin Halavati
2016/10/27 09:40:09
Done.
|
+ |
+ virtual void run(const MatchFinder::MatchResult& result) override { |
+ if (const clang::VarDecl* var_decl = |
+ result.Nodes.getNodeAs<clang::VarDecl>("annotation_variable")) |
battre
2016/10/26 14:29:19
also {} if the is statement spans more than one li
Ramin Halavati
2016/10/27 09:40:09
Done.
|
+ AddVariable(var_decl, result); |
+ else if (const clang::CallExpr* call_expr = |
+ result.Nodes.getNodeAs<clang::CallExpr>("user_function")) |
+ AddFunction(call_expr, result); |
+ } |
+ |
+ // Stores an annotation variable defintion in the Collector. |
+ void AddVariable(const clang::VarDecl* var_decl, |
+ const MatchFinder::MatchResult& result) { |
+ Collector::Instance instance; |
+ |
+ GetOccuranceLocation(var_decl, result, &instance.location); |
+ instance.location.object_name = var_decl->getQualifiedNameAsString(); |
+ instance.variable_reference = clang::dyn_cast<clang::NamedDecl>(var_decl); |
+ |
+ // Get annotation text. |
+ // If it doesn't have initialization, but it's a parameter, store it. |
+ if (!var_decl->hasInit() && var_decl->isLocalVarDeclOrParm() && |
+ !var_decl->isLocalVarDecl()) { |
+ instance.transitive_parameter = true; |
+ } else if (auto* init_expr = var_decl->getInit()) { |
+ if (auto* call_expr = clang::dyn_cast<clang::CallExpr>(init_expr)) |
+ GetAnnotationText(call_expr, *result.SourceManager, &instance); |
+ } |
+ // If nothing is set, issue an error. |
+ if (!instance.transitive_parameter && |
+ instance.annotation.unique_id.empty() && instance.error.empty()) |
battre
2016/10/26 14:29:19
{}
Ramin Halavati
2016/10/27 09:40:09
Done.
|
+ instance.error = "Could not resolve variable initialization."; |
+ |
+ collector_->variable_definitions.push_back(instance); |
+ } |
+ |
+ // Stores a function call that should be monitored. |
+ void AddFunction(const clang::CallExpr* call_expr, |
+ const MatchFinder::MatchResult& result) { |
+ Collector::Instance instance; |
+ |
+ GetOccuranceLocation(call_expr, result, &instance.location); |
+ instance.location.function_name = |
+ GetCoveringFunction(clang::dyn_cast<clang::Stmt>(call_expr), result); |
+ instance.location.object_name = |
+ call_expr->getDirectCallee()->getQualifiedNameAsString(); |
+ |
+ // Get annotation text. |
+ const clang::FunctionDecl* function_decl = call_expr->getDirectCallee(); |
+ unsigned params_count = function_decl->getNumParams(); |
+ unsigned args_count = call_expr->getNumArgs(); |
+ |
+ for (unsigned i = 0; i < params_count; i++) { |
+ if (net_match(clang::QualType::getAsString( |
+ function_decl->getParamDecl(i)->getType().split()), |
+ "NetworkTrafficAnnotationTag")) { |
+ if (i >= args_count) { |
+ instance.error = "Function missing annotation argument."; |
+ } else { |
+ // Get the argument. |
+ const clang::Expr* arg = call_expr->getArgs()[i]; |
+ |
+ // Is it a call to annotate function? |
+ if (auto* inner_call_expr = clang::dyn_cast<clang::CallExpr>(arg)) { |
+ instance.is_direct_call = true; |
+ GetAnnotationText(inner_call_expr, *result.SourceManager, |
+ &instance); |
+ instance.error = ""; |
+ } else { |
+ // Then it's a variable. |
+ instance.is_direct_call = false; |
+ if (auto* pure_arg = |
+ clang::dyn_cast<clang::DeclRefExpr>(arg->IgnoreCasts())) { |
+ instance.variable_reference = pure_arg->getFoundDecl(); |
+ instance.error = ""; |
+ } else { |
+ instance.error = "Unknwon parameter type."; |
+ } |
+ } |
+ } |
+ collector_->calls.push_back(instance); |
+ } |
+ } |
+ } |
+ |
+ private: |
+ // TODO store pointer |
battre
2016/10/26 14:29:19
Delete the todo?
Ramin Halavati
2016/10/27 09:40:09
Done.
|
+ Collector* collector_; |
+}; |
+ |
+// NRA class keeps the call back function and sets the matchers. |
battre
2016/10/26 14:29:18
Please don't use abbreviations.
Ramin Halavati
2016/10/27 09:40:08
Done. The class is totally removed and replaced by
|
+class NetworkRequestAuditor { |
battre
2016/10/26 14:29:19
Rename this to NetworkAnnotationTagExtractor? Then
Ramin Halavati
2016/10/27 09:40:09
The class is totally removed and replaced by a sin
|
+ public: |
+ explicit NetworkRequestAuditor(Collector* collector) |
+ : the_callback_(collector) {} |
+ |
+ void SetupMatchers(MatchFinder* match_finder) { |
+ // Find variables defined as Annotation |
battre
2016/10/26 14:29:19
.
What does "Find variables defined as Annotation
Ramin Halavati
2016/10/27 09:40:08
Comments changed.
|
+ match_finder->addMatcher( |
+ varDecl(anyOf(hasType(asString("NetworkTrafficAnnotationTag")), |
+ hasType(asString("net::NetworkTrafficAnnotationTag")))) |
+ .bind("annotation_variable"), |
+ &the_callback_); |
+ |
+ // Find instances of functions that have a parameter of type Annotate. |
+ match_finder->addMatcher( |
+ callExpr(hasDeclaration(functionDecl(hasAnyParameter(anyOf( |
+ hasType(asString("NetworkTrafficAnnotationTag")), |
+ hasType(asString("net::NetworkTrafficAnnotationTag"))))))) |
+ .bind("user_function"), |
+ &the_callback_); |
+ } |
+ |
+ private: |
+ TheCallback the_callback_; |
+}; |
+ |
+} // namespace |
+ |
+int main(int argc, const char* argv[]) { |
+ llvm::cl::OptionCategory category("Network Request Audit Extractor Tool"); |
+ CommonOptionsParser options(argc, argv, category); |
+ clang::tooling::ClangTool tool(options.getCompilations(), |
+ options.getSourcePathList()); |
+ |
+ Collector collector; |
+ NetworkRequestAuditor auditor(&collector); |
+ MatchFinder match_finder; |
+ auditor.SetupMatchers(&match_finder); |
+ |
+ // Find output folder |
+ const std::string kOutputSpecifier("output_dir="); |
+ std::string output_dir; |
+ for (int i = 0; i < argc; i++) { |
+ if (!strncmp(argv[i], kOutputSpecifier.c_str(), |
+ kOutputSpecifier.length())) { |
battre
2016/10/26 14:29:19
if (argv[i] == kOutputSpecifier) does the same thi
Ramin Halavati
2016/10/27 09:40:08
argv[i] parameter includes the specifier and the p
|
+ output_dir = argv[i] + kOutputSpecifier.length(); |
+ break; |
+ } |
+ } |
+ |
+ if (output_dir == "") { |
+ llvm::errs() << "Temporary files directory is not specified."; |
+ return 1; |
+ } |
+ |
+ std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory = |
+ clang::tooling::newFrontendActionFactory(&match_finder); |
+ int result = tool.run(frontend_factory.get()); |
+ |
+ if (result != 0) |
+ return result; |
+ |
+ llvm::outs() << "==== BEGIN EDITS ====\n"; |
+ llvm::outs() << "==== END EDITS ====\n"; |
+ |
+ // For each call, if the parameter is not generated by a direct call to |
+ // "DefineNetworkTrafficAnnotation", find the variable that holds the value. |
+ for (auto& c : collector.calls) { |
battre
2016/10/26 14:29:19
const auto& c?
Ramin Halavati
2016/10/27 09:40:09
the annotation field of iterators may be changed 5
|
+ if (!c.is_direct_call) { |
+ // Find the variable. |
+ for (auto& v : collector.variable_definitions) |
battre
2016/10/26 14:29:19
{}
const auto& ?
Ramin Halavati
2016/10/27 09:40:09
Done.
|
+ if (v.variable_reference == c.variable_reference) { |
+ c.annotation = v.annotation; |
+ c.transitive_parameter = v.transitive_parameter; |
+ c.error = c.error + (c.error.length() ? "\n+" : "") + v.error; |
+ break; |
+ } |
+ if (!c.annotation.unique_id.length()) |
+ c.error = "Variable not found."; |
+ } |
+ |
+ // If the function just receives the variable and passes it to another |
+ // function, ignore it, otherwise write it to file. |
+ if (!c.transitive_parameter) { |
+ std::string s = c.location.file_name; |
+ std::replace(s.begin(), s.end(), '/', '_'); |
+ std::replace(s.begin(), s.end(), '.', '_'); |
+ char file_name[1000]; |
+ |
+ snprintf(file_name, sizeof(file_name), "%s/%s(%i).txt", |
+ output_dir.c_str(), s.c_str(), c.location.line_number); |
battre
2016/10/26 14:29:18
std::string filename = output_dir + "/" + s + "("
Ramin Halavati
2016/10/27 09:40:09
Done.
|
+ |
+ FILE* file = fopen(file_name, "wt"); |
battre
2016/10/26 14:29:18
how about staying C++ here and using std::ofstream
Ramin Halavati
2016/10/27 09:40:08
Done.
|
+ if (file) { |
+ fprintf(file, "%s\n", c.location.file_name.c_str()); |
+ fprintf(file, "%s\n", c.location.function_name.c_str()); |
+ fprintf(file, "%i\n", c.location.line_number); |
+ fprintf(file, "%s\n", c.location.object_name.c_str()); |
+ fprintf(file, "%s\n", c.error.c_str()); |
+ fprintf(file, "%s\n", c.annotation.unique_id.c_str()); |
+ fprintf(file, "%s", c.annotation.text.c_str()); |
+ fclose(file); |
+ } else { |
+ llvm::errs() << "Could not write to file: " << file_name << " because " |
+ << strerror(errno) << "\n"; |
+ return 1; |
+ } |
+ } |
+ } |
+ |
+ return 0; |
+} |