Index: tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp |
diff --git a/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp |
new file mode 100644 |
index 0000000000000000000000000000000000000000..b96d971be27eefe5086987f8c67ba678a0335847 |
--- /dev/null |
+++ b/tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp |
@@ -0,0 +1,245 @@ |
+// Copyright 2017 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+// This clang tool finds all instances of net::DefineNetworkTrafficAnnotation in |
+// given source code, extracts the location info and content of annotation tags |
+// (unique id and annotation text), and stores them in separate text files |
+// (per instance) in the given output directory. Please refer to README.md for |
+// build and usage instructions. |
+ |
+#include <stdio.h> |
+#include <fstream> |
+#include <memory> |
+ |
+#include "clang/ASTMatchers/ASTMatchFinder.h" |
+#include "clang/ASTMatchers/ASTMatchers.h" |
+#include "clang/Basic/SourceManager.h" |
+#include "clang/Frontend/FrontendActions.h" |
+#include "clang/Lex/Lexer.h" |
+#include "clang/Tooling/CommonOptionsParser.h" |
+#include "clang/Tooling/Refactoring.h" |
+#include "clang/Tooling/Tooling.h" |
+#include "llvm/Support/CommandLine.h" |
+ |
+using namespace clang::ast_matchers; |
+ |
+namespace { |
+ |
+// An instance of a call to the net::DefineNetworkTrafficAnnotation function. |
+struct NetworkAnnotationInstance { |
+ // Information about where the call has happened. |
+ struct Location { |
+ std::string file_path; |
+ int line_number = -1; |
+ |
+ // Name of the function calling net::DefineNetworkTrafficAnnotation. E.g., |
+ // in the following code, |function_name| will be 'foo': |
+ // void foo() { NetworkTrafficAnnotationTag bar = |
+ // net::DefineNetworkTrafficAnnotation(...); } |
+ std::string function_name; |
+ }; |
+ |
+ // Annotation content. These are the parameters of the call to |
+ // net::DefineNetworkTrafficAnnotation. The unique_id is an identifier for the |
+ // annotation that has to be unique across the entire code base. The |text| |
+ // stores a raw string with the annotation that should be extracted. |
+ struct Annotation { |
+ std::string unique_id; |
+ std::string text; |
+ }; |
+ |
+ Location location; |
+ Annotation annotation; |
+}; |
+ |
+using Collector = std::vector<NetworkAnnotationInstance>; |
+ |
+// Returns the function that contains the given token. For example, if the token |
+// is variable x in the code "void foo() { int x; ... }", it returns "foo". |
+llvm::StringRef GetDeclarationCoveringFunction(const clang::Decl* token, |
dcheng
2017/04/11 07:26:42
It's usually nicer to express things as matchers w
Ramin Halavati
2017/04/11 09:15:38
WOW! That was wonderful.
|
+ clang::ASTContext* context); |
+ |
+// Returns the function that contains the given token. For example, if the token |
+// is the call to function bar() in the code "void foo() { bar(); }", it returns |
+// "foo". |
+llvm::StringRef GetStatementCoveringFunction(const clang::Stmt* token, |
+ clang::ASTContext* context) { |
+ // Get the parent of |token| and return its covering function. |
+ clang::ASTContext::DynTypedNodeList parents = context->getParents(*token); |
+ |
+ // If parent is found, extract its name recursively. |
+ if (parents.size()) { |
+ if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) |
+ return GetStatementCoveringFunction(s, context); |
+ else if (const clang::Decl* d = parents[0].get<clang::Decl>()) |
+ return GetDeclarationCoveringFunction(d, context); |
+ } |
+ |
+ return llvm::StringRef("Unknown"); |
+} |
+ |
+// Returns the function that contains the given token. For example, if the token |
+// is variable x in the code "void foo() { int x; ... }", it returns "foo". |
+llvm::StringRef GetDeclarationCoveringFunction(const clang::Decl* token, |
+ clang::ASTContext* context) { |
+ // If |token| is a function declaration, return its name. |
+ if (auto f = clang::dyn_cast<clang::FunctionDecl>(token)) |
+ return f->getQualifiedNameAsString(); |
+ |
+ // As |token| is not a function declaration, get its parent and return its |
+ // covering function. |
+ clang::ASTContext::DynTypedNodeList parents = context->getParents(*token); |
+ |
+ // If parent is found, extract its name recursively. |
+ if (parents.size()) { |
+ if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) |
+ return GetStatementCoveringFunction(s, context); |
+ else if (const clang::Decl* d = parents[0].get<clang::Decl>()) |
+ return GetDeclarationCoveringFunction(d, context); |
+ } |
+ |
+ return llvm::StringRef("Unknown"); |
dcheng
2017/04/11 07:26:42
Btw, no need to explicit construct StringRef here.
Ramin Halavati
2017/04/11 09:15:38
Done.
|
+} |
+ |
+// This class implements the call back functions for AST Matchers. The matchers |
+// are defined in RunMatchers function. When a pattern is found there, |
+// the run function in this class is called back with information on the matched |
+// location and description of the matched pattern. |
+class NetworkAnnotationTagCallback : public MatchFinder::MatchCallback { |
+ public: |
+ explicit NetworkAnnotationTagCallback(Collector* collector) |
+ : collector_(collector) {} |
+ ~NetworkAnnotationTagCallback() override = default; |
+ |
+ // Is called on any pattern found by ASTMathers that are defined in RunMathers |
+ // function. |
+ virtual void run(const MatchFinder::MatchResult& result) override { |
+ const clang::CallExpr* call_expr = |
+ result.Nodes.getNodeAs<clang::CallExpr>("definition_function"); |
+ const clang::StringLiteral* unique_id = |
+ result.Nodes.getNodeAs<clang::StringLiteral>("unique_id"); |
+ const clang::StringLiteral* annotation_text = |
+ result.Nodes.getNodeAs<clang::StringLiteral>("annotation_text"); |
+ |
+ if (call_expr && unique_id && annotation_text) { |
dcheng
2017/04/11 07:26:42
Let's assert since we should expect all these to b
Ramin Halavati
2017/04/11 09:15:38
Done.
|
+ NetworkAnnotationInstance instance; |
+ instance.annotation.unique_id = unique_id->getString(); |
+ instance.annotation.text = annotation_text->getString(); |
+ |
+ // Get annotation location. |
+ clang::SourceLocation source_location = call_expr->getLocStart(); |
+ instance.location.file_path = |
+ result.SourceManager->getFilename(source_location); |
+ instance.location.line_number = |
+ result.SourceManager->getSpellingLineNumber(source_location); |
+ instance.location.function_name = GetStatementCoveringFunction( |
+ clang::dyn_cast<clang::Stmt>(call_expr), result.Context); |
+ |
+ // If DefineNetworkTrafficAnnotation is used in form of a macro, an empty |
+ // file_path is returned. Traversing to its parent node in this case |
+ // will result in the correct value. |
+ if (!instance.location.file_path.length()) { |
dcheng
2017/04/11 07:26:42
Can we do something with getImmediateMacroCallerLo
Ramin Halavati
2017/04/11 09:15:38
Done.
|
+ instance.location.file_path = "unknown_file_path"; |
+ clang::ASTContext::DynTypedNodeList parents = |
+ result.Context->getParents( |
+ *clang::dyn_cast<clang::Stmt>(call_expr)); |
+ if (parents.size()) { |
+ if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) { |
+ source_location = s->getLocStart(); |
+ instance.location.file_path = |
+ result.SourceManager->getFilename(source_location); |
+ instance.location.line_number = |
+ result.SourceManager->getSpellingLineNumber(source_location); |
+ } |
+ } |
+ } |
+ |
+ // Remove leading "../"s from file path. |
+ while (instance.location.file_path.length() > 3 && |
+ instance.location.file_path.substr(0, 3) == "../") { |
dcheng
2017/04/11 07:26:42
I would be cautious of doing this, as Windows will
Ramin Halavati
2017/04/11 09:15:38
I did the replacement, but I am not sure if I got
|
+ instance.location.file_path = instance.location.file_path.substr( |
+ 3, instance.location.file_path.length() - 3); |
+ } |
+ |
+ collector_->push_back(instance); |
+ } |
+ } |
+ |
+ private: |
+ Collector* collector_; |
+}; |
+ |
+// Sets up an ASTMatcher and runs clang tool to populate collector. Returns the |
+// result of running the clang tool. |
+int RunMatchers(clang::tooling::ClangTool* clang_tool, Collector* collector) { |
+ NetworkAnnotationTagCallback call_back(collector); |
+ MatchFinder match_finder; |
+ |
+ // Set up a pattern to find functions that are named |
+ // [net::]DefineNetworkTrafficAnnotation and have 2 arguments of string |
+ // literal type. |
+ match_finder.addMatcher( |
+ callExpr(hasDeclaration(functionDecl( |
+ anyOf(hasName("DefineNetworkTrafficAnnotation"), |
+ hasName("net::DefineNetworkTrafficAnnotation")))), |
+ hasArgument(0, stringLiteral().bind("unique_id")), |
+ hasArgument(1, stringLiteral().bind("annotation_text"))) |
+ .bind("definition_function"), |
+ &call_back); |
+ std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory = |
+ clang::tooling::newFrontendActionFactory(&match_finder); |
+ return clang_tool->run(frontend_factory.get()); |
+} |
+ |
+} // namespace |
+ |
+static llvm::cl::OptionCategory ToolCategory( |
+ "traffic_annotation_extractor: Extract traffic annotation texts"); |
+static llvm::cl::extrahelp CommonHelp( |
+ clang::tooling::CommonOptionsParser::HelpMessage); |
+static llvm::cl::extrahelp MoreHelp( |
+ "\n -output-directory=<string> - The directory in which the output " |
+ "files are written.\n"); |
+static llvm::cl::opt<std::string> OutputDirectory( |
+ "output-directory", |
+ llvm::cl::desc("The directory in which output files are written.\n")); |
+ |
+int main(int argc, const char* argv[]) { |
+ clang::tooling::CommonOptionsParser options(argc, argv, ToolCategory); |
+ clang::tooling::ClangTool tool(options.getCompilations(), |
+ options.getSourcePathList()); |
+ Collector collector; |
+ |
+ int result = RunMatchers(&tool, &collector); |
+ |
+ if (result != 0) |
+ return result; |
+ |
+ // For each call to "DefineNetworkTrafficAnnotation", write annotation text |
dcheng
2017/04/11 07:26:42
Btw, I'm curious if we still need to do this: for
Ramin Halavati
2017/04/11 09:15:38
Thanks, Done!
|
+ // and relevant meta data into a separate file. The filename is uniquely |
+ // generated using the file path and filename of the code including the call |
+ // and it's line number. |
dcheng
2017/04/11 07:26:42
Nit: it's -> its
Ramin Halavati
2017/04/11 09:15:38
Done.
|
+ for (NetworkAnnotationInstance& call : collector) { |
+ std::string s = call.location.file_path; |
+ std::replace(s.begin(), s.end(), '/', '_'); |
+ std::replace(s.begin(), s.end(), '.', '_'); |
+ std::string file_path = OutputDirectory.getValue() + "/" + s + "(" + |
+ std::to_string(call.location.line_number) + ").txt"; |
+ |
+ std::ofstream output_file(file_path); |
+ if (output_file.is_open()) { |
+ output_file << call.location.file_path << "\n"; |
+ output_file << call.location.function_name << "\n"; |
+ output_file << call.location.line_number << "\n"; |
+ output_file << call.annotation.unique_id << "\n"; |
+ output_file << call.annotation.text << "\n"; |
+ output_file.close(); |
+ } else { |
+ llvm::errs() << "Could not write to file: " << file_path << "\n"; |
+ return 1; |
+ } |
+ } |
+ |
+ return 0; |
+} |