Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(22)

Side by Side Diff: tools/clang/traffic_annotation_extractor/traffic_annotation_extractor.cpp

Issue 2448133006: Tool added to extract network traffic annotations. (Closed)
Patch Set: Comments addressed. Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // This clang tool finds all instances of net::NetworkTrafficAnnotationTag in
6 // given source code, extracts the location info and content of annotation tags
7 // (unique id and annotation text), and stores them in separate text files
8 // (per instance) in the given output directory. Please refer to README.md for
9 // build and usage instructions.
10
11 #include <stdio.h>
12 #include <fstream>
13 #include <memory>
14
15 #include "clang/ASTMatchers/ASTMatchFinder.h"
16 #include "clang/ASTMatchers/ASTMatchers.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Frontend/FrontendActions.h"
19 #include "clang/Lex/Lexer.h"
20 #include "clang/Tooling/CommonOptionsParser.h"
21 #include "clang/Tooling/Refactoring.h"
22 #include "clang/Tooling/Tooling.h"
23 #include "llvm/Support/CommandLine.h"
24
25 using namespace clang::ast_matchers;
26
27 namespace {
28
29 // An instance of network traffic annotation usage. This can be either
30 // a variable defined as NetworkTrafficAnnotationTag or a function that has
31 // a variable of this type as it's input parameter, i.e., it can contain either
32 // of the following two 'foo' items:
33 // NetworkTrafficAnnotationTag foo = ...
34 // void foo(NetworkTrafficAnnotationTag bar)
35 struct NetworkAnnotationInstance {
36 NetworkAnnotationInstance() : variable_reference(nullptr) {
37 flag.is_direct_call = false;
38 }
39
40 // Information about where this annotation or call has happened.
41 struct Location {
42 Location() : line_number(-1) {}
dcheng 2017/03/02 07:57:41 Consider using in-class member initializers. The c
Ramin Halavati 2017/04/06 13:32:29 Done.
43 std::string file_path;
44 int line_number;
45
46 // Name of the function including this instance. E.g., in the following
47 // code, |function_name| will be 'foo':
48 // void foo() { NetworkTrafficAnnotationTag bar = ...; }
49 std::string function_name;
50
51 // Name of the variable that contains annotation or the function called
52 // with annotation. E.g., in the following two code segments, |object_name|
53 // will be 'bar':
54 // void foo() { NetworkTrafficAnnotationTag bar = ...; }
55 // void foo() { bar(baz);} // baz is of type NetworkTrafficAnnotationTag.
56 std::string object_name;
57 };
58
59 // Annotation content. These are the parameters of a call to
60 // DefineNetworkTrafficAnnotation. The unique_id is an identifier for the
61 // annotation that has to be unique across the entire code base. The |text|
62 // stores a RAW string with the annotation that should be extracted.
63 struct Annotation {
64 std::string unique_id;
65 std::string text;
66 };
67
68 Location location;
69 Annotation annotation;
70
71 // Possible error message (empty if no error).
72 std::string error;
73
74 // A reference to the variable containing annotation. Null if not available.
75 const clang::NamedDecl* variable_reference;
76
77 union {
78 // When this structure is refering to a function with a parameter of type
dcheng 2017/03/02 07:57:41 Nit: refering => referring
Ramin Halavati 2017/04/06 13:32:29 Done.
79 // NetworkTrafficAnnotationTag, |is_direct_call| variable is true if the
80 // parameter is generated by a direct call to DefineNetworkTrafficAnnotation
81 // and is false when the paramter is a variable. For example, in the
82 // following code segment, it is true for function 'foo'' and false for
83 // function 'baz':
84 //
85 // foo(DefineNetworkTrafficAnnotation(...))
86 // NetworkTrafficAnnotationTag bar = DefineNetworkTrafficAnnotation(...)
87 // baz(bar);
88 bool is_direct_call;
89
90 // When this structure is refering to a variable, |transitive_parameter| is
91 // false if variable is defined in the same function and is true when it is
92 // passed to this function. For example, in the following code segment, it
93 // is true for bar, and false for baz.
94 //
95 // void foo(NetworkTrafficAnnotationTag bar) {
96 // NetworkTrafficAnnotationTag baz = DefineNetworkTrafficAnnotation(...);
97 // }
98 bool transitive_parameter;
99 } flag;
100 };
101
102 // Structure to collect instances of network traffic annotation usages.
103 struct Collector {
104 std::vector<NetworkAnnotationInstance> variable_definitions;
105 std::vector<NetworkAnnotationInstance> calls;
106 };
107
108 // Returns the function that includes the given token. For example, if the token
109 // is variable x in the code "void foo() { int x; ... }", it returns "foo".
110 std::string GetDeclarationCoveringFunction(const clang::Decl* token,
111 clang::ASTContext* context);
112
113 // Checks if a token matches a name, with or without net:: namespace.
114 bool StripNetNamespaceMatch(const std::string& token, const std::string& name) {
115 return token == name || token == (std::string("net::") + name);
116 }
117
118 // Returns the source code of a given token, like function name, variable name,
119 // string literal, etc.
120 std::string GetStmtText(const clang::Stmt* token,
121 const clang::SourceManager& source_manager) {
122 clang::LangOptions lopt;
dcheng 2017/03/02 07:57:41 Nit: use the LangOptions from ASTContext.
Ramin Halavati 2017/04/06 13:32:29 Done.
123 // Get text range.
124 clang::SourceLocation start = token->getLocStart();
125 clang::SourceLocation end = token->getLocEnd();
126
127 // If it's a macro, go to definition.
128 if (start.isMacroID())
129 start = source_manager.getSpellingLoc(start);
130 if (end.isMacroID())
131 end = source_manager.getSpellingLoc(end);
132
133 // Get the real end of the token.
134 end = clang::Lexer::getLocForEndOfToken(end, 0, source_manager, lopt);
135
136 // Extract text.
137 std::string output(source_manager.getCharacterData(start),
138 source_manager.getCharacterData(end));
139
140 // If |token| is a raw string literal, the above code just returns the "R"
141 // part of it.
142 if (output != "R")
143 return output;
144
145 if (auto* literal = clang::dyn_cast<clang::StringLiteral>(token))
146 return literal->getString();
147
148 if (auto* implicit_cast = clang::dyn_cast<clang::ImplicitCastExpr>(token)) {
149 if (const clang::StringLiteral* implicit_literal =
150 clang::dyn_cast<clang::StringLiteral>(
151 implicit_cast->getSubExprAsWritten())) {
152 return implicit_literal->getString();
153 }
154 }
155
156 return output;
157 }
158
159 // Extracts unique id and annotation text of a call to
160 // "DefineNetworkTrafficAnnotation" function. Sets the error text if fails.
161 void GetAnnotationText(const clang::CallExpr* call_expr,
162 const clang::SourceManager& source_manager,
163 NetworkAnnotationInstance* instance) {
164 if (StripNetNamespaceMatch(
165 GetStmtText(call_expr->getCallee(), source_manager),
166 "DefineNetworkTrafficAnnotation") &&
167 call_expr->getNumArgs() == 2) {
168 instance->annotation.unique_id =
169 GetStmtText(call_expr->getArgs()[0], source_manager);
dcheng 2017/03/02 07:57:41 It'd be ideal to take advantage of matcher binding
Ramin Halavati 2017/04/06 13:32:29 Done.
170 instance->annotation.text =
171 GetStmtText(call_expr->getArgs()[1], source_manager);
172 instance->error = "";
173 } else {
174 instance->annotation.unique_id = "";
175 instance->annotation.text = "";
176 instance->error = "Unexpected function.";
177 }
178 }
179
180 // Returns the function that includes the given token. For example, if the token
181 // is the call to function bar() in the code "void foo() { bar(); }", it returns
182 // "foo".
183 std::string GetStatementCoveringFunction(const clang::Stmt* token,
184 clang::ASTContext* context) {
185 // Get the parent of |token| and return its covering function.
186 clang::ASTContext::DynTypedNodeList parents = context->getParents(*token);
187
188 // TODO: What exactly != 1 parent mean? I've not encountered any case that
189 // this value would be non-one.
190 if (parents.size() != 1) {
191 if (const clang::Stmt* s = parents[0].get<clang::Stmt>())
192 return GetStatementCoveringFunction(s, context);
193 else if (const clang::Decl* d = parents[0].get<clang::Decl>())
194 return GetDeclarationCoveringFunction(d, context);
195 }
196 return "Unknown";
197 }
198
199 // Returns the function that includes the given token. For example, if the token
200 // is variable x in the code "void foo() { int x; ... }", it returns "foo".
201 std::string GetDeclarationCoveringFunction(const clang::Decl* token,
202 clang::ASTContext* context) {
203 // If |token| is a function declaration, return its name.
204 if (auto f = clang::dyn_cast<clang::FunctionDecl>(token))
205 return f->getQualifiedNameAsString();
206
207 // As |token| is not a function declaration, get its parent and return its
208 // covering function.
209 clang::ASTContext::DynTypedNodeList parents = context->getParents(*token);
210
211 // TODO: What exactly != 1 parent mean? I've not encountered any case that
212 // this value would be non-one.
213 if (parents.size() == 1) {
214 if (const clang::Stmt* s = parents[0].get<clang::Stmt>())
215 return GetStatementCoveringFunction(s, context);
216 else if (const clang::Decl* d = parents[0].get<clang::Decl>())
217 return GetDeclarationCoveringFunction(d, context);
218 }
219 return "Unknown";
220 }
221
222 // Finds file name and line number of the given token and writes it into
223 // |location|.
224 template <class T>
225 void GetLocation(const T* token,
226 const clang::SourceManager& source_manager,
227 NetworkAnnotationInstance::Location* location) {
228 clang::SourceLocation source_location = token->getLocStart();
229 location->file_path = source_manager.getFilename(source_location);
230 location->line_number = source_manager.getSpellingLineNumber(source_location);
231 }
232
233 // This class implements the call back functions for AST Matchers. The matchers
234 // are defined in RunMatchers function and when a pattern is found there,
235 // the run function in this class is called back with information on the match
236 // location and description of the match pattern.
237 class NetworkAnnotationTagCallback : public MatchFinder::MatchCallback {
238 public:
239 explicit NetworkAnnotationTagCallback(Collector* collector)
240 : collector_(collector) {}
241 ~NetworkAnnotationTagCallback() override = default;
242
243 // Is called on any pattern found by ASTMathers that are defined in RunMathers
244 // function.
245 virtual void run(const MatchFinder::MatchResult& result) override {
246 if (const clang::VarDecl* var_decl =
247 result.Nodes.getNodeAs<clang::VarDecl>("annotation_variable")) {
248 AddVariable(var_decl, result);
249 } else if (const clang::CallExpr* call_expr =
250 result.Nodes.getNodeAs<clang::CallExpr>("user_function")) {
251 AddFunction(call_expr, result);
252 }
253 }
254
255 // Stores an annotation variable defintion.
256 void AddVariable(const clang::VarDecl* var_decl,
257 const MatchFinder::MatchResult& result) {
258 NetworkAnnotationInstance instance;
259
260 GetLocation(var_decl, *result.SourceManager, &instance.location);
261 instance.location.object_name = var_decl->getQualifiedNameAsString();
262 instance.variable_reference = clang::dyn_cast<clang::NamedDecl>(var_decl);
263
264 // Mark the instance as transitive parameter if it doesn't have
265 // initialization in the function where it is defined and it is passed as a
266 // parameter to the function. Otherwise, extract its content.
267 if (!var_decl->hasInit() && var_decl->isLocalVarDeclOrParm() &&
268 !var_decl->isLocalVarDecl()) {
269 instance.flag.transitive_parameter = true;
270 } else if (auto* init_expr = var_decl->getInit()) {
271 if (auto* call_expr = clang::dyn_cast<clang::CallExpr>(init_expr))
272 GetAnnotationText(call_expr, *result.SourceManager, &instance);
273 }
274 // If nothing is set, issue an error.
275 if (!instance.flag.transitive_parameter &&
276 instance.annotation.unique_id.empty() && instance.error.empty()) {
277 instance.error = "Could not resolve variable initialization.";
278 }
279
280 collector_->variable_definitions.push_back(instance);
281 }
282
283 // Stores a function call that uses annotation variables.
284 void AddFunction(const clang::CallExpr* call_expr,
285 const MatchFinder::MatchResult& result) {
286 NetworkAnnotationInstance instance;
287
288 GetLocation(call_expr, *result.SourceManager, &instance.location);
289 instance.location.function_name = GetStatementCoveringFunction(
290 clang::dyn_cast<clang::Stmt>(call_expr), result.Context);
291 instance.location.object_name =
292 call_expr->getDirectCallee()->getQualifiedNameAsString();
293
294 // Get annotation text.
295 const clang::FunctionDecl* function_decl = call_expr->getDirectCallee();
296 unsigned params_count = function_decl->getNumParams();
297 unsigned args_count = call_expr->getNumArgs();
298
299 for (unsigned i = 0; i < params_count; i++) {
300 if (StripNetNamespaceMatch(
301 clang::QualType::getAsString(
302 function_decl->getParamDecl(i)->getType().split()),
303 "NetworkTrafficAnnotationTag")) {
304 if (i >= args_count) {
305 instance.error = "Function missing annotation argument.";
306 } else {
307 // Get the argument.
308 const clang::Expr* arg = call_expr->getArgs()[i];
309
310 // Is it a call to annotate function?
311 if (auto* inner_call_expr = clang::dyn_cast<clang::CallExpr>(arg)) {
312 instance.flag.is_direct_call = true;
313 GetAnnotationText(inner_call_expr, *result.SourceManager,
314 &instance);
315 instance.error = "";
316 } else {
317 // Then it's a variable.
318 instance.flag.is_direct_call = false;
319 if (auto* pure_arg =
320 clang::dyn_cast<clang::DeclRefExpr>(arg->IgnoreCasts())) {
321 instance.variable_reference = pure_arg->getFoundDecl();
322 instance.error = "";
323 } else {
324 instance.error = "Unknwon parameter type.";
dcheng 2017/03/02 07:57:41 Nit: unknown
Ramin Halavati 2017/04/06 13:32:29 Done.
325 }
326 }
327 }
328 collector_->calls.push_back(instance);
329 }
330 }
331 }
332
333 private:
334 Collector* collector_;
335 };
336
337 // Sets up ASTMatchers and runs clang tool to populate collector. Returns the
338 // result of running the clang tool.
339 int RunMatchers(clang::tooling::ClangTool* clang_tool, Collector* collector) {
340 NetworkAnnotationTagCallback call_back(collector);
341 MatchFinder match_finder;
342
343 // Set up a pattern to find variables defined with type
344 // [net::]NetworkTrafficAnnotationTag.
345 match_finder.addMatcher(
346 varDecl(anyOf(hasType(asString("NetworkTrafficAnnotationTag")),
dcheng 2017/03/02 07:57:41 Can you help me understand why we need both? Shoul
Ramin Halavati 2017/04/06 13:32:29 If the code has the line "using namespace net;", t
347 hasType(asString("net::NetworkTrafficAnnotationTag"))))
348 .bind("annotation_variable"),
349 &call_back);
350
351 // Set up a pattern to find functions that have a parameter of type
352 // [net::]NetworkTrafficAnnotationTag.
353 match_finder.addMatcher(
354 callExpr(hasDeclaration(functionDecl(hasAnyParameter(anyOf(
355 hasType(asString("NetworkTrafficAnnotationTag")),
356 hasType(asString("net::NetworkTrafficAnnotationTag")))))))
357 .bind("user_function"),
358 &call_back);
359
360 std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory =
361 clang::tooling::newFrontendActionFactory(&match_finder);
362 return clang_tool->run(frontend_factory.get());
363 }
364
365 } // namespace
366
367 int main(int argc, const char* argv[]) {
368 // Find output directory.
369 if (argc < 5) {
370 llvm::errs() << "Output files directory is not specified.";
371 return -1;
372 }
373 std::string output_dir(argv[4]);
374
375 // Keep to consumed parameter from being passed to clang parser.
dcheng 2017/03/02 07:57:41 Maybe just make this a proper flag?
Ramin Halavati 2017/04/06 13:32:29 Done.
376 argc = 4;
377
378 llvm::cl::OptionCategory category("Network Request Audit Extractor Tool");
379 clang::tooling::CommonOptionsParser options(argc, argv, category);
380 clang::tooling::ClangTool tool(options.getCompilations(),
381 options.getSourcePathList());
382 Collector collector;
383
384 int result = RunMatchers(&tool, &collector);
385
386 if (result != 0)
387 return result;
388
389 // For each call, if the parameter is not generated by a direct call to
390 // "DefineNetworkTrafficAnnotation", find the variable that holds the value.
391 for (NetworkAnnotationInstance& call : collector.calls) {
392 if (!call.flag.is_direct_call) {
393 // Find the variable.
394 for (NetworkAnnotationInstance& var : collector.variable_definitions)
395 if (var.variable_reference == call.variable_reference) {
396 call.annotation = var.annotation;
397 call.flag.transitive_parameter = var.flag.transitive_parameter;
398 call.error =
399 call.error + (call.error.length() ? "\n+" : "") + var.error;
400 break;
401 }
402 if (!call.annotation.unique_id.length())
403 call.error = "Variable not found.";
404 }
405
406 // If the function just receives the variable and passes it to another
407 // function, ignore it, otherwise write it to file.
408 if (!call.flag.transitive_parameter) {
409 std::string s = call.location.file_path;
410 std::replace(s.begin(), s.end(), '/', '_');
411 std::replace(s.begin(), s.end(), '.', '_');
412 std::string file_path = output_dir + "/" + s + "(" +
413 std::to_string(call.location.line_number) +
414 ").txt";
415
416 std::ofstream output_file(file_path);
dcheng 2017/03/02 07:57:41 How are reads/writes to this file synchronized? If
Ramin Halavati 2017/04/06 13:32:29 Comment updated in refactored source: "For each ca
417 if (output_file.is_open()) {
418 output_file << call.location.file_path << "\n";
419 output_file << call.location.function_name << "\n";
420 output_file << call.location.line_number << "\n";
421 output_file << call.location.object_name << "\n";
422 output_file << call.error << "\n";
423 output_file << call.annotation.unique_id << "\n";
424 output_file << call.annotation.text << "\n";
425 output_file.close();
426 } else {
427 llvm::errs() << "Could not write to file: " << file_path << " because "
428 << strerror(errno) << "\n";
429 return 1;
430 }
431 }
432 }
433
434 return 0;
435 }
OLDNEW
« no previous file with comments | « tools/clang/traffic_annotation_extractor/README.md ('k') | tools/traffic_annotation/auditor/BUILD.gn » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698