OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // This clang tool finds all instances of net::NetworkTrafficAnnotationTag in | |
6 // given source code, extracts the location info and content of annotation tags | |
7 // (unique id and annotation text), and stores them in separate text files | |
8 // (per instance) in the given output directory. Please refer to README.md for | |
9 // build and usage instructions. | |
10 | |
11 #include <stdio.h> | |
12 #include <fstream> | |
13 #include <memory> | |
14 | |
15 #include "clang/ASTMatchers/ASTMatchFinder.h" | |
16 #include "clang/ASTMatchers/ASTMatchers.h" | |
17 #include "clang/Basic/SourceManager.h" | |
18 #include "clang/Frontend/FrontendActions.h" | |
19 #include "clang/Lex/Lexer.h" | |
20 #include "clang/Tooling/CommonOptionsParser.h" | |
21 #include "clang/Tooling/Refactoring.h" | |
22 #include "clang/Tooling/Tooling.h" | |
23 #include "llvm/Support/CommandLine.h" | |
24 | |
25 using namespace clang::ast_matchers; | |
26 | |
27 namespace { | |
28 | |
29 // An instance of network traffic annotation usage. This can be either | |
30 // a variable defined as NetworkTrafficAnnotationTag or a function that has | |
31 // a variable of this type as it's input parameter, i.e., it can contain either | |
32 // of the following two 'foo' items: | |
33 // NetworkTrafficAnnotationTag foo = ... | |
34 // void foo(NetworkTrafficAnnotationTag bar) | |
35 struct NetworkAnnotationInstance { | |
36 NetworkAnnotationInstance() : variable_reference(nullptr) { | |
37 flag.is_direct_call = false; | |
38 } | |
39 | |
40 // Information about where this annotation or call has happened. | |
41 struct Location { | |
42 Location() : line_number(-1) {} | |
dcheng
2017/03/02 07:57:41
Consider using in-class member initializers. The c
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
43 std::string file_path; | |
44 int line_number; | |
45 | |
46 // Name of the function including this instance. E.g., in the following | |
47 // code, |function_name| will be 'foo': | |
48 // void foo() { NetworkTrafficAnnotationTag bar = ...; } | |
49 std::string function_name; | |
50 | |
51 // Name of the variable that contains annotation or the function called | |
52 // with annotation. E.g., in the following two code segments, |object_name| | |
53 // will be 'bar': | |
54 // void foo() { NetworkTrafficAnnotationTag bar = ...; } | |
55 // void foo() { bar(baz);} // baz is of type NetworkTrafficAnnotationTag. | |
56 std::string object_name; | |
57 }; | |
58 | |
59 // Annotation content. These are the parameters of a call to | |
60 // DefineNetworkTrafficAnnotation. The unique_id is an identifier for the | |
61 // annotation that has to be unique across the entire code base. The |text| | |
62 // stores a RAW string with the annotation that should be extracted. | |
63 struct Annotation { | |
64 std::string unique_id; | |
65 std::string text; | |
66 }; | |
67 | |
68 Location location; | |
69 Annotation annotation; | |
70 | |
71 // Possible error message (empty if no error). | |
72 std::string error; | |
73 | |
74 // A reference to the variable containing annotation. Null if not available. | |
75 const clang::NamedDecl* variable_reference; | |
76 | |
77 union { | |
78 // When this structure is refering to a function with a parameter of type | |
dcheng
2017/03/02 07:57:41
Nit: refering => referring
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
79 // NetworkTrafficAnnotationTag, |is_direct_call| variable is true if the | |
80 // parameter is generated by a direct call to DefineNetworkTrafficAnnotation | |
81 // and is false when the paramter is a variable. For example, in the | |
82 // following code segment, it is true for function 'foo'' and false for | |
83 // function 'baz': | |
84 // | |
85 // foo(DefineNetworkTrafficAnnotation(...)) | |
86 // NetworkTrafficAnnotationTag bar = DefineNetworkTrafficAnnotation(...) | |
87 // baz(bar); | |
88 bool is_direct_call; | |
89 | |
90 // When this structure is refering to a variable, |transitive_parameter| is | |
91 // false if variable is defined in the same function and is true when it is | |
92 // passed to this function. For example, in the following code segment, it | |
93 // is true for bar, and false for baz. | |
94 // | |
95 // void foo(NetworkTrafficAnnotationTag bar) { | |
96 // NetworkTrafficAnnotationTag baz = DefineNetworkTrafficAnnotation(...); | |
97 // } | |
98 bool transitive_parameter; | |
99 } flag; | |
100 }; | |
101 | |
102 // Structure to collect instances of network traffic annotation usages. | |
103 struct Collector { | |
104 std::vector<NetworkAnnotationInstance> variable_definitions; | |
105 std::vector<NetworkAnnotationInstance> calls; | |
106 }; | |
107 | |
108 // Returns the function that includes the given token. For example, if the token | |
109 // is variable x in the code "void foo() { int x; ... }", it returns "foo". | |
110 std::string GetDeclarationCoveringFunction(const clang::Decl* token, | |
111 clang::ASTContext* context); | |
112 | |
113 // Checks if a token matches a name, with or without net:: namespace. | |
114 bool StripNetNamespaceMatch(const std::string& token, const std::string& name) { | |
115 return token == name || token == (std::string("net::") + name); | |
116 } | |
117 | |
118 // Returns the source code of a given token, like function name, variable name, | |
119 // string literal, etc. | |
120 std::string GetStmtText(const clang::Stmt* token, | |
121 const clang::SourceManager& source_manager) { | |
122 clang::LangOptions lopt; | |
dcheng
2017/03/02 07:57:41
Nit: use the LangOptions from ASTContext.
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
123 // Get text range. | |
124 clang::SourceLocation start = token->getLocStart(); | |
125 clang::SourceLocation end = token->getLocEnd(); | |
126 | |
127 // If it's a macro, go to definition. | |
128 if (start.isMacroID()) | |
129 start = source_manager.getSpellingLoc(start); | |
130 if (end.isMacroID()) | |
131 end = source_manager.getSpellingLoc(end); | |
132 | |
133 // Get the real end of the token. | |
134 end = clang::Lexer::getLocForEndOfToken(end, 0, source_manager, lopt); | |
135 | |
136 // Extract text. | |
137 std::string output(source_manager.getCharacterData(start), | |
138 source_manager.getCharacterData(end)); | |
139 | |
140 // If |token| is a raw string literal, the above code just returns the "R" | |
141 // part of it. | |
142 if (output != "R") | |
143 return output; | |
144 | |
145 if (auto* literal = clang::dyn_cast<clang::StringLiteral>(token)) | |
146 return literal->getString(); | |
147 | |
148 if (auto* implicit_cast = clang::dyn_cast<clang::ImplicitCastExpr>(token)) { | |
149 if (const clang::StringLiteral* implicit_literal = | |
150 clang::dyn_cast<clang::StringLiteral>( | |
151 implicit_cast->getSubExprAsWritten())) { | |
152 return implicit_literal->getString(); | |
153 } | |
154 } | |
155 | |
156 return output; | |
157 } | |
158 | |
159 // Extracts unique id and annotation text of a call to | |
160 // "DefineNetworkTrafficAnnotation" function. Sets the error text if fails. | |
161 void GetAnnotationText(const clang::CallExpr* call_expr, | |
162 const clang::SourceManager& source_manager, | |
163 NetworkAnnotationInstance* instance) { | |
164 if (StripNetNamespaceMatch( | |
165 GetStmtText(call_expr->getCallee(), source_manager), | |
166 "DefineNetworkTrafficAnnotation") && | |
167 call_expr->getNumArgs() == 2) { | |
168 instance->annotation.unique_id = | |
169 GetStmtText(call_expr->getArgs()[0], source_manager); | |
dcheng
2017/03/02 07:57:41
It'd be ideal to take advantage of matcher binding
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
170 instance->annotation.text = | |
171 GetStmtText(call_expr->getArgs()[1], source_manager); | |
172 instance->error = ""; | |
173 } else { | |
174 instance->annotation.unique_id = ""; | |
175 instance->annotation.text = ""; | |
176 instance->error = "Unexpected function."; | |
177 } | |
178 } | |
179 | |
180 // Returns the function that includes the given token. For example, if the token | |
181 // is the call to function bar() in the code "void foo() { bar(); }", it returns | |
182 // "foo". | |
183 std::string GetStatementCoveringFunction(const clang::Stmt* token, | |
184 clang::ASTContext* context) { | |
185 // Get the parent of |token| and return its covering function. | |
186 clang::ASTContext::DynTypedNodeList parents = context->getParents(*token); | |
187 | |
188 // TODO: What exactly != 1 parent mean? I've not encountered any case that | |
189 // this value would be non-one. | |
190 if (parents.size() != 1) { | |
191 if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) | |
192 return GetStatementCoveringFunction(s, context); | |
193 else if (const clang::Decl* d = parents[0].get<clang::Decl>()) | |
194 return GetDeclarationCoveringFunction(d, context); | |
195 } | |
196 return "Unknown"; | |
197 } | |
198 | |
199 // Returns the function that includes the given token. For example, if the token | |
200 // is variable x in the code "void foo() { int x; ... }", it returns "foo". | |
201 std::string GetDeclarationCoveringFunction(const clang::Decl* token, | |
202 clang::ASTContext* context) { | |
203 // If |token| is a function declaration, return its name. | |
204 if (auto f = clang::dyn_cast<clang::FunctionDecl>(token)) | |
205 return f->getQualifiedNameAsString(); | |
206 | |
207 // As |token| is not a function declaration, get its parent and return its | |
208 // covering function. | |
209 clang::ASTContext::DynTypedNodeList parents = context->getParents(*token); | |
210 | |
211 // TODO: What exactly != 1 parent mean? I've not encountered any case that | |
212 // this value would be non-one. | |
213 if (parents.size() == 1) { | |
214 if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) | |
215 return GetStatementCoveringFunction(s, context); | |
216 else if (const clang::Decl* d = parents[0].get<clang::Decl>()) | |
217 return GetDeclarationCoveringFunction(d, context); | |
218 } | |
219 return "Unknown"; | |
220 } | |
221 | |
222 // Finds file name and line number of the given token and writes it into | |
223 // |location|. | |
224 template <class T> | |
225 void GetLocation(const T* token, | |
226 const clang::SourceManager& source_manager, | |
227 NetworkAnnotationInstance::Location* location) { | |
228 clang::SourceLocation source_location = token->getLocStart(); | |
229 location->file_path = source_manager.getFilename(source_location); | |
230 location->line_number = source_manager.getSpellingLineNumber(source_location); | |
231 } | |
232 | |
233 // This class implements the call back functions for AST Matchers. The matchers | |
234 // are defined in RunMatchers function and when a pattern is found there, | |
235 // the run function in this class is called back with information on the match | |
236 // location and description of the match pattern. | |
237 class NetworkAnnotationTagCallback : public MatchFinder::MatchCallback { | |
238 public: | |
239 explicit NetworkAnnotationTagCallback(Collector* collector) | |
240 : collector_(collector) {} | |
241 ~NetworkAnnotationTagCallback() override = default; | |
242 | |
243 // Is called on any pattern found by ASTMathers that are defined in RunMathers | |
244 // function. | |
245 virtual void run(const MatchFinder::MatchResult& result) override { | |
246 if (const clang::VarDecl* var_decl = | |
247 result.Nodes.getNodeAs<clang::VarDecl>("annotation_variable")) { | |
248 AddVariable(var_decl, result); | |
249 } else if (const clang::CallExpr* call_expr = | |
250 result.Nodes.getNodeAs<clang::CallExpr>("user_function")) { | |
251 AddFunction(call_expr, result); | |
252 } | |
253 } | |
254 | |
255 // Stores an annotation variable defintion. | |
256 void AddVariable(const clang::VarDecl* var_decl, | |
257 const MatchFinder::MatchResult& result) { | |
258 NetworkAnnotationInstance instance; | |
259 | |
260 GetLocation(var_decl, *result.SourceManager, &instance.location); | |
261 instance.location.object_name = var_decl->getQualifiedNameAsString(); | |
262 instance.variable_reference = clang::dyn_cast<clang::NamedDecl>(var_decl); | |
263 | |
264 // Mark the instance as transitive parameter if it doesn't have | |
265 // initialization in the function where it is defined and it is passed as a | |
266 // parameter to the function. Otherwise, extract its content. | |
267 if (!var_decl->hasInit() && var_decl->isLocalVarDeclOrParm() && | |
268 !var_decl->isLocalVarDecl()) { | |
269 instance.flag.transitive_parameter = true; | |
270 } else if (auto* init_expr = var_decl->getInit()) { | |
271 if (auto* call_expr = clang::dyn_cast<clang::CallExpr>(init_expr)) | |
272 GetAnnotationText(call_expr, *result.SourceManager, &instance); | |
273 } | |
274 // If nothing is set, issue an error. | |
275 if (!instance.flag.transitive_parameter && | |
276 instance.annotation.unique_id.empty() && instance.error.empty()) { | |
277 instance.error = "Could not resolve variable initialization."; | |
278 } | |
279 | |
280 collector_->variable_definitions.push_back(instance); | |
281 } | |
282 | |
283 // Stores a function call that uses annotation variables. | |
284 void AddFunction(const clang::CallExpr* call_expr, | |
285 const MatchFinder::MatchResult& result) { | |
286 NetworkAnnotationInstance instance; | |
287 | |
288 GetLocation(call_expr, *result.SourceManager, &instance.location); | |
289 instance.location.function_name = GetStatementCoveringFunction( | |
290 clang::dyn_cast<clang::Stmt>(call_expr), result.Context); | |
291 instance.location.object_name = | |
292 call_expr->getDirectCallee()->getQualifiedNameAsString(); | |
293 | |
294 // Get annotation text. | |
295 const clang::FunctionDecl* function_decl = call_expr->getDirectCallee(); | |
296 unsigned params_count = function_decl->getNumParams(); | |
297 unsigned args_count = call_expr->getNumArgs(); | |
298 | |
299 for (unsigned i = 0; i < params_count; i++) { | |
300 if (StripNetNamespaceMatch( | |
301 clang::QualType::getAsString( | |
302 function_decl->getParamDecl(i)->getType().split()), | |
303 "NetworkTrafficAnnotationTag")) { | |
304 if (i >= args_count) { | |
305 instance.error = "Function missing annotation argument."; | |
306 } else { | |
307 // Get the argument. | |
308 const clang::Expr* arg = call_expr->getArgs()[i]; | |
309 | |
310 // Is it a call to annotate function? | |
311 if (auto* inner_call_expr = clang::dyn_cast<clang::CallExpr>(arg)) { | |
312 instance.flag.is_direct_call = true; | |
313 GetAnnotationText(inner_call_expr, *result.SourceManager, | |
314 &instance); | |
315 instance.error = ""; | |
316 } else { | |
317 // Then it's a variable. | |
318 instance.flag.is_direct_call = false; | |
319 if (auto* pure_arg = | |
320 clang::dyn_cast<clang::DeclRefExpr>(arg->IgnoreCasts())) { | |
321 instance.variable_reference = pure_arg->getFoundDecl(); | |
322 instance.error = ""; | |
323 } else { | |
324 instance.error = "Unknwon parameter type."; | |
dcheng
2017/03/02 07:57:41
Nit: unknown
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
325 } | |
326 } | |
327 } | |
328 collector_->calls.push_back(instance); | |
329 } | |
330 } | |
331 } | |
332 | |
333 private: | |
334 Collector* collector_; | |
335 }; | |
336 | |
337 // Sets up ASTMatchers and runs clang tool to populate collector. Returns the | |
338 // result of running the clang tool. | |
339 int RunMatchers(clang::tooling::ClangTool* clang_tool, Collector* collector) { | |
340 NetworkAnnotationTagCallback call_back(collector); | |
341 MatchFinder match_finder; | |
342 | |
343 // Set up a pattern to find variables defined with type | |
344 // [net::]NetworkTrafficAnnotationTag. | |
345 match_finder.addMatcher( | |
346 varDecl(anyOf(hasType(asString("NetworkTrafficAnnotationTag")), | |
dcheng
2017/03/02 07:57:41
Can you help me understand why we need both? Shoul
Ramin Halavati
2017/04/06 13:32:29
If the code has the line "using namespace net;", t
| |
347 hasType(asString("net::NetworkTrafficAnnotationTag")))) | |
348 .bind("annotation_variable"), | |
349 &call_back); | |
350 | |
351 // Set up a pattern to find functions that have a parameter of type | |
352 // [net::]NetworkTrafficAnnotationTag. | |
353 match_finder.addMatcher( | |
354 callExpr(hasDeclaration(functionDecl(hasAnyParameter(anyOf( | |
355 hasType(asString("NetworkTrafficAnnotationTag")), | |
356 hasType(asString("net::NetworkTrafficAnnotationTag"))))))) | |
357 .bind("user_function"), | |
358 &call_back); | |
359 | |
360 std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory = | |
361 clang::tooling::newFrontendActionFactory(&match_finder); | |
362 return clang_tool->run(frontend_factory.get()); | |
363 } | |
364 | |
365 } // namespace | |
366 | |
367 int main(int argc, const char* argv[]) { | |
368 // Find output directory. | |
369 if (argc < 5) { | |
370 llvm::errs() << "Output files directory is not specified."; | |
371 return -1; | |
372 } | |
373 std::string output_dir(argv[4]); | |
374 | |
375 // Keep to consumed parameter from being passed to clang parser. | |
dcheng
2017/03/02 07:57:41
Maybe just make this a proper flag?
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
376 argc = 4; | |
377 | |
378 llvm::cl::OptionCategory category("Network Request Audit Extractor Tool"); | |
379 clang::tooling::CommonOptionsParser options(argc, argv, category); | |
380 clang::tooling::ClangTool tool(options.getCompilations(), | |
381 options.getSourcePathList()); | |
382 Collector collector; | |
383 | |
384 int result = RunMatchers(&tool, &collector); | |
385 | |
386 if (result != 0) | |
387 return result; | |
388 | |
389 // For each call, if the parameter is not generated by a direct call to | |
390 // "DefineNetworkTrafficAnnotation", find the variable that holds the value. | |
391 for (NetworkAnnotationInstance& call : collector.calls) { | |
392 if (!call.flag.is_direct_call) { | |
393 // Find the variable. | |
394 for (NetworkAnnotationInstance& var : collector.variable_definitions) | |
395 if (var.variable_reference == call.variable_reference) { | |
396 call.annotation = var.annotation; | |
397 call.flag.transitive_parameter = var.flag.transitive_parameter; | |
398 call.error = | |
399 call.error + (call.error.length() ? "\n+" : "") + var.error; | |
400 break; | |
401 } | |
402 if (!call.annotation.unique_id.length()) | |
403 call.error = "Variable not found."; | |
404 } | |
405 | |
406 // If the function just receives the variable and passes it to another | |
407 // function, ignore it, otherwise write it to file. | |
408 if (!call.flag.transitive_parameter) { | |
409 std::string s = call.location.file_path; | |
410 std::replace(s.begin(), s.end(), '/', '_'); | |
411 std::replace(s.begin(), s.end(), '.', '_'); | |
412 std::string file_path = output_dir + "/" + s + "(" + | |
413 std::to_string(call.location.line_number) + | |
414 ").txt"; | |
415 | |
416 std::ofstream output_file(file_path); | |
dcheng
2017/03/02 07:57:41
How are reads/writes to this file synchronized? If
Ramin Halavati
2017/04/06 13:32:29
Comment updated in refactored source:
"For each ca
| |
417 if (output_file.is_open()) { | |
418 output_file << call.location.file_path << "\n"; | |
419 output_file << call.location.function_name << "\n"; | |
420 output_file << call.location.line_number << "\n"; | |
421 output_file << call.location.object_name << "\n"; | |
422 output_file << call.error << "\n"; | |
423 output_file << call.annotation.unique_id << "\n"; | |
424 output_file << call.annotation.text << "\n"; | |
425 output_file.close(); | |
426 } else { | |
427 llvm::errs() << "Could not write to file: " << file_path << " because " | |
428 << strerror(errno) << "\n"; | |
429 return 1; | |
430 } | |
431 } | |
432 } | |
433 | |
434 return 0; | |
435 } | |
OLD | NEW |