OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // This clang tool finds all instances of net::NetworkTrafficAnnotationTag in | |
6 // given source code, extracts the location info and content of annotation tags | |
7 // (unique id and annotation text), and stores them in separate text files | |
8 // (per instance) in the given output directory. | |
battre
2017/02/28 18:25:11
you don't specify the parameters of this binary an
Ramin Halavati
2017/04/06 13:32:28
This binary is not designed for direct calling and
| |
9 | |
10 #include <stdio.h> | |
11 #include <fstream> | |
12 #include <memory> | |
13 | |
14 #include "clang/ASTMatchers/ASTMatchFinder.h" | |
15 #include "clang/ASTMatchers/ASTMatchers.h" | |
16 #include "clang/Basic/SourceManager.h" | |
17 #include "clang/Frontend/FrontendActions.h" | |
18 #include "clang/Lex/Lexer.h" | |
19 #include "clang/Tooling/CommonOptionsParser.h" | |
20 #include "clang/Tooling/Refactoring.h" | |
21 #include "clang/Tooling/Tooling.h" | |
22 #include "llvm/Support/CommandLine.h" | |
23 | |
24 using namespace clang::ast_matchers; | |
25 | |
26 namespace { | |
27 | |
28 // An instance of network traffic annotation usage. This can be either | |
29 // a variable defined as NetworkTrafficAnnotationTag or a function that has | |
30 // a variable of this type as it's input parameter. | |
battre
2017/02/28 18:25:12
Can you add examples of these two cases? I think t
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
31 struct NetworkAnnotationInstance { | |
32 NetworkAnnotationInstance() | |
33 : variable_reference(nullptr), | |
34 is_direct_call(false), | |
35 transitive_parameter(false) {} | |
36 | |
37 // Information about where this annotation or call has happened. | |
38 struct Location { | |
39 Location() : line_number(-1) {} | |
40 std::string file_path; | |
41 int line_number; | |
42 // Name of the function including this instance. | |
battre
2017/02/28 18:25:11
What does this mean?
battre
2017/02/28 18:25:12
Nit (also below): newline before //
Ramin Halavati
2017/04/06 13:32:28
Done.
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
43 std::string function_name; | |
44 // Name of the variable that contains annotation or the function called | |
45 // with annotation, e.g. SpellingServiceClient::CreateURLFetcher when it's | |
46 // a function or net_annotation when it's a variable. | |
battre
2017/02/28 18:25:11
I don't understand this either.
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
47 std::string object_name; | |
48 }; | |
49 | |
50 // Annotation content. | |
battre
2017/02/28 18:25:12
// Annotation content: These are the parameters of
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
51 struct Annotation { | |
52 std::string unique_id; | |
53 std::string text; | |
54 }; | |
55 | |
56 Location location; | |
57 Annotation annotation; | |
58 | |
59 // Possible error message (empty if no error). | |
60 std::string error; | |
battre
2017/02/28 18:25:11
extra linebreaks (see above)
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
61 // A reference to the variable containing annotation. Null if not available. | |
62 const clang::NamedDecl* variable_reference; | |
63 // Flag stating that parameter is directly passed to annotate function here | |
battre
2017/02/28 18:25:12
what is "parameter" here? I think this is an unres
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
64 // or it's through a variable. | |
65 bool is_direct_call; | |
66 // Flag stating that a variable is a parameter received by upper level | |
67 // function. | |
battre
2017/02/28 18:25:12
Please give an example as well.
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
68 bool transitive_parameter; | |
69 }; | |
70 | |
71 // Structure to collect instances of network traffic annotation usages. | |
72 struct Collector { | |
73 std::vector<NetworkAnnotationInstance> variable_definitions; | |
74 std::vector<NetworkAnnotationInstance> calls; | |
75 }; | |
76 | |
77 // Returns the function that includes the given token. For example, if the token | |
78 // is variable x in the code "void foo() { int x; ... }", it returns "foo". | |
79 std::string GetCoveringFunction(const clang::Decl* token, | |
80 const MatchFinder::MatchResult& result); | |
81 | |
82 // Checks if a token matches a name, with or without net:: namespace. | |
83 bool net_match(const std::string& token, const std::string& name) { | |
battre
2017/02/28 18:25:12
I think that the style guide requires CamelCase he
Ramin Halavati
2017/04/06 13:32:28
How about "StripNetNamespaceMatch"?
| |
84 return token == name || token == (std::string("net::") + name); | |
85 } | |
86 | |
87 // Returns the source code of a given token, like function name, variable name, | |
88 // string const, etc. | |
battre
2017/02/28 18:25:12
string literal? What's a string const?
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
89 std::string GetStmtText(const clang::Stmt* token, | |
90 const clang::SourceManager& source_manager) { | |
91 clang::LangOptions lopt; | |
92 // Get text range. | |
93 clang::SourceLocation start = token->getLocStart(); | |
94 clang::SourceLocation end = token->getLocEnd(); | |
95 | |
96 // If it's a macro, go to definition. | |
97 if (start.isMacroID()) | |
98 start = source_manager.getSpellingLoc(start); | |
99 if (end.isMacroID()) | |
100 end = source_manager.getSpellingLoc(end); | |
101 | |
102 // Get the real end of the token. | |
103 end = clang::Lexer::getLocForEndOfToken(end, 0, source_manager, lopt); | |
104 | |
105 // Extract text. | |
106 std::string output(source_manager.getCharacterData(start), | |
107 source_manager.getCharacterData(end)); | |
108 | |
109 // Raw string? | |
battre
2017/02/28 18:25:12
Can you explain the following branches better?
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
110 if (output != "R") | |
111 return output; | |
112 | |
113 if (auto* c1 = clang::dyn_cast<clang::StringLiteral>(token)) | |
battre
2017/02/28 18:25:12
why "c"? if (auto* literal = ...) ?
similarly bel
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
114 return c1->getString(); | |
115 | |
116 if (auto* c2 = clang::dyn_cast<clang::ImplicitCastExpr>(token)) { | |
117 if (const clang::StringLiteral* c3 = | |
118 clang::dyn_cast<clang::StringLiteral>(c2->getSubExprAsWritten())) { | |
119 return c3->getString(); | |
120 } | |
121 } | |
122 | |
123 return output; | |
124 } | |
125 | |
126 // Extracts unique id and annotation text of a call to | |
127 // "DefineNetworkTrafficAnnotation" function. Sets the error text if fails. | |
128 void GetAnnotationText(const clang::CallExpr* call_expr, | |
129 const clang::SourceManager& source_manager, | |
130 NetworkAnnotationInstance* instance) { | |
131 if (net_match(GetStmtText(call_expr->getCallee(), source_manager), | |
132 "DefineNetworkTrafficAnnotation") && | |
133 call_expr->getNumArgs() == 2) { | |
134 instance->annotation.unique_id = | |
135 GetStmtText(call_expr->getArgs()[0], source_manager); | |
136 instance->annotation.text = | |
137 GetStmtText(call_expr->getArgs()[1], source_manager); | |
138 instance->error = ""; | |
139 } else { | |
140 instance->annotation.unique_id = ""; | |
141 instance->annotation.text = ""; | |
142 instance->error = "Unexpected function."; | |
143 } | |
144 } | |
145 | |
146 // Returns the function that includes the given token. For example, if the token | |
147 // is the call to function bar() in the code "void foo() { bar(); }", it returns | |
148 // "foo". | |
149 std::string GetCoveringFunction(const clang::Stmt* token, | |
battre
2017/02/28 18:25:12
This overloading is discouraged: https://google.gi
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
150 const MatchFinder::MatchResult& result) { | |
151 clang::ASTContext::DynTypedNodeList parents = | |
152 result.Context->getParents(*token); | |
153 // TODO: What exactly != 1 parent mean? | |
battre
2017/02/28 18:25:11
Can you resolve this TODO?
Ramin Halavati
2017/04/06 13:32:28
I haven't found a clue yet. I am hoping that clang
| |
154 if (parents.size() == 1) { | |
155 if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) | |
156 return GetCoveringFunction(s, result); | |
157 else if (const clang::Decl* d = parents[0].get<clang::Decl>()) | |
158 return GetCoveringFunction(d, result); | |
159 } | |
160 return "Unknown"; | |
161 } | |
162 | |
163 // Returns the function that includes the given token. For example, if the token | |
164 // is variable x in the code "void foo() { int x; ... }", it returns "foo". | |
165 std::string GetCoveringFunction(const clang::Decl* token, | |
166 const MatchFinder::MatchResult& result) { | |
167 if (auto f = clang::dyn_cast<clang::FunctionDecl>(token)) | |
168 return f->getQualifiedNameAsString(); | |
battre
2017/02/28 18:25:12
This path is not documented in the function commen
Ramin Halavati
2017/04/06 13:32:29
Done.
| |
169 | |
170 clang::ASTContext::DynTypedNodeList parents = | |
171 result.Context->getParents(*token); | |
172 // TODO: What exactly != 1 parent mean? | |
173 if (parents.size() == 1) { | |
174 if (const clang::Stmt* s = parents[0].get<clang::Stmt>()) | |
175 return GetCoveringFunction(s, result); | |
176 else if (const clang::Decl* d = parents[0].get<clang::Decl>()) | |
177 return GetCoveringFunction(d, result); | |
178 } | |
179 return "Unknown"; | |
180 } | |
181 | |
182 // Finds file name and line number of the given token. | |
battre
2017/02/28 18:25:11
... and writes it into |location|.
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
183 template <class T> | |
184 void GetLocation(const T* token, | |
185 const MatchFinder::MatchResult& result, | |
battre
2017/02/28 18:25:12
Do you want to pass the SourceManager instead of t
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
186 NetworkAnnotationInstance::Location* location) { | |
187 clang::SourceLocation source_location = token->getLocStart(); | |
188 location->file_path = result.SourceManager->getFilename(source_location); | |
189 location->line_number = | |
190 result.SourceManager->getSpellingLineNumber(source_location); | |
191 } | |
192 | |
193 // This class implements the call back functions for AST Matchers. The matchers | |
194 // are defined in RunMatchers function and when a pattern is found there, | |
195 // the run function in this class is called back with information on the match | |
196 // location and description of the match pattern. | |
197 class NetworkAnnotationTagCallback : public MatchFinder::MatchCallback { | |
198 public: | |
199 explicit NetworkAnnotationTagCallback(Collector* collector) | |
200 : collector_(collector) {} | |
201 ~NetworkAnnotationTagCallback() override = default; | |
202 | |
203 // Is called on any pattern found by ASTMathers that are defined in RunMathers | |
204 // function. | |
205 virtual void run(const MatchFinder::MatchResult& result) override { | |
206 if (const clang::VarDecl* var_decl = | |
207 result.Nodes.getNodeAs<clang::VarDecl>("annotation_variable")) { | |
208 AddVariable(var_decl, result); | |
209 } else if (const clang::CallExpr* call_expr = | |
210 result.Nodes.getNodeAs<clang::CallExpr>("user_function")) { | |
211 AddFunction(call_expr, result); | |
212 } | |
213 } | |
214 | |
215 // Stores an annotation variable defintion. | |
216 void AddVariable(const clang::VarDecl* var_decl, | |
217 const MatchFinder::MatchResult& result) { | |
218 NetworkAnnotationInstance instance; | |
219 | |
220 GetLocation(var_decl, result, &instance.location); | |
221 instance.location.object_name = var_decl->getQualifiedNameAsString(); | |
222 instance.variable_reference = clang::dyn_cast<clang::NamedDecl>(var_decl); | |
223 | |
224 // Mark it as transitive parameter if it doesn't have initialization but | |
battre
2017/02/28 18:25:11
what is "it"? Can you give it a name?
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
225 // it's a function parameter. Otherwise, extract it's content. | |
226 if (!var_decl->hasInit() && var_decl->isLocalVarDeclOrParm() && | |
227 !var_decl->isLocalVarDecl()) { | |
228 instance.transitive_parameter = true; | |
229 } else if (auto* init_expr = var_decl->getInit()) { | |
230 if (auto* call_expr = clang::dyn_cast<clang::CallExpr>(init_expr)) | |
231 GetAnnotationText(call_expr, *result.SourceManager, &instance); | |
battre
2017/02/28 18:25:12
I think that here is some inconsistency: GetLocati
Ramin Halavati
2017/04/06 13:32:28
AddVariable can be changed, but AddFunction requir
| |
232 } | |
233 // If nothing is set, issue an error. | |
234 if (!instance.transitive_parameter && | |
235 instance.annotation.unique_id.empty() && instance.error.empty()) { | |
236 instance.error = "Could not resolve variable initialization."; | |
237 } | |
238 | |
239 collector_->variable_definitions.push_back(instance); | |
240 } | |
241 | |
242 // Stores a function call that uses annotation variables. | |
243 void AddFunction(const clang::CallExpr* call_expr, | |
244 const MatchFinder::MatchResult& result) { | |
245 NetworkAnnotationInstance instance; | |
246 | |
247 GetLocation(call_expr, result, &instance.location); | |
248 instance.location.function_name = | |
249 GetCoveringFunction(clang::dyn_cast<clang::Stmt>(call_expr), result); | |
250 instance.location.object_name = | |
251 call_expr->getDirectCallee()->getQualifiedNameAsString(); | |
252 | |
253 // Get annotation text. | |
254 const clang::FunctionDecl* function_decl = call_expr->getDirectCallee(); | |
255 unsigned params_count = function_decl->getNumParams(); | |
256 unsigned args_count = call_expr->getNumArgs(); | |
257 | |
258 for (unsigned i = 0; i < params_count; i++) { | |
259 if (net_match(clang::QualType::getAsString( | |
260 function_decl->getParamDecl(i)->getType().split()), | |
261 "NetworkTrafficAnnotationTag")) { | |
262 if (i >= args_count) { | |
263 instance.error = "Function missing annotation argument."; | |
264 } else { | |
265 // Get the argument. | |
266 const clang::Expr* arg = call_expr->getArgs()[i]; | |
267 | |
268 // Is it a call to annotate function? | |
269 if (auto* inner_call_expr = clang::dyn_cast<clang::CallExpr>(arg)) { | |
270 instance.is_direct_call = true; | |
271 GetAnnotationText(inner_call_expr, *result.SourceManager, | |
272 &instance); | |
273 instance.error = ""; | |
274 } else { | |
275 // Then it's a variable. | |
276 instance.is_direct_call = false; | |
277 if (auto* pure_arg = | |
278 clang::dyn_cast<clang::DeclRefExpr>(arg->IgnoreCasts())) { | |
279 instance.variable_reference = pure_arg->getFoundDecl(); | |
280 instance.error = ""; | |
281 } else { | |
282 instance.error = "Unknwon parameter type."; | |
283 } | |
284 } | |
285 } | |
286 collector_->calls.push_back(instance); | |
287 } | |
288 } | |
289 } | |
290 | |
291 private: | |
292 Collector* collector_; | |
293 }; | |
294 | |
295 // Sets up ASTMatchers and runs clang tool to populate collector. Returns the | |
296 // result of running the clang tool. | |
297 int RunMatchers(clang::tooling::ClangTool* clang_tool, Collector* collector) { | |
298 NetworkAnnotationTagCallback call_back(collector); | |
299 MatchFinder match_finder; | |
300 | |
301 // Set up a pattern to find variables defined with type | |
302 // [net::]NetworkTrafficAnnotationTag. | |
303 match_finder.addMatcher( | |
304 varDecl(anyOf(hasType(asString("NetworkTrafficAnnotationTag")), | |
305 hasType(asString("net::NetworkTrafficAnnotationTag")))) | |
306 .bind("annotation_variable"), | |
307 &call_back); | |
308 | |
309 // Set up a pattern to find functions that have a parameter of type | |
310 // [net::]NetworkTrafficAnnotationTag. | |
311 match_finder.addMatcher( | |
312 callExpr(hasDeclaration(functionDecl(hasAnyParameter(anyOf( | |
313 hasType(asString("NetworkTrafficAnnotationTag")), | |
314 hasType(asString("net::NetworkTrafficAnnotationTag"))))))) | |
315 .bind("user_function"), | |
316 &call_back); | |
317 | |
318 std::unique_ptr<clang::tooling::FrontendActionFactory> frontend_factory = | |
319 clang::tooling::newFrontendActionFactory(&match_finder); | |
320 return clang_tool->run(frontend_factory.get()); | |
321 } | |
322 | |
323 } // namespace | |
324 | |
325 int main(int argc, const char* argv[]) { | |
326 // Find output directory. | |
327 if (argc < 5) { | |
328 llvm::errs() << "Temporary files directory is not specified."; | |
329 return -1; | |
330 } | |
331 std::string output_dir(argv[4]); | |
battre
2017/02/28 18:25:11
I think this parameter is not required by the read
Ramin Halavati
2017/04/06 13:32:28
I don't understand, elaborate please.
| |
332 // Keep to consumed parameter from being passed to clang parser. | |
333 argc = 4; | |
battre
2017/02/28 18:25:12
Wouldn't it be cleaner introduce a new parameter -
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
334 | |
335 llvm::cl::OptionCategory category("Network Request Audit Extractor Tool"); | |
336 clang::tooling::CommonOptionsParser options(argc, argv, category); | |
battre
2017/02/28 18:25:12
CommonParserOptions supports a help messages https
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
337 clang::tooling::ClangTool tool(options.getCompilations(), | |
338 options.getSourcePathList()); | |
339 Collector collector; | |
340 | |
341 int result = RunMatchers(&tool, &collector); | |
342 | |
343 if (result != 0) | |
344 return result; | |
345 | |
346 // llvm::outs() << "==== BEGIN EDITS ====\n"; | |
347 // llvm::outs() << "==== END EDITS ====\n"; | |
battre
2017/02/28 18:25:12
Can this be deleted?
Ramin Halavati
2017/04/06 13:32:28
It was a requirement in previous version of clang
| |
348 | |
349 // For each call, if the parameter is not generated by a direct call to | |
350 // "DefineNetworkTrafficAnnotation", find the variable that holds the value. | |
351 for (auto& c : collector.calls) { | |
battre
2017/02/28 18:25:11
I think that for (NetworkAnnotationInstance& call
Ramin Halavati
2017/04/06 13:32:28
Done.
| |
352 if (!c.is_direct_call) { | |
353 // Find the variable. | |
354 for (const auto& v : collector.variable_definitions) | |
355 if (v.variable_reference == c.variable_reference) { | |
356 c.annotation = v.annotation; | |
357 c.transitive_parameter = v.transitive_parameter; | |
358 c.error = c.error + (c.error.length() ? "\n+" : "") + v.error; | |
359 break; | |
360 } | |
361 if (!c.annotation.unique_id.length()) | |
362 c.error = "Variable not found."; | |
363 } | |
364 | |
365 // If the function just receives the variable and passes it to another | |
366 // function, ignore it, otherwise write it to file. | |
367 if (!c.transitive_parameter) { | |
368 std::string s = c.location.file_path; | |
369 std::replace(s.begin(), s.end(), '/', '_'); | |
370 std::replace(s.begin(), s.end(), '.', '_'); | |
371 std::string file_path = output_dir + "/" + s + "(" + | |
372 std::to_string(c.location.line_number) + ").txt"; | |
373 | |
374 std::ofstream output_file(file_path); | |
375 if (output_file.is_open()) { | |
376 output_file << c.location.file_path << "\n"; | |
377 output_file << c.location.function_name << "\n"; | |
378 output_file << c.location.line_number << "\n"; | |
379 output_file << c.location.object_name << "\n"; | |
380 output_file << c.error << "\n"; | |
381 output_file << c.annotation.unique_id << "\n"; | |
382 output_file << c.annotation.text << "\n"; | |
383 output_file.close(); | |
384 } else { | |
385 llvm::errs() << "Could not write to file: " << file_path << " because " | |
386 << strerror(errno) << "\n"; | |
387 return 1; | |
388 } | |
389 } | |
390 } | |
391 | |
392 return 0; | |
393 } | |
OLD | NEW |