OLD | NEW |
---|---|
(Empty) | |
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import logging | |
6 import os | |
7 from threading import Lock, Thread | |
8 | |
9 import blame | |
10 import component_dictionary | |
11 import crash_utils | |
12 import git_repository_parser | |
13 import match_set | |
14 import svn_repository_parser | |
15 import utils | |
16 | |
17 | |
18 LINE_CHANGE_PRIORITY = 1 | |
19 FILE_CHANGE_PRIORITY = 2 | |
20 logging.basicConfig(filename='errors.log', level=logging.WARNING, | |
stgao
2014/08/14 18:25:42
Let's move this out.
logging setup should be done
jeun
2014/08/14 20:54:12
Done.
| |
21 filemode='w+') | |
22 URL_MAP = crash_utils.ParseURLsFromConfig('urls.config') | |
23 | |
24 | |
25 def GenerateMatchEntry( | |
26 matches, revisions, cl, file_path, file_name, function, component_path, | |
27 component_name, crashed_lines, stack_frame_indices, file_action, | |
28 repository_parser): | |
29 """Generates a match object and adds it to the match set. | |
30 | |
31 Args: | |
32 matches: A matchset object, a map from CL to a match object. | |
33 revisions: The dictionary mapping cl's number to the revision information, | |
34 as returned from a function ParseChangelog in parser. | |
35 cl: The SVN revision number or git hash. | |
36 file_path: The path of the file. | |
37 file_name: The name of the file that this CL might be the culprit cl. | |
38 function: The function that caused an crash. | |
39 component_path: The path of the component this file is from. | |
40 component_name: The name of the component the file is from. | |
41 crashed_lines: The list of the lines in the file that caused the crash. | |
42 stack_frame_indices: The list of positions of this file within a stack. | |
43 file_action: Whether file is modified, added or deleted. | |
44 repository_parser: The parser object to parse line diff. | |
45 """ | |
46 # Check if this CL should be avoided. | |
stgao
2014/08/14 18:25:42
avoided -> ignored?
jeun
2014/08/14 20:54:13
Done.
| |
47 with matches.matches_lock: | |
48 if cl in matches.cls_to_ignore: | |
49 return | |
50 | |
51 # If this CL is not already identified as suspected, create a new entry. | |
52 if cl not in matches.matches: | |
53 revision = revisions[cl] | |
54 match = match_set.Match(revision, component_name) | |
55 message = revisions[cl]['message'] | |
56 # TODO(jeun): Don't hold lock while issuing http request. | |
57 match.ParseMessage(message, matches.codereview_api_url) | |
58 | |
59 # If this match is a revert, add to the set of CLs to be avoided. | |
60 if match.is_reverted: | |
61 matches.cls_to_ignore.add(cl) | |
62 | |
63 # If this match has info on what CL it is reverted from, add that CL. | |
64 if match.revert_of: | |
65 matches.cls_to_ignore.add(match.revert_of) | |
66 | |
67 # Return because we do not need to look at this CL anymore. | |
68 return | |
69 | |
70 # Add this CL to the set of matches. | |
71 matches.matches[cl] = match | |
72 | |
73 # Else, bring in the existing result. | |
74 else: | |
75 match = matches.matches[cl] | |
76 | |
77 # Parse line diff information for this file. | |
78 (diff_url, changed_line_numbers, changed_line_contents) = ( | |
79 repository_parser.ParseLineDiff( | |
80 file_path, component_path, file_action, cl)) | |
81 | |
82 # Ignore this match if the component is not supported for svn. | |
83 if not diff_url: | |
84 return | |
85 | |
86 # Find the intersection between the lines that this file crashed on and | |
87 # the changed lines. | |
88 (line_intersection, stack_frame_index_intersection) = ( | |
89 crash_utils.Intersection( | |
90 crashed_lines, stack_frame_indices, changed_line_numbers)) | |
91 | |
92 # Find the minimum distance between the changed lines and crashed lines. | |
93 min_distance = crash_utils.FindMinLineDistance(crashed_lines, | |
94 changed_line_numbers) | |
95 | |
96 # Check whether this CL changes the crashed lines or not. | |
97 if line_intersection: | |
98 priority = LINE_CHANGE_PRIORITY | |
99 else: | |
100 priority = FILE_CHANGE_PRIORITY | |
101 | |
102 # Add the parsed information to the object. | |
103 with matches.matches_lock: | |
104 match.line_of_crash.append(line_intersection) | |
105 match.files.append(file_name) | |
106 | |
107 # Update the min distance only if it is less than the current one. | |
108 if min_distance < match.min_distance: | |
109 match.min_distance = min_distance | |
110 | |
111 # If this CL does not change the crashed line, all occurrence of this | |
112 # file in the stack has the same priority. | |
113 if not stack_frame_index_intersection: | |
114 stack_frame_index_intersection = stack_frame_indices | |
115 match.stack_frame_indices.append(stack_frame_index_intersection) | |
116 match.file_urls.append(diff_url) | |
117 | |
118 # Only record the highest rank of this CL. | |
119 if priority < match.rank: | |
120 match.rank = priority | |
121 match.priorities.append(priority) | |
122 match.function.append(function) | |
123 | |
124 | |
125 def FindMatch(revisions, file_map, file_dict, component_path, component_name, | |
126 repository_parser, codereview_api_url): | |
127 """Finds a CL that modifies file in the stacktrace. | |
128 | |
129 Args: | |
130 revisions: A dictionary mapping revision number to the CL information. | |
131 file_map: A dictionary mapping file to the revision that modifies it. | |
132 file_dict: A dictionary mapping file to its occurrence in stacktrace. | |
133 component_path: The path of the component to search for. | |
134 component_name: The name of the component to search for. | |
135 repository_parser: The parser object to parse the line diff. | |
136 codereview_api_url: A code review url to retrieve data from. | |
137 | |
138 Returns: | |
139 Matches, a set of match objects. | |
140 """ | |
141 matches = match_set.MatchSet(codereview_api_url) | |
142 threads = [] | |
143 | |
144 # For all files in stacktrace, if it is not header file, check against | |
145 # all CLs to see if any CL changes this file. If so, add to the set of | |
146 # suspected CLs. | |
147 for file_name in file_dict: | |
148 # Ignore header file. | |
149 if file_name.endswith('.h'): | |
150 continue | |
151 | |
152 # If the file in the stacktrace is not changed in any commits, continue. | |
153 if file_name not in file_map: | |
154 continue | |
155 | |
156 path_dict = file_dict.GetPathDic(file_name) | |
157 # There can be two files that are in different path but have same | |
158 # names, so also has to check path. | |
159 for path in path_dict: | |
160 crashed_lines = file_dict.GetCrashedLineNumbers(path) | |
161 stack_frame_nums = file_dict.GetCrashStackFrameindex(path) | |
162 function = file_dict.GetCrashFunction(path) | |
163 | |
164 # Iterate through the revisions that this file is changed. | |
165 for (cl, file_action, file_path) in file_map[file_name]: | |
166 | |
stgao
2014/08/14 18:25:42
This empty line could be removed.
jeun
2014/08/14 20:54:13
Done.
| |
167 # If path of two files are different or if a commit deletes a file, | |
168 # continue. | |
169 if not crash_utils.GuessIfSameSubPath(path, file_path) or ( | |
170 file_action == 'D'): | |
171 continue | |
172 | |
173 # Create a new thread for adding this CL. | |
174 match_thread = Thread( | |
175 target=GenerateMatchEntry, | |
176 args=[matches, revisions, cl, file_path, file_name, function, | |
177 component_path, component_name, crashed_lines, | |
178 stack_frame_nums, file_action, | |
179 repository_parser]) | |
180 threads.append(match_thread) | |
181 match_thread.start() | |
182 | |
183 # Make sure all threads finish before returning. | |
184 for match_thread in threads: | |
185 match_thread.join() | |
186 | |
187 # Remove CLs that are revert. | |
188 matches.RemoveReverts() | |
189 | |
190 return matches | |
191 | |
192 | |
193 def FindMatchForComponent(component_path, file_dict, changelog, | |
194 callstack_priority, results, results_lock): | |
195 """Parses changelog and finds suspected CLs for a given component. | |
196 | |
197 Args: | |
198 component_path: The path of component to look for the culprit CL. | |
199 file_dict: A dictionary mapping file to its occurrence in stackframe. | |
200 changelog: The parsed changelog for this component. | |
201 callstack_priority: The priority of this call stack, 0 if from crash stack, | |
202 1 if from freed, 2 if from previously allocated. | |
203 results: A dictionary to store the result. | |
204 results_lock: A lock that guards result_map. | |
stgao
2014/08/14 18:25:42
result_map is not clear.
jeun
2014/08/14 20:54:12
Done.
| |
205 """ | |
206 (repository_parser, component_name, revisions, file_to_revision_map) = \ | |
207 changelog | |
208 | |
209 # Find match for this component. | |
210 codereview_api_url = URL_MAP['codereview']['review_url'] | |
211 component_result = FindMatch( | |
212 revisions, file_to_revision_map, file_dict, component_path, | |
213 component_name, repository_parser, codereview_api_url) | |
214 matches = component_result.matches | |
215 | |
216 # For all the match results in a dictionary, add to the list so that it | |
217 # can be sorted. | |
218 with results_lock: | |
219 for cl in matches: | |
220 match = matches[cl] | |
221 results.append((callstack_priority, cl, match)) | |
222 | |
223 | |
224 def FindMatchForCallstack( | |
225 callstack, components, component_to_changelog_map, results, | |
226 results_lock): | |
227 """Finds culprit cl for a stack within a stacktrace. | |
228 | |
229 For each components to look for, create new thread that computes the matches | |
230 and join the results at the end. | |
231 Args: | |
232 callstack: A callstack in a stacktrace to find the result for. | |
233 components: A set of components to look for. | |
234 component_to_changelog_map: A map from component to its parsed changelog. | |
235 results: A list to aggregrate results from all stacktraces. | |
236 results_lock: A lock that guards all_results. | |
stgao
2014/08/14 18:25:42
all_result is not clear.
jeun
2014/08/14 20:54:13
Done.
| |
237 """ | |
238 # Create component dictionary from the component and call stack. | |
239 component_dict = component_dictionary.ComponentDictionary(callstack, | |
240 components) | |
241 callstack_priority = callstack.priority | |
242 | |
243 # Iterate through all components and create new thread for each component. | |
244 threads = [] | |
245 for component_path in component_dict: | |
246 file_dict = component_dict.GetFileDict(component_path) | |
247 | |
248 if component_path not in component_to_changelog_map: | |
249 continue | |
250 changelog = component_to_changelog_map[component_path] | |
251 | |
252 t = Thread( | |
253 target=FindMatchForComponent, | |
254 args=[component_path, file_dict, changelog, | |
255 callstack_priority, results, results_lock]) | |
256 threads.append(t) | |
257 t.start() | |
258 | |
259 # Join the threads before returning. | |
260 for t in threads: | |
261 t.join() | |
262 | |
263 | |
264 def FindMatchForStacktrace(stacktrace, components, regression_dict): | |
265 """Finds the culprit CL for stacktrace. | |
266 | |
267 The passed stacktrace is either from release build stacktrace | |
268 or debug build stacktrace. | |
269 | |
270 Args: | |
271 stacktrace: A list of parsed stacks within a stacktrace. | |
272 components: A set of components to look for. | |
273 regression_dict: A dictionary mapping component path to its regression. | |
274 | |
275 Returns: | |
276 A list of match results from all stacks. | |
277 """ | |
278 # A list to aggregate results from the whole stacktrace. | |
279 results = [] | |
280 results_lock = Lock() | |
281 | |
282 # Parse changelog for each of the components and store it. | |
283 svn_parser = svn_repository_parser.SVNParser(URL_MAP['svn']) | |
284 git_parser = git_repository_parser.GitParser(regression_dict, URL_MAP['git']) | |
285 | |
286 component_to_changelog_map = {} | |
287 for component_path in components: | |
288 component_object = regression_dict[component_path] | |
289 range_start = component_object['old_revision'] | |
290 range_end = component_object['new_revision'] | |
291 if range_start == '0': | |
292 continue | |
293 | |
294 component_name = regression_dict[component_path]['name'] | |
295 | |
296 # Check if the regression is git hash or a revision number, and create | |
297 # appropriate parser object. | |
298 is_git = utils.IsGitHash(range_start) | |
299 if is_git: | |
300 repository_parser = git_parser | |
301 else: | |
302 repository_parser = svn_parser | |
303 | |
304 # Parse revisions and changed files in regression. | |
305 (revisions, file_to_revision_map) = repository_parser.ParseChangelog( | |
306 component_path, range_start, range_end) | |
307 | |
308 if not (revisions and file_to_revision_map): | |
309 continue | |
310 | |
311 component_to_changelog_map[component_path] = (repository_parser, | |
312 component_name, | |
313 revisions, | |
314 file_to_revision_map) | |
315 | |
316 # Create separate threads for each of the call stack in the stacktrace. | |
317 threads = [] | |
318 for callstack in stacktrace.stack_list: | |
319 t = Thread( | |
320 target=FindMatchForCallstack, | |
321 args=[callstack, components, component_to_changelog_map, | |
322 results, results_lock]) | |
323 threads.append(t) | |
324 t.start() | |
325 | |
326 # Join the threads before returning. | |
327 for t in threads: | |
328 t.join() | |
329 | |
330 return results | |
331 | |
332 | |
333 def SortAndFilterMatches(matches): | |
334 """Filters the list of potential culprit CLs to remove noise. | |
335 | |
336 Args: | |
337 matches: A list containing match results. | |
338 | |
339 Returns: | |
340 Filtered match results. | |
341 """ | |
342 new_matches = [] | |
343 line_changed = False | |
344 in_crash_state = False | |
345 highest_priority_stack = crash_utils.INFINITY | |
346 | |
347 # Sort the matches in specific order. | |
348 matches.sort(key=lambda (stack_index, cl, match): | |
349 (match.rank, stack_index, | |
350 crash_utils.FindMinStackFrameNumber(match.stack_frame_indices, | |
351 match.priorities), | |
352 -len(match.files), match.min_distance)) | |
353 | |
354 # Iterate through the matches to find out what results are significant. | |
355 for stack_index, cl, match in matches: | |
356 | |
stgao
2014/08/14 18:25:42
This empty line could be removed.
jeun
2014/08/14 20:54:12
Done.
| |
357 # Check if the current match changes crashed line. | |
358 is_line_change = (match.rank == LINE_CHANGE_PRIORITY) | |
359 | |
360 # Check which thread this match is from. | |
361 current_stack = stack_index | |
362 if current_stack < highest_priority_stack: | |
363 highest_priority_stack = current_stack | |
364 | |
365 # Check if current match changes a file that occurs in crash state. | |
366 flattened_stack_frame_indices = [i for sl in match.stack_frame_indices | |
367 for i in sl] | |
368 current_in_crash_state = min(flattened_stack_frame_indices) < 5 | |
369 | |
370 # If current match does not change crashed lines but there are matches | |
371 # that do so, or if current match is not in crash state but there are | |
372 # matchtes in it, or if there are other matches that came from higher | |
373 # priority stack, then this match and anything lower than this should | |
374 # be ignored. | |
stgao
2014/08/14 18:25:42
Could this comment turned into bullet point?
jeun
2014/08/14 20:54:13
Done.
| |
375 if (line_changed and not is_line_change) or ( | |
376 in_crash_state and not current_in_crash_state) or ( | |
377 current_stack > highest_priority_stack): | |
378 break | |
379 | |
380 # Update the variables. | |
381 if is_line_change: | |
382 line_changed = True | |
383 if current_in_crash_state: | |
384 in_crash_state = True | |
385 | |
386 # Add current match to the filtered result. | |
387 new_matches.append((stack_index, cl, match)) | |
388 | |
389 return new_matches | |
390 | |
391 | |
392 def GenerateReasonForMatches(matches): | |
393 """Generates a reason that a match (CL) is a culprit cl. | |
394 | |
395 Args: | |
396 matches: A list of match objects. | |
397 """ | |
398 # Iterate through the matches in the list. | |
399 for i, _, match in matches: | |
400 reason = [] | |
stgao
2014/08/14 18:25:42
reasons?
jeun
2014/08/14 20:54:12
I would call it a reason because it is a list of s
| |
401 | |
402 # Zip the files in the match by the reason they are suspected | |
403 # (how the file is modified) and sort it in specific order. | |
404 match_by_priority = zip( | |
405 match.priorities, match.line_of_crash, match.files, | |
406 match.file_urls) | |
407 | |
408 match_by_priority.sort( | |
409 key=lambda (priority, crashed_line_number, file_name, file_url): | |
410 (priority, len(file_name))) | |
411 | |
412 # Iterate through the sorted match. | |
413 for i in range(len(match_by_priority)): | |
414 (priority, crashed_line_number, file_name, file_url) = \ | |
stgao
2014/08/14 18:25:42
file_name or file_path? Same the one for above in
jeun
2014/08/14 20:54:13
It is file_name.
| |
415 match_by_priority[i] | |
416 | |
417 # If the file in the match is a line change, append a explanation. | |
418 if priority == LINE_CHANGE_PRIORITY: | |
419 reason.append( | |
420 ' Line %s of file %s which caused the crash according ' | |
421 'to the stacktrace, is changed in this cl.\n' % | |
422 (crash_utils.PrettifyList(crashed_line_number), | |
423 crash_utils.PrettifyFiles([(file_name, file_url)]))) | |
424 | |
425 else: | |
426 # Get all the files that are not line change. | |
427 rest_of_the_files = match_by_priority[i:] | |
428 | |
429 # Take care of single/multiple files in string. | |
430 if len(rest_of_the_files) == 1: | |
431 file_string = ' File %s is changed in this cl.\n' | |
432 else: | |
433 file_string = ' Files %s are changed in this cl.\n' | |
434 | |
435 # Create a list of file name and its url, and prettify the list. | |
436 file_name_with_url = [(file_name, file_url) | |
437 for (_, _, file_name, file_url) | |
438 in rest_of_the_files] | |
439 pretty_file_name_url = crash_utils.PrettifyFiles(file_name_with_url) | |
440 | |
441 # Add the reason, break because we took care of the rest of the files. | |
442 reason.append(file_string % pretty_file_name_url) | |
443 break | |
444 | |
445 # Set the reason as string. | |
446 match.reason = ''.join(reason) | |
447 | |
448 | |
449 def CombineMatches(matches): | |
450 """Combine possible duplicates in matches. | |
451 | |
452 Args: | |
453 matches: A list of matches object, along with its callstack priority and | |
454 CL it is from. | |
455 Returns: | |
456 A combined list of matches. | |
457 """ | |
458 combined_matches = [] | |
459 | |
460 for stack_index, cl, match in matches: | |
461 found_match = None | |
462 | |
463 # Iterate through the list of combined matches. | |
464 for _, cl_combined, match_combined in combined_matches: | |
465 # Check for if current match is already higher up in the result. | |
466 if cl == cl_combined: | |
467 found_match = match_combined | |
468 | |
469 # If current match is not already in, add it to the list of matches. | |
470 if not found_match: | |
471 combined_matches.append((stack_index, cl, match)) | |
472 continue | |
473 | |
474 # Combine the reason if the current match is already in there. | |
475 found_match.reason += match.reason | |
476 | |
477 return combined_matches | |
478 | |
479 | |
480 def FilterAndGenerateReasonForMatches(result): | |
481 """A wrapper function. | |
482 | |
483 It generates reasons for the matches and returns string representation | |
484 of filtered results. | |
485 | |
486 Args: | |
487 result: A list of match objects. | |
488 | |
489 Returns: | |
490 A string representation of filtered results. | |
491 """ | |
492 new_result = SortAndFilterMatches(result) | |
493 GenerateReasonForMatches(new_result) | |
494 combined_matches = CombineMatches(new_result) | |
495 return crash_utils.MatchListToResultList(combined_matches) | |
496 | |
497 | |
498 def ParseCrashComponents(main_stack, top_n_frames=3): | |
499 """Parses the crashing component. | |
500 | |
501 Args: | |
502 main_stack: Main stack from the stacktrace. | |
503 top_n_frames: A number of frames to regard as crashing frame. | |
504 Returns: | |
505 A set of components. | |
506 """ | |
507 frame_list = main_stack.GetTopNFrames(top_n_frames) | |
508 components = set() | |
509 | |
510 for frame in frame_list: | |
511 components.add(frame.component_path) | |
512 | |
513 return components | |
514 | |
515 | |
516 def GenerateAndFilterBlameList(callstack, crash_revision_dict, regression_dict): | |
517 """A wrapper function. | |
518 | |
519 Finds blame information for stack and returns string representation. | |
520 | |
521 Args: | |
522 callstack: A callstack to find the blame information. | |
523 crash_revision_dict: A dictionary mapping component to its crash revision. | |
524 regression_dict: A dictionary mapping component to its regression. | |
525 | |
526 Returns: | |
527 A list of blame results. | |
528 """ | |
529 # Setup parser objects to use for parsing blame information. | |
530 svn_parser = svn_repository_parser.SVNParser(URL_MAP['svn']) | |
531 git_parser = git_repository_parser.GitParser(regression_dict, URL_MAP['git']) | |
532 parsers = [svn_parser, git_parser] | |
533 | |
534 # Create and generate the blame objects from the callstack. | |
535 blame_list = blame.BlameList() | |
536 blame_list.FindBlame(callstack, crash_revision_dict, regression_dict, | |
537 parsers) | |
538 | |
539 blame_list.FilterAndSortBlameList() | |
540 return crash_utils.BlameListToResultList(blame_list) | |
541 | |
542 | |
543 def FindItForCrash(stacktrace_list, | |
544 callstack, | |
545 parsed_regression, | |
546 parsed_crash_revision): | |
547 """Finds the culprit CL from the list of stacktrace. | |
548 | |
549 Args: | |
550 stacktrace_list: A list of stacktraces to look for, in the order of | |
551 decreasing significance. | |
552 callstack: A callstack object to show blame information for, if there are | |
553 no results for all stacktraces in the stacktrace_list. | |
554 parsed_regression: A parsed regression information as a result of parsing | |
555 DEPS file. | |
556 parsed_crash_revision: A parsed crash revision information. | |
557 | |
558 Returns: | |
559 A list of result objects. | |
560 """ | |
561 # If regression information is not available, return blame information. | |
562 if not parsed_regression: | |
563 return GenerateAndFilterBlameList(callstack, parsed_crash_revision, | |
564 parsed_regression) | |
565 | |
566 for stacktrace in stacktrace_list: | |
567 | |
stgao
2014/08/14 18:25:42
This empty line could be removed.
jeun
2014/08/14 20:54:12
Done.
| |
568 # Check the next stacktrace if current one is empty. | |
569 if not stacktrace.stack_list: | |
570 continue | |
571 | |
572 # Get the crash stack for this stacktrace, and extract crashing components | |
573 # from it. | |
574 main_stack = stacktrace.GetCrashStack() | |
575 components = ParseCrashComponents(main_stack, parsed_crash_revision) | |
576 | |
577 result_for_stacktrace = FindMatchForStacktrace(stacktrace, components, | |
578 parsed_regression) | |
579 | |
580 # If the result is empty, check the next stacktrace. Else, return the | |
581 # filtered result. | |
582 if not result_for_stacktrace: | |
583 continue | |
584 | |
585 return FilterAndGenerateReasonForMatches(result_for_stacktrace) | |
586 | |
587 # If not match is found, return the blame information for the input | |
588 # callstack. | |
589 return GenerateAndFilterBlameList(callstack, parsed_crash_revision, | |
590 parsed_regression) | |
591 | |
OLD | NEW |