OLD | NEW |
---|---|
(Empty) | |
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import os | |
6 from threading import Lock, Thread | |
7 | |
8 import blame | |
9 from common import utils | |
10 import component_dictionary | |
11 import crash_utils | |
12 import git_repository_parser | |
13 import match_set | |
14 import svn_repository_parser | |
15 | |
16 | |
17 LINE_CHANGE_PRIORITY = 1 | |
18 FILE_CHANGE_PRIORITY = 2 | |
19 _THIS_DIR = os.path.abspath(os.path.dirname(__file__)) | |
20 CONFIG = crash_utils.ParseURLsFromConfig(os.path.join(_THIS_DIR, | |
21 'config.ini')) | |
22 | |
23 | |
24 def GenerateMatchEntry( | |
25 matches, revision_info, revision_number, file_path, function, | |
26 component_path, component_name, crashed_line_numbers, stack_frame_indices, | |
27 file_change_type, repository_parser): | |
28 """Generates a match object and adds it to the match set. | |
29 | |
30 Args: | |
31 matches: A matchset object, a map from CL to a match object. | |
32 revision_info: The revision information, a map from fields (message, | |
33 changed files, etc) to its values. | |
34 revision_number: The SVN revision number or git hash. | |
35 file_path: The path of the file. | |
36 function: The function that caused an crash. | |
37 component_path: The path of the component this file is from. | |
38 component_name: The name of the component the file is from. | |
39 crashed_line_numbers: The list of the lines in the file that caused | |
40 the crash. | |
41 stack_frame_indices: The list of positions of this file within a stack. | |
42 file_change_type: Whether file is modified, added or deleted. | |
43 repository_parser: The parser object to parse line diff. | |
44 """ | |
45 # Check if this CL should be ignored. | |
46 with matches.matches_lock: | |
47 if revision_number in matches.cls_to_ignore: | |
48 return | |
49 | |
50 # If this CL is not already identified as suspected, create a new entry. | |
51 if revision_number not in matches.matches: | |
52 match = match_set.Match(revision_info, component_name) | |
53 message = revision_info['message'] | |
54 # TODO(jeun): Don't hold lock while issuing http request. | |
55 match.ParseMessage(message, matches.codereview_api_url) | |
56 | |
57 # If this match is a revert, add to the set of CLs to be ignored. | |
58 if match.is_revert: | |
59 matches.cls_to_ignore.add(revision_number) | |
60 | |
61 # If this match has info on what CL it is reverted from, add that CL. | |
62 if match.revert_of: | |
63 matches.cls_to_ignore.add(match.revert_of) | |
64 | |
65 return | |
66 | |
67 matches.matches[revision_number] = match | |
68 | |
69 else: | |
70 match = matches.matches[revision_number] | |
71 | |
72 (diff_url, changed_line_numbers, changed_line_contents) = ( | |
73 repository_parser.ParseLineDiff( | |
74 file_path, component_path, file_change_type, revision_number)) | |
75 | |
76 # Ignore this match if the component is not supported for svn. | |
77 if not diff_url: | |
78 return | |
79 | |
80 # Find the intersection between the lines that this file crashed on and | |
81 # the changed lines. | |
82 (line_number_intersection, stack_frame_index_intersection) = ( | |
83 crash_utils.Intersection( | |
84 crashed_line_numbers, stack_frame_indices, changed_line_numbers)) | |
85 | |
86 # Find the minimum distance between the changed lines and crashed lines. | |
87 min_distance = crash_utils.FindMinLineDistance(crashed_line_numbers, | |
88 changed_line_numbers) | |
89 | |
90 # Check whether this CL changes the crashed lines or not. | |
91 if line_number_intersection: | |
92 priority = LINE_CHANGE_PRIORITY | |
93 else: | |
94 priority = FILE_CHANGE_PRIORITY | |
95 | |
96 # Add the parsed information to the object. | |
97 with matches.matches_lock: | |
98 match.crashed_line_numbers.append(line_number_intersection) | |
99 | |
100 file_name = file_path.split('/')[-1] | |
101 match.changed_files.append(file_name) | |
102 | |
103 # Update the min distance only if it is less than the current one. | |
104 if min_distance < match.min_distance: | |
105 match.min_distance = min_distance | |
106 | |
107 # If this CL does not change the crashed line, all occurrence of this | |
108 # file in the stack has the same priority. | |
109 if not stack_frame_index_intersection: | |
110 stack_frame_index_intersection = stack_frame_indices | |
111 match.stack_frame_indices.append(stack_frame_index_intersection) | |
112 match.changed_file_urls.append(diff_url) | |
113 match.priorities.append(priority) | |
114 match.function_list.append(function) | |
115 | |
116 | |
117 def FindMatch(revisions_info_map, file_to_revision_info, file_to_crash_info, | |
118 component_path, component_name, repository_parser, | |
119 codereview_api_url): | |
120 """Finds a CL that modifies file in the stacktrace. | |
121 | |
122 Args: | |
123 revisions_info_map: A dictionary mapping revision number to the CL | |
124 information. | |
125 file_to_revision_info: A dictionary mapping file to the revision that | |
126 modifies it. | |
127 file_to_crash_info: A dictionary mapping file to its occurrence in | |
128 stacktrace. | |
129 component_path: The path of the component to search for. | |
130 component_name: The name of the component to search for. | |
131 repository_parser: The parser object to parse the line diff. | |
132 codereview_api_url: A code review url to retrieve data from. | |
133 | |
134 Returns: | |
135 Matches, a set of match objects. | |
136 """ | |
137 matches = match_set.MatchSet(codereview_api_url) | |
138 threads = [] | |
139 | |
140 # Iterate through the crashed files in the stacktrace. | |
141 for crashed_file_path in file_to_crash_info: | |
142 # Ignore header file. | |
143 if crashed_file_path.endswith('.h'): | |
144 continue | |
145 | |
146 # If the file in the stacktrace is not changed in any commits, continue. | |
147 for changed_file_path in file_to_revision_info: | |
148 changed_file_name = changed_file_path.split('/')[-1] | |
149 crashed_file_name = crashed_file_path.split('/')[-1] | |
150 | |
151 if changed_file_name != crashed_file_name: | |
152 continue | |
153 | |
154 if not crash_utils.GuessIfSameSubPath( | |
155 changed_file_path, crashed_file_path): | |
156 continue | |
157 | |
158 crashed_line_numbers = file_to_crash_info.GetCrashedLineNumbers( | |
159 crashed_file_path) | |
160 stack_frame_nums = file_to_crash_info.GetCrashStackFrameIndices( | |
161 crashed_file_path) | |
162 functions = file_to_crash_info.GetCrashFunctions(crashed_file_path) | |
163 | |
164 # Iterate through the CLs that this file path is changed. | |
165 for (cl, file_change_type) in file_to_revision_info[changed_file_path]: | |
166 # If the file change is delete, ignore this CL. | |
167 if file_change_type == 'D': | |
168 continue | |
169 | |
170 revision = revisions_info_map[cl] | |
171 | |
172 match_thread = Thread( | |
173 target=GenerateMatchEntry, | |
174 args=[matches, revision, cl, changed_file_path, functions, | |
175 component_path, component_name, crashed_line_numbers, | |
176 stack_frame_nums, file_change_type, | |
177 repository_parser]) | |
178 threads.append(match_thread) | |
179 match_thread.start() | |
180 | |
181 for match_thread in threads: | |
182 match_thread.join() | |
183 | |
184 matches.RemoveRevertedCLs() | |
185 | |
186 return matches | |
187 | |
188 | |
189 def FindMatchForComponent(component_path, file_to_crash_info, changelog, | |
190 callstack_priority, results, results_lock): | |
191 """Parses changelog and finds suspected CLs for a given component. | |
192 | |
193 Args: | |
194 component_path: The path of component to look for the culprit CL. | |
195 file_to_crash_info: A dictionary mapping file to its occurrence in | |
196 stackframe. | |
197 changelog: The parsed changelog for this component. | |
198 callstack_priority: The priority of this call stack, 0 if from crash stack, | |
199 1 if from freed, 2 if from previously allocated. | |
200 results: A dictionary to store the result. | |
201 results_lock: A lock that guards results. | |
202 """ | |
203 (repository_parser, component_name, revisions, file_to_revision_map) = \ | |
204 changelog | |
205 | |
206 # Find match for this component. | |
207 codereview_api_url = CONFIG['codereview']['review_url'] | |
208 component_result = FindMatch( | |
209 revisions, file_to_revision_map, file_to_crash_info, component_path, | |
210 component_name, repository_parser, codereview_api_url) | |
211 matches = component_result.matches | |
212 | |
213 # For all the match results in a dictionary, add to the list so that it | |
214 # can be sorted. | |
215 with results_lock: | |
216 for cl in matches: | |
217 match = matches[cl] | |
218 results.append((callstack_priority, cl, match)) | |
219 | |
220 | |
221 def FindMatchForCallstack( | |
222 callstack, components, component_to_changelog_map, results, | |
223 results_lock): | |
224 """Finds culprit cl for a stack within a stacktrace. | |
225 | |
226 For each components to look for, create new thread that computes the matches | |
227 and join the results at the end. | |
228 | |
229 Args: | |
230 callstack: A callstack in a stacktrace to find the result for. | |
231 components: A set of components to look for. | |
232 component_to_changelog_map: A map from component to its parsed changelog. | |
233 results: A list to aggregrate results from all stacktraces. | |
234 results_lock: A lock that guards results. | |
235 """ | |
236 # Create component dictionary from the component and call stack. | |
237 component_dict = component_dictionary.ComponentDictionary(callstack, | |
238 components) | |
239 callstack_priority = callstack.priority | |
240 | |
241 # Iterate through all components and create new thread for each component. | |
242 threads = [] | |
243 for component_path in component_dict: | |
244 # If the component to consider in this callstack is not in the parsed list | |
245 # of components, ignore this one. | |
246 if component_path not in component_to_changelog_map: | |
247 continue | |
248 | |
249 changelog = component_to_changelog_map[component_path] | |
250 file_to_crash_info = component_dict.GetFileDict(component_path) | |
251 t = Thread( | |
252 target=FindMatchForComponent, | |
253 args=[component_path, file_to_crash_info, changelog, | |
254 callstack_priority, results, results_lock]) | |
255 threads.append(t) | |
256 t.start() | |
257 | |
258 for t in threads: | |
259 t.join() | |
260 | |
261 | |
262 def FindMatchForStacktrace(stacktrace, components, | |
263 component_to_regression_dict): | |
264 """Finds the culprit CL for stacktrace. | |
265 | |
266 The passed stacktrace is either from release build stacktrace | |
267 or debug build stacktrace. | |
268 | |
269 Args: | |
270 stacktrace: A list of parsed stacks within a stacktrace. | |
271 components: A set of components to look for. | |
272 component_to_regression_dict: A dictionary mapping component path to | |
273 its regression. | |
274 | |
275 Returns: | |
276 A list of match results from all stacks. | |
277 """ | |
278 # A list to aggregate results from all the callstacks in the stacktrace. | |
279 results = [] | |
280 results_lock = Lock() | |
281 | |
282 # Setup parsers. | |
283 svn_parser = svn_repository_parser.SVNParser(CONFIG['svn']) | |
284 git_parser = git_repository_parser.GitParser(component_to_regression_dict, | |
285 CONFIG['git']) | |
286 | |
287 # Create a cache of parsed revisions. | |
288 component_to_changelog_map = {} | |
289 for component_path in components: | |
290 component_object = component_to_regression_dict[component_path] | |
291 range_start = component_object['old_revision'] | |
292 range_end = component_object['new_revision'] | |
293 | |
294 # If range start is 0, the range is too large and the crash has been | |
295 # introduced the archived build, so ignore this case. | |
296 if range_start == '0': | |
297 continue | |
298 | |
299 component_name = component_to_regression_dict[component_path]['name'] | |
300 | |
301 is_git = utils.IsGitHash(range_start) | |
302 if is_git: | |
303 repository_parser = git_parser | |
304 else: | |
305 repository_parser = svn_parser | |
306 | |
307 (revisions, file_to_revision_map) = repository_parser.ParseChangelog( | |
308 component_path, range_start, range_end) | |
309 | |
310 # If the returned map from ParseChangeLog is empty, we don't need to look | |
311 # further because either the parsing failed or the changelog is empty. | |
312 if not (revisions and file_to_revision_map): | |
313 continue | |
314 | |
315 component_to_changelog_map[component_path] = (repository_parser, | |
316 component_name, | |
317 revisions, | |
318 file_to_revision_map) | |
319 | |
320 # Create separate threads for each of the call stack in the stacktrace. | |
321 threads = [] | |
322 for callstack in stacktrace.stack_list: | |
323 t = Thread( | |
324 target=FindMatchForCallstack, | |
325 args=[callstack, components, component_to_changelog_map, | |
326 results, results_lock]) | |
327 threads.append(t) | |
328 t.start() | |
329 | |
330 for t in threads: | |
331 t.join() | |
332 | |
333 return results | |
334 | |
335 | |
336 def SortMatchesFunction(match_with_stack_priority): | |
337 """A function to sort the match triple. | |
338 | |
339 Currently, it sorts the list by: | |
340 1) The highest priority file change in the CL (changing crashed line is | |
341 higher priority than just changing the file). | |
342 2) The callstack this match is computed (crash stack, freed, allocation). | |
343 3) The minimum stack frame number of the changed file in the match. | |
344 4) The number of files this CL changes (higher the better). | |
345 5) The minimum distance between the lines that the CL changes and crashed | |
346 lines. | |
347 | |
348 Args: | |
349 match_with_stack_priority: A match object, with the CL it is from and what | |
350 callstack it is from. | |
351 | |
352 Returns: | |
353 A sort key. | |
354 """ | |
355 (stack_priority, _, match) = match_with_stack_priority | |
356 | |
357 return (min(match.priorities), | |
358 stack_priority, | |
359 crash_utils.FindMinStackFrameNumber(match.stack_frame_indices, | |
360 match.priorities), | |
361 -len(match.changed_files), match.min_distance) | |
362 | |
363 | |
364 def SortAndFilterMatches(matches, num_important_frames=5): | |
365 """Filters the list of potential culprit CLs to remove noise. | |
366 | |
367 Args: | |
368 matches: A list containing match results. | |
369 num_important_frames: A number of frames on the top of the frame to Check | |
370 for when filtering the results. A match with a file | |
371 that is in top num_important_frames of the stacktrace | |
372 is regarded more probable then others. | |
373 | |
374 Returns: | |
375 Filtered match results. | |
376 """ | |
377 new_matches = [] | |
378 line_changed = False | |
379 is_important_frame = False | |
380 highest_priority_stack = crash_utils.INFINITY | |
381 | |
382 matches.sort(key=SortMatchesFunction) | |
383 | |
384 # Iterate through the matches to find out what results are significant. | |
385 for stack_priority, cl, match in matches: | |
386 # Check if the current match changes crashed line. | |
387 is_line_change = (min(match.priorities) == LINE_CHANGE_PRIORITY) | |
388 | |
389 # Check which stack this match is from, and finds the highest priority | |
390 # callstack up to this point. | |
391 current_stack = stack_priority | |
392 if current_stack < highest_priority_stack: | |
393 highest_priority_stack = current_stack | |
394 | |
395 # Check if current match changes a file that occurs in crash state. | |
396 flattened_stack_frame_indices = [frame for frame_indices in | |
397 match.stack_frame_indices | |
398 for frame in frame_indices] | |
399 current_is_important = ( | |
400 min(flattened_stack_frame_indices) < num_important_frames) | |
401 | |
402 # This match and anything lower than this should be ignored if: | |
403 # - Current match does not change crashed lines but there are matches | |
404 # that do so. | |
405 # - Current match is not in crash state but there are matches in it. | |
406 # - There are other matches that came from higher priority stack. | |
407 if (line_changed and not is_line_change) or ( | |
408 is_important_frame and not current_is_important) or ( | |
409 current_stack > highest_priority_stack): | |
410 break | |
411 | |
412 # Update the variables. | |
413 if is_line_change: | |
414 line_changed = True | |
415 if current_is_important: | |
416 is_important_frame = True | |
417 | |
418 # Add current match to the filtered result. | |
419 new_matches.append((stack_priority, cl, match)) | |
420 | |
421 return new_matches | |
422 | |
423 | |
424 def GenerateReasonForMatches(matches): | |
425 """Generates a reason that a match (CL) is a culprit cl. | |
426 | |
427 Args: | |
428 matches: A list of match objects. | |
429 """ | |
430 # Iterate through the matches in the list. | |
431 for i, _, match in matches: | |
432 reason = [] | |
433 | |
434 # Zip the files in the match by the reason they are suspected | |
435 # (how the file is modified). | |
436 match_by_priority = zip( | |
437 match.priorities, match.crashed_line_numbers, match.changed_files, | |
438 match.changed_file_urls) | |
439 | |
440 # Sort the zipped changed files in the match by their priority so that the | |
441 # changed lines comes first in the reason. | |
442 match_by_priority.sort( | |
443 key=lambda (priority, crashed_line_number, file_name, url): priority) | |
444 | |
445 # Iterate through the sorted match. | |
446 for i in range(len(match_by_priority)): | |
447 (priority, crashed_line_number, file_name, file_url) = \ | |
448 match_by_priority[i] | |
449 | |
450 # If the file in the match is a line change, append a explanation. | |
451 if priority == LINE_CHANGE_PRIORITY: | |
452 reason.append( | |
453 'Line %s of file %s which potentially caused the crash ' | |
454 'according to the stacktrace, is changed in this cl.\n' % | |
455 (crash_utils.PrettifyList(crashed_line_number), | |
456 crash_utils.PrettifyFiles([(file_name, file_url)]))) | |
457 | |
458 else: | |
459 # Get all the files that are not line change. | |
460 rest_of_the_files = match_by_priority[i:] | |
461 | |
462 if len(rest_of_the_files) == 1: | |
463 file_string = 'File %s is changed in this cl.\n' | |
464 else: | |
465 file_string = 'Files %s are changed in this cl.\n' | |
466 | |
467 # Create a list of file name and its url, and prettify the list. | |
468 file_name_with_url = [(file_name, file_url) | |
469 for (_, _, file_name, file_url) | |
470 in rest_of_the_files] | |
471 pretty_file_name_url = crash_utils.PrettifyFiles(file_name_with_url) | |
472 | |
473 # Add the reason, break because we took care of the rest of the files. | |
474 reason.append(file_string % pretty_file_name_url) | |
475 break | |
476 | |
477 # Set the reason as string. | |
478 match.reason = ''.join(reason) | |
479 | |
480 | |
481 def CombineMatches(matches): | |
482 """Combine possible duplicates in matches. | |
483 | |
484 Args: | |
485 matches: A list of matches object, along with its callstack priority and | |
486 CL it is from. | |
487 Returns: | |
488 A combined list of matches. | |
489 """ | |
490 combined_matches = [] | |
491 | |
492 for stack_index, cl, match in matches: | |
493 found_match = None | |
494 | |
495 # Iterate through the list of combined matches. | |
496 for _, cl_combined, match_combined in combined_matches: | |
497 # Check for if current CL is already higher up in the result. | |
498 if cl == cl_combined: | |
499 found_match = match_combined | |
500 break | |
501 | |
502 # If current match is not already in, add it to the list of matches. | |
503 if not found_match: | |
504 combined_matches.append((stack_index, cl, match)) | |
505 continue | |
506 | |
507 # Combine the reason if the current match is already in there. | |
508 found_match.reason += match.reason | |
509 | |
510 return combined_matches | |
511 | |
512 | |
513 def FilterAndGenerateReasonForMatches(result): | |
514 """A wrapper function. | |
515 | |
516 It generates reasons for the matches and returns string representation | |
517 of filtered results. | |
518 | |
519 Args: | |
520 result: A list of match objects. | |
521 | |
522 Returns: | |
523 A string representation of filtered results. | |
524 """ | |
525 new_result = SortAndFilterMatches(result) | |
526 GenerateReasonForMatches(new_result) | |
527 combined_matches = CombineMatches(new_result) | |
528 return crash_utils.MatchListToResultList(combined_matches) | |
529 | |
530 | |
531 def ParseCrashComponents(main_stack, top_n_frames=3): | |
Martin Barbella
2014/08/19 19:52:12
I discussed this with Shuotao a bit and we think i
jeun
2014/08/19 21:37:43
Done.
| |
532 """Parses the crashing component. | |
533 | |
534 Crashing components is a component that top_n_frames of the stacktrace is | |
535 from. | |
536 | |
537 Args: | |
538 main_stack: Main stack from the stacktrace. | |
539 top_n_frames: A number of frames to regard as crashing frame. | |
540 | |
541 Returns: | |
542 A set of components. | |
543 """ | |
544 frame_list = main_stack.GetTopNFrames(top_n_frames) | |
545 components = set() | |
546 | |
547 for frame in frame_list: | |
548 components.add(frame.component_path) | |
549 | |
550 return components | |
551 | |
552 | |
553 def GenerateAndFilterBlameList(callstack, component_to_crash_revision_dict, | |
554 component_to_regression_dict): | |
555 """A wrapper function. | |
556 | |
557 Finds blame information for stack and returns string representation. | |
558 | |
559 Args: | |
560 callstack: A callstack to find the blame information. | |
561 component_to_crash_revision_dict: A dictionary mapping component to its | |
562 crash revision. | |
563 component_to_regression_dict: A dictionary mapping component to its | |
564 regression. | |
565 | |
566 Returns: | |
567 A list of blame results. | |
568 """ | |
569 # Setup parser objects to use for parsing blame information. | |
570 svn_parser = svn_repository_parser.SVNParser(CONFIG['svn']) | |
571 git_parser = git_repository_parser.GitParser(component_to_regression_dict, | |
572 CONFIG['git']) | |
573 parsers = {} | |
574 parsers['svn'] = svn_parser | |
575 parsers['git'] = git_parser | |
576 | |
577 # Create and generate the blame objects from the callstack. | |
578 blame_list = blame.BlameList() | |
579 blame_list.FindBlame(callstack, component_to_crash_revision_dict, | |
580 component_to_regression_dict, | |
581 parsers) | |
582 | |
583 blame_list.FilterAndSortBlameList() | |
584 return crash_utils.BlameListToResultList(blame_list) | |
585 | |
586 | |
587 def FindItForCrash(stacktrace_list, | |
588 callstack, | |
589 component_to_regression_dict, | |
590 component_to_crash_revision_dict): | |
591 """Finds the culprit CL from the list of stacktrace. | |
592 | |
593 Args: | |
594 stacktrace_list: A list of stacktraces to look for, in the order of | |
595 decreasing significance. | |
596 callstack: A callstack object to show blame information for, if there are | |
597 no results for all stacktraces in the stacktrace_list. | |
598 component_to_regression_dict: A parsed regression information as a | |
599 result of parsing DEPS file. | |
600 component_to_crash_revision_dict: A parsed crash revision information. | |
601 | |
602 Returns: | |
603 A list of result objects, with the message how the result is created. | |
604 """ | |
605 # If regression information is not available, return blame information. | |
606 if not component_to_regression_dict: | |
607 return_message = ( | |
608 'Regression information is not available. The result is ' | |
609 'the blame information.') | |
610 result = GenerateAndFilterBlameList(callstack, | |
611 component_to_crash_revision_dict, | |
612 component_to_regression_dict) | |
613 return (return_message, result) | |
614 | |
615 for stacktrace in stacktrace_list: | |
616 # Check the next stacktrace if current one is empty. | |
617 if not stacktrace.stack_list: | |
618 continue | |
619 | |
620 # Get the crash stack for this stacktrace, and extract crashing components | |
621 # from it. | |
622 main_stack = stacktrace.GetCrashStack() | |
623 components = ParseCrashComponents(main_stack) | |
624 | |
625 result_for_stacktrace = FindMatchForStacktrace( | |
626 stacktrace, components, component_to_regression_dict) | |
627 | |
628 # If the result is empty, check the next stacktrace. Else, return the | |
629 # filtered result. | |
630 if not result_for_stacktrace: | |
631 continue | |
632 | |
633 return_message = ( | |
634 'The result is a list of CLs that change the crashed files.') | |
635 result = FilterAndGenerateReasonForMatches(result_for_stacktrace) | |
636 return (return_message, result) | |
637 | |
638 # If no match is found, return the blame information for the input | |
639 # callstack. | |
640 return_message = ( | |
641 'There are no CLs that change the crashed files. The result is the ' | |
642 'blame information.') | |
643 result = GenerateAndFilterBlameList( | |
644 callstack, component_to_crash_revision_dict, | |
645 component_to_regression_dict) | |
646 return (return_message, result) | |
647 | |
OLD | NEW |