tools/findit/findit_for_crash.py - Issue 478763003: [Findit] Bug fixing and implemented some feature requests.

Side by Side Diff: tools/findit/findit_for_crash.py

Issue 478763003: [Findit] Bug fixing and implemented some feature requests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Addressed codereview and removed all references to logging Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2014 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import os	5 import os

6 from threading import Lock, Thread	6 from threading import Lock, Thread

7	7

8 import blame	8 import blame

9 from common import utils	9 from common import utils

10 import component_dictionary	10 import component_dictionary

11 import crash_utils	11 import crash_utils

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
72 (diff_url, changed_line_numbers, changed_line_contents) = (	72 (diff_url, changed_line_numbers, changed_line_contents) = (

73 repository_parser.ParseLineDiff(	73 repository_parser.ParseLineDiff(

74 file_path, component_path, file_change_type, revision_number))	74 file_path, component_path, file_change_type, revision_number))

75	75

76 # Ignore this match if the component is not supported for svn.	76 # Ignore this match if the component is not supported for svn.

77 if not diff_url:	77 if not diff_url:

78 return	78 return

79	79

80 # Find the intersection between the lines that this file crashed on and	80 # Find the intersection between the lines that this file crashed on and

81 # the changed lines.	81 # the changed lines.

82 (line_number_intersection, stack_frame_index_intersection) = (	82 (line_number_intersection, stack_frame_index_intersection, functions) = (

83 crash_utils.Intersection(	83 crash_utils.Intersection(

84 crashed_line_numbers, stack_frame_indices, changed_line_numbers))	84 crashed_line_numbers, stack_frame_indices, changed_line_numbers,

	85 function))

85	86

86 # Find the minimum distance between the changed lines and crashed lines.	87 # Find the minimum distance between the changed lines and crashed lines.

87 min_distance = crash_utils.FindMinLineDistance(crashed_line_numbers,	88 (min_distance, min_crashed_line, min_changed_line) = \

88 changed_line_numbers)	89 crash_utils.FindMinLineDistance(crashed_line_numbers,

	90 changed_line_numbers)

89	91

90 # Check whether this CL changes the crashed lines or not.	92 # Check whether this CL changes the crashed lines or not.

91 if line_number_intersection:	93 if line_number_intersection:

92 priority = LINE_CHANGE_PRIORITY	94 priority = LINE_CHANGE_PRIORITY

93 else:	95 else:

94 priority = FILE_CHANGE_PRIORITY	96 priority = FILE_CHANGE_PRIORITY

95	97

96 # Add the parsed information to the object.	98 # Add the parsed information to the object.

97 with matches.matches_lock:	99 with matches.matches_lock:

98 match.crashed_line_numbers.append(line_number_intersection)	100 match.crashed_line_numbers.append(line_number_intersection)

99	101

100 file_name = file_path.split('/')[-1]	102 file_name = file_path.split('/')[-1]

101 match.changed_files.append(file_name)	103 match.changed_files.append(file_name)

102	104

103 # Update the min distance only if it is less than the current one.	105 # Update the min distance only if it is less than the current one.

104 if min_distance < match.min_distance:	106 if min_distance < match.min_distance:

105 match.min_distance = min_distance	107 match.min_distance = min_distance

	108 match.min_distance_info = (file_name, min_crashed_line, min_changed_line)

106	109

107 # If this CL does not change the crashed line, all occurrence of this	110 # If this CL does not change the crashed line, all occurrence of this

108 # file in the stack has the same priority.	111 # file in the stack has the same priority.

109 if not stack_frame_index_intersection:	112 if not stack_frame_index_intersection:

110 stack_frame_index_intersection = stack_frame_indices	113 stack_frame_index_intersection = stack_frame_indices

	114 functions = function

111 match.stack_frame_indices.append(stack_frame_index_intersection)	115 match.stack_frame_indices.append(stack_frame_index_intersection)

112 match.changed_file_urls.append(diff_url)	116 match.changed_file_urls.append(diff_url)

113 match.priorities.append(priority)	117 match.priorities.append(priority)

114 match.function_list.append(function)	118 match.function_list.append(functions)

115	119

116	120

117 def FindMatch(revisions_info_map, file_to_revision_info, file_to_crash_info,	121 def FindMatch(revisions_info_map, file_to_revision_info, file_to_crash_info,

118 component_path, component_name, repository_parser,	122 component_path, component_name, repository_parser,

119 codereview_api_url):	123 codereview_api_url):

120 """Finds a CL that modifies file in the stacktrace.	124 """Finds a CL that modifies file in the stacktrace.

121	125

122 Args:	126 Args:

123 revisions_info_map: A dictionary mapping revision number to the CL	127 revisions_info_map: A dictionary mapping revision number to the CL

124 information.	128 information.

(...skipping 13 matching lines...) Expand all Loading...
138 threads = []	142 threads = []

139	143

140 # Iterate through the crashed files in the stacktrace.	144 # Iterate through the crashed files in the stacktrace.

141 for crashed_file_path in file_to_crash_info:	145 for crashed_file_path in file_to_crash_info:

142 # Ignore header file.	146 # Ignore header file.

143 if crashed_file_path.endswith('.h'):	147 if crashed_file_path.endswith('.h'):

144 continue	148 continue

145	149

146 # If the file in the stacktrace is not changed in any commits, continue.	150 # If the file in the stacktrace is not changed in any commits, continue.

147 for changed_file_path in file_to_revision_info:	151 for changed_file_path in file_to_revision_info:

148 changed_file_name = changed_file_path.split('/')[-1]	152 changed_file_name = changed_file_path.split('/')[-1].lower()

149 crashed_file_name = crashed_file_path.split('/')[-1]	153 crashed_file_name = crashed_file_path.split('/')[-1].lower()

150

151 if changed_file_name != crashed_file_name:	154 if changed_file_name != crashed_file_name:

152 continue	155 continue

153	156

154 if not crash_utils.GuessIfSameSubPath(	157 if not crash_utils.GuessIfSameSubPath(

155 changed_file_path, crashed_file_path):	158 changed_file_path.lower(), crashed_file_path.lower()):

156 continue	159 continue

157	160

158 crashed_line_numbers = file_to_crash_info.GetCrashedLineNumbers(	161 crashed_line_numbers = file_to_crash_info.GetCrashedLineNumbers(

159 crashed_file_path)	162 crashed_file_path)

160 stack_frame_nums = file_to_crash_info.GetCrashStackFrameIndices(	163 stack_frame_nums = file_to_crash_info.GetCrashStackFrameIndices(

161 crashed_file_path)	164 crashed_file_path)

162 functions = file_to_crash_info.GetCrashFunctions(crashed_file_path)	165 functions = file_to_crash_info.GetCrashFunctions(crashed_file_path)

163	166

164 # Iterate through the CLs that this file path is changed.	167 # Iterate through the CLs that this file path is changed.

165 for (cl, file_change_type) in file_to_revision_info[changed_file_path]:	168 for (cl, file_change_type) in file_to_revision_info[changed_file_path]:

(...skipping 183 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
349 match_with_stack_priority: A match object, with the CL it is from and what	352 match_with_stack_priority: A match object, with the CL it is from and what

350 callstack it is from.	353 callstack it is from.

351	354

352 Returns:	355 Returns:

353 A sort key.	356 A sort key.

354 """	357 """

355 (stack_priority, _, match) = match_with_stack_priority	358 (stack_priority, _, match) = match_with_stack_priority

356	359

357 return (min(match.priorities),	360 return (min(match.priorities),

358 stack_priority,	361 stack_priority,

	362 match.min_distance,

359 crash_utils.FindMinStackFrameNumber(match.stack_frame_indices,	363 crash_utils.FindMinStackFrameNumber(match.stack_frame_indices,

360 match.priorities),	364 match.priorities),

361 -len(match.changed_files), match.min_distance)	365 -len(match.changed_files))

362	366

363	367

364 def SortAndFilterMatches(matches, num_important_frames=5):	368 def SortAndFilterMatches(matches, num_important_frames=5):

365 """Filters the list of potential culprit CLs to remove noise.	369 """Filters the list of potential culprit CLs to remove noise.

366	370

367 Args:	371 Args:

368 matches: A list containing match results.	372 matches: A list containing match results.

369 num_important_frames: A number of frames on the top of the frame to Check	373 num_important_frames: A number of frames on the top of the frame to Check

370 for when filtering the results. A match with a file	374 for when filtering the results. A match with a file

371 that is in top num_important_frames of the stacktrace	375 that is in top num_important_frames of the stacktrace

372 is regarded more probable then others.	376 is regarded more probable then others.

373	377

374 Returns:	378 Returns:

375 Filtered match results.	379 Filtered match results.

376 """	380 """

377 new_matches = []	381 new_matches = []

378 line_changed = False	382 line_changed = False

379 is_important_frame = False	383 is_important_frame = False

380 highest_priority_stack = crash_utils.INFINITY	384 highest_priority_stack = crash_utils.INFINITY

381 matches.sort(key=SortMatchesFunction)	385 matches.sort(key=SortMatchesFunction)

382

383 # Iterate through the matches to find out what results are significant.	386 # Iterate through the matches to find out what results are significant.

384 for stack_priority, cl, match in matches:	387 for stack_priority, cl, match in matches:

385 # Check if the current match changes crashed line.	388 # Check if the current match changes crashed line.

386 is_line_change = (min(match.priorities) == LINE_CHANGE_PRIORITY)	389 is_line_change = (min(match.priorities) == LINE_CHANGE_PRIORITY)

387	390

388 # Check which stack this match is from, and finds the highest priority	391 # Check which stack this match is from, and finds the highest priority

389 # callstack up to this point.	392 # callstack up to this point.

390 current_stack = stack_priority	393 current_stack = stack_priority

391 if current_stack < highest_priority_stack:	394 if current_stack < highest_priority_stack:

392 highest_priority_stack = current_stack	395 highest_priority_stack = current_stack

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
427 matches: A list of match objects.	430 matches: A list of match objects.

428 """	431 """

429 # Iterate through the matches in the list.	432 # Iterate through the matches in the list.

430 for i, _, match in matches:	433 for i, _, match in matches:

431 reason = []	434 reason = []

432	435

433 # Zip the files in the match by the reason they are suspected	436 # Zip the files in the match by the reason they are suspected

434 # (how the file is modified).	437 # (how the file is modified).

435 match_by_priority = zip(	438 match_by_priority = zip(

436 match.priorities, match.crashed_line_numbers, match.changed_files,	439 match.priorities, match.crashed_line_numbers, match.changed_files,

437 match.changed_file_urls)	440 match.stack_frame_indices, match.function_list)

438	441

439 # Sort the zipped changed files in the match by their priority so that the	442 # Sort the zipped changed files in the match by their priority so that the

440 # changed lines comes first in the reason.	443 # changed lines comes first in the reason.

441 match_by_priority.sort(	444 match_by_priority.sort(

442 key=lambda (priority, crashed_line_number, file_name, url): priority)	445 key=lambda (priority, crashed_line_numbers, file_name,

	446 stack_frame_indices, function_list): priority)

443	447

444 # Iterate through the sorted match.	448 # Iterate through the sorted match.

445 for i in range(len(match_by_priority)):	449 for i in range(len(match_by_priority)):

446 (priority, crashed_line_number, file_name, file_url) = \	450 (priority, crashed_line_numbers, file_name, stack_frame_indices,

447 match_by_priority[i]	451 function_list) = match_by_priority[i]

448	452

449 # If the file in the match is a line change, append a explanation.	453 # If the file in the match is a line change, append a explanation.

450 if priority == LINE_CHANGE_PRIORITY:	454 if priority == LINE_CHANGE_PRIORITY:

	455 crashed_line_numbers = [crashed_line_number

	456 for lines in crashed_line_numbers

	457 for crashed_line_number in lines]

451 reason.append(	458 reason.append(

452 'Line %s of file %s which potentially caused the crash '	459 'Line %s of file %s which potentially caused the crash '

453 'according to the stacktrace, is changed in this cl.\n' %	460 'according to the stacktrace, is changed in this cl'

454 (crash_utils.PrettifyList(crashed_line_number),	461 ' (From stack frame %s, function %s).' %

455 crash_utils.PrettifyFiles([(file_name, file_url)])))	462 (crash_utils.PrettifyList(crashed_line_numbers),

	463 file_name,

	464 crash_utils.PrettifyList(stack_frame_indices),

	465 crash_utils.PrettifyList(function_list)))

456	466

457 else:	467 else:

458 # Get all the files that are not line change.	468 # Get all the files that are not line change.

459 rest_of_the_files = match_by_priority[i:]	469 rest_of_the_files = match_by_priority[i:]

460	470

461 if len(rest_of_the_files) == 1:	471 if len(rest_of_the_files) == 1:

462 file_string = 'File %s is changed in this cl.\n'	472 file_string = 'File %s is changed in this cl '

463 else:	473 else:

464 file_string = 'Files %s are changed in this cl.\n'	474 file_string = 'Files %s are changed in this cl '

465	475

466 # Create a list of file name and its url, and prettify the list.	476 # Create a list of file names, and prettify the list.

467 file_name_with_url = [(file_name, file_url)	477 file_names = [

468 for (_, _, file_name, file_url)	478 file_name for (_, _, file_name, _, _) in rest_of_the_files]

469 in rest_of_the_files]	479 pretty_file_names = crash_utils.PrettifyList(file_names)

470 pretty_file_name_url = crash_utils.PrettifyFiles(file_name_with_url)

471	480

472 # Add the reason, break because we took care of the rest of the files.	481 # Add the reason, break because we took care of the rest of the files.

473 reason.append(file_string % pretty_file_name_url)	482 file_string += ('(From stack frames %s, functions %s)' %

	483 (crash_utils.PrettifyList(stack_frame_indices),

	484 crash_utils.PrettifyList(function_list)))

	485 reason.append(file_string % pretty_file_names)

474 break	486 break

475	487

476 # Set the reason as string.	488 # Set the reason as string.

477 match.reason = ''.join(reason)	489 match.reason = '\n'.join(reason)

478	490

479	491

480 def CombineMatches(matches):	492 def CombineMatches(matches):

481 """Combine possible duplicates in matches.	493 """Combine possible duplicates in matches.

482	494

483 Args:	495 Args:

484 matches: A list of matches object, along with its callstack priority and	496 matches: A list of matches object, along with its callstack priority and

485 CL it is from.	497 CL it is from.

486 Returns:	498 Returns:

487 A combined list of matches.	499 A combined list of matches.

(...skipping 10 matching lines...) Expand all Loading...
498 found_match = match_combined	510 found_match = match_combined

499 break	511 break

500	512

501 # If current match is not already in, add it to the list of matches.	513 # If current match is not already in, add it to the list of matches.

502 if not found_match:	514 if not found_match:

503 combined_matches.append((stack_index, cl, match))	515 combined_matches.append((stack_index, cl, match))

504 continue	516 continue

505	517

506 # Combine the reason if the current match is already in there.	518 # Combine the reason if the current match is already in there.

507 found_match.reason += match.reason	519 found_match.reason += match.reason

	520 if match.min_distance < found_match.min_distance:

	521 found_match.min_distance = match.min_distance

	522 found_match.min_distance_info = match.min_distance_info

	523

	524 for stack_index, cl, match in combined_matches:

	525 if match.min_distance_info:

	526 file_name, min_crashed_line, min_changed_line = match.min_distance_info

	527 match.reason += \

	528 ('\nMininimum distance from crashed line to changed line: %d. '

	529 '(File: %s, Crashed on: %d, Changed: %d).' %

	530 (match.min_distance, file_name, min_crashed_line, min_changed_line))

508	531

509 return combined_matches	532 return combined_matches

510	533

511	534

512 def FilterAndGenerateReasonForMatches(result):	535 def FilterAndGenerateReasonForMatches(result):

513 """A wrapper function.	536 """A wrapper function.

514	537

515 It generates reasons for the matches and returns string representation	538 It generates reasons for the matches and returns string representation

516 of filtered results.	539 of filtered results.

517	540

(...skipping 10 matching lines...) Expand all Loading...
528	551

529	552

530 def ParseCrashComponents(main_stack):	553 def ParseCrashComponents(main_stack):

531 """Parses the crashing component.	554 """Parses the crashing component.

532	555

533 Crashing components is a component that top_n_frames of the stacktrace is	556 Crashing components is a component that top_n_frames of the stacktrace is

534 from.	557 from.

535	558

536 Args:	559 Args:

537 main_stack: Main stack from the stacktrace.	560 main_stack: Main stack from the stacktrace.

538 top_n_frames: A number of frames to regard as crashing frame.

539	561

540 Returns:	562 Returns:

541 A set of components.	563 A set of components.

542 """	564 """

543 components = set()	565 components = set()

544	566

545 for frame in main_stack.frame_list:	567 for frame in main_stack.frame_list:

546 components.add(frame.component_path)	568 components.add(frame.component_path)

547	569

548 return components	570 return components

549	571

550	572

551 def GenerateAndFilterBlameList(callstack, component_to_crash_revision_dict,	573 def GenerateAndFilterBlameList(callstack, component_to_crash_revision_dict,

552 component_to_regression_dict):	574 component_to_regression_dict):

553 """A wrapper function.	575 """A wrapper function.

554	576

555 Finds blame information for stack and returns string representation.	577 Finds blame information for stack and returns string representation.

556	578

557 Args:	579 Args:

558 callstack: A callstack to find the blame information.	580 callstack: A callstack to find the blame information.

559 component_to_crash_revision_dict: A dictionary mapping component to its	581 component_to_crash_revision_dict: A dictionary mapping component to its

560 crash revision.	582 crash revision.

561 component_to_regression_dict: A dictionary mapping component to its	583 component_to_regression_dict: A dictionary mapping component to its

562 regression.	584 regression.

563	585

564 Returns:	586 Returns:

565 A list of blame results.	587 A list of blame results.

566 """	588 """

	589 if component_to_regression_dict:

	590 parsed_deps = component_to_regression_dict

	591 else:

	592 parsed_deps = component_to_crash_revision_dict

	593

567 # Setup parser objects to use for parsing blame information.	594 # Setup parser objects to use for parsing blame information.

568 svn_parser = svn_repository_parser.SVNParser(CONFIG['svn'])	595 svn_parser = svn_repository_parser.SVNParser(CONFIG['svn'])

569 git_parser = git_repository_parser.GitParser(component_to_regression_dict,	596 git_parser = git_repository_parser.GitParser(parsed_deps, CONFIG['git'])

570 CONFIG['git'])

571 parsers = {}	597 parsers = {}

572 parsers['svn'] = svn_parser	598 parsers['svn'] = svn_parser

573 parsers['git'] = git_parser	599 parsers['git'] = git_parser

574	600

575 # Create and generate the blame objects from the callstack.	601 # Create and generate the blame objects from the callstack.

576 blame_list = blame.BlameList()	602 blame_list = blame.BlameList()

577 blame_list.FindBlame(callstack, component_to_crash_revision_dict,	603 blame_list.FindBlame(callstack, component_to_crash_revision_dict,

578 component_to_regression_dict,	604 component_to_regression_dict,

579 parsers)	605 parsers)

580	606

(...skipping 14 matching lines...) Expand all Loading...
595 no results for all stacktraces in the stacktrace_list.	621 no results for all stacktraces in the stacktrace_list.

596 component_to_regression_dict: A parsed regression information as a	622 component_to_regression_dict: A parsed regression information as a

597 result of parsing DEPS file.	623 result of parsing DEPS file.

598 component_to_crash_revision_dict: A parsed crash revision information.	624 component_to_crash_revision_dict: A parsed crash revision information.

599	625

600 Returns:	626 Returns:

601 A list of result objects, with the message how the result is created.	627 A list of result objects, with the message how the result is created.

602 """	628 """

603 # If regression information is not available, return blame information.	629 # If regression information is not available, return blame information.

604 if not component_to_regression_dict:	630 if not component_to_regression_dict:

605 return_message = (

606 'Regression information is not available. The result is '

607 'the blame information.')

608 result = GenerateAndFilterBlameList(callstack,	631 result = GenerateAndFilterBlameList(callstack,

609 component_to_crash_revision_dict,	632 component_to_crash_revision_dict,

610 component_to_regression_dict)	633 component_to_regression_dict)

	634 if result:

	635 return_message = (

	636 'Regression information is not available. The result is '

	637 'the blame information.')

	638 else:

	639 return_message = ('Findit could not find any suspected CLs.')

	640

611 return (return_message, result)	641 return (return_message, result)

612	642

613 for stacktrace in stacktrace_list:	643 for stacktrace in stacktrace_list:

614 # Check the next stacktrace if current one is empty.	644 # Check the next stacktrace if current one is empty.

615 if not stacktrace.stack_list:	645 if not stacktrace.stack_list:

616 continue	646 continue

617	647

618 # Get the crash stack for this stacktrace, and extract crashing components	648 # Get the crash stack for this stacktrace, and extract crashing components

619 # from it.	649 # from it.

620 main_stack = stacktrace.GetCrashStack()	650 main_stack = stacktrace.GetCrashStack()

621 components = ParseCrashComponents(main_stack)	651 components = ParseCrashComponents(main_stack)

622	652

623 result_for_stacktrace = FindMatchForStacktrace(	653 result_for_stacktrace = FindMatchForStacktrace(

624 stacktrace, components, component_to_regression_dict)	654 stacktrace, components, component_to_regression_dict)

	655 filtered_result = FilterAndGenerateReasonForMatches(result_for_stacktrace)

625	656

626 # If the result is empty, check the next stacktrace. Else, return the	657 # If the result is empty, check the next stacktrace. Else, return the

627 # filtered result.	658 # filtered result.

628 if not result_for_stacktrace:	659 if not filtered_result:

629 continue	660 continue

630	661

631 return_message = (	662 return_message = (

632 'The result is a list of CLs that change the crashed files.')	663 'The result is a list of CLs that change the crashed files.')

633 result = FilterAndGenerateReasonForMatches(result_for_stacktrace)	664 return (return_message, filtered_result)

634 return (return_message, result)

635	665

636 # If no match is found, return the blame information for the input	666 # If no match is found, return the blame information for the input

637 # callstack.	667 # callstack.

638 return_message = (

639 'There are no CLs that change the crashed files. The result is the '

640 'blame information.')

641 result = GenerateAndFilterBlameList(	668 result = GenerateAndFilterBlameList(

642 callstack, component_to_crash_revision_dict,	669 callstack, component_to_crash_revision_dict,

643 component_to_regression_dict)	670 component_to_regression_dict)

	671

	672 if result:

	673 return_message = (

	674 'No CL in the regression changes the crashed files. The result is '

	675 'the blame information.')

	676

	677 # When findit could not find any CL that changes file in stacktrace or if

	678 # if cannot get any blame information, return a message saying that no

	679 # results are available.

	680 else:

	681 return_message = ('Findit could not find any suspected CLs.')

	682

644 return (return_message, result)	683 return (return_message, result)

645	684

OLD	NEW

« tools/findit/crash_utils.py ('K') | « tools/findit/findit_for_clusterfuzz.py ('k') | tools/findit/git_repository_parser.py » ('j') | no next file with comments »