tools/findit/findit_for_crash.py - Issue 478763003: [Findit] Bug fixing and implemented some feature requests.

Side by Side Diff: tools/findit/findit_for_crash.py

Issue 478763003: [Findit] Bug fixing and implemented some feature requests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: fixed a bug in intersection Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2014 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import os	5 import os

6 from threading import Lock, Thread	6 from threading import Lock, Thread

7	7

8 import blame	8 import blame

9 from common import utils	9 from common import utils

10 import component_dictionary	10 import component_dictionary

11 import crash_utils	11 import crash_utils

12 import git_repository_parser	12 import git_repository_parser

13 import match_set	13 import match_set

14 import svn_repository_parser	14 import svn_repository_parser

	15 import svn_repository_parser
	stgao 2014/08/22 06:50:54 duplicate duplicate jeun 2014/08/22 22:58:43 Done. Show quoted text On 2014/08/22 06:50:54, Shuotao wrote: > duplicate Done.
15	16

16	17

17 LINE_CHANGE_PRIORITY = 1	18 LINE_CHANGE_PRIORITY = 1

18 FILE_CHANGE_PRIORITY = 2	19 FILE_CHANGE_PRIORITY = 2

19 _THIS_DIR = os.path.abspath(os.path.dirname(__file__))	20 _THIS_DIR = os.path.abspath(os.path.dirname(__file__))

20 CONFIG = crash_utils.ParseURLsFromConfig(os.path.join(_THIS_DIR,	21 CONFIG = crash_utils.ParseURLsFromConfig(os.path.join(_THIS_DIR,

21 'config.ini'))	22 'config.ini'))

22	23

23	24

24 def GenerateMatchEntry(	25 def GenerateMatchEntry(

(...skipping 113 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
138 threads = []	139 threads = []

139	140

140 # Iterate through the crashed files in the stacktrace.	141 # Iterate through the crashed files in the stacktrace.

141 for crashed_file_path in file_to_crash_info:	142 for crashed_file_path in file_to_crash_info:

142 # Ignore header file.	143 # Ignore header file.

143 if crashed_file_path.endswith('.h'):	144 if crashed_file_path.endswith('.h'):

144 continue	145 continue

145	146

146 # If the file in the stacktrace is not changed in any commits, continue.	147 # If the file in the stacktrace is not changed in any commits, continue.

147 for changed_file_path in file_to_revision_info:	148 for changed_file_path in file_to_revision_info:

148 changed_file_name = changed_file_path.split('/')[-1]	149 changed_file_name = changed_file_path.split('/')[-1].lower()
	stgao 2014/08/22 06:50:54 use os.path.basename instead. use os.path.basename instead. jeun 2014/08/22 22:58:43 Changed to split('/') so that it would also work o Show quoted text On 2014/08/22 06:50:54, Shuotao wrote: > use os.path.basename instead. Changed to split('/') so that it would also work on windows machine.
149 crashed_file_name = crashed_file_path.split('/')[-1]	150 crashed_file_name = crashed_file_path.split('/')[-1].lower()

150

151 if changed_file_name != crashed_file_name:	151 if changed_file_name != crashed_file_name:

152 continue	152 continue

153	153

154 if not crash_utils.GuessIfSameSubPath(	154 if not crash_utils.GuessIfSameSubPath(

155 changed_file_path, crashed_file_path):	155 changed_file_path.lower(), crashed_file_path.lower()):

156 continue	156 continue

157	157

158 crashed_line_numbers = file_to_crash_info.GetCrashedLineNumbers(	158 crashed_line_numbers = file_to_crash_info.GetCrashedLineNumbers(

159 crashed_file_path)	159 crashed_file_path)

160 stack_frame_nums = file_to_crash_info.GetCrashStackFrameIndices(	160 stack_frame_nums = file_to_crash_info.GetCrashStackFrameIndices(

161 crashed_file_path)	161 crashed_file_path)

162 functions = file_to_crash_info.GetCrashFunctions(crashed_file_path)	162 functions = file_to_crash_info.GetCrashFunctions(crashed_file_path)

163	163

164 # Iterate through the CLs that this file path is changed.	164 # Iterate through the CLs that this file path is changed.

165 for (cl, file_change_type) in file_to_revision_info[changed_file_path]:	165 for (cl, file_change_type) in file_to_revision_info[changed_file_path]:

(...skipping 183 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
349 match_with_stack_priority: A match object, with the CL it is from and what	349 match_with_stack_priority: A match object, with the CL it is from and what

350 callstack it is from.	350 callstack it is from.

351	351

352 Returns:	352 Returns:

353 A sort key.	353 A sort key.

354 """	354 """

355 (stack_priority, _, match) = match_with_stack_priority	355 (stack_priority, _, match) = match_with_stack_priority

356	356

357 return (min(match.priorities),	357 return (min(match.priorities),

358 stack_priority,	358 stack_priority,

	359 match.min_distance,
	stgao 2014/08/22 06:50:53 Why we do this change? Why we do this change? jeun 2014/08/22 22:58:43 Abhishek suggested it. Show quoted text On 2014/08/22 06:50:53, Shuotao wrote: > Why we do this change? Abhishek suggested it.
359 crash_utils.FindMinStackFrameNumber(match.stack_frame_indices,	360 crash_utils.FindMinStackFrameNumber(match.stack_frame_indices,

360 match.priorities),	361 match.priorities))

361 -len(match.changed_files), match.min_distance)

362	362

363	363

364 def SortAndFilterMatches(matches, num_important_frames=5):	364 def SortAndFilterMatches(matches, num_important_frames=5):

365 """Filters the list of potential culprit CLs to remove noise.	365 """Filters the list of potential culprit CLs to remove noise.

366	366

367 Args:	367 Args:

368 matches: A list containing match results.	368 matches: A list containing match results.

369 num_important_frames: A number of frames on the top of the frame to Check	369 num_important_frames: A number of frames on the top of the frame to Check

370 for when filtering the results. A match with a file	370 for when filtering the results. A match with a file

371 that is in top num_important_frames of the stacktrace	371 that is in top num_important_frames of the stacktrace

372 is regarded more probable then others.	372 is regarded more probable then others.

373	373

374 Returns:	374 Returns:

375 Filtered match results.	375 Filtered match results.

376 """	376 """

377 new_matches = []	377 new_matches = []

378 line_changed = False	378 line_changed = False

379 is_important_frame = False	379 is_important_frame = False

380 highest_priority_stack = crash_utils.INFINITY	380 highest_priority_stack = crash_utils.INFINITY

381 matches.sort(key=SortMatchesFunction)	381 matches.sort(key=SortMatchesFunction)

382

383 # Iterate through the matches to find out what results are significant.	382 # Iterate through the matches to find out what results are significant.

384 for stack_priority, cl, match in matches:	383 for stack_priority, cl, match in matches:

385 # Check if the current match changes crashed line.	384 # Check if the current match changes crashed line.

386 is_line_change = (min(match.priorities) == LINE_CHANGE_PRIORITY)	385 is_line_change = (min(match.priorities) == LINE_CHANGE_PRIORITY)

387	386

388 # Check which stack this match is from, and finds the highest priority	387 # Check which stack this match is from, and finds the highest priority

389 # callstack up to this point.	388 # callstack up to this point.

390 current_stack = stack_priority	389 current_stack = stack_priority

391 if current_stack < highest_priority_stack:	390 if current_stack < highest_priority_stack:

392 highest_priority_stack = current_stack	391 highest_priority_stack = current_stack

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
426 Args:	425 Args:

427 matches: A list of match objects.	426 matches: A list of match objects.

428 """	427 """

429 # Iterate through the matches in the list.	428 # Iterate through the matches in the list.

430 for i, _, match in matches:	429 for i, _, match in matches:

431 reason = []	430 reason = []

432	431

433 # Zip the files in the match by the reason they are suspected	432 # Zip the files in the match by the reason they are suspected

434 # (how the file is modified).	433 # (how the file is modified).

435 match_by_priority = zip(	434 match_by_priority = zip(

436 match.priorities, match.crashed_line_numbers, match.changed_files,	435 match.priorities, match.crashed_line_numbers, match.changed_files)

437 match.changed_file_urls)

438	436

439 # Sort the zipped changed files in the match by their priority so that the	437 # Sort the zipped changed files in the match by their priority so that the

440 # changed lines comes first in the reason.	438 # changed lines comes first in the reason.

441 match_by_priority.sort(	439 match_by_priority.sort(

442 key=lambda (priority, crashed_line_number, file_name, url): priority)	440 key=lambda (priority, crashed_line_numbers, file_name): priority)

443	441

444 # Iterate through the sorted match.	442 # Iterate through the sorted match.

445 for i in range(len(match_by_priority)):	443 for i in range(len(match_by_priority)):

446 (priority, crashed_line_number, file_name, file_url) = \	444 (priority, crashed_line_numbers, file_name) = match_by_priority[i]

447 match_by_priority[i]

448	445

449 # If the file in the match is a line change, append a explanation.	446 # If the file in the match is a line change, append a explanation.

450 if priority == LINE_CHANGE_PRIORITY:	447 if priority == LINE_CHANGE_PRIORITY:

	448 crashed_line_numbers = [crashed_line_number

	449 for lines in crashed_line_numbers

	450 for crashed_line_number in lines]

451 reason.append(	451 reason.append(

452 'Line %s of file %s which potentially caused the crash '	452 'Line %s of file %s which potentially caused the crash '

453 'according to the stacktrace, is changed in this cl.\n' %	453 'according to the stacktrace, is changed in this cl.\n' %

454 (crash_utils.PrettifyList(crashed_line_number),	454 (crash_utils.PrettifyList(crashed_line_numbers),

455 crash_utils.PrettifyFiles([(file_name, file_url)])))	455 file_name))

456	456

457 else:	457 else:

458 # Get all the files that are not line change.	458 # Get all the files that are not line change.

459 rest_of_the_files = match_by_priority[i:]	459 rest_of_the_files = match_by_priority[i:]

460	460

461 if len(rest_of_the_files) == 1:	461 if len(rest_of_the_files) == 1:

462 file_string = 'File %s is changed in this cl.\n'	462 file_string = 'File %s is changed in this cl.\n'

463 else:	463 else:

464 file_string = 'Files %s are changed in this cl.\n'	464 file_string = 'Files %s are changed in this cl.\n'

465	465

466 # Create a list of file name and its url, and prettify the list.	466 # Create a list of file names, and prettify the list.

467 file_name_with_url = [(file_name, file_url)	467 file_names = [file_name
	Martin Barbella 2014/08/22 02:24:14 Nit: just include all of this as a continuation on Nit: just include all of this as a continuation on the next line (move file_name down with the rest) jeun 2014/08/22 22:58:43 Done. Show quoted text On 2014/08/22 02:24:14, mbarbella wrote: > Nit: just include all of this as a continuation on the next line (move file_name > down with the rest) Done.
468 for (_, _, file_name, file_url)	468 for (_, _, file_name) in rest_of_the_files]

469 in rest_of_the_files]	469 pretty_file_names = crash_utils.PrettifyList(file_names)

470 pretty_file_name_url = crash_utils.PrettifyFiles(file_name_with_url)

471	470

472 # Add the reason, break because we took care of the rest of the files.	471 # Add the reason, break because we took care of the rest of the files.

473 reason.append(file_string % pretty_file_name_url)	472 reason.append(file_string % pretty_file_names)

474 break	473 break

475	474

476 # Set the reason as string.	475 # Set the reason as string.

477 match.reason = ''.join(reason)	476 match.reason = ''.join(reason)

478	477

479	478

480 def CombineMatches(matches):	479 def CombineMatches(matches):

481 """Combine possible duplicates in matches.	480 """Combine possible duplicates in matches.

482	481

483 Args:	482 Args:

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
528	527

529	528

530 def ParseCrashComponents(main_stack):	529 def ParseCrashComponents(main_stack):

531 """Parses the crashing component.	530 """Parses the crashing component.

532	531

533 Crashing components is a component that top_n_frames of the stacktrace is	532 Crashing components is a component that top_n_frames of the stacktrace is

534 from.	533 from.

535	534

536 Args:	535 Args:

537 main_stack: Main stack from the stacktrace.	536 main_stack: Main stack from the stacktrace.

538 top_n_frames: A number of frames to regard as crashing frame.

539	537

540 Returns:	538 Returns:

541 A set of components.	539 A set of components.

542 """	540 """

543 components = set()	541 components = set()

544	542

545 for frame in main_stack.frame_list:	543 for frame in main_stack.frame_list:

546 components.add(frame.component_path)	544 components.add(frame.component_path)

547	545

548 return components	546 return components

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
595 no results for all stacktraces in the stacktrace_list.	593 no results for all stacktraces in the stacktrace_list.

596 component_to_regression_dict: A parsed regression information as a	594 component_to_regression_dict: A parsed regression information as a

597 result of parsing DEPS file.	595 result of parsing DEPS file.

598 component_to_crash_revision_dict: A parsed crash revision information.	596 component_to_crash_revision_dict: A parsed crash revision information.

599	597

600 Returns:	598 Returns:

601 A list of result objects, with the message how the result is created.	599 A list of result objects, with the message how the result is created.

602 """	600 """

603 # If regression information is not available, return blame information.	601 # If regression information is not available, return blame information.

604 if not component_to_regression_dict:	602 if not component_to_regression_dict:

605 return_message = (

606 'Regression information is not available. The result is '

607 'the blame information.')

608 result = GenerateAndFilterBlameList(callstack,	603 result = GenerateAndFilterBlameList(callstack,

609 component_to_crash_revision_dict,	604 component_to_crash_revision_dict,

610 component_to_regression_dict)	605 component_to_regression_dict)

	606 if result:

	607 return_message = (

	608 'Regression information is not available. The result is '

	609 'the blame information.')

	610 else:

	611 return_message = ('Findit could not find any suspected CLs.')
	stgao 2014/08/22 06:50:54 Please add a comment on why we would reach this ca Please add a comment on why we would reach this case. jeun 2014/08/22 22:58:43 Done. Show quoted text On 2014/08/22 06:50:54, Shuotao wrote: > Please add a comment on why we would reach this case. Done.
	612

611 return (return_message, result)	613 return (return_message, result)

612	614

613 for stacktrace in stacktrace_list:	615 for stacktrace in stacktrace_list:

614 # Check the next stacktrace if current one is empty.	616 # Check the next stacktrace if current one is empty.

615 if not stacktrace.stack_list:	617 if not stacktrace.stack_list:

616 continue	618 continue

617	619

618 # Get the crash stack for this stacktrace, and extract crashing components	620 # Get the crash stack for this stacktrace, and extract crashing components

619 # from it.	621 # from it.

620 main_stack = stacktrace.GetCrashStack()	622 main_stack = stacktrace.GetCrashStack()

621 components = ParseCrashComponents(main_stack)	623 components = ParseCrashComponents(main_stack)

622	624

623 result_for_stacktrace = FindMatchForStacktrace(	625 result_for_stacktrace = FindMatchForStacktrace(

624 stacktrace, components, component_to_regression_dict)	626 stacktrace, components, component_to_regression_dict)

	627 filtered_result = FilterAndGenerateReasonForMatches(result_for_stacktrace)

625	628

626 # If the result is empty, check the next stacktrace. Else, return the	629 # If the result is empty, check the next stacktrace. Else, return the

627 # filtered result.	630 # filtered result.

628 if not result_for_stacktrace:	631 if not filtered_result:

629 continue	632 continue

630	633

631 return_message = (	634 return_message = (

632 'The result is a list of CLs that change the crashed files.')	635 'The result is a list of CLs that change the crashed files.')

633 result = FilterAndGenerateReasonForMatches(result_for_stacktrace)	636 return (return_message, filtered_result)

634 return (return_message, result)

635	637

636 # If no match is found, return the blame information for the input	638 # If no match is found, return the blame information for the input

637 # callstack.	639 # callstack.

638 return_message = (

639 'There are no CLs that change the crashed files. The result is the '

640 'blame information.')

641 result = GenerateAndFilterBlameList(	640 result = GenerateAndFilterBlameList(

642 callstack, component_to_crash_revision_dict,	641 callstack, component_to_crash_revision_dict,

643 component_to_regression_dict)	642 component_to_regression_dict)

	643

	644 if result:

	645 return_message = (

	646 'No CL in the regression changes the crashed files. The result is '

	647 'the blame information.')

	648 else:

	649 return_message = ('Findit could not find any suspected CLs.')

	650

644 return (return_message, result)	651 return (return_message, result)

645	652

OLD	NEW

« tools/findit/findit_for_clusterfuzz.py ('K') | « tools/findit/findit_for_clusterfuzz.py ('k') | tools/findit/git_repository_parser.py » ('j') | tools/findit/git_repository_parser.py » ('J')