Index: tools/findit/crash_utils.py |
diff --git a/tools/findit/crash_utils.py b/tools/findit/crash_utils.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..32cf7abc742488ab703cf13c7b522c923bd926cd |
--- /dev/null |
+++ b/tools/findit/crash_utils.py |
@@ -0,0 +1,463 @@ |
+# Copyright (c) 2014 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import cgi |
+import ConfigParser |
+import json |
+import logging |
+import os |
+import time |
+import urllib2 |
+ |
+from result import Result |
+ |
+ |
+INFINITY = float('inf') |
+ |
+ |
+def ParseURLsFromConfig(file_name): |
+ """Parses URLS from the config file. |
+ |
+ The file should be in python config format, where svn section is in the |
+ format "svn:component_path", except for git URLs and codereview URL. |
+ Each of the section for svn should contain changelog_url, revision_url, |
+ diff_url and blame_url. |
+ |
+ Args: |
+ file_name: The name of the file that contains URL information. |
+ |
+ Returns: |
+ A dictionary that maps repository type to list of URLs. For svn, it maps |
+ key 'svn' to another dictionary, which maps component path to the URLs |
+ as explained above. For git, it maps to the URLs as explained above. |
+ Codereview maps to codereview API url. |
+ """ |
+ config = ConfigParser.ConfigParser() |
+ |
+ # Get the absolute path of the config file, and read the file. If it fails, |
+ # return none. |
+ config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), |
+ file_name) |
+ config.read(config_file_path) |
+ if not config: |
+ logging.error('Config file with URLs does not exist.') |
+ return None |
+ |
+ # Iterate through the config file, check for sections. |
+ repository_type_to_url_map = {} |
+ for section in config.sections(): |
+ # These two do not need another layer of dictionary, so add it and go |
+ # to next section. |
+ if section == 'git' or section == 'codereview': |
+ for option in config.options(section): |
+ if section not in repository_type_to_url_map: |
+ repository_type_to_url_map[section] = {} |
+ |
+ url = config.get(section, option) |
+ repository_type_to_url_map[section][option] = url |
+ |
+ continue |
+ |
+ # Get repository type and component name from the section name. |
+ repository_type_and_component = section.split(':') |
+ repository_type = repository_type_and_component[0] |
+ component_path = repository_type_and_component[1] |
+ |
+ # Add 'svn' as the key, if it is not already there. |
+ if repository_type not in repository_type_to_url_map: |
+ repository_type_to_url_map[repository_type] = {} |
+ url_map_for_repository = repository_type_to_url_map[repository_type] |
+ |
+ # Add the path to the 'svn', if it is not already there. |
+ if component_path not in url_map_for_repository: |
+ url_map_for_repository[component_path] = {} |
+ type_to_url = url_map_for_repository[component_path] |
+ |
+ # Add all URLs to this map. |
+ for option in config.options(section): |
+ url = config.get(section, option) |
+ type_to_url[option] = url |
+ |
+ return repository_type_to_url_map |
+ |
+ |
+def NormalizePathLinux(path, parsed_deps): |
+ """Normalizes linux path. |
+ |
+ Args: |
+ path: A string representing a path. |
+ parsed_deps: A map from component path to its component name, repositor |
+ |
+ Returns: |
+ A tuple containing a component this path is in (e.g blink, skia, etc) |
+ and a path in that component's repository. |
+ """ |
+ # First normalize the path by retreiving the absolute path. |
+ normalized_path = os.path.abspath(path) |
+ |
+ # Iterate through all component paths in the parsed DEPS, in the decreasing |
+ # order of the length of the file path. |
+ for component_path in sorted(parsed_deps, |
+ key=(lambda path: -len(path))): |
+ # New_path is the component path with 'src/' removed. |
+ new_path = component_path |
+ if new_path.startswith('src/') and new_path != 'src/': |
+ new_path = new_path[len('src/'):] |
+ |
+ # If this path is the part of file path, this file must be from this |
+ # component. |
+ if new_path in normalized_path: |
+ |
+ # Currently does not support googlecode. |
+ if 'googlecode' in parsed_deps[component_path]['repository']: |
+ return (None, '', '') |
+ |
+ # Normalize the path by stripping everything off the component's relative |
+ # path. |
+ normalized_path = normalized_path.split(new_path)[1] |
+ |
+ # Add 'src/' or 'Source/' at the front of the normalized path, depending |
+ # on what prefix the component path uses. |
+ if not normalized_path.startswith('src/') or \ |
+ normalized_path.startswith('Source/'): |
+ |
+ if (new_path.lower().endswith('src/') or |
+ new_path.lower().endswith('source/')): |
+ normalized_path = new_path.split('/')[-2] + '/' + normalized_path |
+ |
+ else: |
+ normalized_path = 'src/' + normalized_path |
+ |
+ component_name = parsed_deps[component_path]['name'] |
+ |
+ return (component_path, component_name, normalized_path) |
+ |
+ # If the path does not match any component, default to chromium. |
+ return ('src/', 'chromium', normalized_path) |
+ |
+ |
+def SplitRange(regression): |
+ """Splits a range as retrieved from clusterfuzz. |
+ |
+ Args: |
+ regression: A string in format 'r1234:r5678'. |
+ |
+ Returns: |
+ A list containing two numbers represented in string, for example |
+ ['1234','5678']. |
+ """ |
+ if not regression: |
+ return None |
+ |
+ revisions = regression.split(':') |
+ |
+ # If regression information is not available, return none. |
+ if len(revisions) != 2: |
+ return None |
+ |
+ # Strip 'r' from both start and end range. |
+ range_start = revisions[0].lstrip('r') |
+ range_end = revisions[1].lstrip('r') |
+ |
+ return [range_start, range_end] |
+ |
+ |
+def LoadJSON(json_string): |
+ """Loads json object from string, or None. |
+ |
+ Args: |
+ json_string: A string to get object from. |
+ |
+ Returns: |
+ JSON object if the string represents a JSON object, None otherwise. |
+ """ |
+ try: |
+ data = json.loads(json_string) |
+ except ValueError: |
+ data = None |
+ |
+ return data |
+ |
+ |
+def GetDataFromURL(url, retries=10, sleep_time=0.1, timeout=10): |
+ """Retrieves raw data from URL, tries 10 times. |
+ |
+ Args: |
+ url: URL to get data from. |
+ retries: Number of times to retry connection. |
+ sleep_time: Time in seconds to wait before retrying connection. |
+ timeout: Time in seconds to wait before time out. |
+ |
+ Returns: |
+ None if the data retrieval fails, or the raw data. |
+ """ |
+ data = None |
+ for i in range(retries): |
+ # Retrieves data from URL. |
+ try: |
+ data = urllib2.urlopen(url, timeout=timeout) |
+ |
+ # If retrieval is successful, return the data. |
+ if data: |
+ return data.read() |
+ |
+ # If retrieval fails, try after sleep_time second. |
+ except urllib2.URLError: |
+ time.sleep(sleep_time) |
+ continue |
+ except IOError: |
+ time.sleep(sleep_time) |
+ continue |
+ |
+ # Return None if it fails to read data from URL 'retries' times. |
+ return None |
+ |
+ |
+def FindMinLineDistance(crashed_line_list, changed_line_numbers): |
+ """Calculates how far the changed line is from one of the crashes. |
+ |
+ Finds the minimum distance between the lines that the file crashed on |
+ and the lines that the file changed. For example, if the file crashed on |
+ line 200 and the CL changes line 203,204 and 205, the function returns 3. |
+ |
+ Args: |
+ crashed_line_list: A list of lines that the file crashed on. |
+ changed_line_numbers: A list of lines that the file changed. |
+ |
+ Returns: |
+ The minimum distance. If either of the input lists is empty, |
+ it returns inf. |
+ |
+ """ |
+ min_distance = INFINITY |
+ |
+ for line in crashed_line_list: |
+ for distance in changed_line_numbers: |
+ # Find the current distance and update the min if current distance is |
+ # less than current min. |
+ current_distance = abs(line - distance) |
+ if current_distance < min_distance: |
+ min_distance = current_distance |
+ |
+ return min_distance |
+ |
+ |
+def GuessIfSameSubPath(path1, path2): |
+ """Guesses if two paths represent same path. |
+ |
+ Compares the name of the folders in the path (by split('/')), and checks |
+ if they match either more than 3 or min of path lengths. |
+ |
+ Args: |
+ path1: First path. |
+ path2: Second path to compare. |
+ |
+ Returns: |
+ True if it they are thought to be a same path, False otherwise. |
+ """ |
+ path1 = path1.split('/') |
+ path2 = path2.split('/') |
+ |
+ intersection = set(path1).intersection(set(path2)) |
+ return len(intersection) >= (min(3, min(len(path1), len(path2)))) |
+ |
+ |
+def FindMinStackFrameNumber(stack_frame_indices, priorities): |
+ """Finds the minimum stack number, from the list of stack numbers. |
+ |
+ Args: |
+ stack_frame_indices: A list of lists containing stack position. |
+ priorities: A list of of priority for each file. |
+ |
+ Returns: |
+ Inf if stack_frame_indices is empty, minimum stack number otherwise. |
+ """ |
+ # Get the indexes of the highest priority (or low priority number). |
+ highest_priority = min(priorities) |
+ highest_priority_indices = [] |
+ for i in range(len(priorities)): |
+ if priorities[i] == highest_priority: |
+ highest_priority_indices.append(i) |
+ |
+ # Gather the list of stack frame numbers for the files that change the |
+ # crash lines. |
+ flattened = [] |
+ for i in highest_priority_indices: |
+ flattened += stack_frame_indices[i] |
+ |
+ # If no stack frame information is available, return inf. Else, return min. |
+ if not flattened: |
+ return INFINITY |
+ else: |
+ return min(flattened) |
+ |
+ |
+def AddHyperlink(text, link): |
+ """Returns a string with HTML link tag. |
+ |
+ Args: |
+ text: A string to add link. |
+ link: A link to add to the string. |
+ |
+ Returns: |
+ A string with hyperlink added. |
+ """ |
+ sanitized_link = cgi.escape(link, quote=True) |
+ sanitized_text = cgi.escape(str(text)) |
+ return '<a href="%s">%s</a>' % (sanitized_link, sanitized_text) |
+ |
+ |
+def PrettifyList(l): |
+ """Returns a string representation of a list. |
+ |
+ It adds comma in between the elements and removes the brackets. |
+ Args: |
+ l: A list to prettify. |
+ Returns: |
+ A string representation of the list. |
+ """ |
+ return str(l)[1:-1] |
+ |
+ |
+def PrettifyFiles(file_list): |
+ """Returns a string representation of a list of file names. |
+ |
+ Args: |
+ file_list: A list of tuple, (file_name, file_url). |
+ Returns: |
+ A string representation of file names with their urls. |
+ """ |
+ ret = ['\n'] |
+ for file_name, file_url in file_list: |
+ ret.append(' %s\n' % AddHyperlink(file_name, file_url)) |
+ return ''.join(ret) |
+ |
+ |
+def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers, |
+ line_range=3): |
+ """Finds the overlap betwee changed lines and crashed lines. |
+ |
+ Finds the intersection of the lines that caused the crash and |
+ lines that the file changes. The intersection looks within 3 lines |
+ of the line that caused the crash. |
+ |
+ Args: |
+ crashed_line_list: A list of lines that the file crashed on. |
+ stack_frame_index: A list of positions in stack for each of the lines. |
+ changed_line_numbers: A list of lines that the file changed. |
+ line_range: Number of lines to look backwards from crashed lines. |
+ |
+ Returns: |
+ line_intersection: Intersection between crashed_line_list and |
+ changed_line_numbers. |
+ stack_frame_index_intersection: Stack number for each of the intersections. |
+ """ |
+ line_intersection = [] |
+ stack_frame_index_intersection = [] |
+ |
+ # Iterate through the crashed lines, and its occurence in stack. |
+ for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index): |
+ # Also check previous 'line_range' lines. |
+ line_minus_n = range(line - line_range, line + 1) |
+ |
+ for changed_line in changed_line_numbers: |
+ # If a CL does not change crahsed line, check next line. |
+ if changed_line not in line_minus_n: |
+ continue |
+ |
+ # If the changed line is exactly the crashed line, add that line. |
+ if line in changed_line_numbers: |
+ intersected_line = line |
+ |
+ # If the changed line is in 3 lines of the crashed line, add the line. |
+ else: |
+ intersected_line = changed_line |
+ |
+ # Avoid adding the same line twice. |
+ if intersected_line not in line_intersection: |
+ line_intersection.append(intersected_line) |
+ stack_frame_index_intersection.append(stack_frame_index) |
+ |
+ break |
+ |
+ return (line_intersection, stack_frame_index_intersection) |
+ |
+ |
+def MatchListToResultList(matches): |
+ """Convert list of matches to the list of result objects. |
+ |
+ Args: |
+ matches: A list of match objects along with its stack priority and revision |
+ number/git hash |
+ Returns: |
+ A list of result object. |
+ |
+ """ |
+ result_list = [] |
+ |
+ for _, cl, match in matches: |
+ suspected_cl = cl |
+ revision_url = match.url |
+ component_name = match.component_name |
+ author = match.author |
+ reason = match.reason |
+ review_url = match.review_url |
+ reviewers = match.reviewers |
+ # For matches, line content do not exist. |
+ line_content = None |
+ |
+ result = Result(suspected_cl, revision_url, component_name, author, reason, |
+ review_url, reviewers, line_content) |
+ result_list.append(result) |
+ |
+ return result_list |
+ |
+ |
+def BlameListToResultList(blame_list): |
+ """Convert blame list to the list of result objects. |
+ |
+ Args: |
+ blame_list: A list of blame objects. |
+ |
+ Returns: |
+ A list of result objects. |
+ """ |
+ result_list = [] |
+ |
+ for blame in blame_list: |
+ suspected_cl = blame.revision |
+ revision_url = blame.url |
+ component_name = blame.component_name |
+ author = blame.author |
+ reason = ( |
+ 'The CL changes line %s of file %s from stack %d.' % |
+ (blame.line_number, blame.file, blame.stack_frame_index)) |
+ # Blame object does not have review url and reviewers. |
+ review_url = None |
+ reviewers = None |
+ line_content = blame.content |
+ |
+ result = Result(suspected_cl, revision_url, component_name, author, reason, |
+ review_url, reviewers, line_content) |
+ result_list.append(result) |
+ |
+ return result_list |
+ |
+ |
+def PrettifyResultList(result_list): |
+ """Prints string format of the result list.""" |
+ for result in result_list: |
+ print '-----------------------' |
+ print 'Suspected CL: ', AddHyperlink(result.suspected_cl, |
+ result.suspected_cl_revision_url) |
+ print 'Component: ', result.component_name |
+ print 'Author: ', result.author |
+ print 'Reason:\n', result.reason |
+ if result.review_url: |
+ print 'Review URL: ', result.review_url |
+ if result.reviewers: |
+ print 'Reviewers: ', PrettifyList(result.reviewers) |
+ if result.line_content: |
+ print 'Line content: ', result.line_content |
+ print '-----------------------' |