Chromium Code Reviews| Index: tools/findit/crash_utils.py |
| diff --git a/tools/findit/crash_utils.py b/tools/findit/crash_utils.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..4c44fa8fd7902732a4caf1600f3ff40f5c2f3eb4 |
| --- /dev/null |
| +++ b/tools/findit/crash_utils.py |
| @@ -0,0 +1,291 @@ |
| +# Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +import json |
| +import time |
| +import urllib |
| + |
| + |
| +def NormalizePathLinux(path): |
| + """Normalizes linux path. |
| + |
| + Args: |
| + path: A string representing a path. |
| + |
| + Returns: |
| + A tuple containing a component this path is in (e.g blink, skia, etc) |
| + and a path in that component's repository. |
| + """ |
| + normalized_path = path |
| + # TODO(jeun): Integrate with parsing DEPS file. |
| + if 'WebKit/' in path: |
| + component = 'blink' |
| + normalized_path = ''.join(path.split('WebKit/')[1:]) |
| + else: |
| + component = 'chromium' |
| + |
| + if normalized_path.startswith( |
| + '/b/build/slave/ASAN_Release__symbolized_/build/'): |
| + normalized_path = normalized_path.split( |
| + '/b/build/slave/ASAN_Release__symbolized_/build/')[1] |
| + |
| + if '../../' in normalized_path: |
| + normalized_path = normalized_path.split('../../')[1] |
| + |
| + if 'src/v8/' in normalized_path: |
| + component = 'v8' |
| + normalized_path = normalized_path.split('src/v8/')[1] |
| + |
| + if './' in normalized_path: |
| + normalized_path = normalized_path.split('./')[1] |
| + |
| + if not normalized_path.startswith('src/') and ( |
| + not normalized_path.startswith('Source/')): |
| + normalized_path = 'src/' + normalized_path |
| + |
| + return (component, normalized_path) |
| + |
| + |
| +def SplitRange(regression): |
| + """Splits a range as retrieved from clusterfuzz. |
| + |
| + Args: |
| + regression: A string in format 'r1234:r5678'. |
| + |
| + Returns: |
| + A list containing two numbers represented in string, for example |
| + ['1234','5678']. |
| + """ |
| + temp = regression.split(':') |
|
Martin Barbella
2014/08/06 21:12:24
s/temp/revisions/
jeun
2014/08/06 23:36:24
Done.
|
| + |
| + # If regression information is not available, return none. |
| + if len(temp) != 2: |
| + return None |
| + |
| + start_range = temp[0] |
| + end_range = temp[1] |
| + |
| + # Check if the range starts with r, such as in 'r10000' format. |
| + if start_range.startswith('r'): |
| + start_range = start_range[1:] |
| + if end_range.startswith('r'): |
| + end_range = end_range[1:] |
| + |
| + return [start_range, end_range] |
| + |
| + |
| +def LoadJSON(json_string): |
| + """Loads json object from string, or None. |
| + |
| + Args: |
| + json_string: A string to get object from. |
| + |
| + Returns: |
| + JSON object if the string represents a JSON object, None otherwise. |
| + """ |
| + try: |
| + data = json.loads(json_string) |
| + except ValueError: |
| + data = None |
| + return data |
| + |
| + |
| +def GetDataFromURL(url, retry=10, sleep_time=0.1): |
|
Martin Barbella
2014/08/06 21:12:24
Using the name retry makes it sound like a boolean
jeun
2014/08/06 23:36:25
Done.
|
| + """Retrieves raw data from URL, tries 10 times. |
| + |
| + Args: |
| + url: URL to get data from. |
| + retry: Number of times to retry connection. |
| + sleep_time: Time in seconds to wait before retrying connection. |
| + |
| + Returns: |
| + None if the data retrieval fails, or the raw data. |
| + """ |
| + data = None |
| + for i in range(retry): |
| + # Retrieves data from URL. |
| + try: |
| + data = urllib.urlopen(url) |
| + |
| + # If retrieval is successful, break from the retry look. |
| + if data: |
| + break |
| + |
| + # If retrieval fails, try after 0.1 second. |
| + except IOError: |
| + time.sleep(sleep_time) |
| + continue |
| + |
| + # If returned data has something in it, return the content. |
| + if data: |
| + return data.read() |
| + else: |
| + return None |
| + |
| + |
| +def FindMinLineDistance(crashed_line_list, changed_line_numbers): |
| + """Calculates how far the changed line is from one of the crashes. |
| + |
| + Finds the minimum distance between the lines that the file crashed on |
| + and the lines that the file changed. For example, if the file crashed on |
| + line 200 and the CL changes line 203,204 and 205, the function returns 3. |
| + |
| + Args: |
| + crashed_line_list: A list of lines that the file crashed on. |
| + changed_line_numbers: A list of lines that the file changed. |
| + |
| + Returns: |
| + The minimum distance. If either of the input lists is empty, |
| + it returns inf. |
| + |
| + """ |
| + min_distance = float('inf') |
| + |
| + for line in crashed_line_list: |
| + for distance in changed_line_numbers: |
| + # Find the current distance and update the min if current distance is |
| + # less than current min. |
| + current_distance = abs(line - distance) |
| + if current_distance < min_distance: |
| + min_distance = current_distance |
| + |
| + return min_distance |
| + |
| + |
| +def GuessIfSamePath(path1, path2): |
| + """Guesses if two paths represent same path. |
| + |
| + Compares the name of the folders in the path (by split('/')), and checks |
| + if they match either more than 3 or min of path lengths. |
| + |
| + Args: |
| + path1: First path. |
| + path2: Second path to compare. |
| + |
| + Returns: |
| + True if it they are thought to be a same path, False otherwise. |
| + """ |
| + path1 = path1.split('/') |
| + path2 = path2.split('/') |
| + |
| + intersection = set(path1).intersection(set(path2)) |
| + return len(intersection) >= (min(3, min(len(path1), len(path2)))) |
| + |
| + |
| +def FindMinStackFrameNum(stack_frame_index, priorities): |
| + """Finds the minimum stack number, from the list of stack numbers. |
| + |
| + Args: |
| + stack_frame_index: A list of list containing stack position. |
| + priorities: A list of of priority for each file. |
| + |
| + Returns: |
| + Inf if stack_frame_index is empty, minimum stack number otherwise. |
| + """ |
| + # Get the indexes of the highest priority (or low priority number) |
| + highest_priority = min(priorities) |
| + highest_priority_indices = [] |
| + for i in range(len(priorities)): |
| + if priorities[i] == highest_priority: |
| + highest_priority_indices.append(i) |
| + |
| + # Gather the list of stack frame numbers for the files that change the |
| + # crash lines. |
| + flattened = [] |
| + for i in highest_priority_indices: |
| + flattened += stack_frame_index[i] |
| + |
| + # If no stack frame information is available, return inf. Else, return min. |
| + if not flattened: |
| + return float('inf') |
| + else: |
| + return min(flattened) |
| + |
| + |
| +def AddHyperlink(to_add, link): |
| + """Returns a string with HTML link tag. |
| + |
| + Args: |
| + to_add: A string to add link. |
| + link: A link to add to the string. |
| + |
| + Returns: |
| + A string with hyperlink added. |
| + """ |
| + return '<a href="%s">%s<\\a>' % (link, to_add) |
|
Martin Barbella
2014/08/06 21:12:24
Depending on how this is used, this is scary. Use
jeun
2014/08/06 23:36:25
added cgi.escape to link and changed the closing t
|
| + |
| + |
| +def PrettifyList(l): |
| + """Returns a string representation of a list . |
|
Martin Barbella
2014/08/06 21:12:24
Nit: there's an extra space before the period. Cou
jeun
2014/08/06 23:36:25
Added explanation on how the string is prettified.
|
| + |
| + Args: |
| + l: A list to prettify. |
| + Returns: |
| + A string representation of the list. |
| + """ |
| + return str(l)[1:-1] |
| + |
| + |
| +def PrettifyFiles(file_list): |
| + """Returns a string representation of a list of file names. |
| + |
| + Args: |
| + file_list: A list of tuple, (file_name, file_url). |
| + Returns: |
| + A string representation of file names with their urls. |
| + """ |
| + ret = ['\n'] |
| + for file_name, file_url in file_list: |
| + ret.append(' %s\n' % AddHyperlink(file_name, file_url)) |
| + return ''.join(ret) |
| + |
| + |
| +def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers): |
| + """Finds the overlap betwee changed lines and crashed lines. |
| + |
| + Finds the intersection of the lines that caused the crash and |
| + lines that the file changes. The intersection looks within 3 lines |
| + of the line that caused the crash. |
| + |
| + Args: |
| + crashed_line_list: A list of lines that the file crashed on. |
| + stack_frame_index: A list of positions in stack for each of the lines. |
| + changed_line_numbers: A list of lines that the file changed. |
| + |
| + Returns: |
| + line_intersection: Intersection between crashed_line_list and |
| + changed_line_numbers. |
| + stack_frame_index_intersection: Stack number for each of the intersections. |
| + """ |
| + line_intersection = [] |
| + stack_frame_index_intersection = [] |
| + |
| + # Iterate through the crashed lines, and its occurence in stack. |
| + for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index): |
| + |
| + # Also check previous 3 lines. |
| + line_minus_n = range(line - 3, line + 1) |
| + |
| + for changed_line in changed_line_numbers: |
| + |
| + # If a CL does not change crahsed line, check next line. |
| + if changed_line not in line_minus_n: |
| + continue |
| + |
| + # If the changed line is exactly the crashed line, add that line. |
| + if line in changed_line_numbers: |
| + to_add = line |
| + |
| + # If the changed line is in 3 lines of the crashed line, add the line. |
| + else: |
| + to_add = changed_line |
| + |
| + # Avoid adding the same line twice. |
| + if to_add not in line_intersection: |
| + line_intersection.append(to_add) |
| + stack_frame_index_intersection.append(stack_frame_index) |
| + |
| + break |
| + |
| + return (line_intersection, stack_frame_index_intersection) |