Index: tools/findit/crash_utils.py |
diff --git a/tools/findit/crash_utils.py b/tools/findit/crash_utils.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..e6cf3787200df7a3fa7962ac61cf479f2787875f |
--- /dev/null |
+++ b/tools/findit/crash_utils.py |
@@ -0,0 +1,290 @@ |
+# Copyright (c) 2014 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import json |
+import time |
+import urllib |
+ |
+ |
+def NormalizePathLinux(path): |
stgao
2014/08/01 17:22:20
Could we include this function only in this patch?
jeun
2014/08/06 18:22:59
I am not sure what is the best way of doing this,
|
+ """Normalizes linux path. |
+ |
+ Args: |
+ path: a string representing a path |
+ |
+ Returns: |
+ A tuple containing a component this path is in (e.g blink, skia, etc) |
+ and a path in that component's repository |
+ """ |
+ normalized_path = path |
+ # TODO(jeun): integrate with parsing DEPS file |
stgao
2014/08/01 17:22:20
style issue: comment.
jeun
2014/08/06 18:22:59
Done.
|
+ if 'WebKit/' in path: |
+ component = 'blink' |
+ normalized_path = path.split('WebKit/')[1] |
+ else: |
+ component = 'chromium' |
+ |
+ if normalized_path.startswith( |
+ '/b/build/slave/ASAN_Release__symbolized_/build/'): |
+ normalized_path = normalized_path.split( |
+ '/b/build/slave/ASAN_Release__symbolized_/build/')[1] |
+ |
+ if '../../' in normalized_path: |
+ normalized_path = normalized_path.split('../../')[1] |
+ |
+ if 'src/v8/' in normalized_path: |
+ component = 'v8' |
+ normalized_path = normalized_path.split('src/v8/')[1] |
+ |
+ if './' in normalized_path: |
+ normalized_path = normalized_path.split('./')[1] |
+ |
+ if not normalized_path.startswith('src/') and ( |
+ not normalized_path.startswith('Source/')): |
+ normalized_path = 'src/' + normalized_path |
+ |
+ return (component, normalized_path) |
+ |
+ |
+def SplitRange(regression): |
+ """Splits a range as retrieved from clusterfuzz. |
+ |
+ Args: |
+ regression: a string in format 'r1234:r5678' |
+ |
+ Returns: |
+ a list containing two numbers represented in string, for example |
+ ['1234','5678'] |
+ """ |
+ temp = regression.split(':') |
+ |
+ # If regression information is not available, return none |
+ if len(temp) != 2: |
+ return None |
+ |
+ start_range = temp[0] |
+ end_range = temp[1] |
+ |
+ # Check if the range starts with r, such as in 'r10000' format |
+ if start_range.startswith('r'): |
+ start_range = start_range[1:] |
+ if end_range.startswith('r'): |
+ end_range = end_range[1:] |
+ |
+ return [start_range, end_range] |
+ |
+ |
+def LoadJSON(json_string): |
+ """Loads json object from string, or None. |
+ |
+ Args: |
+ json_string: a string to get object from. |
+ |
+ Returns: |
+ JSON object if the string represents a JSON object, None otherwise |
+ """ |
+ try: |
+ data = json.loads(json_string) |
+ except ValueError: |
+ data = None |
+ return data |
+ |
+ |
+def GetDataFromURL(url): |
+ """Retrieves raw data from URL, tries 10 times. |
+ |
+ Args: |
+ url: url to get data from |
+ |
+ Returns: |
+ None if the data retrieval fails, or the raw data. |
+ """ |
+ data = None |
+ for i in range(10): |
stgao
2014/08/01 17:22:20
For the retry, it might be better to make it as a
jeun
2014/08/06 18:22:59
Done.
|
+ |
+ # Retrieves data from URL |
+ try: |
+ data = urllib.urlopen(url) |
+ |
+ # If retrieval is successful, break from the retry look |
+ if data: |
+ break |
+ |
+ # If retrieval fails, try after 0.1 second |
+ except IOError: |
+ time.sleep(0.1) |
stgao
2014/08/01 17:22:20
Sleep interval could be parameterized too.
jeun
2014/08/06 18:22:59
Done.
|
+ continue |
+ |
+ # If returned data has something in it, return the content |
+ if data: |
+ return data.read() |
+ else: |
+ return None |
+ |
+ |
+def FindMinLineDistance(crashed_line_list, changed_line_numbers): |
+ """Calculates how far the changed line is from one of the crashes. |
+ |
+ Finds the minimum distance between the lines that the file crashed on |
+ and the lines that the file changed. For example, if the file crashed on |
+ line 200 and the CL changes line 203,204 and 205, the function returns 3. |
+ |
+ Args: |
+ crashed_line_list: list of lines that the file crashed on |
+ changed_line_numbers: list of lines that the file changed |
+ |
+ Returns: |
+ the minimum distance. If either of the input lists is empty, |
+ it returns inf. |
+ |
+ """ |
+ min_distance = float('inf') |
+ |
+ for line in crashed_line_list: |
+ for distance in changed_line_numbers: |
+ # Find the current distance and update the min if current distance is |
+ # less than current min |
+ current_distance = abs(line - distance) |
+ if current_distance < min_distance: |
+ min_distance = current_distance |
+ |
+ return min_distance |
+ |
+ |
+def GuessIfSamePath(path1, path2): |
+ """Guesses if two paths represent same path. |
+ |
+ Compares the name of the folders in the path (by split('/')), and checks |
+ if they match either more than 3 or min of path lengths |
+ |
+ Args: |
+ path1: First path |
+ path2: Second path to compare |
+ |
+ Returns: |
+ True if it they are thought to be a same path, False otherwise |
+ """ |
+ path1 = path1.split('/') |
+ path2 = path2.split('/') |
+ |
+ intersection = set(path1).intersection(set(path2)) |
+ return len(intersection) >= (min(3, min(len(path1), len(path2)))) |
+ |
+ |
+def FindMinStackFrameNum(stack_frame_index, priorities): |
+ """Finds the minimum stack number, from the list of stack numbers. |
+ |
+ Args: |
+ stack_frame_index: a list of list containing stack position |
+ priorities: a list of of priority for each file |
+ |
+ Returns: |
+ inf if stack_frame_index is empty, minimum stack number otherwise |
+ """ |
+ # Get the indexes of the highest priority (or low priority number) |
+ highest_priority = min(priorities) |
+ highest_priority_indices = [] |
+ for i in range(len(priorities)): |
+ if priorities[i] == highest_priority: |
+ highest_priority_indices.append(i) |
+ |
+ # Gather the list of stack frame numbers for the files that change the |
+ # crash lines |
+ flattened = [] |
+ for i in highest_priority_indices: |
+ flattened += stack_frame_index[i] |
+ |
+ # If no stack frame information is available, return inf. Else, return min |
+ if not flattened: |
+ return float('inf') |
+ else: |
+ return min(flattened) |
+ |
+ |
+def AddHyperlink(to_add, link): |
+ """Returns a string with HTML link tag. |
+ |
+ Args: |
+ to_add: a string to add link |
+ link: a link to add to the string |
+ |
+ Returns: |
+ a string with hyperlink added |
+ """ |
+ return '<a href="%s">%s<\\a>' % (link, to_add) |
+ |
+ |
+def PrettifyList(l): |
+ """Returns a string representation of a list of ints. |
+ |
+ Args: |
+ l: an int list to prettify |
+ Returns: |
+ a string representation of list |
+ """ |
+ return str(l)[1:-1] |
+ |
+ |
+def PrettifyFiles(file_list): |
+ """Returns a string representation of a list of file names. |
+ |
+ Args: |
+ file_list: a list of tuple, (file_name, file_url) |
+ Returns: |
+ a string representation of file names |
+ """ |
+ ret = ['\n'] |
+ for file_name, file_url in file_list: |
+ ret.append(' %s\n' % AddHyperlink(file_name, file_url)) |
+ return ''.join(ret) |
+ |
+ |
+def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers): |
+ """Finds the overlap betwee changed lines and crashed lines. |
+ |
+ Finds the intersection of the lines that caused the crash and |
+ lines that the file changes. The intersection looks within 3 lines |
+ of the line that caused the crash. |
+ |
+ Args: |
+ crashed_line_list: list of lines that the file crashed on |
+ stack_frame_index: list of positions in stack for each of the lines |
+ changed_line_numbers: list of lines that the file changed |
+ |
+ Returns: |
+ line_intersection: intersection between crashed_line_list and |
+ changed_line_numbers |
+ stack_frame_index_intersection: stack number for each of the intersections |
+ """ |
+ line_intersection = [] |
+ stack_frame_index_intersection = [] |
+ |
+ # Iterate through the crashed lines, and its occurence in stack |
+ for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index): |
+ |
+ # Also check previous 3 lines |
+ line_minus_n = range(line - 3, line + 1) |
+ |
+ for changed_line in changed_line_numbers: |
+ |
+ # If a CL does not change crahsed line, check next line |
+ if changed_line not in line_minus_n: |
+ continue |
+ |
+ # If the changed line is exactly the crashed line, add that line |
+ if line in changed_line_numbers: |
+ to_add = line |
+ |
+ # If the changed line is within 3 lines of the crashed line, add the line |
+ else: |
+ to_add = changed_line |
+ |
+ # Avoid adding the same line twiece |
+ if to_add not in line_intersection: |
+ line_intersection.append(to_add) |
+ stack_frame_index_intersection.append(stack_frame_index) |
+ |
+ break |
+ |
+ return (line_intersection, stack_frame_index_intersection) |