Index: tools/findit/crash_utils.py |
diff --git a/tools/findit/crash_utils.py b/tools/findit/crash_utils.py |
index 42a17ce2cb341e7e4202a8a4aaef8261ee52e6ea..d5401f4b5f8fc8a58b823e4e5b4925a9a904374c 100644 |
--- a/tools/findit/crash_utils.py |
+++ b/tools/findit/crash_utils.py |
@@ -10,7 +10,6 @@ import os |
import time |
import urllib2 |
-from common import utils |
from result import Result |
@@ -45,17 +44,17 @@ def ParseURLsFromConfig(file_name): |
return None |
# Iterate through the config file, check for sections. |
- parsed_config = {} |
+ config_dict = {} |
for section in config.sections(): |
# These two do not need another layer of dictionary, so add it and go |
# to next section. |
if ':' not in section: |
for option in config.options(section): |
- if section not in parsed_config: |
- parsed_config[section] = {} |
+ if section not in config_dict: |
+ config_dict[section] = {} |
url = config.get(section, option) |
- parsed_config[section][option] = url |
+ config_dict[section][option] = url |
continue |
@@ -65,9 +64,9 @@ def ParseURLsFromConfig(file_name): |
component_path = repository_type_and_component[1] |
# Add 'svn' as the key, if it is not already there. |
- if repository_type not in parsed_config: |
- parsed_config[repository_type] = {} |
- url_map_for_repository = parsed_config[repository_type] |
+ if repository_type not in config_dict: |
+ config_dict[repository_type] = {} |
+ url_map_for_repository = config_dict[repository_type] |
# Add the path to the 'svn', if it is not already there. |
if component_path not in url_map_for_repository: |
@@ -79,11 +78,11 @@ def ParseURLsFromConfig(file_name): |
url = config.get(section, option) |
type_to_url[option] = url |
- return parsed_config |
+ return config_dict |
-def NormalizePathLinux(path, parsed_deps): |
- """Normalizes linux path. |
+def NormalizePath(path, parsed_deps): |
+ """Normalizes the path. |
Args: |
path: A string representing a path. |
@@ -92,43 +91,53 @@ def NormalizePathLinux(path, parsed_deps): |
Returns: |
A tuple containing a component this path is in (e.g blink, skia, etc) |
- and a path in that component's repository. |
+ and a path in that component's repository. Returns None if the component |
stgao
2014/08/22 06:50:53
It seems googlecode is not checked.
jeun
2014/08/22 22:58:43
Even if we don't check it here, it will fail later
|
+ repository is not supported, i.e from googlecode. |
""" |
# First normalize the path by retreiving the absolute path. |
- normalized_path = os.path.abspath(path) |
+ normalized_path = os.path.normpath(path.replace('\\','/')) |
stgao
2014/08/22 06:50:53
Please also update the comment.
jeun
2014/08/22 22:58:43
Done.
|
# Iterate through all component paths in the parsed DEPS, in the decreasing |
# order of the length of the file path. |
for component_path in sorted(parsed_deps, |
key=(lambda path: -len(path))): |
- # New_path is the component path with 'src/' removed. |
- new_path = component_path |
- if new_path.startswith('src/') and new_path != 'src/': |
- new_path = new_path[len('src/'):] |
+ # new_component_path is the component path with 'src/' removed. |
+ new_component_path = component_path |
+ if new_component_path.startswith('src/') and new_component_path != 'src/': |
+ new_component_path = new_component_path[len('src/'):] |
+ |
+ # We need to consider when the lowercased component path is in the path, |
+ # because syzyasan build returns lowercased file path. |
+ lower_component_path = new_component_path.lower() |
# If this path is the part of file path, this file must be from this |
# component. |
- if new_path in normalized_path: |
+ if new_component_path in normalized_path or \ |
+ lower_component_path in normalized_path: |
- # Currently does not support googlecode. |
- if 'googlecode' in parsed_deps[component_path]['repository']: |
- return (None, '', '') |
+ # Case when the retreived path is in lowercase. |
+ if lower_component_path in normalized_path: |
+ current_component_path = lower_component_path |
+ else: |
+ current_component_path = new_component_path |
# Normalize the path by stripping everything off the component's relative |
# path. |
- normalized_path = normalized_path.split(new_path,1)[1] |
+ normalized_path = normalized_path.split(current_component_path, 1)[1] |
+ lower_normalized_path = normalized_path.lower() |
# Add 'src/' or 'Source/' at the front of the normalized path, depending |
# on what prefix the component path uses. For example, blink uses |
# 'Source' but chromium uses 'src/', and blink component path is |
# 'src/third_party/WebKit/Source', so add 'Source/' in front of the |
# normalized path. |
- if not (normalized_path.startswith('src/') or |
- normalized_path.startswith('Source/')): |
+ if not (lower_normalized_path.startswith('src/') or |
+ lower_normalized_path.startswith('source/')): |
- if (new_path.lower().endswith('src/') or |
- new_path.lower().endswith('source/')): |
- normalized_path = new_path.split('/')[-2] + '/' + normalized_path |
+ if (lower_component_path.endswith('src/') or |
+ lower_component_path.endswith('source/')): |
+ normalized_path = (current_component_path.split('/')[-2] + '/' + |
+ normalized_path) |
else: |
normalized_path = 'src/' + normalized_path |
@@ -160,9 +169,16 @@ def SplitRange(regression): |
if len(revisions) != 2: |
return None |
- # Strip 'r' from both start and end range. |
- range_start = revisions[0].lstrip('r') |
- range_end = revisions[1].lstrip('r') |
+ range_start = revisions[0] |
+ range_end = revisions[1] |
+ |
+ # Strip 'r' off the range start/end. Not using lstrip to avoid the case when |
Martin Barbella
2014/08/22 02:24:14
Maybe it would be best for us not to include the '
jeun
2014/08/22 22:58:43
Done.
|
+ # the range is in git hash and it starts with 'r'. |
+ if range_start.startswith('r'): |
+ range_start = range_start[1:] |
+ |
+ if range_end.startswith('r'): |
+ range_end = range_end[1:] |
return [range_start, range_end] |
@@ -196,19 +212,23 @@ def GetDataFromURL(url, retries=10, sleep_time=0.1, timeout=5): |
Returns: |
None if the data retrieval fails, or the raw data. |
""" |
- count = 0 |
- while True: |
- count += 1 |
+ data = None |
+ for i in range(retries): |
# Retrieves data from URL. |
try: |
- _, data = utils.GetHttpClient().Get(url) |
- return data |
+ data = urllib2.urlopen(url, timeout=timeout) |
stgao
2014/08/22 06:50:53
Is there a specific reason to revert my change to
jeun
2014/08/22 22:58:43
Done.
|
+ |
+ # If retrieval is successful, return the data. |
+ if data: |
+ return data.read() |
+ |
+ # If retrieval fails, try after sleep_time second. |
+ except urllib2.URLError: |
+ time.sleep(sleep_time) |
+ continue |
except IOError: |
- if count < retries: |
- # If retrieval fails, try after sleep_time second. |
- time.sleep(sleep_time) |
- else: |
- break |
+ time.sleep(sleep_time) |
+ continue |
# Return None if it fails to read data from URL 'retries' times. |
return None |
@@ -232,7 +252,10 @@ def FindMinLineDistance(crashed_line_list, changed_line_numbers): |
""" |
min_distance = INFINITY |
- for line in crashed_line_list: |
+ crashed_line_numbers = [crashed_line |
+ for crashed_line_range in crashed_line_list |
+ for crashed_line in crashed_line_range] |
+ for line in crashed_line_numbers: |
for distance in changed_line_numbers: |
# Find the current distance and update the min if current distance is |
# less than current min. |
@@ -349,39 +372,45 @@ def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers, |
line_range: Number of lines to look backwards from crashed lines. |
Returns: |
- line_intersection: Intersection between crashed_line_list and |
+ line_number_intersection: Intersection between crashed_line_list and |
changed_line_numbers. |
stack_frame_index_intersection: Stack number for each of the intersections. |
""" |
- line_intersection = [] |
+ line_number_intersection = [] |
stack_frame_index_intersection = [] |
# Iterate through the crashed lines, and its occurence in stack. |
- for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index): |
- # Also check previous 'line_range' lines. |
- line_minus_n = range(line - line_range, line + 1) |
+ for (lines, stack_frame_index) in zip(crashed_line_list, stack_frame_index): |
+ # Also check previous 'line_range' lines. Create a set of all changed lines |
+ # and lines within 3 lines range before the crashed line. |
+ line_minus_n = set() |
+ for line in lines: |
+ for line_in_range in range(line - line_range, line + 1): |
+ line_minus_n.add(line_in_range) |
for changed_line in changed_line_numbers: |
# If a CL does not change crahsed line, check next line. |
if changed_line not in line_minus_n: |
continue |
+ intersected_line = set() |
# If the changed line is exactly the crashed line, add that line. |
- if line in changed_line_numbers: |
- intersected_line = line |
+ for line in lines: |
+ if line in changed_line_numbers: |
+ intersected_line.add(line) |
- # If the changed line is in 3 lines of the crashed line, add the line. |
- else: |
- intersected_line = changed_line |
+ # If the changed line is in 3 lines of the crashed line, add the line. |
+ else: |
+ intersected_line.add(changed_line) |
# Avoid adding the same line twice. |
- if intersected_line not in line_intersection: |
- line_intersection.append(intersected_line) |
+ if intersected_line not in line_number_intersection: |
+ line_number_intersection.append(list(intersected_line)) |
stack_frame_index_intersection.append(stack_frame_index) |
break |
- return (line_intersection, stack_frame_index_intersection) |
+ return (line_number_intersection, stack_frame_index_intersection) |
def MatchListToResultList(matches): |
@@ -406,9 +435,10 @@ def MatchListToResultList(matches): |
reviewers = match.reviewers |
# For matches, line content do not exist. |
line_content = None |
+ message = match.message |
result = Result(suspected_cl, revision_url, component_name, author, reason, |
- review_url, reviewers, line_content) |
+ review_url, reviewers, line_content, message) |
result_list.append(result) |
return result_list |
@@ -431,15 +461,16 @@ def BlameListToResultList(blame_list): |
component_name = blame.component_name |
author = blame.author |
reason = ( |
- 'The CL changes line %s of file %s from stack %d.' % |
+ 'The CL changes line %s of file %s, which is stack frame index %d.' % |
Martin Barbella
2014/08/22 02:24:14
Nit: remove "index".
jeun
2014/08/22 22:58:43
Done.
|
(blame.line_number, blame.file, blame.stack_frame_index)) |
# Blame object does not have review url and reviewers. |
review_url = None |
reviewers = None |
line_content = blame.line_content |
+ message = blame.message |
result = Result(suspected_cl, revision_url, component_name, author, reason, |
- review_url, reviewers, line_content) |
+ review_url, reviewers, line_content, message) |
result_list.append(result) |
return result_list |