tools/findit/crash_utils.py - Issue 478763003: [Findit] Bug fixing and implemented some feature requests.

Unified Diff: tools/findit/crash_utils.py

Issue 478763003: [Findit] Bug fixing and implemented some feature requests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: fixed a bug in intersection Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/findit/crash_utils.py

diff --git a/tools/findit/crash_utils.py b/tools/findit/crash_utils.py

index 42a17ce2cb341e7e4202a8a4aaef8261ee52e6ea..d5401f4b5f8fc8a58b823e4e5b4925a9a904374c 100644

--- a/tools/findit/crash_utils.py

+++ b/tools/findit/crash_utils.py

@@ -10,7 +10,6 @@ import os

import time

import urllib2

-from common import utils

from result import Result

@@ -45,17 +44,17 @@ def ParseURLsFromConfig(file_name):

return None

# Iterate through the config file, check for sections.

- parsed_config = {}

+ config_dict = {}

for section in config.sections():

# These two do not need another layer of dictionary, so add it and go

# to next section.

if ':' not in section:

for option in config.options(section):

- if section not in parsed_config:

- parsed_config[section] = {}

+ if section not in config_dict:

+ config_dict[section] = {}

url = config.get(section, option)

- parsed_config[section][option] = url

+ config_dict[section][option] = url

continue

@@ -65,9 +64,9 @@ def ParseURLsFromConfig(file_name):

component_path = repository_type_and_component[1]

# Add 'svn' as the key, if it is not already there.

- if repository_type not in parsed_config:

- parsed_config[repository_type] = {}

- url_map_for_repository = parsed_config[repository_type]

+ if repository_type not in config_dict:

+ config_dict[repository_type] = {}

+ url_map_for_repository = config_dict[repository_type]

# Add the path to the 'svn', if it is not already there.

if component_path not in url_map_for_repository:

@@ -79,11 +78,11 @@ def ParseURLsFromConfig(file_name):

url = config.get(section, option)

type_to_url[option] = url

- return parsed_config

+ return config_dict

-def NormalizePathLinux(path, parsed_deps):

- """Normalizes linux path.

+def NormalizePath(path, parsed_deps):

+ """Normalizes the path.

Args:

path: A string representing a path.

@@ -92,43 +91,53 @@ def NormalizePathLinux(path, parsed_deps):

Returns:

A tuple containing a component this path is in (e.g blink, skia, etc)

- and a path in that component's repository.

+ and a path in that component's repository. Returns None if the component

stgao 2014/08/22 06:50:53 It seems googlecode is not checked.

jeun 2014/08/22 22:58:43 Even if we don't check it here, it will fail later

+ repository is not supported, i.e from googlecode.

"""

# First normalize the path by retreiving the absolute path.

- normalized_path = os.path.abspath(path)

+ normalized_path = os.path.normpath(path.replace('\\','/'))

stgao 2014/08/22 06:50:53 Please also update the comment.

jeun 2014/08/22 22:58:43 Done.

# Iterate through all component paths in the parsed DEPS, in the decreasing

# order of the length of the file path.

for component_path in sorted(parsed_deps,

key=(lambda path: -len(path))):

- # New_path is the component path with 'src/' removed.

- new_path = component_path

- if new_path.startswith('src/') and new_path != 'src/':

- new_path = new_path[len('src/'):]

+ # new_component_path is the component path with 'src/' removed.

+ new_component_path = component_path

+ if new_component_path.startswith('src/') and new_component_path != 'src/':

+ new_component_path = new_component_path[len('src/'):]

+ # We need to consider when the lowercased component path is in the path,

+ # because syzyasan build returns lowercased file path.

+ lower_component_path = new_component_path.lower()

# If this path is the part of file path, this file must be from this

# component.

- if new_path in normalized_path:

+ if new_component_path in normalized_path or \

+ lower_component_path in normalized_path:

- # Currently does not support googlecode.

- if 'googlecode' in parsed_deps[component_path]['repository']:

- return (None, '', '')

+ # Case when the retreived path is in lowercase.

+ if lower_component_path in normalized_path:

+ current_component_path = lower_component_path

+ else:

+ current_component_path = new_component_path

# Normalize the path by stripping everything off the component's relative

# path.

- normalized_path = normalized_path.split(new_path,1)[1]

+ normalized_path = normalized_path.split(current_component_path, 1)[1]

+ lower_normalized_path = normalized_path.lower()

# Add 'src/' or 'Source/' at the front of the normalized path, depending

# on what prefix the component path uses. For example, blink uses

# 'Source' but chromium uses 'src/', and blink component path is

# 'src/third_party/WebKit/Source', so add 'Source/' in front of the

# normalized path.

- if not (normalized_path.startswith('src/') or

- normalized_path.startswith('Source/')):

+ if not (lower_normalized_path.startswith('src/') or

+ lower_normalized_path.startswith('source/')):

- if (new_path.lower().endswith('src/') or

- new_path.lower().endswith('source/')):

- normalized_path = new_path.split('/')[-2] + '/' + normalized_path

+ if (lower_component_path.endswith('src/') or

+ lower_component_path.endswith('source/')):

+ normalized_path = (current_component_path.split('/')[-2] + '/' +

+ normalized_path)

else:

normalized_path = 'src/' + normalized_path

@@ -160,9 +169,16 @@ def SplitRange(regression):

if len(revisions) != 2:

return None

- # Strip 'r' from both start and end range.

- range_start = revisions[0].lstrip('r')

- range_end = revisions[1].lstrip('r')

+ range_start = revisions[0]

+ range_end = revisions[1]

+ # Strip 'r' off the range start/end. Not using lstrip to avoid the case when

Martin Barbella 2014/08/22 02:24:14 Maybe it would be best for us not to include the '

jeun 2014/08/22 22:58:43 Done.

+ # the range is in git hash and it starts with 'r'.

+ if range_start.startswith('r'):

+ range_start = range_start[1:]

+ if range_end.startswith('r'):

+ range_end = range_end[1:]

return [range_start, range_end]

@@ -196,19 +212,23 @@ def GetDataFromURL(url, retries=10, sleep_time=0.1, timeout=5):

Returns:

None if the data retrieval fails, or the raw data.

"""

- count = 0

- while True:

- count += 1

+ data = None

+ for i in range(retries):

# Retrieves data from URL.

try:

- _, data = utils.GetHttpClient().Get(url)

- return data

+ data = urllib2.urlopen(url, timeout=timeout)

stgao 2014/08/22 06:50:53 Is there a specific reason to revert my change to

jeun 2014/08/22 22:58:43 Done.

+ # If retrieval is successful, return the data.

+ if data:

+ return data.read()

+ # If retrieval fails, try after sleep_time second.

+ except urllib2.URLError:

+ time.sleep(sleep_time)

+ continue

except IOError:

- if count < retries:

- # If retrieval fails, try after sleep_time second.

- time.sleep(sleep_time)

- else:

- break

+ time.sleep(sleep_time)

+ continue

# Return None if it fails to read data from URL 'retries' times.

return None

@@ -232,7 +252,10 @@ def FindMinLineDistance(crashed_line_list, changed_line_numbers):

"""

min_distance = INFINITY

- for line in crashed_line_list:

+ crashed_line_numbers = [crashed_line

+ for crashed_line_range in crashed_line_list

+ for crashed_line in crashed_line_range]

+ for line in crashed_line_numbers:

for distance in changed_line_numbers:

# Find the current distance and update the min if current distance is

# less than current min.

@@ -349,39 +372,45 @@ def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers,

line_range: Number of lines to look backwards from crashed lines.

Returns:

- line_intersection: Intersection between crashed_line_list and

+ line_number_intersection: Intersection between crashed_line_list and

changed_line_numbers.

stack_frame_index_intersection: Stack number for each of the intersections.

"""

- line_intersection = []

+ line_number_intersection = []

stack_frame_index_intersection = []

# Iterate through the crashed lines, and its occurence in stack.

- for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index):

- # Also check previous 'line_range' lines.

- line_minus_n = range(line - line_range, line + 1)

+ for (lines, stack_frame_index) in zip(crashed_line_list, stack_frame_index):

+ # Also check previous 'line_range' lines. Create a set of all changed lines

+ # and lines within 3 lines range before the crashed line.

+ line_minus_n = set()

+ for line in lines:

+ for line_in_range in range(line - line_range, line + 1):

+ line_minus_n.add(line_in_range)

for changed_line in changed_line_numbers:

# If a CL does not change crahsed line, check next line.

if changed_line not in line_minus_n:

continue

+ intersected_line = set()

# If the changed line is exactly the crashed line, add that line.

- if line in changed_line_numbers:

- intersected_line = line

+ for line in lines:

+ if line in changed_line_numbers:

+ intersected_line.add(line)

- # If the changed line is in 3 lines of the crashed line, add the line.

- else:

- intersected_line = changed_line

+ # If the changed line is in 3 lines of the crashed line, add the line.

+ else:

+ intersected_line.add(changed_line)

# Avoid adding the same line twice.

- if intersected_line not in line_intersection:

- line_intersection.append(intersected_line)

+ if intersected_line not in line_number_intersection:

+ line_number_intersection.append(list(intersected_line))

stack_frame_index_intersection.append(stack_frame_index)

break

- return (line_intersection, stack_frame_index_intersection)

+ return (line_number_intersection, stack_frame_index_intersection)

def MatchListToResultList(matches):

@@ -406,9 +435,10 @@ def MatchListToResultList(matches):

reviewers = match.reviewers

# For matches, line content do not exist.

line_content = None

+ message = match.message

result = Result(suspected_cl, revision_url, component_name, author, reason,

- review_url, reviewers, line_content)

+ review_url, reviewers, line_content, message)

result_list.append(result)

return result_list

@@ -431,15 +461,16 @@ def BlameListToResultList(blame_list):

component_name = blame.component_name

author = blame.author

reason = (

- 'The CL changes line %s of file %s from stack %d.' %

+ 'The CL changes line %s of file %s, which is stack frame index %d.' %

Martin Barbella 2014/08/22 02:24:14 Nit: remove "index".

jeun 2014/08/22 22:58:43 Done.

(blame.line_number, blame.file, blame.stack_frame_index))

# Blame object does not have review url and reviewers.

review_url = None

reviewers = None

line_content = blame.line_content

+ message = blame.message

result = Result(suspected_cl, revision_url, component_name, author, reason,

- review_url, reviewers, line_content)

+ review_url, reviewers, line_content, message)

result_list.append(result)

return result_list

« tools/findit/component_dictionary.py ('K') | « tools/findit/config.ini ('k') | tools/findit/findit_for_clusterfuzz.py » ('j') | tools/findit/findit_for_clusterfuzz.py » ('J')