tools/findit/crash_utils.py - Issue 430943003: [Findit] Plain objects to represent and parse stack trace.

Side by Side Diff: tools/findit/crash_utils.py

Issue 430943003: [Findit] Plain objects to represent and parse stack trace. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: reupload Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 import cgi

	6 import ConfigParser

	7 import json

	8 import logging

	9 import os

	10 import time

	11 import urllib2

	12

	13 from result import Result

	14

	15

	16 INFINITY = float('inf')

	17

	18

	19 def ParseURLsFromConfig(file_name):

	20 """Parses URLS from the config file.

	21

	22 The file should be in python config format, where svn section is in the

	23 format "svn:component_path", except for git URLs and codereview URL.

	24 Each of the section for svn should contain changelog_url, revision_url,

	25 diff_url and blame_url.

	26

	27 Args:

	28 file_name: The name of the file that contains URL information.

	29

	30 Returns:

	31 A dictionary that maps repository type to list of URLs. For svn, it maps

	32 key 'svn' to another dictionary, which maps component path to the URLs

	33 as explained above. For git, it maps to the URLs as explained above.

	34 Codereview maps to codereview API url.

	35 """

	36 config = ConfigParser.ConfigParser()

	37

	38 # Get the absolute path of the config file, and read the file. If it fails,

	39 # return none.

	40 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),

	41 file_name)

	42 config.read(config_file_path)

	43 if not config:

	44 logging.error('Config file with URLs does not exist.')

	45 return None

	46

	47 # Iterate through the config file, check for sections.

	48 repository_type_to_url_map = {}

	49 for section in config.sections():

	50 # These two do not need another layer of dictionary, so add it and go

	51 # to next section.

	52 if section == 'git' or section == 'codereview':

	53 for option in config.options(section):

	54 if section not in repository_type_to_url_map:

	55 repository_type_to_url_map[section] = {}

	56

	57 url = config.get(section, option)

	58 repository_type_to_url_map[section][option] = url

	59

	60 continue

	61

	62 # Get repository type and component name from the section name.

	63 repository_type_and_component = section.split(':')

	64 repository_type = repository_type_and_component[0]

	65 component_path = repository_type_and_component[1]

	66

	67 # Add 'svn' as the key, if it is not already there.

	68 if repository_type not in repository_type_to_url_map:

	69 repository_type_to_url_map[repository_type] = {}

	70 url_map_for_repository = repository_type_to_url_map[repository_type]

	71

	72 # Add the path to the 'svn', if it is not already there.

	73 if component_path not in url_map_for_repository:

	74 url_map_for_repository[component_path] = {}

	75 type_to_url = url_map_for_repository[component_path]

	76

	77 # Add all URLs to this map.

	78 for option in config.options(section):

	79 url = config.get(section, option)

	80 type_to_url[option] = url

	81

	82 return repository_type_to_url_map

	83

	84

	85 def NormalizePathLinux(path, parsed_deps):

	86 """Normalizes linux path.

	87

	88 Args:

	89 path: A string representing a path.

	90 parsed_deps: A map from component path to its component name, repositor

	91

	92 Returns:

	93 A tuple containing a component this path is in (e.g blink, skia, etc)

	94 and a path in that component's repository.

	95 """

	96 # First normalize the path by retreiving the absolute path.

	97 normalized_path = os.path.abspath(path)

	98

	99 # Iterate through all component paths in the parsed DEPS, in the decreasing

	100 # order of the length of the file path.

	101 for component_path in sorted(parsed_deps,

	102 key=(lambda path: -len(path))):

	103 # New_path is the component path with 'src/' removed.

	104 new_path = component_path

	105 if new_path.startswith('src/') and new_path != 'src/':

	106 new_path = new_path[len('src/'):]

	107

	108 # If this path is the part of file path, this file must be from this

	109 # component.

	110 if new_path in normalized_path:

	111

	112 # Currently does not support googlecode.

	113 if 'googlecode' in parsed_deps[component_path]['repository']:

	114 return (None, '', '')

	115

	116 # Normalize the path by stripping everything off the component's relative

	117 # path.

	118 normalized_path = normalized_path.split(new_path)[1]

	119

	120 # Add 'src/' or 'Source/' at the front of the normalized path, depending

	121 # on what prefix the component path uses.

	122 if not normalized_path.startswith('src/') or \

	123 normalized_path.startswith('Source/'):

	124

	125 if (new_path.lower().endswith('src/') or

	126 new_path.lower().endswith('source/')):

	127 normalized_path = new_path.split('/')[-2] + '/' + normalized_path

	128

	129 else:

	130 normalized_path = 'src/' + normalized_path

	131

	132 component_name = parsed_deps[component_path]['name']

	133

	134 return (component_path, component_name, normalized_path)

	135

	136 # If the path does not match any component, default to chromium.

	137 return ('src/', 'chromium', normalized_path)

	138

	139

	140 def SplitRange(regression):

	141 """Splits a range as retrieved from clusterfuzz.

	142

	143 Args:

	144 regression: A string in format 'r1234:r5678'.

	145

	146 Returns:

	147 A list containing two numbers represented in string, for example

	148 ['1234','5678'].

	149 """

	150 if not regression:

	151 return None

	152

	153 revisions = regression.split(':')

	154

	155 # If regression information is not available, return none.

	156 if len(revisions) != 2:

	157 return None

	158

	159 # Strip 'r' from both start and end range.

	160 range_start = revisions[0].lstrip('r')

	161 range_end = revisions[1].lstrip('r')

	162

	163 return [range_start, range_end]

	164

	165

	166 def LoadJSON(json_string):

	167 """Loads json object from string, or None.

	168

	169 Args:

	170 json_string: A string to get object from.

	171

	172 Returns:

	173 JSON object if the string represents a JSON object, None otherwise.

	174 """

	175 try:

	176 data = json.loads(json_string)

	177 except ValueError:

	178 data = None

	179

	180 return data

	181

	182

	183 def GetDataFromURL(url, retries=10, sleep_time=0.1, timeout=10):

	184 """Retrieves raw data from URL, tries 10 times.

	185

	186 Args:

	187 url: URL to get data from.

	188 retries: Number of times to retry connection.

	189 sleep_time: Time in seconds to wait before retrying connection.

	190 timeout: Time in seconds to wait before time out.

	191

	192 Returns:

	193 None if the data retrieval fails, or the raw data.

	194 """

	195 data = None

	196 for i in range(retries):

	197 # Retrieves data from URL.

	198 try:

	199 data = urllib2.urlopen(url, timeout=timeout)

	200

	201 # If retrieval is successful, return the data.

	202 if data:

	203 return data.read()

	204

	205 # If retrieval fails, try after sleep_time second.

	206 except urllib2.URLError:

	207 time.sleep(sleep_time)

	208 continue

	209 except IOError:

	210 time.sleep(sleep_time)

	211 continue

	212

	213 # Return None if it fails to read data from URL 'retries' times.

	214 return None

	215

	216

	217 def FindMinLineDistance(crashed_line_list, changed_line_numbers):

	218 """Calculates how far the changed line is from one of the crashes.

	219

	220 Finds the minimum distance between the lines that the file crashed on

	221 and the lines that the file changed. For example, if the file crashed on

	222 line 200 and the CL changes line 203,204 and 205, the function returns 3.

	223

	224 Args:

	225 crashed_line_list: A list of lines that the file crashed on.

	226 changed_line_numbers: A list of lines that the file changed.

	227

	228 Returns:

	229 The minimum distance. If either of the input lists is empty,

	230 it returns inf.

	231

	232 """

	233 min_distance = INFINITY

	234

	235 for line in crashed_line_list:

	236 for distance in changed_line_numbers:

	237 # Find the current distance and update the min if current distance is

	238 # less than current min.

	239 current_distance = abs(line - distance)

	240 if current_distance < min_distance:

	241 min_distance = current_distance

	242

	243 return min_distance

	244

	245

	246 def GuessIfSameSubPath(path1, path2):

	247 """Guesses if two paths represent same path.

	248

	249 Compares the name of the folders in the path (by split('/')), and checks

	250 if they match either more than 3 or min of path lengths.

	251

	252 Args:

	253 path1: First path.

	254 path2: Second path to compare.

	255

	256 Returns:

	257 True if it they are thought to be a same path, False otherwise.

	258 """

	259 path1 = path1.split('/')

	260 path2 = path2.split('/')

	261

	262 intersection = set(path1).intersection(set(path2))

	263 return len(intersection) >= (min(3, min(len(path1), len(path2))))

	264

	265

	266 def FindMinStackFrameNumber(stack_frame_indices, priorities):

	267 """Finds the minimum stack number, from the list of stack numbers.

	268

	269 Args:

	270 stack_frame_indices: A list of lists containing stack position.

	271 priorities: A list of of priority for each file.

	272

	273 Returns:

	274 Inf if stack_frame_indices is empty, minimum stack number otherwise.

	275 """

	276 # Get the indexes of the highest priority (or low priority number).

	277 highest_priority = min(priorities)

	278 highest_priority_indices = []

	279 for i in range(len(priorities)):

	280 if priorities[i] == highest_priority:

	281 highest_priority_indices.append(i)

	282

	283 # Gather the list of stack frame numbers for the files that change the

	284 # crash lines.

	285 flattened = []

	286 for i in highest_priority_indices:

	287 flattened += stack_frame_indices[i]

	288

	289 # If no stack frame information is available, return inf. Else, return min.

	290 if not flattened:

	291 return INFINITY

	292 else:

	293 return min(flattened)

	294

	295

	296 def AddHyperlink(text, link):

	297 """Returns a string with HTML link tag.

	298

	299 Args:

	300 text: A string to add link.

	301 link: A link to add to the string.

	302

	303 Returns:

	304 A string with hyperlink added.

	305 """

	306 sanitized_link = cgi.escape(link, quote=True)

	307 sanitized_text = cgi.escape(str(text))

	308 return '<a href="%s">%s</a>' % (sanitized_link, sanitized_text)

	309

	310

	311 def PrettifyList(l):

	312 """Returns a string representation of a list.

	313

	314 It adds comma in between the elements and removes the brackets.

	315 Args:

	316 l: A list to prettify.

	317 Returns:

	318 A string representation of the list.

	319 """

	320 return str(l)[1:-1]

	321

	322

	323 def PrettifyFiles(file_list):

	324 """Returns a string representation of a list of file names.

	325

	326 Args:

	327 file_list: A list of tuple, (file_name, file_url).

	328 Returns:

	329 A string representation of file names with their urls.

	330 """

	331 ret = ['\n']

	332 for file_name, file_url in file_list:

	333 ret.append(' %s\n' % AddHyperlink(file_name, file_url))

	334 return ''.join(ret)

	335

	336

	337 def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers,

	338 line_range=3):

	339 """Finds the overlap betwee changed lines and crashed lines.

	340

	341 Finds the intersection of the lines that caused the crash and

	342 lines that the file changes. The intersection looks within 3 lines

	343 of the line that caused the crash.

	344

	345 Args:

	346 crashed_line_list: A list of lines that the file crashed on.

	347 stack_frame_index: A list of positions in stack for each of the lines.

	348 changed_line_numbers: A list of lines that the file changed.

	349 line_range: Number of lines to look backwards from crashed lines.

	350

	351 Returns:

	352 line_intersection: Intersection between crashed_line_list and

	353 changed_line_numbers.

	354 stack_frame_index_intersection: Stack number for each of the intersections.

	355 """

	356 line_intersection = []

	357 stack_frame_index_intersection = []

	358

	359 # Iterate through the crashed lines, and its occurence in stack.

	360 for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index):

	361 # Also check previous 'line_range' lines.

	362 line_minus_n = range(line - line_range, line + 1)

	363

	364 for changed_line in changed_line_numbers:

	365 # If a CL does not change crahsed line, check next line.

	366 if changed_line not in line_minus_n:

	367 continue

	368

	369 # If the changed line is exactly the crashed line, add that line.

	370 if line in changed_line_numbers:

	371 intersected_line = line

	372

	373 # If the changed line is in 3 lines of the crashed line, add the line.

	374 else:

	375 intersected_line = changed_line

	376

	377 # Avoid adding the same line twice.

	378 if intersected_line not in line_intersection:

	379 line_intersection.append(intersected_line)

	380 stack_frame_index_intersection.append(stack_frame_index)

	381

	382 break

	383

	384 return (line_intersection, stack_frame_index_intersection)

	385

	386

	387 def MatchListToResultList(matches):

	388 """Convert list of matches to the list of result objects.

	389

	390 Args:

	391 matches: A list of match objects along with its stack priority and revision

	392 number/git hash

	393 Returns:

	394 A list of result object.

	395

	396 """

	397 result_list = []

	398

	399 for _, cl, match in matches:

	400 suspected_cl = cl

	401 revision_url = match.url

	402 component_name = match.component_name

	403 author = match.author

	404 reason = match.reason

	405 review_url = match.review_url

	406 reviewers = match.reviewers

	407 # For matches, line content do not exist.

	408 line_content = None

	409

	410 result = Result(suspected_cl, revision_url, component_name, author, reason,

	411 review_url, reviewers, line_content)

	412 result_list.append(result)

	413

	414 return result_list

	415

	416

	417 def BlameListToResultList(blame_list):

	418 """Convert blame list to the list of result objects.

	419

	420 Args:

	421 blame_list: A list of blame objects.

	422

	423 Returns:

	424 A list of result objects.

	425 """

	426 result_list = []

	427

	428 for blame in blame_list:

	429 suspected_cl = blame.revision

	430 revision_url = blame.url

	431 component_name = blame.component_name

	432 author = blame.author

	433 reason = (

	434 'The CL changes line %s of file %s from stack %d.' %

	435 (blame.line_number, blame.file, blame.stack_frame_index))

	436 # Blame object does not have review url and reviewers.

	437 review_url = None

	438 reviewers = None

	439 line_content = blame.content

	440

	441 result = Result(suspected_cl, revision_url, component_name, author, reason,

	442 review_url, reviewers, line_content)

	443 result_list.append(result)

	444

	445 return result_list

	446

	447

	448 def PrettifyResultList(result_list):

	449 """Prints string format of the result list."""

	450 for result in result_list:

	451 print '-----------------------'

	452 print 'Suspected CL: ', AddHyperlink(result.suspected_cl,

	453 result.suspected_cl_revision_url)

	454 print 'Component: ', result.component_name

	455 print 'Author: ', result.author

	456 print 'Reason:\n', result.reason

	457 if result.review_url:

	458 print 'Review URL: ', result.review_url

	459 if result.reviewers:

	460 print 'Reviewers: ', PrettifyList(result.reviewers)

	461 if result.line_content:

	462 print 'Line content: ', result.line_content

	463 print '-----------------------'

OLD	NEW

« no previous file with comments | « tools/findit/component_dictionary.py ('k') | tools/findit/stacktrace.py » ('j') | no next file with comments »