tools/findit/crash_utils.py - Issue 478763003: [Findit] Bug fixing and implemented some feature requests.

Side by Side Diff: tools/findit/crash_utils.py

Issue 478763003: [Findit] Bug fixing and implemented some feature requests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Addressed codereview and removed all references to logging Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import cgi	5 import cgi

6 import ConfigParser	6 import ConfigParser

7 import json	7 import json

8 import logging

9 import os	8 import os

10 import time	9 import time

11 import urllib2

12	10

13 from common import utils	11 from common import utils

14 from result import Result	12 from result import Result

15	13

16	14

17 INFINITY = float('inf')	15 INFINITY = float('inf')

18	16

19	17

	18 def GetRepositoryType(revision_number):

	19 """Returns the repository type of this revision number.

	20

	21 Args:

	22 revision_number: A revision number or git hash.

	23

	24 Returns:

	25 'git' or 'svn', depending on the revision_number.

	26 """

	27 if utils.IsGitHash(revision_number):

	28 return 'git'

	29 else:

	30 return 'svn'

	31

	32

20 def ParseURLsFromConfig(file_name):	33 def ParseURLsFromConfig(file_name):

21 """Parses URLS from the config file.	34 """Parses URLS from the config file.

22	35

23 The file should be in python config format, where svn section is in the	36 The file should be in python config format, where svn section is in the

24 format "svn:component_path".	37 format "svn:component_path".

25 Each of the section for svn should contain changelog_url, revision_url,	38 Each of the section for svn should contain changelog_url, revision_url,

26 diff_url and blame_url.	39 diff_url and blame_url.

27	40

28 Args:	41 Args:

29 file_name: The name of the file that contains URL information.	42 file_name: The name of the file that contains URL information.

30	43

31 Returns:	44 Returns:

32 A dictionary that maps repository type to list of URLs. For svn, it maps	45 A dictionary that maps repository type to list of URLs. For svn, it maps

33 key 'svn' to another dictionary, which maps component path to the URLs	46 key 'svn' to another dictionary, which maps component path to the URLs

34 as explained above. For git, it maps to the URLs as explained above.	47 as explained above. For git, it maps to the URLs as explained above.

35 """	48 """

36 config = ConfigParser.ConfigParser()	49 config = ConfigParser.ConfigParser()

37	50

38 # Get the absolute path of the config file, and read the file. If it fails,	51 # Get the absolute path of the config file, and read the file. If it fails,

39 # return none.	52 # return none.

40 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),	53 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),

41 file_name)	54 file_name)

42 config.read(config_file_path)	55 config.read(config_file_path)

43 if not config:	56 if not config:

44 logging.error('Config file with URLs does not exist.')

45 return None	57 return None

46	58

47 # Iterate through the config file, check for sections.	59 # Iterate through the config file, check for sections.

48 parsed_config = {}	60 config_dict = {}

49 for section in config.sections():	61 for section in config.sections():

50 # These two do not need another layer of dictionary, so add it and go	62 # These two do not need another layer of dictionary, so add it and go

51 # to next section.	63 # to next section.

52 if ':' not in section:	64 if ':' not in section:

53 for option in config.options(section):	65 for option in config.options(section):

54 if section not in parsed_config:	66 if section not in config_dict:

55 parsed_config[section] = {}	67 config_dict[section] = {}

56	68

57 url = config.get(section, option)	69 url = config.get(section, option)

58 parsed_config[section][option] = url	70 config_dict[section][option] = url

59	71

60 continue	72 continue

61	73

62 # Get repository type and component name from the section name.	74 # Get repository type and component name from the section name.

63 repository_type_and_component = section.split(':')	75 repository_type_and_component = section.split(':')

64 repository_type = repository_type_and_component[0]	76 repository_type = repository_type_and_component[0]

65 component_path = repository_type_and_component[1]	77 component_path = repository_type_and_component[1]

66	78

67 # Add 'svn' as the key, if it is not already there.	79 # Add 'svn' as the key, if it is not already there.

68 if repository_type not in parsed_config:	80 if repository_type not in config_dict:

69 parsed_config[repository_type] = {}	81 config_dict[repository_type] = {}

70 url_map_for_repository = parsed_config[repository_type]	82 url_map_for_repository = config_dict[repository_type]

71	83

72 # Add the path to the 'svn', if it is not already there.	84 # Add the path to the 'svn', if it is not already there.

73 if component_path not in url_map_for_repository:	85 if component_path not in url_map_for_repository:

74 url_map_for_repository[component_path] = {}	86 url_map_for_repository[component_path] = {}

75 type_to_url = url_map_for_repository[component_path]	87 type_to_url = url_map_for_repository[component_path]

76	88

77 # Add all URLs to this map.	89 # Add all URLs to this map.

78 for option in config.options(section):	90 for option in config.options(section):

79 url = config.get(section, option)	91 url = config.get(section, option)

80 type_to_url[option] = url	92 type_to_url[option] = url

81	93

82 return parsed_config	94 return config_dict

83	95

84	96

85 def NormalizePathLinux(path, parsed_deps):	97 def NormalizePath(path, parsed_deps):

86 """Normalizes linux path.	98 """Normalizes the path.

87	99

88 Args:	100 Args:

89 path: A string representing a path.	101 path: A string representing a path.

90 parsed_deps: A map from component path to its component name, repository,	102 parsed_deps: A map from component path to its component name, repository,

91 etc.	103 etc.

92	104

93 Returns:	105 Returns:

94 A tuple containing a component this path is in (e.g blink, skia, etc)	106 A tuple containing a component this path is in (e.g blink, skia, etc)

95 and a path in that component's repository.	107 and a path in that component's repository. Returns None if the component

	108 repository is not supported, i.e from googlecode.

96 """	109 """

97 # First normalize the path by retreiving the absolute path.	110 # First normalize the path by retreiving the normalized path.

98 normalized_path = os.path.abspath(path)	111 normalized_path = os.path.normpath(path.replace('\\', '/'))

99	112

100 # Iterate through all component paths in the parsed DEPS, in the decreasing	113 # Iterate through all component paths in the parsed DEPS, in the decreasing

101 # order of the length of the file path.	114 # order of the length of the file path.

102 for component_path in sorted(parsed_deps,	115 for component_path in sorted(parsed_deps,

103 key=(lambda path: -len(path))):	116 key=(lambda path: -len(path))):

104 # New_path is the component path with 'src/' removed.	117 # new_component_path is the component path with 'src/' removed.

105 new_path = component_path	118 new_component_path = component_path

106 if new_path.startswith('src/') and new_path != 'src/':	119 if new_component_path.startswith('src/') and new_component_path != 'src/':

107 new_path = new_path[len('src/'):]	120 new_component_path = new_component_path[len('src/'):]

	121

	122 # We need to consider when the lowercased component path is in the path,

	123 # because syzyasan build returns lowercased file path.

	124 lower_component_path = new_component_path.lower()

108	125

109 # If this path is the part of file path, this file must be from this	126 # If this path is the part of file path, this file must be from this

110 # component.	127 # component.

111 if new_path in normalized_path:	128 if new_component_path in normalized_path or \

	129 lower_component_path in normalized_path:

112	130

113 # Currently does not support googlecode.	131 # Case when the retreived path is in lowercase.

114 if 'googlecode' in parsed_deps[component_path]['repository']:	132 if lower_component_path in normalized_path:

115 return (None, '', '')	133 current_component_path = lower_component_path

	134 else:

	135 current_component_path = new_component_path

116	136

117 # Normalize the path by stripping everything off the component's relative	137 # Normalize the path by stripping everything off the component's relative

118 # path.	138 # path.

119 normalized_path = normalized_path.split(new_path,1)[1]	139 normalized_path = normalized_path.split(current_component_path, 1)[1]

	140 lower_normalized_path = normalized_path.lower()

120	141

121 # Add 'src/' or 'Source/' at the front of the normalized path, depending	142 # Add 'src/' or 'Source/' at the front of the normalized path, depending

122 # on what prefix the component path uses. For example, blink uses	143 # on what prefix the component path uses. For example, blink uses

123 # 'Source' but chromium uses 'src/', and blink component path is	144 # 'Source' but chromium uses 'src/', and blink component path is

124 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the	145 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the

125 # normalized path.	146 # normalized path.

126 if not (normalized_path.startswith('src/') or	147 if not (lower_normalized_path.startswith('src/') or

127 normalized_path.startswith('Source/')):	148 lower_normalized_path.startswith('source/')):

128	149

129 if (new_path.lower().endswith('src/') or	150 if (lower_component_path.endswith('src/') or

130 new_path.lower().endswith('source/')):	151 lower_component_path.endswith('source/')):

131 normalized_path = new_path.split('/')[-2] + '/' + normalized_path	152 normalized_path = (current_component_path.split('/')[-2] + '/' +

	153 normalized_path)

132	154

133 else:	155 else:

134 normalized_path = 'src/' + normalized_path	156 normalized_path = 'src/' + normalized_path

135	157

136 component_name = parsed_deps[component_path]['name']	158 component_name = parsed_deps[component_path]['name']

137	159

138 return (component_path, component_name, normalized_path)	160 return (component_path, component_name, normalized_path)

139	161

140 # If the path does not match any component, default to chromium.	162 # If the path does not match any component, default to chromium.

141 return ('src/', 'chromium', normalized_path)	163 return ('src/', 'chromium', normalized_path)

(...skipping 11 matching lines...) Expand all Loading...
153 """	175 """

154 if not regression:	176 if not regression:

155 return None	177 return None

156	178

157 revisions = regression.split(':')	179 revisions = regression.split(':')

158	180

159 # If regression information is not available, return none.	181 # If regression information is not available, return none.

160 if len(revisions) != 2:	182 if len(revisions) != 2:

161 return None	183 return None

162	184

163 # Strip 'r' from both start and end range.	185 range_start = revisions[0]

164 range_start = revisions[0].lstrip('r')	186 range_end = revisions[1]

165 range_end = revisions[1].lstrip('r')	187

	188 # Strip 'r' off the range start/end. Not using lstrip to avoid the case when

	189 # the range is in git hash and it starts with 'r'.

	190 if range_start.startswith('r'):

	191 range_start = range_start[1:]

	192

	193 if range_end.startswith('r'):

	194 range_end = range_end[1:]

166	195

167 return [range_start, range_end]	196 return [range_start, range_end]

168	197

169	198

170 def LoadJSON(json_string):	199 def LoadJSON(json_string):

171 """Loads json object from string, or None.	200 """Loads json object from string, or None.

172	201

173 Args:	202 Args:

174 json_string: A string to get object from.	203 json_string: A string to get object from.

175	204

(...skipping 18 matching lines...) Expand all Loading...
194 timeout: Time in seconds to wait before time out.	223 timeout: Time in seconds to wait before time out.

195	224

196 Returns:	225 Returns:

197 None if the data retrieval fails, or the raw data.	226 None if the data retrieval fails, or the raw data.

198 """	227 """

199 count = 0	228 count = 0

200 while True:	229 while True:

201 count += 1	230 count += 1

202 # Retrieves data from URL.	231 # Retrieves data from URL.

203 try:	232 try:

204 _, data = utils.GetHttpClient().Get(url)	233 _, data = utils.GetHttpClient().Get(url, timeout=timeout)

205 return data	234 return data

206 except IOError:	235 except IOError:

207 if count < retries:	236 if count < retries:

208 # If retrieval fails, try after sleep_time second.	237 # If retrieval fails, try after sleep_time second.

209 time.sleep(sleep_time)	238 time.sleep(sleep_time)

210 else:	239 else:

211 break	240 break

212	241

213 # Return None if it fails to read data from URL 'retries' times.	242 # Return None if it fails to read data from URL 'retries' times.

214 return None	243 return None

215	244

216	245

217 def FindMinLineDistance(crashed_line_list, changed_line_numbers):	246 def FindMinLineDistance(crashed_line_list, changed_line_numbers,

	247 line_range=3):

218 """Calculates how far the changed line is from one of the crashes.	248 """Calculates how far the changed line is from one of the crashes.

219	249

220 Finds the minimum distance between the lines that the file crashed on	250 Finds the minimum distance between the lines that the file crashed on

221 and the lines that the file changed. For example, if the file crashed on	251 and the lines that the file changed. For example, if the file crashed on

222 line 200 and the CL changes line 203,204 and 205, the function returns 3.	252 line 200 and the CL changes line 203,204 and 205, the function returns 3.

223	253

224 Args:	254 Args:

225 crashed_line_list: A list of lines that the file crashed on.	255 crashed_line_list: A list of lines that the file crashed on.

226 changed_line_numbers: A list of lines that the file changed.	256 changed_line_numbers: A list of lines that the file changed.

	257 line_range: Number of lines to look back for.

227	258

228 Returns:	259 Returns:

229 The minimum distance. If either of the input lists is empty,	260 The minimum distance. If either of the input lists is empty,

230 it returns inf.	261 it returns inf.

231	262

232 """	263 """

233 min_distance = INFINITY	264 min_distance = INFINITY

	265 crashed_line = -1

	266 changed_line = -1

234	267

235 for line in crashed_line_list:	268 crashed_line_numbers = set()

	269 for crashed_line_range in crashed_line_list:

	270 for crashed_line in crashed_line_range:

	271 for line in range(crashed_line - line_range, crashed_line + 1):

	272 crashed_line_numbers.add(line)

	273

	274 for line in crashed_line_numbers:

236 for distance in changed_line_numbers:	275 for distance in changed_line_numbers:

237 # Find the current distance and update the min if current distance is	276 # Find the current distance and update the min if current distance is

238 # less than current min.	277 # less than current min.

239 current_distance = abs(line - distance)	278 current_distance = abs(line - distance)

240 if current_distance < min_distance:	279 if current_distance < min_distance:

241 min_distance = current_distance	280 min_distance = current_distance

	281 crashed_line = line

	282 changed_line = distance

242	283

243 return min_distance	284 return (min_distance, crashed_line, changed_line)

244	285

245	286

246 def GuessIfSameSubPath(path1, path2):	287 def GuessIfSameSubPath(path1, path2):

247 """Guesses if two paths represent same path.	288 """Guesses if two paths represent same path.

248	289

249 Compares the name of the folders in the path (by split('/')), and checks	290 Compares the name of the folders in the path (by split('/')), and checks

250 if they match either more than 3 or min of path lengths.	291 if they match either more than 3 or min of path lengths.

251	292

252 Args:	293 Args:

253 path1: First path.	294 path1: First path.

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
328 Returns:	369 Returns:

329 A string representation of file names with their urls.	370 A string representation of file names with their urls.

330 """	371 """

331 ret = ['\n']	372 ret = ['\n']

332 for file_name, file_url in file_list:	373 for file_name, file_url in file_list:

333 ret.append(' %s\n' % AddHyperlink(file_name, file_url))	374 ret.append(' %s\n' % AddHyperlink(file_name, file_url))

334 return ''.join(ret)	375 return ''.join(ret)

335	376

336	377

337 def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers,	378 def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers,

338 line_range=3):	379 function, line_range=3):

339 """Finds the overlap betwee changed lines and crashed lines.	380 """Finds the overlap betwee changed lines and crashed lines.

340	381

341 Finds the intersection of the lines that caused the crash and	382 Finds the intersection of the lines that caused the crash and

342 lines that the file changes. The intersection looks within 3 lines	383 lines that the file changes. The intersection looks within 3 lines

343 of the line that caused the crash.	384 of the line that caused the crash.

344	385

345 Args:	386 Args:

346 crashed_line_list: A list of lines that the file crashed on.	387 crashed_line_list: A list of lines that the file crashed on.

347 stack_frame_index: A list of positions in stack for each of the lines.	388 stack_frame_index: A list of positions in stack for each of the lines.

348 changed_line_numbers: A list of lines that the file changed.	389 changed_line_numbers: A list of lines that the file changed.

	390 function: A list of functions that the file crashed on.
	stgao 2014/08/23 00:44:13 function -> functions function -> functions
349 line_range: Number of lines to look backwards from crashed lines.	391 line_range: Number of lines to look backwards from crashed lines.

350	392

351 Returns:	393 Returns:

352 line_intersection: Intersection between crashed_line_list and	394 line_number_intersection: Intersection between crashed_line_list and

353 changed_line_numbers.	395 changed_line_numbers.

354 stack_frame_index_intersection: Stack number for each of the intersections.	396 stack_frame_index_intersection: Stack number for each of the intersections.

355 """	397 """

356 line_intersection = []	398 line_number_intersection = []

357 stack_frame_index_intersection = []	399 stack_frame_index_intersection = []

	400 function_intersection = []

358	401

359 # Iterate through the crashed lines, and its occurence in stack.	402 # Iterate through the crashed lines, and its occurence in stack.

360 for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index):	403 for (lines, stack_frame_index, function_name) in zip(

361 # Also check previous 'line_range' lines.	404 crashed_line_list, stack_frame_index, function):

362 line_minus_n = range(line - line_range, line + 1)	405 # Also check previous 'line_range' lines. Create a set of all changed lines

	406 # and lines within 3 lines range before the crashed line.

	407 line_minus_n = set()

	408 for line in lines:

	409 for line_in_range in range(line - line_range, line + 1):

	410 line_minus_n.add(line_in_range)

363	411

364 for changed_line in changed_line_numbers:	412 for changed_line in changed_line_numbers:

365 # If a CL does not change crahsed line, check next line.	413 # If a CL does not change crahsed line, check next line.

366 if changed_line not in line_minus_n:	414 if changed_line not in line_minus_n:

367 continue	415 continue

368	416

	417 intersected_line = set()

369 # If the changed line is exactly the crashed line, add that line.	418 # If the changed line is exactly the crashed line, add that line.

370 if line in changed_line_numbers:	419 for line in lines:

371 intersected_line = line	420 if line in changed_line_numbers:

	421 intersected_line.add(line)

372	422

373 # If the changed line is in 3 lines of the crashed line, add the line.	423 # If the changed line is in 3 lines of the crashed line, add the line.

374 else:	424 else:

375 intersected_line = changed_line	425 intersected_line.add(changed_line)

376	426

377 # Avoid adding the same line twice.	427 # Avoid adding the same line twice.

378 if intersected_line not in line_intersection:	428 if intersected_line not in line_number_intersection:

379 line_intersection.append(intersected_line)	429 line_number_intersection.append(list(intersected_line))

380 stack_frame_index_intersection.append(stack_frame_index)	430 stack_frame_index_intersection.append(stack_frame_index)

381	431 function_intersection.append(function_name)

382 break	432 break

383	433

384 return (line_intersection, stack_frame_index_intersection)	434 return (line_number_intersection, stack_frame_index_intersection,

	435 function_intersection)

385	436

386	437

387 def MatchListToResultList(matches):	438 def MatchListToResultList(matches):

388 """Convert list of matches to the list of result objects.	439 """Convert list of matches to the list of result objects.

389	440

390 Args:	441 Args:

391 matches: A list of match objects along with its stack priority and revision	442 matches: A list of match objects along with its stack priority and revision

392 number/git hash	443 number/git hash

393 Returns:	444 Returns:

394 A list of result object.	445 A list of result object.

395	446

396 """	447 """

397 result_list = []	448 result_list = []

398	449

399 for _, cl, match in matches:	450 for _, cl, match in matches:

400 suspected_cl = cl	451 suspected_cl = cl

401 revision_url = match.revision_url	452 revision_url = match.revision_url

402 component_name = match.component_name	453 component_name = match.component_name

403 author = match.author	454 author = match.author

404 reason = match.reason	455 reason = match.reason

405 review_url = match.review_url	456 review_url = match.review_url

406 reviewers = match.reviewers	457 reviewers = match.reviewers

407 # For matches, line content do not exist.	458 # For matches, line content do not exist.

408 line_content = None	459 line_content = None

	460 message = match.message

409	461

410 result = Result(suspected_cl, revision_url, component_name, author, reason,	462 result = Result(suspected_cl, revision_url, component_name, author, reason,

411 review_url, reviewers, line_content)	463 review_url, reviewers, line_content, message)

412 result_list.append(result)	464 result_list.append(result)

413	465

414 return result_list	466 return result_list

415	467

416	468

417 def BlameListToResultList(blame_list):	469 def BlameListToResultList(blame_list):

418 """Convert blame list to the list of result objects.	470 """Convert blame list to the list of result objects.

419	471

420 Args:	472 Args:

421 blame_list: A list of blame objects.	473 blame_list: A list of blame objects.

422	474

423 Returns:	475 Returns:

424 A list of result objects.	476 A list of result objects.

425 """	477 """

426 result_list = []	478 result_list = []

427	479

428 for blame in blame_list:	480 for blame in blame_list:

429 suspected_cl = blame.revision	481 suspected_cl = blame.revision

430 revision_url = blame.url	482 revision_url = blame.url

431 component_name = blame.component_name	483 component_name = blame.component_name

432 author = blame.author	484 author = blame.author

433 reason = (	485 reason = (

434 'The CL changes line %s of file %s from stack %d.' %	486 'The CL last changed line %s of file %s, which is stack frame %d.' %

435 (blame.line_number, blame.file, blame.stack_frame_index))	487 (blame.line_number, blame.file, blame.stack_frame_index))

436 # Blame object does not have review url and reviewers.	488 # Blame object does not have review url and reviewers.

437 review_url = None	489 review_url = None

438 reviewers = None	490 reviewers = None

439 line_content = blame.line_content	491 line_content = blame.line_content

	492 message = blame.message

440	493

441 result = Result(suspected_cl, revision_url, component_name, author, reason,	494 result = Result(suspected_cl, revision_url, component_name, author, reason,

442 review_url, reviewers, line_content)	495 review_url, reviewers, line_content, message)

443 result_list.append(result)	496 result_list.append(result)

444	497

445 return result_list	498 return result_list

OLD	NEW

« no previous file with comments | « tools/findit/config.ini ('k') | tools/findit/findit_for_clusterfuzz.py » ('j') | no next file with comments »