tracing/bin/symbolize_trace - Issue 2810523002: symbolize_trace: support new heap dump format.

Side by Side Diff: tracing/bin/symbolize_trace

Issue 2810523002: symbolize_trace: support new heap dump format. (Closed)

Patch Set: Remove everything except symbolization Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright 2016 The Chromium Authors. All rights reserved.	2 # Copyright 2016 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 import argparse	6 import argparse

7 import bisect	7 import bisect

8 import collections	8 import collections

9 import gzip	9 import gzip

	10 import itertools

10 import json	11 import json

11 import os	12 import os

12 import re	13 import re

13 import subprocess	14 import subprocess

14 import sys	15 import sys

15	16

16 _SYMBOLS_PATH = os.path.abspath(os.path.join(	17 _SYMBOLS_PATH = os.path.abspath(os.path.join(

17 os.path.dirname(os.path.realpath(__file__)),	18 os.path.dirname(os.path.realpath(__file__)),

18 '..',	19 '..',

19 'third_party',	20 'third_party',

20 'symbols'))	21 'symbols'))

21 sys.path.append(_SYMBOLS_PATH)	22 sys.path.append(_SYMBOLS_PATH)

22 # pylint: disable=import-error	23 # pylint: disable=import-error

23 import symbols.elf_symbolizer as elf_symbolizer	24 import symbols.elf_symbolizer as elf_symbolizer

24	25

25 import symbolize_trace_atos_regex	26 import symbolize_trace_atos_regex

26 import symbolize_trace_macho_reader	27 import symbolize_trace_macho_reader

27	28

28	29

29 # Relevant trace event phases from Chromium's	30 class MemoryMap(object):

30 # src/base/trace_event/common/trace_event_common.h.

31 TRACE_EVENT_PHASE_METADATA = 'M'

32 TRACE_EVENT_PHASE_MEMORY_DUMP = 'v'

33

34

35 # Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)

36 # as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available

37 # via 'name' group.

38 ANDROID_PATH_MATCHER = re.compile(

39 r'^/data/(?:'

40 r'app/[^/]+/lib/[^/]+/\|'

41 r'app-lib/[^/]+/\|'

42 r'data/[^/]+/incremental-install-files/lib/'

43 r')(?P<name>.*\.so)')

44

45 # Subpath of output path where unstripped libraries are stored.

46 ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'

47

48

49 def FindInSystemPath(binary_name):

50 paths = os.environ['PATH'].split(os.pathsep)

51 for path in paths:

52 binary_path = os.path.join(path, binary_name)

53 if os.path.isfile(binary_path):

54 return binary_path

55 return None

56

57

58 class Symbolizer(object):

59 # Encapsulates platform-specific symbolization logic.

60 def __init__(self):

61 self.is_mac = sys.platform == 'darwin'

62 self.is_win = sys.platform == 'win32'

63 if self.is_mac:

64 self.binary = 'atos'

65 self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher()

66 elif self.is_win:

67 self.binary = 'addr2line-pdb.exe'

68 else:

69 self.binary = 'addr2line'

70 self.symbolizer_path = FindInSystemPath(self.binary)

71

72 def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name):

73 def _SymbolizerCallback(sym_info, frames):

74 # Unwind inline chain to the top.

75 while sym_info.inlined_by:

76 sym_info = sym_info.inlined_by

77

78 symbolized_name = sym_info.name if sym_info.name else unsymbolized_name

79 for frame in frames:

80 frame.name = symbolized_name

81

82 symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path,

83 self.symbolizer_path,

84 _SymbolizerCallback,

85 inlines=True)

86

87 for address, frames in symfile.frames_by_address.iteritems():

88 # SymbolizeAsync() asserts that the type of address is int. We operate

89 # on longs (since they are raw pointers possibly from 64-bit processes).

90 # It's OK to cast here because we're passing relative PC, which should

91 # always fit into int.

92 symbolizer.SymbolizeAsync(int(address), frames)

93

94 symbolizer.Join()

95

96

97 def _SymbolizeMac(self, symfile):

98 chars_max = int(subprocess.check_output("getconf ARG_MAX", shell=True))

99

100 # 16 for the address, 2 for "0x", 1 for the space

101 chars_per_address = 19

102

103 load_address = (symbolize_trace_macho_reader.

104 ReadMachOTextLoadAddress(symfile.symbolizable_path))

105 assert load_address is not None

106

107 cmd_base = [self.symbolizer_path, '-arch', 'x86_64', '-l',

108 '0x%x' % load_address, '-o',

109 symfile.symbolizable_path]

110 chars_for_other_arguments = len(' '.join(cmd_base)) + 1

111

112 # The maximum number of inputs that can be processed at once is limited by

113 # ARG_MAX. This currently evalutes to ~13000 on macOS.

114 max_inputs = (chars_max - chars_for_other_arguments) / chars_per_address

115

116 all_keys = symfile.frames_by_address.keys()

117 processed_keys_count = 0

118 while len(all_keys):

119 input_count = min(len(all_keys), max_inputs)

120 keys_to_process = all_keys[0:input_count]

121

122 cmd = list(cmd_base)

123 cmd.extend([hex(int(x) + load_address)

124 for x in keys_to_process])

125 output_array = subprocess.check_output(cmd).split('\n')

126 for i in range(len(keys_to_process)):

127 for frame in (symfile.frames_by_address.values()

128 [i + processed_keys_count]):

129 frame.name = self._matcher.Match(output_array[i])

130 processed_keys_count += len(keys_to_process)

131 all_keys = all_keys[input_count:]

132

133

134 def _SymbolizeWin(self, symfile):

135 """Invoke symbolizer binary on windows and write all input in one go.

136

137 Unlike linux, on windows, symbolization talks through a shared system

138 service that handles communication with the NT symbol servers. This

139 creates an explicit serialization (and therefor lock contention) of

140 any process using the symbol API for files do not have a local PDB.

141

142 Thus, even though the windows symbolizer binary can be make command line

143 compatible with the POSIX addr2line interface, paralellizing the

144 symbolization does not yield the same performance effects. Running

145 just one symbolizer seems good enough for now. Can optimize later

146 if this becomes a bottleneck.

147 """

148 cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe',

149 symfile.symbolizable_path]

150

151 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE,

152 stderr=sys.stderr)

153 addrs = ["%x" % relative_pc for relative_pc in

154 symfile.frames_by_address.keys()]

155 (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs))

156 stdout_data = stdout_data.split('\n')

157

158 # This is known to be in the same order as stderr_data.

159 for i, addr in enumerate(addrs):

160 for frame in symfile.frames_by_address[int(addr, 16)]:

161 # Output of addr2line with --functions is always 2 outputs per

162 # symbol, function name followed by source line number. Only grab

163 # the function name as line info is not always available.

164 frame.name = stdout_data[i * 2]

165

166

167 def Symbolize(self, symfile, unsymbolized_name):

168 if self.is_mac:

169 self._SymbolizeMac(symfile)

170 if self.is_win:

171 self._SymbolizeWin(symfile)

172 else:

173 self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name)

174

175

176 def IsSymbolizableFile(self, file_path):

177 if self.is_win:

178 extension = os.path.splitext(file_path)[1].lower()

179 return extension in ['.dll', '.exe']

180 else:

181 result = subprocess.check_output(['file', '-0', file_path])

182 type_string = result[result.find('\0') + 1:]

183 return bool(re.match(r'.(ELF\|Mach-O) (32\|64)-bit\b.',

184 type_string, re.DOTALL))

185

186

187 class ProcessMemoryMaps(object):

188 """Represents 'process_mmaps' trace file entry."""	31 """Represents 'process_mmaps' trace file entry."""

189	32

190 class Region(object):	33 class Region(object):

191 def __init__(self, start_address, size, file_path):	34 def __init__(self, start_address, size, file_path):

192 self._start_address = start_address	35 self._start_address = start_address

193 self._size = size	36 self._size = size

194 self._file_path = file_path	37 self._file_path = file_path

195	38

196 @property	39 @property

197 def start_address(self):	40 def start_address(self):

(...skipping 16 matching lines...) Expand all Loading...
214 return long(self._start_address).__cmp__(long(other._start_address))	57 return long(self._start_address).__cmp__(long(other._start_address))

215 elif isinstance(other, (long, int)):	58 elif isinstance(other, (long, int)):

216 return long(self._start_address).__cmp__(long(other))	59 return long(self._start_address).__cmp__(long(other))

217 else:	60 else:

218 raise Exception('Cannot compare with %s' % type(other))	61 raise Exception('Cannot compare with %s' % type(other))

219	62

220 def __repr__(self):	63 def __repr__(self):

221 return 'Region(0x{:X} - 0x{:X}, {})'.format(	64 return 'Region(0x{:X} - 0x{:X}, {})'.format(

222 self.start_address, self.end_address, self.file_path)	65 self.start_address, self.end_address, self.file_path)

223	66

224 def __init__(self, process_mmaps):	67 def __init__(self, process_mmaps_json):

225 """Parses 'process_mmaps' dictionary."""

226

227 regions = []	68 regions = []

228 for region_value in process_mmaps['vm_regions']:	69 for region_json in process_mmaps_json['vm_regions']:

229 regions.append(self.Region(	70 regions.append(self.Region(

230 long(region_value['sa'], 16),	71 long(region_json['sa'], 16),

231 long(region_value['sz'], 16),	72 long(region_json['sz'], 16),

232 region_value['mf']))	73 region_json['mf']))

233 regions.sort()	74 regions.sort()

234	75

235 # Copy regions without duplicates and check for overlaps.	76 # Copy regions without duplicates and check for overlaps.

236 self._regions = []	77 self._regions = []

237 previous_region = None	78 previous_region = None

238 for region in regions:	79 for region in regions:

239 if previous_region is not None:	80 if previous_region is not None:

240 if region == previous_region:	81 if region == previous_region:

241 continue	82 continue

242 assert region.start_address >= previous_region.end_address, \	83 assert region.start_address >= previous_region.end_address, \

243 'Regions {} and {} overlap.'.format(previous_region, region)	84 'Regions {} and {} overlap.'.format(previous_region, region)

244 previous_region = region	85 previous_region = region

245 self._regions.append(region)	86 self._regions.append(region)

246	87

247 @property	88 @property

248 def regions(self):	89 def regions(self):

249 return self._regions	90 return self._regions

250	91

251 def FindRegion(self, address):	92 def FindRegion(self, address):

252 """Finds region containing \|address\|. Returns None if none found."""	93 """Finds region containing \|address\|. Returns None if none found."""

253	94

254 region_index = bisect.bisect_right(self._regions, address) - 1	95 region_index = bisect.bisect_right(self._regions, address) - 1

255 if region_index >= 0:	96 if region_index >= 0:

256 region = self._regions[region_index]	97 region = self._regions[region_index]

257 if address >= region.start_address and address < region.end_address:	98 if address >= region.start_address and address < region.end_address:

258 return region	99 return region

259 return None	100 return None

260	101

261	102

262 class StackFrames(object):	103 class UnsupportedHeapDumpVersionError(Exception):

263 """Represents 'stackFrames' trace file entry."""	104 def __init__(self, version):

264	105 message = 'Unsupported heap dump version: {}'.format(version)

265 class PCFrame(object):	106 super(UnsupportedHeapDumpVersionError, self).__init__(message)

266 def __init__(self, pc, frame):	107

	108

	109 class StringMap(object):
	awong 2017/04/20 19:37:39 These classes should have doc strings explaining t These classes should have doc strings explaining the basic usage. The name is also pretty generic. When I see StringMap, I expect just a python dictionary. This is clearly different. So what exactly is it doing? What's _modified for, etc?
	110 def __init__(self):

	111 self._modified = False

	112 self._string_jsons = []

	113 self._string_by_id = {}

	114 self._id_by_string = {}

	115 self._max_string_id = 0

	116

	117 @property

	118 def modified(self):

	119 return self._modified

	120

	121 @property

	122 def string_by_id(self):

	123 return self._string_by_id

	124

	125 def ParseMore(self, heap_dump_version, strings_json):

	126 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:

	127 raise UnsupportedHeapDumpVersionError(heap_dump_version)

	128

	129 self._string_jsons.append(strings_json)

	130 for string_json in strings_json:

	131 self._Insert(string_json['id'], string_json['string'])

	132

	133 def Clear(self):

	134 if self._string_by_id:

	135 self._modified = True
	awong 2017/04/20 22:12:14 Is Clear() not reset? This looks almost like __ Is Clear() not reset? This looks almost like __init__, but not. I'm confused at the usage and naming...
	136 self._string_by_id = {}

	137 self._id_by_string = {}

	138 self._Insert(0, '[null]')

	139 self._max_string_id = 0

	140

	141 def AddString(self, string):

	142 string_id = self._id_by_string.get(string)

	143 if string_id is None:

	144 string_id = self._max_string_id + 1

	145 self._Insert(string_id, string)

	146 self._modified = True

	147 return string_id

	148

	149 def ApplyModifications(self):
	awong 2017/04/20 22:12:14 What are such modifications? Can we use a less gen What are such modifications? Can we use a less generic name here?
	150 if not self.modified:

	151 return

	152

	153 assert self._string_jsons, 'no JSON nodes'

	154

	155 # Serialize into first JSON node, and clear all others.
	awong 2017/04/20 22:12:14 Can we get a "why" in this comment? As a reader, Can we get a "why" in this comment? As a reader, it's hard for me to verify if this makes sense because I don't know why you are doing it. :)
	156

	157 for string_json in self._string_jsons:

	158 string_json[:] = []
	awong 2017/04/20 22:12:14 string_json.clear()? string_json.clear()?
	159 string_json = self._string_jsons[0]

	160 for string_id, string in self._string_by_id.iteritems():

	161 string_json.append({'id': string_id, 'string': string})

	162

	163 self._modified = False
	awong 2017/04/20 22:12:14 This is confusing. Shouldn't it be true? This is confusing. Shouldn't it be true?
	164

	165 def _Insert(self, string_id, string):

	166 self._id_by_string[string] = string_id

	167 self._string_by_id[string_id] = string

	168 self._max_string_id = max(self._max_string_id, string_id)

	169

	170

	171 class TypeNameMap(object):

	172 def __init__(self):

	173 self._modified = False

	174 self._type_name_jsons = []

	175 self._name_by_id = {}

	176 self._id_by_name = {}

	177 self._max_type_id = 0

	178

	179 @property

	180 def modified(self):

	181 return self._modified

	182

	183 @property

	184 def name_by_id(self):

	185 return self._name_by_id

	186

	187 def ParseMore(self, heap_dump_version, type_name_json, string_map):

	188 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:

	189 raise UnsupportedHeapDumpVersionError(heap_dump_version)

	190

	191 self._type_name_jsons.append(type_name_json)

	192 for type_json in type_name_json:

	193 self._Insert(type_json['id'],

	194 string_map.string_by_id[type_json['name_sid']])

	195

	196 def AddType(self, type_name):

	197 type_id = self._id_by_name.get(type_name)

	198 if type_id is None:

	199 type_id = self._max_type_id + 1

	200 self._Insert(type_id, type_name)

	201 self._modified = True

	202 return type_id

	203

	204 def ApplyModifications(self, string_map, force=False):

	205 if not self.modified and not force:

	206 return

	207

	208 assert self._type_name_jsons, 'no JSON nodes'

	209

	210 # Serialize into first JSON node, and clear all others.

	211

	212 for types_json in self._type_name_jsons:

	213 types_json[:] = []
	awong 2017/04/20 22:12:14 types_json.clear()? types_json.clear()?
	214 types_json = self._type_name_jsons[0]

	215 for type_id, type_name in self._name_by_id.iteritems():

	216 types_json.append({

	217 'id': type_id,

	218 'name_sid': string_map.AddString(type_name)})

	219

	220 self._modified = False
	awong 2017/04/20 22:12:14 Should this be true? Should this be true?
	221

	222 def _Insert(self, type_id, type_name):

	223 self._id_by_name[type_name] = type_id

	224 self._name_by_id[type_id] = type_name

	225 self._max_type_id = max(self._max_type_id, type_id)

	226

	227

	228 class StackFrameMap(object):

	229 class Frame(object):

	230 def __init__(self, frame_id, name, parent_frame_id):

267 self._modified = False	231 self._modified = False

268 self._pc = pc	232 self._id = frame_id

269 self._frame = frame	233 self._name = name

	234 self._pc = self._ParsePC(name)

	235 self._parent_id = parent_frame_id

	236 self._ext = None

270	237

271 @property	238 @property

272 def modified(self):	239 def modified(self):

273 return self._modified	240 return self._modified

274	241

275 @property	242 @property

	243 def id(self):

	244 return self._id

	245

	246 @property

276 def pc(self):	247 def pc(self):

277 return self._pc	248 return self._pc

278	249

279 @property	250 @property

280 def name(self):	251 def name(self):

281 return self._frame['name']	252 return self._name

282	253

283 @name.setter	254 @name.setter

284 def name(self, value):	255 def name(self, value):

285 self._modified = True	256 self._modified = True

286 self._frame['name'] = value	257 self._name = value

287	258

288 def __init__(self, stack_frames):	259 @property

289 """Constructs object using 'stackFrames' dictionary."""	260 def parent_id(self):

290 self._pc_frames = []	261 return self._parent_id

291 for frame in stack_frames.itervalues():	262

292 pc_frame = self._ParsePCFrame(frame)	263 _PC_TAG = 'pc:'

293 if pc_frame:	264

294 self._pc_frames.append(pc_frame)	265 def _ParsePC(self, name):

295	266 if not name.startswith(self._PC_TAG):
	awong 2017/04/20 22:12:14 How about invert the logic to remove the not? How about invert the logic to remove the not?
296 @property	267 return None

297 def pc_frames(self):	268 return long(name[len(self._PC_TAG):], 16)

298 return self._pc_frames	269

	270 def _ClearModified(self):

	271 self._modified = False

	272

	273 def __init__(self):
	awong 2017/04/20 22:12:14 Group the __init__? Group the __init__?
	274 self._modified = False

	275 self._heap_dump_version = None

	276 self._stack_frames_jsons = []

	277 self._frame_by_id = {}

299	278

300 @property	279 @property

301 def modified(self):	280 def modified(self):

302 return any(f.modified for f in self._pc_frames)	281 return (self._modified or

303	282 any(f.modified for f in self._frame_by_id.itervalues()))

304 _PC_TAG = 'pc:'	283

305	284 @property

306 @classmethod	285 def frame_by_id(self):

307 def _ParsePCFrame(self, frame):	286 return self._frame_by_id

308 name = frame['name']	287

309 if not name.startswith(self._PC_TAG):	288 def ParseMore(self, heap_dump_version, stack_frames_json, string_map):

310 return None	289 frame_by_id = {}

311 pc = long(name[len(self._PC_TAG):], 16)	290 if heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:

312 return self.PCFrame(pc, frame)	291 if self._stack_frames_jsons:

313	292 raise Exception('Legacy stack frames are expected only once.')

314	293 for frame_id, frame_json in stack_frames_json.iteritems():

315 class Process(object):	294 frame = self.Frame(frame_id,

316 """Holds various bits of information about a process in a trace file."""	295 frame_json['name'],

317	296 frame_json.get('parent'))

318 def __init__(self, pid):	297 frame_by_id[frame.id] = frame

319 self.pid = pid	298 else:

320 self.name = None	299 if heap_dump_version != Trace.HEAP_DUMP_VERSION_1:

321 self.mmaps = None	300 raise UnsupportedHeapDumpVersionError(heap_dump_version)

322 self.stack_frames = None	301 for frame_json in stack_frames_json:

323	302 frame = self.Frame(frame_json['id'],

324	303 string_map.string_by_id[frame_json['name_sid']],

325 def CollectProcesses(trace):	304 frame_json.get('parent'))

326 """Parses trace dictionary and returns pid->Process map of all processes	305 frame_by_id[frame.id] = frame

327 suitable for symbolization (which have both mmaps and stack_frames).	306

328 """	307 self._heap_dump_version = heap_dump_version

329	308 self._stack_frames_jsons.append(stack_frames_json)

330 process_map = {}	309

331	310 self._frame_by_id = frame_by_id

332 # Android traces produced via 'chrome://inspect/?tracing#devices' are	311

333 # just list of events.	312 def ApplyModifications(self, string_map, force=False):

334 events = trace if isinstance(trace, list) else trace['traceEvents']	313 if not self.modified and not force:

335 for event in events:	314 return

336 name = event.get('name')	315

337 if not name:	316 assert self._stack_frames_jsons, 'no JSON nodes'

338 continue	317 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:

339	318 assert string_map is None, \

340 pid = event['pid']	319 'string_map should not be used with the legacy format'

341 process = process_map.get(pid)	320

342 if process is None:	321 # Serialize frames into first JSON node, and clear all others.

343 process = Process(pid)	322

344 process_map[pid] = process	323 for frames_json in self._stack_frames_jsons:

345	324 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:

346 phase = event['ph']	325 frames_json.clear()

347 if phase == TRACE_EVENT_PHASE_METADATA:	326 else:

348 if name == 'process_name':	327 frames_json[:] = []

349 process.name = event['args']['name']	328

350 elif name == 'stackFrames':	329 frames_json = self._stack_frames_jsons[0]

351 process.stack_frames = StackFrames(event['args']['stackFrames'])	330 for frame in self._frame_by_id.itervalues():

352 elif phase == TRACE_EVENT_PHASE_MEMORY_DUMP:	331 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:

353 process_mmaps = event['args']['dumps'].get('process_mmaps')	332 frame_json = {'name': frame.name}

354 if process_mmaps:	333 frames_json[frame.id] = frame_json

355 # TODO(dskiba): this parses all process_mmaps, but retains only the	334 else:

356 # last one. We need to parse only once (lazy parsing?).	335 frame_json = {

357 process.mmaps = ProcessMemoryMaps(process_mmaps)	336 'id': frame.id,

358	337 'name_sid': string_map.AddString(frame.name)

359 return [p for p in process_map.itervalues() if p.mmaps and p.stack_frames]	338 }

	339 frames_json.append(frame_json)

	340 if frame.parent_id is not None:

	341 frame_json['parent'] = frame.parent_id

	342 frame._ClearModified()

	343

	344 self._modified = False

	345

	346

	347 class HeapProfile(object):

	348 EntryKey = collections.namedtuple(

	349 'EntryKey',

	350 ['stack_frame_id', 'type_name_id'])

	351

	352 class Entry(object):

	353 def __init__(self, key, mapped_value_by_name, numeric_value_by_name):

	354 self._key = key

	355 self._mapped_value_by_name = mapped_value_by_name

	356 self._numeric_value_by_name = numeric_value_by_name

	357

	358 @property

	359 def key(self):

	360 return self._key

	361

	362 @property

	363 def stack_frame_id(self):

	364 return self._key.stack_frame_id

	365

	366 @property

	367 def type_name_id(self):

	368 return self._key.type_name_id

	369

	370 def _AddValuesFrom(self, entry):

	371 self._mapped_value_by_name.clear()

	372 for name, value in entry._numeric_value_by_name.iteritems():

	373 value += self._numeric_value_by_name.get(name, 0)

	374 self._numeric_value_by_name[name] = value

	375

	376 def __init__(self, allocator_name, entries_json, mapped_entry_names):

	377 self._modified = False

	378 self._allocator_name = allocator_name

	379 self._entries_json = entries_json

	380 self._entries = []

	381 for values in zip(*entries_json.itervalues()):

	382 stack_frame_id = None

	383 type_name_id = None

	384 mapped_value_by_name = {}

	385 numeric_value_by_name = {}

	386 for index, name in enumerate(entries_json.iterkeys()):

	387 value = values[index]

	388 if name == 'nodes':

	389 stack_frame_id = value

	390 elif name == 'types':

	391 type_name_id = value

	392 elif name in mapped_entry_names:

	393 mapped_value_by_name[name] = value

	394 else:

	395 numeric_value_by_name[name] = value

	396 entry = self.Entry(self.EntryKey(stack_frame_id, type_name_id),

	397 mapped_value_by_name, numeric_value_by_name)

	398 self._entries.append(entry)

	399

	400 @property

	401 def modified(self):

	402 return self._modified

	403

	404 @property

	405 def allocator_name(self):

	406 return self._allocator_name

	407

	408 @property

	409 def entries(self):

	410 return self._entries

	411

	412 def ApplyModifications(self):

	413 if not self.modified:

	414 return

	415

	416 mapped_value_names = set()

	417 numeric_value_names = set()

	418 for entry in self._entries:

	419 mapped_value_names.update(entry._mapped_value_by_name.iterkeys())

	420 numeric_value_names.update(entry._numeric_value_by_name.iterkeys())

	421

	422 def _AddJSONValue(name, value):

	423 values = self._entries_json.get(name)

	424 if values is None:

	425 values = []

	426 self._entries_json[name] = values

	427 values.append(value)

	428

	429 self._entries_json.clear()

	430 for entry in self._entries:

	431 _AddJSONValue('nodes', entry.stack_frame_id)

	432 _AddJSONValue('types', entry.type_name_id)

	433 for name in mapped_value_names:

	434 value = entry._mapped_value_by_name[name]

	435 _AddJSONValue(name, value)

	436 for name in numeric_value_names:

	437 value = entry._numeric_value_by_name[name]

	438 _AddJSONValue(name, value)

	439

	440 self._modified = False

	441

	442

	443 class MemoryDump(object):

	444 def __init__(self, allocators_json, mapped_entry_names):

	445 self._profiles = []

	446 for allocator_name, entries_json in allocators_json.iteritems():

	447 profile = HeapProfile(allocator_name, entries_json, mapped_entry_names)

	448 self._profiles.append(profile)

	449

	450 @property

	451 def modified(self):

	452 return any(p.modified for p in self.profiles)

	453

	454 @property

	455 def profiles(self):

	456 return self._profiles

	457

	458 def ApplyModifications(self):

	459 for profile in self._profiles:

	460 profile.ApplyModifications()

	461

	462

	463 class Trace(object):

	464

	465 HEAP_DUMP_VERSION_LEGACY = 'Legacy'

	466 HEAP_DUMP_VERSION_1 = 1

	467

	468 class Process(object):

	469 def __init__(self, pid):

	470 self._pid = pid

	471 self._name = None

	472 self._memory_map = None

	473 self._memory_dumps = []

	474 self._stack_frame_map = StackFrameMap()

	475 self._type_name_map = TypeNameMap()

	476 self._string_map = StringMap()

	477 self._heap_dump_version = None

	478

	479 @property

	480 def modified(self):

	481 return (self._stack_frame_map.modified or

	482 self._type_name_map.modified or

	483 any(d.modified for d in self._memory_dumps))

	484

	485 @property

	486 def pid(self):

	487 return self._pid

	488

	489 @property

	490 def name(self):
	awong 2017/04/20 22:28:04 For these properties, having a docstring that expl For these properties, having a docstring that explains their usage and contents is useful. If the concept is general, then making a file-level comment would be good. Right now, I'm unsure the distinction between name, and unique_name. Also, what is in a stack_frame_map? Or a type_name_map?
	491 return self._name

	492

	493 @property

	494 def unique_name(self):

	495 name = self._name if self._name else 'UnnamedProcess'

	496 return '{}({})'.format(name, self._pid)

	497

	498 @property

	499 def memory_map(self):

	500 return self._memory_map

	501

	502 @property

	503 def memory_dumps(self):
	awong 2017/04/20 22:28:04 Why is this one plural? Why is this one plural?
	504 return self._memory_dumps

	505

	506 @property

	507 def stack_frame_map(self):

	508 return self._stack_frame_map

	509

	510 @property

	511 def type_name_map(self):

	512 return self._type_name_map

	513

	514 def ApplyModifications(self):

	515 if self._heap_dump_version == Trace.HEAP_DUMP_VERSION_LEGACY:

	516 self._stack_frame_map.ApplyModifications(None)

	517 else:

	518 if self._stack_frame_map.modified or self._type_name_map.modified:

	519 self._string_map.Clear()

	520 self._stack_frame_map.ApplyModifications(self._string_map, force=True)

	521 self._type_name_map.ApplyModifications(self._string_map, force=True)

	522 self._string_map.ApplyModifications()

	523 for dump in self._memory_dumps:

	524 dump.ApplyModifications()

	525

	526 def __init__(self, trace_json):

	527 self._trace_json = trace_json

	528 self._processes = []

	529 self._heap_dump_version = None

	530

	531 # Misc per-process information needed only during parsing.

	532 class ProcessExt(object):

	533 def __init__(self, pid):

	534 self.process = Trace.Process(pid)

	535 self.mapped_entry_names = set()

	536 self.process_mmaps_json = None

	537 self.seen_strings_json = False

	538

	539 process_ext_by_pid = {}

	540

	541 # Android traces produced via 'chrome://inspect/?tracing#devices' are

	542 # just list of events.

	543 events = trace_json if isinstance(trace_json, list) \

	544 else trace_json['traceEvents']

	545 for event in events:

	546 name = event.get('name')

	547 if not name:

	548 continue

	549

	550 pid = event['pid']

	551 process_ext = process_ext_by_pid.get(pid)

	552 if process_ext is None:

	553 process_ext = ProcessExt(pid)

	554 process_ext_by_pid[pid] = process_ext

	555 process = process_ext.process

	556

	557 phase = event['ph']

	558 if phase == self._EVENT_PHASE_METADATA:

	559 if name == 'process_name':

	560 process._name = event['args']['name']

	561 elif name == 'stackFrames':

	562 process._stack_frame_map.ParseMore(

	563 self._UseHeapDumpVersion(self.HEAP_DUMP_VERSION_LEGACY),

	564 event['args']['stackFrames'],

	565 process._string_map)

	566 elif phase == self._EVENT_PHASE_MEMORY_DUMP:

	567 dumps = event['args']['dumps']

	568 process_mmaps = dumps.get('process_mmaps')

	569 if process_mmaps:

	570 # We want the most recent memory map, so parsing happens later

	571 # once we finished reading all events.

	572 process_ext.process_mmaps_json = process_mmaps

	573 heaps = dumps.get('heaps_v2')

	574 if heaps:

	575 version = self._UseHeapDumpVersion(heaps['version'])

	576 maps = heaps.get('maps')

	577 if maps:

	578 process_ext.mapped_entry_names.update(maps.iterkeys())

	579 types = maps.get('types')

	580 stack_frames = maps.get('nodes')

	581 strings = maps.get('strings')

	582 if (strings is None and (types or stack_frames)

	583 and not process_ext.seen_strings_json):

	584 # ApplyModifications() for TypeNameMap and StackFrameMap puts

	585 # everything into the first node and depends on StringMap. So

	586 # we need to make sure that 'strings' node is there if any of

	587 # other two nodes present.

	588 strings = []

	589 maps['strings'] = strings

	590 if strings is not None:

	591 process_ext.seen_strings_json = True

	592 process._string_map.ParseMore(version, strings)

	593 if types:

	594 process._type_name_map.ParseMore(

	595 version, types, process._string_map)

	596 if stack_frames:

	597 process._stack_frame_map.ParseMore(

	598 version, stack_frames, process._string_map)

	599 allocators = heaps.get('allocators')

	600 if allocators:

	601 dump = MemoryDump(allocators, process_ext.mapped_entry_names)

	602 process._memory_dumps.append(dump)

	603

	604 self._processes = []

	605 for pe in process_ext_by_pid.itervalues():

	606 pe.process._heap_dump_version = self._heap_dump_version

	607 if pe.process_mmaps_json:

	608 # Now parse the most recent memory map.

	609 pe.process._memory_map = MemoryMap(pe.process_mmaps_json)

	610 self._processes.append(pe.process)

	611

	612 @property

	613 def modified(self):

	614 return any(p.modified for p in self._processes)

	615

	616 @property

	617 def processes(self):

	618 return self._processes

	619

	620 @property

	621 def heap_dump_version(self):

	622 return self._heap_dump_version

	623

	624 def ApplyModifications(self):

	625 for process in self._processes:

	626 process.ApplyModifications()

	627 assert not self.modified, 'still modified'

	628

	629 def Serialize(self):

	630 return self._trace_json

	631

	632 # Relevant trace event phases from Chromium's

	633 # src/base/trace_event/common/trace_event_common.h.

	634 _EVENT_PHASE_METADATA = 'M'

	635 _EVENT_PHASE_MEMORY_DUMP = 'v'

	636

	637 def _UseHeapDumpVersion(self, version):

	638 if self._heap_dump_version is None:

	639 self._heap_dump_version = version

	640 return version

	641 elif self._heap_dump_version != version:

	642 raise Exception(

	643 ("Inconsistent trace file: first saw '{}' heap dump version, "

	644 "then '{}'.").format(self._heap_dump_version, version))

	645 else:

	646 return version

360	647

361	648

362 class SymbolizableFile(object):	649 class SymbolizableFile(object):

363 """Holds file path, addresses to symbolize and stack frames to update.	650 """Holds file path, addresses to symbolize and stack frames to update.

364	651

365 This class is a link between ELFSymbolizer and a trace file: it specifies	652 This class is a link between ELFSymbolizer and a trace file: it specifies

366 what to symbolize (addresses) and what to update with the symbolization	653 what to symbolize (addresses) and what to update with the symbolization

367 result (frames).	654 result (frames).

368 """	655 """

369 def __init__(self, file_path):	656 def __init__(self, file_path):

370 self.path = file_path	657 self.path = file_path

371 self.symbolizable_path = file_path # path to use for symbolization	658 self.symbolizable_path = file_path # path to use for symbolization

372 self.frames_by_address = collections.defaultdict(list)	659 self.frames_by_address = collections.defaultdict(list)

373	660

374	661

375 def ResolveSymbolizableFiles(processes):	662 def ResolveSymbolizableFiles(processes):

376 """Resolves and groups PCs into list of SymbolizableFiles.	663 """Resolves and groups PCs into list of SymbolizableFiles.

377	664

378 As part of the grouping process, this function resolves PC from each stack	665 As part of the grouping process, this function resolves PC from each stack

379 frame to the corresponding mmap region. Stack frames that failed to resolve	666 frame to the corresponding mmap region. Stack frames that failed to resolve

380 are symbolized with '<unresolved>'.	667 are symbolized with '<unresolved>'.

381 """	668 """

382 symfile_by_path = {}	669 symfile_by_path = {}

383 for process in processes:	670 for process in processes:

384 for frame in process.stack_frames.pc_frames:	671 if not process.memory_map:
	awong 2017/04/20 22:28:04 Comment explaining when this can occur? Comment explaining when this can occur?
385 region = process.mmaps.FindRegion(frame.pc)	672 continue

	673 for frame in process.stack_frame_map.frame_by_id.itervalues():

	674 if frame.pc is None:

	675 continue

	676 region = process.memory_map.FindRegion(frame.pc)

386 if region is None:	677 if region is None:

387 frame.name = '<unresolved>'	678 frame.name = '<unresolved>'

388 continue	679 continue

389	680

390 symfile = symfile_by_path.get(region.file_path)	681 symfile = symfile_by_path.get(region.file_path)

391 if symfile is None:	682 if symfile is None:

392 symfile = SymbolizableFile(region.file_path)	683 symfile = SymbolizableFile(region.file_path)

393 symfile_by_path[symfile.path] = symfile	684 symfile_by_path[symfile.path] = symfile

394	685

395 relative_pc = frame.pc - region.start_address	686 relative_pc = frame.pc - region.start_address

396 symfile.frames_by_address[relative_pc].append(frame)	687 symfile.frames_by_address[relative_pc].append(frame)

397 return symfile_by_path.values()	688 return symfile_by_path.values()

398	689

399	690

	691 def FindInSystemPath(binary_name):

	692 paths = os.environ['PATH'].split(os.pathsep)

	693 for path in paths:

	694 binary_path = os.path.join(path, binary_name)

	695 if os.path.isfile(binary_path):

	696 return binary_path

	697 return None

	698

	699

	700 class Symbolizer(object):

	701 # Encapsulates platform-specific symbolization logic.
	awong 2017/04/20 22:28:04 Turn into docstring. Turn into docstring.
	702 def __init__(self):

	703 self.is_mac = sys.platform == 'darwin'

	704 self.is_win = sys.platform == 'win32'

	705 if self.is_mac:

	706 self.binary = 'atos'

	707 self._matcher = symbolize_trace_atos_regex.AtosRegexMatcher()

	708 elif self.is_win:

	709 self.binary = 'addr2line-pdb.exe'

	710 else:

	711 self.binary = 'addr2line'

	712 self.symbolizer_path = FindInSystemPath(self.binary)

	713

	714 def _SymbolizeLinuxAndAndroid(self, symfile, unsymbolized_name):

	715 def _SymbolizerCallback(sym_info, frames):

	716 # Unwind inline chain to the top.

	717 while sym_info.inlined_by:

	718 sym_info = sym_info.inlined_by

	719

	720 symbolized_name = sym_info.name if sym_info.name else unsymbolized_name

	721 for frame in frames:

	722 frame.name = symbolized_name

	723 frame.ext.source_path = sym_info.source_path

	724

	725 symbolizer = elf_symbolizer.ELFSymbolizer(symfile.symbolizable_path,

	726 self.symbolizer_path,

	727 _SymbolizerCallback,

	728 inlines=True)

	729

	730 for address, frames in symfile.frames_by_address.iteritems():

	731 # SymbolizeAsync() asserts that the type of address is int. We operate

	732 # on longs (since they are raw pointers possibly from 64-bit processes).

	733 # It's OK to cast here because we're passing relative PC, which should

	734 # always fit into int.

	735 symbolizer.SymbolizeAsync(int(address), frames)

	736

	737 symbolizer.Join()

	738

	739

	740 def _SymbolizeMac(self, symfile):

	741 chars_max = int(subprocess.check_output("getconf ARG_MAX", shell=True))

	742

	743 # 16 for the address, 2 for "0x", 1 for the space

	744 chars_per_address = 19

	745

	746 load_address = (symbolize_trace_macho_reader.

	747 ReadMachOTextLoadAddress(symfile.symbolizable_path))

	748 assert load_address is not None

	749

	750 cmd_base = [self.symbolizer_path, '-arch', 'x86_64', '-l',

	751 '0x%x' % load_address, '-o',

	752 symfile.symbolizable_path]

	753 chars_for_other_arguments = len(' '.join(cmd_base)) + 1

	754

	755 # The maximum number of inputs that can be processed at once is limited by

	756 # ARG_MAX. This currently evalutes to ~13000 on macOS.

	757 max_inputs = (chars_max - chars_for_other_arguments) / chars_per_address

	758

	759 all_keys = symfile.frames_by_address.keys()

	760 processed_keys_count = 0

	761 while len(all_keys):

	762 input_count = min(len(all_keys), max_inputs)

	763 keys_to_process = all_keys[0:input_count]

	764 cmd = list(cmd_base)

	765 cmd.extend([hex(int(x) + load_address)

	766 for x in keys_to_process])

	767 output_array = subprocess.check_output(cmd).split('\n')

	768 for i in range(len(keys_to_process)):

	769 for frame in (symfile.frames_by_address.values()

	770 [i + processed_keys_count]):

	771 frame.name = self._matcher.Match(output_array[i])

	772 processed_keys_count += len(keys_to_process)

	773 all_keys = all_keys[input_count:]

	774

	775 def _SymbolizeWin(self, symfile):

	776 """Invoke symbolizer binary on windows and write all input in one go.

	777

	778 Unlike linux, on windows, symbolization talks through a shared system

	779 service that handles communication with the NT symbol servers. This

	780 creates an explicit serialization (and therefor lock contention) of

	781 any process using the symbol API for files do not have a local PDB.

	782

	783 Thus, even though the windows symbolizer binary can be make command line

	784 compatible with the POSIX addr2line interface, paralellizing the

	785 symbolization does not yield the same performance effects. Running

	786 just one symbolizer seems good enough for now. Can optimize later

	787 if this becomes a bottleneck.

	788 """

	789 cmd = [self.symbolizer_path, '--functions', '--demangle', '--exe',

	790 symfile.symbolizable_path]

	791

	792 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE,

	793 stderr=sys.stderr)

	794 addrs = ["%x" % relative_pc for relative_pc in

	795 symfile.frames_by_address.keys()]

	796 (stdout_data, stderr_data) = proc.communicate('\n'.join(addrs))

	797 stdout_data = stdout_data.split('\n')

	798

	799 # This is known to be in the same order as stderr_data.

	800 for i, addr in enumerate(addrs):

	801 for frame in symfile.frames_by_address[int(addr, 16)]:

	802 # Output of addr2line with --functions is always 2 outputs per

	803 # symbol, function name followed by source line number. Only grab

	804 # the function name as line info is not always available.

	805 frame.name = stdout_data[i * 2]

	806

	807 def Symbolize(self, symfile, unsymbolized_name):

	808 if self.is_mac:

	809 self._SymbolizeMac(symfile)

	810 elif self.is_win:

	811 self._SymbolizeWin(symfile)

	812 else:

	813 self._SymbolizeLinuxAndAndroid(symfile, unsymbolized_name)

	814

	815 def IsSymbolizableFile(self, file_path):

	816 if self.is_win:

	817 extension = os.path.splitext(file_path)[1].lower()

	818 return extension in ['.dll', '.exe']

	819 else:

	820 result = subprocess.check_output(['file', '-0', file_path])

	821 type_string = result[result.find('\0') + 1:]

	822 return bool(re.match(r'.(ELF\|Mach-O) (32\|64)-bit\b.',

	823 type_string, re.DOTALL))

	824

	825

400 def SymbolizeFiles(symfiles, symbolizer):	826 def SymbolizeFiles(symfiles, symbolizer):

401 """Symbolizes each file in the given list of SymbolizableFiles	827 """Symbolizes each file in the given list of SymbolizableFiles

402 and updates stack frames with symbolization results."""	828 and updates stack frames with symbolization results."""

	829

	830 if not symfiles:

	831 print 'Nothing to symbolize.'

	832 return

	833

403 print 'Symbolizing...'	834 print 'Symbolizing...'

404	835

405 def _SubPrintf(message, *args):	836 def _SubPrintf(message, *args):

406 print (' ' + message).format(*args)	837 print (' ' + message).format(*args)

407	838

408 symbolized = False

409 for symfile in symfiles:	839 for symfile in symfiles:

410 unsymbolized_name = '<{}>'.format(	840 unsymbolized_name = '<{}>'.format(

411 symfile.path if symfile.path else 'unnamed')	841 symfile.path if symfile.path else 'unnamed')

412	842

413 problem = None	843 problem = None

414 if not os.path.isabs(symfile.symbolizable_path):	844 if not os.path.isabs(symfile.symbolizable_path):

415 problem = 'not a file'	845 problem = 'not a file'

416 elif not os.path.isfile(symfile.symbolizable_path):	846 elif not os.path.isfile(symfile.symbolizable_path):

417 problem = "file doesn't exist"	847 problem = "file doesn't exist"

418 elif not symbolizer.IsSymbolizableFile(symfile.symbolizable_path):	848 elif not symbolizer.IsSymbolizableFile(symfile.symbolizable_path):

419 problem = 'file is not symbolizable'	849 problem = 'file is not symbolizable'

420 if problem:	850 if problem:

421 _SubPrintf("Won't symbolize {} PCs for '{}': {}.",	851 _SubPrintf("Won't symbolize {} PCs for '{}': {}.",

422 len(symfile.frames_by_address),	852 len(symfile.frames_by_address),

423 symfile.symbolizable_path,	853 symfile.symbolizable_path,

424 problem)	854 problem)

425 for frames in symfile.frames_by_address.itervalues():	855 for frames in symfile.frames_by_address.itervalues():

426 for frame in frames:	856 for frame in frames:

427 frame.name = unsymbolized_name	857 frame.name = unsymbolized_name

428 continue	858 continue

429	859

430 _SubPrintf('Symbolizing {} PCs from {}...',	860 _SubPrintf('Symbolizing {} PCs from {}...',

431 len(symfile.frames_by_address),	861 len(symfile.frames_by_address),

432 symfile.path)	862 symfile.path)

433	863

434 symbolizer.Symbolize(symfile, unsymbolized_name)	864 symbolizer.Symbolize(symfile, unsymbolized_name)

435 symbolized = True

436	865

437 return symbolized	866

	867 # Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)

	868 # as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available

	869 # via 'name' group.

	870 ANDROID_PATH_MATCHER = re.compile(
	awong 2017/04/20 22:28:04 This is hardish to read and matching paths with re This is hardish to read and matching paths with regexps always scares me. What about using os.path.split? Or will that be worse to read?
	871 r'^/data/(?:'

	872 r'app/[^/]+/lib/[^/]+/\|'

	873 r'app-lib/[^/]+/\|'

	874 r'data/[^/]+/incremental-install-files/lib/'

	875 r')(?P<name>.*\.so)')

	876

	877 # Subpath of output path where unstripped libraries are stored.

	878 ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'

438	879

439	880

440 def HaveFilesFromAndroid(symfiles):	881 def HaveFilesFromAndroid(symfiles):

441 return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles)	882 return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles)

442	883

443	884

444 def RemapAndroidFiles(symfiles, output_path):	885 def RemapAndroidFiles(symfiles, output_path):

445 for symfile in symfiles:	886 for symfile in symfiles:

446 match = ANDROID_PATH_MATCHER.match(symfile.path)	887 match = ANDROID_PATH_MATCHER.match(symfile.path)

447 if match:	888 if match:

448 name = match.group('name')	889 name = match.group('name')

449 symfile.symbolizable_path = os.path.join(	890 symfile.symbolizable_path = os.path.join(

450 output_path, ANDROID_UNSTRIPPED_SUBPATH, name)	891 output_path, ANDROID_UNSTRIPPED_SUBPATH, name)

451 else:	892 else:

452 # Clobber file path to trigger "not a file" problem in SymbolizeFiles().	893 # Clobber file path to trigger "not a file" problem in SymbolizeFiles().

453 # Without this, files won't be symbolized with "file not found" problem,	894 # Without this, files won't be symbolized with "file not found" problem,

454 # which is not accurate.	895 # which is not accurate.

455 symfile.symbolizable_path = 'android://{}'.format(symfile.path)	896 symfile.symbolizable_path = 'android://{}'.format(symfile.path)

456	897

457	898

	899 def Symbolize(options, trace, symbolizer):

	900 symfiles = ResolveSymbolizableFiles(trace.processes)

	901

	902 # Android trace files don't have any indication they are from Android.

	903 # So we're checking for Android-specific paths.

	904 if HaveFilesFromAndroid(symfiles):

	905 if not options.output_directory:

	906 sys.exit('The trace file appears to be from Android. Please '

	907 'specify output directory to properly symbolize it.')

	908 RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))

	909

	910 SymbolizeFiles(symfiles, symbolizer)

	911

	912

	913 def OpenTraceFile(file_path, mode):

	914 if file_path.endswith('.gz'):

	915 return gzip.open(file_path, mode + 'b')

	916 else:

	917 return open(file_path, mode + 't')

	918

	919

458 # Suffix used for backup files.	920 # Suffix used for backup files.

459 BACKUP_FILE_TAG = '.BACKUP'	921 BACKUP_FILE_TAG = '.BACKUP'

460	922

461 def main():	923 def main():

462 parser = argparse.ArgumentParser()	924 class MultilineHelpFormatter(argparse.HelpFormatter):

463 parser.add_argument('file',	925 def _split_lines(self, text, width):

464 help='Trace file to symbolize (.json or .json.gz)')	926 extra_lines = []

465 parser.add_argument('--no-backup',	927 if '\n' in text:

466 dest='backup', default='true', action='store_false',	928 lines = text.splitlines()

467 help="Don't create {} files".format(BACKUP_FILE_TAG))	929 text = lines[0]

468 parser.add_argument('--output-directory',	930 extra_lines = lines[1:]

469 help='The path to the build output directory, such ' +	931 return super(MultilineHelpFormatter, self)._split_lines(text, width) + \

470 'as out/Debug. Only needed for Android.')	932 extra_lines

471 options = parser.parse_args()

472	933

473 trace_file_path = options.file	934 parser = argparse.ArgumentParser(formatter_class=MultilineHelpFormatter)

474 def _OpenTraceFile(mode):	935 parser.add_argument(

475 if trace_file_path.endswith('.gz'):	936 'file',

476 return gzip.open(trace_file_path, mode + 'b')	937 help='Trace file to symbolize (.json or .json.gz)')

477 else:	938

478 return open(trace_file_path, mode + 't')	939 parser.add_argument(

	940 '--no-backup', dest='backup', default='true', action='store_false',

	941 help="Don't create {} files".format(BACKUP_FILE_TAG))

	942

	943 parser.add_argument(

	944 '--output-directory',

	945 help='The path to the build output directory, such as out/Debug.')

479	946

480 symbolizer = Symbolizer()	947 symbolizer = Symbolizer()

481 if symbolizer.symbolizer_path is None:	948 if symbolizer.symbolizer_path is None:

482 sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary)	949 sys.exit("Can't symbolize - no %s in PATH." % symbolizer.binary)

483	950

	951 options = parser.parse_args()

	952

	953 trace_file_path = options.file

	954

484 print 'Reading trace file...'	955 print 'Reading trace file...'

485 with _OpenTraceFile('r') as trace_file:	956 with OpenTraceFile(trace_file_path, 'r') as trace_file:

486 trace = json.load(trace_file)	957 trace = Trace(json.load(trace_file))

487	958

488 processes = CollectProcesses(trace)	959 Symbolize(options, trace, symbolizer)

489 symfiles = ResolveSymbolizableFiles(processes)

490	960

491 # Android trace files don't have any indication they are from Android.	961 if trace.modified:

492 # So we're checking for Android-specific paths.	962 trace.ApplyModifications()

493 if HaveFilesFromAndroid(symfiles):

494 if not options.output_directory:

495 parser.error('The trace file appears to be from Android. Please '

496 "specify output directory (e.g. 'out/Debug') to properly "

497 'symbolize it.')

498 RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))

499	963

500 if SymbolizeFiles(symfiles, symbolizer):

501 if options.backup:	964 if options.backup:

502 backup_file_path = trace_file_path + BACKUP_FILE_TAG	965 backup_file_path = trace_file_path + BACKUP_FILE_TAG

503 print 'Backing up trace file to {}...'.format(backup_file_path)	966 if os.path.exists(backup_file_path):

	967 for i in itertools.count(1):

	968 unique_file_path = '{}{}'.format(backup_file_path, i)

	969 if not os.path.exists(unique_file_path):

	970 backup_file_path = unique_file_path

	971 break

	972 print 'Backing up trace file to {}'.format(backup_file_path)

504 os.rename(trace_file_path, backup_file_path)	973 os.rename(trace_file_path, backup_file_path)

505	974

506 print 'Updating trace file...'	975 print 'Updating the trace file...'

507 with _OpenTraceFile('w') as trace_file:	976 with OpenTraceFile(trace_file_path, 'w') as trace_file:

508 json.dump(trace, trace_file)	977 json.dump(trace.Serialize(), trace_file)

509 else:	978 else:

510 print 'No PCs symbolized - not updating trace file.'	979 print 'No modifications were made - not updating the trace file.'

511	980

512	981

513 if __name__ == '__main__':	982 if __name__ == '__main__':

514 main()	983 main()

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »