tools/metrics/common/pretty_print_xml.py - Issue 149503005: Change actions.txt to actions.xml

Side by Side Diff: tools/metrics/common/pretty_print_xml.py

Issue 149503005: Change actions.txt to actions.xml (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Reupload(Resolve old chunk mismatch) Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/env python

2 # Copyright 2013 The Chromium Authors. All rights reserved.	1 # Copyright 2013 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	3 # found in the LICENSE file.

5	4

6 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text	5 """Utility file for pretty print xml file."""
	Alexei Svitkine (slow) 2014/02/03 19:37:34 This description should be more clear, otherwise i This description should be more clear, otherwise it's just repeating the file name. For example, mention that this function is used for formatting both histograms and actions XML files. yao 2014/02/04 19:08:12 Done. Show quoted text On 2014/02/03 19:37:34, Alexei Svitkine wrote: > This description should be more clear, otherwise it's just repeating the file > name. For example, mention that this function is used for formatting both > histograms and actions XML files. Done.
7 at 80 chars, enforcing standard attribute ordering, and standardizing

8 indentation.

9	6

10 This is quite a bit more complicated than just calling tree.toprettyxml();

11 we need additional customization, like special attribute ordering in tags

12 and wrapping text nodes, so we implement our own full custom XML pretty-printer.

13 """

14

15 from __future__ import with_statement

16

17 import diffutil

18 import json

19 import logging	7 import logging

20 import os

21 import shutil

22 import sys

23 import textwrap	8 import textwrap

24 import xml.dom.minidom	9 import xml.dom.minidom

25	10

26 sys.path.insert(1, os.path.join(sys.path[0], '..', '..', 'python'))

27 from google import path_utils

28

29 WRAP_COLUMN = 80	11 WRAP_COLUMN = 80

30	12

31 # Desired order for tag attributes; attributes listed here will appear first,

32 # and in the same order as in these lists.

33 # { tag_name: [attribute_name, ...] }

34 ATTRIBUTE_ORDER = {

35 'enum': ['name', 'type'],

36 'histogram': ['name', 'enum', 'units'],

37 'int': ['value', 'label'],

38 'fieldtrial': ['name', 'separator', 'ordering'],

39 'group': ['name', 'label'],

40 'affected-histogram': ['name'],

41 'with-group': ['name'],

42 }

43

44 # Tag names for top-level nodes whose children we don't want to indent.

45 TAGS_THAT_DONT_INDENT = [

46 'histogram-configuration',

47 'histograms',

48 'fieldtrials',

49 'enums'

50 ]

51

52 # Extra vertical spacing rules for special tag names.

53 # {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)}

54 TAGS_THAT_HAVE_EXTRA_NEWLINE = {

55 'histogram-configuration': (2, 1, 1),

56 'histograms': (2, 1, 1),

57 'fieldtrials': (2, 1, 1),

58 'enums': (2, 1, 1),

59 'histogram': (1, 1, 1),

60 'enum': (1, 1, 1),

61 'fieldtrial': (1, 1, 1),

62 }

63

64 # Tags that we allow to be squished into a single line for brevity.

65 TAGS_THAT_ALLOW_SINGLE_LINE = [

66 'summary',

67 'int',

68 ]

69

70 # Tags whose children we want to alphabetize. The key is the parent tag name,

71 # and the value is a pair of the tag name of the children we want to sort,

72 # and a key function that maps each child node to the desired sort key.

73 ALPHABETIZATION_RULES = {

74 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()),

75 'enums': ('enum', lambda n: n.attributes['name'].value.lower()),

76 'enum': ('int', lambda n: int(n.attributes['value'].value)),

77 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()),

78 'fieldtrial': ('affected-histogram',

79 lambda n: n.attributes['name'].value.lower()),

80 }

81

82	13

83 class Error(Exception):	14 class Error(Exception):

84 pass	15 pass

85	16

86	17

87 def LastLineLength(s):	18 def LastLineLength(s):

88 """Returns the length of the last line in s.	19 """Returns the length of the last line in s.

89	20

90 Args:	21 Args:

91 s: A multi-line string, including newlines.	22 s: A multi-line string, including newlines.

92	23

93 Returns:	24 Returns:

94 The length of the last line in s, in characters.	25 The length of the last line in s, in characters.

95 """	26 """

96 if s.rfind('\n') == -1: return len(s)	27 if s.rfind('\n') == -1: return len(s)

97 return len(s) - s.rfind('\n') - len('\n')	28 return len(s) - s.rfind('\n') - len('\n')

98	29

99	30

100 def XmlEscape(s):	31 def XmlEscape(s):

101 """XML-escapes the given string, replacing magic characters (&<>") with their	32 """XML-escapes the given string, replacing magic characters (&<>") with their

102 escaped equivalents."""	33 escaped equivalents."""

103 s = s.replace("&", "&").replace("<", "<")	34 s = s.replace("&", "&").replace("<", "<")

104 s = s.replace("\"", """).replace(">", ">")	35 s = s.replace("\"", """).replace(">", ">")

105 return s	36 return s

106	37

107	38

108 def PrettyPrintNode(node, indent=0):	39 class XmlStyle(object):

	40 """A class that stores all style specification for an output xml file."""

	41

	42 def __init__(self, attribute_order, tags_that_have_extra_newline,

	43 tags_that_dont_indent, tags_that_allow_single_line):

	44 # List of tag names for top-level nodes whose children are not indented.

	45 self.attribute_order = attribute_order

	46 self.tags_that_have_extra_newline = tags_that_have_extra_newline

	47 self.tags_that_dont_indent = tags_that_dont_indent

	48 self.tags_that_allow_single_line = tags_that_allow_single_line

	49

	50

	51 def PrettyPrintNode(node, xml_style, indent=0):
	Alexei Svitkine (slow) 2014/02/03 19:37:34 Now that you have the XmlStyle object, can you mak Now that you have the XmlStyle object, can you make this a method of that class, rather than a free-standing function? yao 2014/02/04 19:08:12 Done. Show quoted text On 2014/02/03 19:37:34, Alexei Svitkine wrote: > Now that you have the XmlStyle object, can you make this a method of that class, > rather than a free-standing function? Done.
109 """Pretty-prints the given XML node at the given indent level.	52 """Pretty-prints the given XML node at the given indent level.

110	53

111 Args:	54 Args:

112 node: The minidom node to pretty-print.	55 node: The minidom node to pretty-print.

	56 xml_style: An XmlStyle object that represents the style requirement of the

	57 output xml file.

113 indent: The current indent level.	58 indent: The current indent level.

114	59

115 Returns:	60 Returns:

116 The pretty-printed string (including embedded newlines).	61 The pretty-printed string (including embedded newlines).

117	62

118 Raises:	63 Raises:

119 Error if the XML has unknown tags or attributes.	64 Error if the XML has unknown tags or attributes.

120 """	65 """

121 # Handle the top-level document node.	66 # Handle the top-level document node.

122 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE:	67 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE:

123 return '\n'.join([PrettyPrintNode(n) for n in node.childNodes])	68 return '\n'.join([PrettyPrintNode(n, xml_style) for n in node.childNodes])

124	69

125 # Handle text nodes.	70 # Handle text nodes.

126 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE:	71 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE:

127 # Wrap each paragraph in the text to fit in the 80 column limit.	72 # Wrap each paragraph in the text to fit in the 80 column limit.

128 wrapper = textwrap.TextWrapper()	73 wrapper = textwrap.TextWrapper()

129 wrapper.initial_indent = ' ' * indent	74 wrapper.initial_indent = ' ' * indent

130 wrapper.subsequent_indent = ' ' * indent	75 wrapper.subsequent_indent = ' ' * indent

131 wrapper.break_on_hyphens = False	76 wrapper.break_on_hyphens = False

132 wrapper.break_long_words = False	77 wrapper.break_long_words = False

133 wrapper.width = WRAP_COLUMN	78 wrapper.width = WRAP_COLUMN

(...skipping 10 matching lines...) Expand all Loading...
144 paragraphs[-1].append(l)	89 paragraphs[-1].append(l)

145 # Remove trailing empty paragraph if present.	90 # Remove trailing empty paragraph if present.

146 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0:	91 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0:

147 paragraphs = paragraphs[:-1]	92 paragraphs = paragraphs[:-1]

148 # Wrap each paragraph and separate with two newlines.	93 # Wrap each paragraph and separate with two newlines.

149 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs])	94 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs])

150	95

151 # Handle element nodes.	96 # Handle element nodes.

152 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:	97 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:

153 newlines_after_open, newlines_before_close, newlines_after_close = (	98 newlines_after_open, newlines_before_close, newlines_after_close = (

154 TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0)))	99 xml_style.tags_that_have_extra_newline.get(node.tagName, (1, 1, 0)))

155 # Open the tag.	100 # Open the tag.

156 s = ' ' * indent + '<' + node.tagName	101 s = ' ' * indent + '<' + node.tagName

157	102

158 # Calculate how much space to allow for the '>' or '/>'.	103 # Calculate how much space to allow for the '>' or '/>'.

159 closing_chars = 1	104 closing_chars = 1

160 if not node.childNodes:	105 if not node.childNodes:

161 closing_chars = 2	106 closing_chars = 2

162	107

163 # Pretty-print the attributes.	108 # Pretty-print the attributes.

164 attributes = node.attributes.keys()	109 attributes = node.attributes.keys()

165 if attributes:	110 if attributes:

166 # Reorder the attributes.	111 # Reorder the attributes.

167 if not node.tagName in ATTRIBUTE_ORDER:	112 if node.tagName not in xml_style.attribute_order:

168 unrecognized_attributes = attributes;	113 unrecognized_attributes = attributes

169 else:	114 else:

170 unrecognized_attributes = (	115 unrecognized_attributes = (

171 [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]])	116 [a for a in attributes

172 attributes = (	117 if a not in xml_style.attribute_order[node.tagName]])

173 [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes])	118 attributes = [a for a in xml_style.attribute_order[node.tagName]

	119 if a in attributes]

174	120

175 for a in unrecognized_attributes:	121 for a in unrecognized_attributes:

176 logging.error(	122 logging.error(

177 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName))	123 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName))

178 if unrecognized_attributes:	124 if unrecognized_attributes:

179 raise Error()	125 raise Error()

180	126

181 for a in attributes:	127 for a in attributes:

182 value = XmlEscape(node.attributes[a].value)	128 value = XmlEscape(node.attributes[a].value)

183 # Replace sequences of whitespace with single spaces.	129 # Replace sequences of whitespace with single spaces.

(...skipping 18 matching lines...) Expand all Loading...
202 column += len(word) + 1	148 column += len(word) + 1

203 s = s.rstrip() # remove any trailing whitespace	149 s = s.rstrip() # remove any trailing whitespace

204 s += '"'	150 s += '"'

205 s = s.rstrip() # remove any trailing whitespace	151 s = s.rstrip() # remove any trailing whitespace

206	152

207 # Pretty-print the child nodes.	153 # Pretty-print the child nodes.

208 if node.childNodes:	154 if node.childNodes:

209 s += '>'	155 s += '>'

210 # Calculate the new indent level for child nodes.	156 # Calculate the new indent level for child nodes.

211 new_indent = indent	157 new_indent = indent

212 if node.tagName not in TAGS_THAT_DONT_INDENT:	158 if node.tagName not in xml_style.tags_that_dont_indent:

213 new_indent += 2	159 new_indent += 2

214 child_nodes = node.childNodes	160 child_nodes = node.childNodes

215	161

216 # Recursively pretty-print the child nodes.	162 # Recursively pretty-print the child nodes.

217 child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes]	163 child_nodes = [PrettyPrintNode(n, xml_style, indent=new_indent)

	164 for n in child_nodes]

218 child_nodes = [c for c in child_nodes if len(c.strip()) > 0]	165 child_nodes = [c for c in child_nodes if len(c.strip()) > 0]

219	166

220 # Determine whether we can fit the entire node on a single line.	167 # Determine whether we can fit the entire node on a single line.

221 close_tag = '</%s>' % node.tagName	168 close_tag = '</%s>' % node.tagName

222 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag)	169 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag)

223 if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and	170 if (node.tagName in xml_style.tags_that_allow_single_line and

224 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left):	171 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left):

225 s += child_nodes[0].strip()	172 s += child_nodes[0].strip()

226 else:	173 else:

227 s += '\n' * newlines_after_open + '\n'.join(child_nodes)	174 s += '\n' * newlines_after_open + '\n'.join(child_nodes)

228 s += '\n' * newlines_before_close + ' ' * indent	175 s += '\n' * newlines_before_close + ' ' * indent

229 s += close_tag	176 s += close_tag

230 else:	177 else:

231 s += '/>'	178 s += '/>'

232 s += '\n' * newlines_after_close	179 s += '\n' * newlines_after_close

233 return s	180 return s

234	181

235 # Handle comment nodes.	182 # Handle comment nodes.

236 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE:	183 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE:

237 return '<!--%s-->\n' % node.data	184 return '<!--%s-->\n' % node.data

238	185

239 # Ignore other node types. This could be a processing instruction (<? ... ?>)	186 # Ignore other node types. This could be a processing instruction (<? ... ?>)

240 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the	187 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the

241 # histograms XML at present.	188 # histograms XML at present.

242 logging.error('Ignoring unrecognized node data: %s' % node.toxml())	189 logging.error('Ignoring unrecognized node data: %s' % node.toxml())

243 raise Error()	190 raise Error()

244

245

246 def unsafeAppendChild(parent, child):

247 """Append child to parent's list of children, ignoring the possibility that it

248 is already in another node's childNodes list. Requires that the previous

249 parent of child is discarded (to avoid non-tree DOM graphs).

250 This can provide a significant speedup as O(n^2) operations are removed (in

251 particular, each child insertion avoids the need to traverse the old parent's

252 entire list of children)."""

253 child.parentNode = None

254 parent.appendChild(child)

255 child.parentNode = parent

256

257

258 def TransformByAlphabetizing(node):

259 """Transform the given XML by alphabetizing specific node types according to

260 the rules in ALPHABETIZATION_RULES.

261

262 Args:

263 node: The minidom node to transform.

264

265 Returns:

266 The minidom node, with children appropriately alphabetized. Note that the

267 transformation is done in-place, i.e. the original minidom tree is modified

268 directly.

269 """

270 if node.nodeType != xml.dom.minidom.Node.ELEMENT_NODE:

271 for c in node.childNodes: TransformByAlphabetizing(c)

272 return node

273

274 # Element node with a tag name that we alphabetize the children of?

275 if node.tagName in ALPHABETIZATION_RULES:

276 # Put subnodes in a list of node,key pairs to allow for custom sorting.

277 subtag, key_function = ALPHABETIZATION_RULES[node.tagName]

278 subnodes = []

279 last_key = -1

280 for c in node.childNodes:

281 if (c.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and

282 c.tagName == subtag):

283 last_key = key_function(c)

284 # Subnodes that we don't want to rearrange use the last node's key,

285 # so they stay in the same relative position.

286 subnodes.append( (c, last_key) )

287

288 # Sort the subnode list.

289 subnodes.sort(key=lambda pair: pair[1])

290

291 # Re-add the subnodes, transforming each recursively.

292 while node.firstChild:

293 node.removeChild(node.firstChild)

294 for (c, _) in subnodes:

295 unsafeAppendChild(node, TransformByAlphabetizing(c))

296 return node

297

298 # Recursively handle other element nodes and other node types.

299 for c in node.childNodes: TransformByAlphabetizing(c)

300 return node

301

302

303 def PrettyPrint(raw_xml):

304 """Pretty-print the given XML.

305

306 Args:

307 xml: The contents of the histograms XML file, as a string.

308

309 Returns:

310 The pretty-printed version.

311 """

312 tree = xml.dom.minidom.parseString(raw_xml)

313 tree = TransformByAlphabetizing(tree)

314 return PrettyPrintNode(tree)

315

316

317 def main():

318 logging.basicConfig(level=logging.INFO)

319

320 presubmit = ('--presubmit' in sys.argv)

321

322 histograms_filename = 'histograms.xml'

323 histograms_backup_filename = 'histograms.before.pretty-print.xml'

324

325 script_dir = path_utils.ScriptDir()

326

327 histograms_pathname = os.path.join(script_dir, histograms_filename)

328 histograms_backup_pathname = os.path.join(script_dir,

329 histograms_backup_filename)

330

331 logging.info('Loading %s...' % histograms_filename)

332 with open(histograms_pathname, 'rb') as f:

333 xml = f.read()

334

335 # Check there are no CR ('\r') characters in the file.

336 if '\r' in xml:

337 logging.info('DOS-style line endings (CR characters) detected - these are '

338 'not allowed. Please run dos2unix %s' % histograms_filename)

339 sys.exit(1)

340

341 logging.info('Pretty-printing...')

342 try:

343 pretty = PrettyPrint(xml)

344 except Error:

345 logging.error('Aborting parsing due to fatal errors.')

346 sys.exit(1)

347

348 if xml == pretty:

349 logging.info('%s is correctly pretty-printed.' % histograms_filename)

350 sys.exit(0)

351 if presubmit:

352 logging.info('%s is not formatted correctly; run pretty_print.py to fix.' %

353 histograms_filename)

354 sys.exit(1)

355 if not diffutil.PromptUserToAcceptDiff(

356 xml, pretty,

357 'Is the prettified version acceptable?'):

358 logging.error('Aborting')

359 return

360

361 logging.info('Creating backup file %s' % histograms_backup_filename)

362 shutil.move(histograms_pathname, histograms_backup_pathname)

363

364 logging.info('Writing new %s file' % histograms_filename)

365 with open(histograms_pathname, 'wb') as f:

366 f.write(pretty)

367

368

369 if __name__ == '__main__':

370 main()

OLD	NEW

« tools/metrics/actions/extract_actions.py ('K') | « tools/metrics/common/diffutil.py ('k') | tools/metrics/histograms/diffutil.py » ('j') | no next file with comments »