Chromium Code Reviews| Index: tools/metrics/common/pretty_print_xml.py |
| diff --git a/tools/metrics/histograms/pretty_print.py b/tools/metrics/common/pretty_print_xml.py |
| old mode 100755 |
| new mode 100644 |
| similarity index 43% |
| copy from tools/metrics/histograms/pretty_print.py |
| copy to tools/metrics/common/pretty_print_xml.py |
| index 60e8c7833e59e5f0020d3188881526e0fe462385..396b098ef06487f530f2fb30307522919bea9db9 |
| --- a/tools/metrics/histograms/pretty_print.py |
| +++ b/tools/metrics/common/pretty_print_xml.py |
| @@ -1,84 +1,15 @@ |
| -#!/usr/bin/env python |
| # Copyright 2013 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| -"""Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text |
| -at 80 chars, enforcing standard attribute ordering, and standardizing |
| -indentation. |
| +"""Utility file for pretty print xml file.""" |
|
Alexei Svitkine (slow)
2014/02/03 19:37:34
This description should be more clear, otherwise i
yao
2014/02/04 19:08:12
Done.
|
| -This is quite a bit more complicated than just calling tree.toprettyxml(); |
| -we need additional customization, like special attribute ordering in tags |
| -and wrapping text nodes, so we implement our own full custom XML pretty-printer. |
| -""" |
| - |
| -from __future__ import with_statement |
| - |
| -import diffutil |
| -import json |
| import logging |
| -import os |
| -import shutil |
| -import sys |
| import textwrap |
| import xml.dom.minidom |
| -sys.path.insert(1, os.path.join(sys.path[0], '..', '..', 'python')) |
| -from google import path_utils |
| - |
| WRAP_COLUMN = 80 |
| -# Desired order for tag attributes; attributes listed here will appear first, |
| -# and in the same order as in these lists. |
| -# { tag_name: [attribute_name, ...] } |
| -ATTRIBUTE_ORDER = { |
| - 'enum': ['name', 'type'], |
| - 'histogram': ['name', 'enum', 'units'], |
| - 'int': ['value', 'label'], |
| - 'fieldtrial': ['name', 'separator', 'ordering'], |
| - 'group': ['name', 'label'], |
| - 'affected-histogram': ['name'], |
| - 'with-group': ['name'], |
| -} |
| - |
| -# Tag names for top-level nodes whose children we don't want to indent. |
| -TAGS_THAT_DONT_INDENT = [ |
| - 'histogram-configuration', |
| - 'histograms', |
| - 'fieldtrials', |
| - 'enums' |
| -] |
| - |
| -# Extra vertical spacing rules for special tag names. |
| -# {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)} |
| -TAGS_THAT_HAVE_EXTRA_NEWLINE = { |
| - 'histogram-configuration': (2, 1, 1), |
| - 'histograms': (2, 1, 1), |
| - 'fieldtrials': (2, 1, 1), |
| - 'enums': (2, 1, 1), |
| - 'histogram': (1, 1, 1), |
| - 'enum': (1, 1, 1), |
| - 'fieldtrial': (1, 1, 1), |
| -} |
| - |
| -# Tags that we allow to be squished into a single line for brevity. |
| -TAGS_THAT_ALLOW_SINGLE_LINE = [ |
| - 'summary', |
| - 'int', |
| -] |
| - |
| -# Tags whose children we want to alphabetize. The key is the parent tag name, |
| -# and the value is a pair of the tag name of the children we want to sort, |
| -# and a key function that maps each child node to the desired sort key. |
| -ALPHABETIZATION_RULES = { |
| - 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()), |
| - 'enums': ('enum', lambda n: n.attributes['name'].value.lower()), |
| - 'enum': ('int', lambda n: int(n.attributes['value'].value)), |
| - 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()), |
| - 'fieldtrial': ('affected-histogram', |
| - lambda n: n.attributes['name'].value.lower()), |
| -} |
| - |
| class Error(Exception): |
| pass |
| @@ -105,11 +36,25 @@ def XmlEscape(s): |
| return s |
| -def PrettyPrintNode(node, indent=0): |
| +class XmlStyle(object): |
| + """A class that stores all style specification for an output xml file.""" |
| + |
| + def __init__(self, attribute_order, tags_that_have_extra_newline, |
| + tags_that_dont_indent, tags_that_allow_single_line): |
| + # List of tag names for top-level nodes whose children are not indented. |
| + self.attribute_order = attribute_order |
| + self.tags_that_have_extra_newline = tags_that_have_extra_newline |
| + self.tags_that_dont_indent = tags_that_dont_indent |
| + self.tags_that_allow_single_line = tags_that_allow_single_line |
| + |
| + |
| +def PrettyPrintNode(node, xml_style, indent=0): |
|
Alexei Svitkine (slow)
2014/02/03 19:37:34
Now that you have the XmlStyle object, can you mak
yao
2014/02/04 19:08:12
Done.
|
| """Pretty-prints the given XML node at the given indent level. |
| Args: |
| node: The minidom node to pretty-print. |
| + xml_style: An XmlStyle object that represents the style requirement of the |
| + output xml file. |
| indent: The current indent level. |
| Returns: |
| @@ -120,7 +65,7 @@ def PrettyPrintNode(node, indent=0): |
| """ |
| # Handle the top-level document node. |
| if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: |
| - return '\n'.join([PrettyPrintNode(n) for n in node.childNodes]) |
| + return '\n'.join([PrettyPrintNode(n, xml_style) for n in node.childNodes]) |
| # Handle text nodes. |
| if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: |
| @@ -151,7 +96,7 @@ def PrettyPrintNode(node, indent=0): |
| # Handle element nodes. |
| if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: |
| newlines_after_open, newlines_before_close, newlines_after_close = ( |
| - TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0))) |
| + xml_style.tags_that_have_extra_newline.get(node.tagName, (1, 1, 0))) |
| # Open the tag. |
| s = ' ' * indent + '<' + node.tagName |
| @@ -164,13 +109,14 @@ def PrettyPrintNode(node, indent=0): |
| attributes = node.attributes.keys() |
| if attributes: |
| # Reorder the attributes. |
| - if not node.tagName in ATTRIBUTE_ORDER: |
| - unrecognized_attributes = attributes; |
| + if node.tagName not in xml_style.attribute_order: |
| + unrecognized_attributes = attributes |
| else: |
| unrecognized_attributes = ( |
| - [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]]) |
| - attributes = ( |
| - [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes]) |
| + [a for a in attributes |
| + if a not in xml_style.attribute_order[node.tagName]]) |
| + attributes = [a for a in xml_style.attribute_order[node.tagName] |
| + if a in attributes] |
| for a in unrecognized_attributes: |
| logging.error( |
| @@ -209,18 +155,19 @@ def PrettyPrintNode(node, indent=0): |
| s += '>' |
| # Calculate the new indent level for child nodes. |
| new_indent = indent |
| - if node.tagName not in TAGS_THAT_DONT_INDENT: |
| + if node.tagName not in xml_style.tags_that_dont_indent: |
| new_indent += 2 |
| child_nodes = node.childNodes |
| # Recursively pretty-print the child nodes. |
| - child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes] |
| + child_nodes = [PrettyPrintNode(n, xml_style, indent=new_indent) |
| + for n in child_nodes] |
| child_nodes = [c for c in child_nodes if len(c.strip()) > 0] |
| # Determine whether we can fit the entire node on a single line. |
| close_tag = '</%s>' % node.tagName |
| space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) |
| - if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and |
| + if (node.tagName in xml_style.tags_that_allow_single_line and |
| len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left): |
| s += child_nodes[0].strip() |
| else: |
| @@ -241,130 +188,3 @@ def PrettyPrintNode(node, indent=0): |
| # histograms XML at present. |
| logging.error('Ignoring unrecognized node data: %s' % node.toxml()) |
| raise Error() |
| - |
| - |
| -def unsafeAppendChild(parent, child): |
| - """Append child to parent's list of children, ignoring the possibility that it |
| - is already in another node's childNodes list. Requires that the previous |
| - parent of child is discarded (to avoid non-tree DOM graphs). |
| - This can provide a significant speedup as O(n^2) operations are removed (in |
| - particular, each child insertion avoids the need to traverse the old parent's |
| - entire list of children).""" |
| - child.parentNode = None |
| - parent.appendChild(child) |
| - child.parentNode = parent |
| - |
| - |
| -def TransformByAlphabetizing(node): |
| - """Transform the given XML by alphabetizing specific node types according to |
| - the rules in ALPHABETIZATION_RULES. |
| - |
| - Args: |
| - node: The minidom node to transform. |
| - |
| - Returns: |
| - The minidom node, with children appropriately alphabetized. Note that the |
| - transformation is done in-place, i.e. the original minidom tree is modified |
| - directly. |
| - """ |
| - if node.nodeType != xml.dom.minidom.Node.ELEMENT_NODE: |
| - for c in node.childNodes: TransformByAlphabetizing(c) |
| - return node |
| - |
| - # Element node with a tag name that we alphabetize the children of? |
| - if node.tagName in ALPHABETIZATION_RULES: |
| - # Put subnodes in a list of node,key pairs to allow for custom sorting. |
| - subtag, key_function = ALPHABETIZATION_RULES[node.tagName] |
| - subnodes = [] |
| - last_key = -1 |
| - for c in node.childNodes: |
| - if (c.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and |
| - c.tagName == subtag): |
| - last_key = key_function(c) |
| - # Subnodes that we don't want to rearrange use the last node's key, |
| - # so they stay in the same relative position. |
| - subnodes.append( (c, last_key) ) |
| - |
| - # Sort the subnode list. |
| - subnodes.sort(key=lambda pair: pair[1]) |
| - |
| - # Re-add the subnodes, transforming each recursively. |
| - while node.firstChild: |
| - node.removeChild(node.firstChild) |
| - for (c, _) in subnodes: |
| - unsafeAppendChild(node, TransformByAlphabetizing(c)) |
| - return node |
| - |
| - # Recursively handle other element nodes and other node types. |
| - for c in node.childNodes: TransformByAlphabetizing(c) |
| - return node |
| - |
| - |
| -def PrettyPrint(raw_xml): |
| - """Pretty-print the given XML. |
| - |
| - Args: |
| - xml: The contents of the histograms XML file, as a string. |
| - |
| - Returns: |
| - The pretty-printed version. |
| - """ |
| - tree = xml.dom.minidom.parseString(raw_xml) |
| - tree = TransformByAlphabetizing(tree) |
| - return PrettyPrintNode(tree) |
| - |
| - |
| -def main(): |
| - logging.basicConfig(level=logging.INFO) |
| - |
| - presubmit = ('--presubmit' in sys.argv) |
| - |
| - histograms_filename = 'histograms.xml' |
| - histograms_backup_filename = 'histograms.before.pretty-print.xml' |
| - |
| - script_dir = path_utils.ScriptDir() |
| - |
| - histograms_pathname = os.path.join(script_dir, histograms_filename) |
| - histograms_backup_pathname = os.path.join(script_dir, |
| - histograms_backup_filename) |
| - |
| - logging.info('Loading %s...' % histograms_filename) |
| - with open(histograms_pathname, 'rb') as f: |
| - xml = f.read() |
| - |
| - # Check there are no CR ('\r') characters in the file. |
| - if '\r' in xml: |
| - logging.info('DOS-style line endings (CR characters) detected - these are ' |
| - 'not allowed. Please run dos2unix %s' % histograms_filename) |
| - sys.exit(1) |
| - |
| - logging.info('Pretty-printing...') |
| - try: |
| - pretty = PrettyPrint(xml) |
| - except Error: |
| - logging.error('Aborting parsing due to fatal errors.') |
| - sys.exit(1) |
| - |
| - if xml == pretty: |
| - logging.info('%s is correctly pretty-printed.' % histograms_filename) |
| - sys.exit(0) |
| - if presubmit: |
| - logging.info('%s is not formatted correctly; run pretty_print.py to fix.' % |
| - histograms_filename) |
| - sys.exit(1) |
| - if not diffutil.PromptUserToAcceptDiff( |
| - xml, pretty, |
| - 'Is the prettified version acceptable?'): |
| - logging.error('Aborting') |
| - return |
| - |
| - logging.info('Creating backup file %s' % histograms_backup_filename) |
| - shutil.move(histograms_pathname, histograms_backup_pathname) |
| - |
| - logging.info('Writing new %s file' % histograms_filename) |
| - with open(histograms_pathname, 'wb') as f: |
| - f.write(pretty) |
| - |
| - |
| -if __name__ == '__main__': |
| - main() |