Index: trunk/src/tools/metrics/common/pretty_print_xml.py |
=================================================================== |
--- trunk/src/tools/metrics/common/pretty_print_xml.py (revision 255402) |
+++ trunk/src/tools/metrics/common/pretty_print_xml.py (working copy) |
@@ -1,193 +0,0 @@ |
-# Copyright 2014 The Chromium Authors. All rights reserved. |
-# Use of this source code is governed by a BSD-style license that can be |
-# found in the LICENSE file. |
- |
-"""Utility file for pretty print xml file. |
- |
-The function PrettyPrintNode will be used for formatting both histograms.xml |
-and actions.xml. |
-""" |
- |
-import logging |
-import textwrap |
-import xml.dom.minidom |
- |
-WRAP_COLUMN = 80 |
- |
- |
-class Error(Exception): |
- pass |
- |
- |
-def LastLineLength(s): |
- """Returns the length of the last line in s. |
- |
- Args: |
- s: A multi-line string, including newlines. |
- |
- Returns: |
- The length of the last line in s, in characters. |
- """ |
- if s.rfind('\n') == -1: return len(s) |
- return len(s) - s.rfind('\n') - len('\n') |
- |
- |
-def XmlEscape(s): |
- """XML-escapes the given string, replacing magic characters (&<>") with their |
- escaped equivalents.""" |
- s = s.replace("&", "&").replace("<", "<") |
- s = s.replace("\"", """).replace(">", ">") |
- return s |
- |
- |
-class XmlStyle(object): |
- """A class that stores all style specification for an output xml file.""" |
- |
- def __init__(self, attribute_order, tags_that_have_extra_newline, |
- tags_that_dont_indent, tags_that_allow_single_line): |
- # List of tag names for top-level nodes whose children are not indented. |
- self.attribute_order = attribute_order |
- self.tags_that_have_extra_newline = tags_that_have_extra_newline |
- self.tags_that_dont_indent = tags_that_dont_indent |
- self.tags_that_allow_single_line = tags_that_allow_single_line |
- |
- def PrettyPrintNode(self, node, indent=0): |
- """Pretty-prints the given XML node at the given indent level. |
- |
- Args: |
- node: The minidom node to pretty-print. |
- indent: The current indent level. |
- |
- Returns: |
- The pretty-printed string (including embedded newlines). |
- |
- Raises: |
- Error if the XML has unknown tags or attributes. |
- """ |
- # Handle the top-level document node. |
- if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: |
- return '\n'.join([self.PrettyPrintNode(n) for n in node.childNodes]) |
- |
- # Handle text nodes. |
- if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: |
- # Wrap each paragraph in the text to fit in the 80 column limit. |
- wrapper = textwrap.TextWrapper() |
- wrapper.initial_indent = ' ' * indent |
- wrapper.subsequent_indent = ' ' * indent |
- wrapper.break_on_hyphens = False |
- wrapper.break_long_words = False |
- wrapper.width = WRAP_COLUMN |
- text = XmlEscape(node.data) |
- # Remove any common indent. |
- text = textwrap.dedent(text.strip('\n')) |
- lines = text.split('\n') |
- # Split the text into paragraphs at blank line boundaries. |
- paragraphs = [[]] |
- for l in lines: |
- if len(l.strip()) == 0 and len(paragraphs[-1]) > 0: |
- paragraphs.append([]) |
- else: |
- paragraphs[-1].append(l) |
- # Remove trailing empty paragraph if present. |
- if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: |
- paragraphs = paragraphs[:-1] |
- # Wrap each paragraph and separate with two newlines. |
- return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) |
- |
- # Handle element nodes. |
- if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: |
- newlines_after_open, newlines_before_close, newlines_after_close = ( |
- self.tags_that_have_extra_newline.get(node.tagName, (1, 1, 0))) |
- # Open the tag. |
- s = ' ' * indent + '<' + node.tagName |
- |
- # Calculate how much space to allow for the '>' or '/>'. |
- closing_chars = 1 |
- if not node.childNodes: |
- closing_chars = 2 |
- |
- # Pretty-print the attributes. |
- attributes = node.attributes.keys() |
- if attributes: |
- # Reorder the attributes. |
- if node.tagName not in self.attribute_order: |
- unrecognized_attributes = attributes |
- else: |
- unrecognized_attributes = ( |
- [a for a in attributes |
- if a not in self.attribute_order[node.tagName]]) |
- attributes = [a for a in self.attribute_order[node.tagName] |
- if a in attributes] |
- |
- for a in unrecognized_attributes: |
- logging.error( |
- 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName)) |
- if unrecognized_attributes: |
- raise Error() |
- |
- for a in attributes: |
- value = XmlEscape(node.attributes[a].value) |
- # Replace sequences of whitespace with single spaces. |
- words = value.split() |
- a_str = ' %s="%s"' % (a, ' '.join(words)) |
- # Start a new line if the attribute will make this line too long. |
- if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN: |
- s += '\n' + ' ' * (indent + 3) |
- # Output everything up to the first quote. |
- s += ' %s="' % (a) |
- value_indent_level = LastLineLength(s) |
- # Output one word at a time, splitting to the next line where |
- # necessary. |
- column = value_indent_level |
- for i, word in enumerate(words): |
- # This is slightly too conservative since not every word will be |
- # followed by the closing characters... |
- if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN): |
- s = s.rstrip() # remove any trailing whitespace |
- s += '\n' + ' ' * value_indent_level |
- column = value_indent_level |
- s += word + ' ' |
- column += len(word) + 1 |
- s = s.rstrip() # remove any trailing whitespace |
- s += '"' |
- s = s.rstrip() # remove any trailing whitespace |
- |
- # Pretty-print the child nodes. |
- if node.childNodes: |
- s += '>' |
- # Calculate the new indent level for child nodes. |
- new_indent = indent |
- if node.tagName not in self.tags_that_dont_indent: |
- new_indent += 2 |
- child_nodes = node.childNodes |
- |
- # Recursively pretty-print the child nodes. |
- child_nodes = [self.PrettyPrintNode(n, indent=new_indent) |
- for n in child_nodes] |
- child_nodes = [c for c in child_nodes if len(c.strip()) > 0] |
- |
- # Determine whether we can fit the entire node on a single line. |
- close_tag = '</%s>' % node.tagName |
- space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) |
- if (node.tagName in self.tags_that_allow_single_line and |
- len(child_nodes) == 1 and |
- len(child_nodes[0].strip()) <= space_left): |
- s += child_nodes[0].strip() |
- else: |
- s += '\n' * newlines_after_open + '\n'.join(child_nodes) |
- s += '\n' * newlines_before_close + ' ' * indent |
- s += close_tag |
- else: |
- s += '/>' |
- s += '\n' * newlines_after_close |
- return s |
- |
- # Handle comment nodes. |
- if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: |
- return '<!--%s-->\n' % node.data |
- |
- # Ignore other node types. This could be a processing instruction |
- # (<? ... ?>) or cdata section (<![CDATA[...]]!>), neither of which are |
- # legal in the histograms XML at present. |
- logging.error('Ignoring unrecognized node data: %s' % node.toxml()) |
- raise Error() |