Index: trunk/src/tools/metrics/histograms/pretty_print.py |
=================================================================== |
--- trunk/src/tools/metrics/histograms/pretty_print.py (revision 255402) |
+++ trunk/src/tools/metrics/histograms/pretty_print.py (working copy) |
@@ -14,21 +14,59 @@ |
from __future__ import with_statement |
+import diffutil |
+import json |
import logging |
import os |
import shutil |
import sys |
+import textwrap |
import xml.dom.minidom |
-import print_style |
- |
sys.path.insert(1, os.path.join(sys.path[0], '..', '..', 'python')) |
from google import path_utils |
-# Import the metrics/common module. |
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) |
-import diff_util |
+WRAP_COLUMN = 80 |
+# Desired order for tag attributes; attributes listed here will appear first, |
+# and in the same order as in these lists. |
+# { tag_name: [attribute_name, ...] } |
+ATTRIBUTE_ORDER = { |
+ 'enum': ['name', 'type'], |
+ 'histogram': ['name', 'enum', 'units'], |
+ 'int': ['value', 'label'], |
+ 'fieldtrial': ['name', 'separator', 'ordering'], |
+ 'group': ['name', 'label'], |
+ 'affected-histogram': ['name'], |
+ 'with-group': ['name'], |
+} |
+ |
+# Tag names for top-level nodes whose children we don't want to indent. |
+TAGS_THAT_DONT_INDENT = [ |
+ 'histogram-configuration', |
+ 'histograms', |
+ 'fieldtrials', |
+ 'enums' |
+] |
+ |
+# Extra vertical spacing rules for special tag names. |
+# {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)} |
+TAGS_THAT_HAVE_EXTRA_NEWLINE = { |
+ 'histogram-configuration': (2, 1, 1), |
+ 'histograms': (2, 1, 1), |
+ 'fieldtrials': (2, 1, 1), |
+ 'enums': (2, 1, 1), |
+ 'histogram': (1, 1, 1), |
+ 'enum': (1, 1, 1), |
+ 'fieldtrial': (1, 1, 1), |
+} |
+ |
+# Tags that we allow to be squished into a single line for brevity. |
+TAGS_THAT_ALLOW_SINGLE_LINE = [ |
+ 'summary', |
+ 'int', |
+] |
+ |
# Tags whose children we want to alphabetize. The key is the parent tag name, |
# and the value is a pair of the tag name of the children we want to sort, |
# and a key function that maps each child node to the desired sort key. |
@@ -46,6 +84,165 @@ |
pass |
+def LastLineLength(s): |
+ """Returns the length of the last line in s. |
+ |
+ Args: |
+ s: A multi-line string, including newlines. |
+ |
+ Returns: |
+ The length of the last line in s, in characters. |
+ """ |
+ if s.rfind('\n') == -1: return len(s) |
+ return len(s) - s.rfind('\n') - len('\n') |
+ |
+ |
+def XmlEscape(s): |
+ """XML-escapes the given string, replacing magic characters (&<>") with their |
+ escaped equivalents.""" |
+ s = s.replace("&", "&").replace("<", "<") |
+ s = s.replace("\"", """).replace(">", ">") |
+ return s |
+ |
+ |
+def PrettyPrintNode(node, indent=0): |
+ """Pretty-prints the given XML node at the given indent level. |
+ |
+ Args: |
+ node: The minidom node to pretty-print. |
+ indent: The current indent level. |
+ |
+ Returns: |
+ The pretty-printed string (including embedded newlines). |
+ |
+ Raises: |
+ Error if the XML has unknown tags or attributes. |
+ """ |
+ # Handle the top-level document node. |
+ if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE: |
+ return '\n'.join([PrettyPrintNode(n) for n in node.childNodes]) |
+ |
+ # Handle text nodes. |
+ if node.nodeType == xml.dom.minidom.Node.TEXT_NODE: |
+ # Wrap each paragraph in the text to fit in the 80 column limit. |
+ wrapper = textwrap.TextWrapper() |
+ wrapper.initial_indent = ' ' * indent |
+ wrapper.subsequent_indent = ' ' * indent |
+ wrapper.break_on_hyphens = False |
+ wrapper.break_long_words = False |
+ wrapper.width = WRAP_COLUMN |
+ text = XmlEscape(node.data) |
+ # Remove any common indent. |
+ text = textwrap.dedent(text.strip('\n')) |
+ lines = text.split('\n') |
+ # Split the text into paragraphs at blank line boundaries. |
+ paragraphs = [[]] |
+ for l in lines: |
+ if len(l.strip()) == 0 and len(paragraphs[-1]) > 0: |
+ paragraphs.append([]) |
+ else: |
+ paragraphs[-1].append(l) |
+ # Remove trailing empty paragraph if present. |
+ if len(paragraphs) > 0 and len(paragraphs[-1]) == 0: |
+ paragraphs = paragraphs[:-1] |
+ # Wrap each paragraph and separate with two newlines. |
+ return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs]) |
+ |
+ # Handle element nodes. |
+ if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: |
+ newlines_after_open, newlines_before_close, newlines_after_close = ( |
+ TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0))) |
+ # Open the tag. |
+ s = ' ' * indent + '<' + node.tagName |
+ |
+ # Calculate how much space to allow for the '>' or '/>'. |
+ closing_chars = 1 |
+ if not node.childNodes: |
+ closing_chars = 2 |
+ |
+ # Pretty-print the attributes. |
+ attributes = node.attributes.keys() |
+ if attributes: |
+ # Reorder the attributes. |
+ if not node.tagName in ATTRIBUTE_ORDER: |
+ unrecognized_attributes = attributes; |
+ else: |
+ unrecognized_attributes = ( |
+ [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]]) |
+ attributes = ( |
+ [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes]) |
+ |
+ for a in unrecognized_attributes: |
+ logging.error( |
+ 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName)) |
+ if unrecognized_attributes: |
+ raise Error() |
+ |
+ for a in attributes: |
+ value = XmlEscape(node.attributes[a].value) |
+ # Replace sequences of whitespace with single spaces. |
+ words = value.split() |
+ a_str = ' %s="%s"' % (a, ' '.join(words)) |
+ # Start a new line if the attribute will make this line too long. |
+ if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN: |
+ s += '\n' + ' ' * (indent + 3) |
+ # Output everything up to the first quote. |
+ s += ' %s="' % (a) |
+ value_indent_level = LastLineLength(s) |
+ # Output one word at a time, splitting to the next line where necessary. |
+ column = value_indent_level |
+ for i, word in enumerate(words): |
+ # This is slightly too conservative since not every word will be |
+ # followed by the closing characters... |
+ if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN): |
+ s = s.rstrip() # remove any trailing whitespace |
+ s += '\n' + ' ' * value_indent_level |
+ column = value_indent_level |
+ s += word + ' ' |
+ column += len(word) + 1 |
+ s = s.rstrip() # remove any trailing whitespace |
+ s += '"' |
+ s = s.rstrip() # remove any trailing whitespace |
+ |
+ # Pretty-print the child nodes. |
+ if node.childNodes: |
+ s += '>' |
+ # Calculate the new indent level for child nodes. |
+ new_indent = indent |
+ if node.tagName not in TAGS_THAT_DONT_INDENT: |
+ new_indent += 2 |
+ child_nodes = node.childNodes |
+ |
+ # Recursively pretty-print the child nodes. |
+ child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes] |
+ child_nodes = [c for c in child_nodes if len(c.strip()) > 0] |
+ |
+ # Determine whether we can fit the entire node on a single line. |
+ close_tag = '</%s>' % node.tagName |
+ space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag) |
+ if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and |
+ len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left): |
+ s += child_nodes[0].strip() |
+ else: |
+ s += '\n' * newlines_after_open + '\n'.join(child_nodes) |
+ s += '\n' * newlines_before_close + ' ' * indent |
+ s += close_tag |
+ else: |
+ s += '/>' |
+ s += '\n' * newlines_after_close |
+ return s |
+ |
+ # Handle comment nodes. |
+ if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE: |
+ return '<!--%s-->\n' % node.data |
+ |
+ # Ignore other node types. This could be a processing instruction (<? ... ?>) |
+ # or cdata section (<![CDATA[...]]!>), neither of which are legal in the |
+ # histograms XML at present. |
+ logging.error('Ignoring unrecognized node data: %s' % node.toxml()) |
+ raise Error() |
+ |
+ |
def unsafeAppendChild(parent, child): |
"""Append child to parent's list of children, ignoring the possibility that it |
is already in another node's childNodes list. Requires that the previous |
@@ -107,14 +304,14 @@ |
"""Pretty-print the given XML. |
Args: |
- raw_xml: The contents of the histograms XML file, as a string. |
+ xml: The contents of the histograms XML file, as a string. |
Returns: |
The pretty-printed version. |
""" |
tree = xml.dom.minidom.parseString(raw_xml) |
tree = TransformByAlphabetizing(tree) |
- return print_style.GetPrintStyle().PrettyPrintNode(tree) |
+ return PrettyPrintNode(tree) |
def main(): |
@@ -159,7 +356,7 @@ |
logging.info('%s is not formatted correctly; run pretty_print.py to fix.' % |
histograms_filename) |
sys.exit(1) |
- if not diff_util.PromptUserToAcceptDiff( |
+ if not diffutil.PromptUserToAcceptDiff( |
xml, pretty, |
'Is the prettified version acceptable?'): |
logging.error('Aborting') |