Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(138)

Unified Diff: tools/jsmin.py

Issue 215052: * Remove non-Open Source code from Douglas Crockford.... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 11 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/js2c.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/jsmin.py
===================================================================
--- tools/jsmin.py (revision 2941)
+++ tools/jsmin.py (working copy)
@@ -1,218 +1,241 @@
-#!/usr/bin/python
-
-# This code is original from jsmin by Douglas Crockford, it was translated to
-# Python by Baruch Even. The original code had the following copyright and
-# license.
-#
-# /* jsmin.c
-# 2007-05-22
-#
-# Copyright (c) 2002 Douglas Crockford (www.crockford.com)
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of
-# this software and associated documentation files (the "Software"), to deal in
-# the Software without restriction, including without limitation the rights to
-# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-# of the Software, and to permit persons to whom the Software is furnished to do
-# so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# The Software shall be used for Good, not Evil.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# */
-
-from StringIO import StringIO
-
-def jsmin(js):
- ins = StringIO(js)
- outs = StringIO()
- JavascriptMinify().minify(ins, outs)
- str = outs.getvalue()
- if len(str) > 0 and str[0] == '\n':
- str = str[1:]
- return str
-
-def isAlphanum(c):
- """return true if the character is a letter, digit, underscore,
- dollar sign, or non-ASCII character.
- """
- return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or
- (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c is not None and ord(c) > 126));
-
-class UnterminatedComment(Exception):
- pass
-
-class UnterminatedStringLiteral(Exception):
- pass
-
-class UnterminatedRegularExpression(Exception):
- pass
-
-class JavascriptMinify(object):
-
- def _outA(self):
- self.outstream.write(self.theA)
- def _outB(self):
- self.outstream.write(self.theB)
-
- def _get(self):
- """return the next character from stdin. Watch out for lookahead. If
- the character is a control character, translate it to a space or
- linefeed.
- """
- c = self.theLookahead
- self.theLookahead = None
- if c == None:
- c = self.instream.read(1)
- if c >= ' ' or c == '\n':
- return c
- if c == '': # EOF
- return '\000'
- if c == '\r':
- return '\n'
- return ' '
-
- def _peek(self):
- self.theLookahead = self._get()
- return self.theLookahead
-
- def _next(self):
- """get the next character, excluding comments. peek() is used to see
- if an unescaped '/' is followed by a '/' or '*'.
- """
- c = self._get()
- if c == '/' and self.theA != '\\':
- p = self._peek()
- if p == '/':
- c = self._get()
- while c > '\n':
- c = self._get()
- return c
- if p == '*':
- c = self._get()
- while 1:
- c = self._get()
- if c == '*':
- if self._peek() == '/':
- self._get()
- return ' '
- if c == '\000':
- raise UnterminatedComment()
-
- return c
-
- def _action(self, action):
- """do something! What you do is determined by the argument:
- 1 Output A. Copy B to A. Get the next B.
- 2 Copy B to A. Get the next B. (Delete A).
- 3 Get the next B. (Delete B).
- action treats a string as a single character. Wow!
- action recognizes a regular expression if it is preceded by ( or , or =.
- """
- if action <= 1:
- self._outA()
-
- if action <= 2:
- self.theA = self.theB
- if self.theA == "'" or self.theA == '"':
- while 1:
- self._outA()
- self.theA = self._get()
- if self.theA == self.theB:
- break
- if self.theA <= '\n':
- raise UnterminatedStringLiteral()
- if self.theA == '\\':
- self._outA()
- self.theA = self._get()
-
-
- if action <= 3:
- self.theB = self._next()
- if self.theB == '/' and (self.theA == '(' or self.theA == ',' or
- self.theA == '=' or self.theA == ':' or
- self.theA == '[' or self.theA == '?' or
- self.theA == '!' or self.theA == '&' or
- self.theA == '|' or self.theA == ';' or
- self.theA == '{' or self.theA == '}' or
- self.theA == '\n'):
- self._outA()
- self._outB()
- while 1:
- self.theA = self._get()
- if self.theA == '/':
- break
- elif self.theA == '\\':
- self._outA()
- self.theA = self._get()
- elif self.theA <= '\n':
- raise UnterminatedRegularExpression()
- self._outA()
- self.theB = self._next()
-
-
- def _jsmin(self):
- """Copy the input to the output, deleting the characters which are
- insignificant to JavaScript. Comments will be removed. Tabs will be
- replaced with spaces. Carriage returns will be replaced with linefeeds.
- Most spaces and linefeeds will be removed.
- """
- self.theA = '\n'
- self._action(3)
-
- while self.theA != '\000':
- if self.theA == ' ':
- if isAlphanum(self.theB):
- self._action(1)
- else:
- self._action(2)
- elif self.theA == '\n':
- if self.theB in ['{', '[', '(', '+', '-']:
- self._action(1)
- elif self.theB == ' ':
- self._action(3)
- else:
- if isAlphanum(self.theB):
- self._action(1)
- else:
- self._action(2)
- else:
- if self.theB == ' ':
- if isAlphanum(self.theA):
- self._action(1)
- else:
- self._action(3)
- elif self.theB == '\n':
- if self.theA in ['}', ']', ')', '+', '-', '"', '\'']:
- self._action(1)
- else:
- if isAlphanum(self.theA):
- self._action(1)
- else:
- self._action(3)
- else:
- self._action(1)
-
- def minify(self, instream, outstream):
- self.instream = instream
- self.outstream = outstream
- self.theA = '\n'
- self.theB = None
- self.theLookahead = None
-
- self._jsmin()
- self.instream.close()
-
-if __name__ == '__main__':
- import sys
- jsm = JavascriptMinify()
- jsm.minify(sys.stdin, sys.stdout)
+#!/usr/bin/python2.4
+
+# Copyright 2009 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Suppress copyright warning: pylint: disable-msg=C6304
Christian Plesner Hansen 2009/09/23 12:19:35 I would suggest removing this and just letting pyl
+
+"""A JavaScript minifier.
+
+It is far from being a complete JS parser, so there are many valid
+JavaScript programs that will be ruined by it. Another strangeness is that
+it accepts $ and % as parts of identifiers. It doesn't merge lines or strip
+out blank lines in order to ease debugging. Variables at the top scope are
+properties of the global object so we can't rename them. It is assumed that
+you introduce variables with var as if JavaScript followed C++ scope rules
+around curly braces, so the declaration must be above the first use.
+
+Use as:
+import jsmin
+minifier = JavaScriptMinifier()
+program1 = minifier.JSMinify(program1)
+program2 = minifier.JSMinify(program2)
+"""
+
+import re
+
+
+class JavaScriptMinifier(object):
+ """An object that you can feed code snippets to to get them minified."""
+
+ def __init__(self):
+ # We prepopulate the list of identifiers that shouldn't be used. These
+ # short language keywords could otherwise be used by the script as variable
+ # names.
+ self.seen_identifiers = {"do": True, "in": True}
+ self.identifier_counter = 0
+ self.in_comment = False
+ self.map = {}
+ self.nesting = 0
+
+ def LookAtIdentifier(self, m):
+ """Records identifiers or keywords that we see in use."""
+ # (So we can avoid renaming variables to these strings.)
+ identifier = m.group(1)
+ self.seen_identifiers[identifier] = True
+
+ def Push(self):
+ """Called when we encounter a '{'."""
+ self.nesting += 1
+
+ def Pop(self):
+ """Called when we encounter a '}'."""
+ self.nesting -= 1
+ # We treat each top-level opening brace as a single scope that can span
+ # several sets of nested braces.
+ if self.nesting == 0:
+ self.map = {}
+ self.identifier_counter = 0
+
+ def Declaration(self, m):
+ """Rewrites bits of the program selected by a regexp."""
+ # These can be curly braces, literal strings, function declarations and var
+ # declarations. (These last two must be on one line including the opening
+ # curly brace of the function for their variables to be renamed).
+ matched_text = m.group(0)
+ if matched_text == "{":
+ self.Push()
+ return matched_text
+ if matched_text == "}":
+ self.Pop()
+ return matched_text
+ if re.match("[\"'/]", matched_text):
+ return matched_text
+ m = re.match(r"var ", matched_text)
+ if m:
+ var_names = matched_text[m.end():]
+ var_names = re.split(r",", var_names)
+ return "var " + ",".join(map(self.FindNewName, var_names))
+ m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text)
+ if m:
+ up_to_args = m.group(1)
+ args = m.group(2)
+ args = re.split(r",", args)
+ self.Push()
+ return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){"
+
+ if matched_text in self.map:
+ return self.map[matched_text]
+
+ return matched_text
+
+ def CharFromNumber(self, number):
+ """A single-digit base-52 encoding using a-zA-Z."""
+ if number < 26:
+ return chr(number + 97)
+ number -= 26
+ return chr(number + 65)
+
+ def FindNewName(self, var_name):
+ """Finds a new 1-character or 2-character name for a variable."""
+ # Enters it into the mapping table for this scope.
Christian Plesner Hansen 2009/09/23 12:19:35 Why is this not part of the docstring?
+ new_identifier = ""
+ # Variable names that end in _ are member variables of the global object,
+ # so they can be visible from code in a different scope. We leave them
+ # alone.
+ if var_name in self.map:
+ return self.map[var_name]
+ if self.nesting == 0:
+ return var_name
+ while True:
+ identifier_first_char = self.identifier_counter % 52
+ identifier_second_char = self.identifier_counter / 52
+ new_identifier = self.CharFromNumber(identifier_first_char)
+ if identifier_second_char != 0:
+ new_identifier = (
+ self.CharFromNumber(identifier_second_char - 1) + new_identifier)
+ self.identifier_counter += 1
+ if not new_identifier in self.seen_identifiers:
+ break
+
+ self.map[var_name] = new_identifier
+ return new_identifier
+
+ def RemoveSpaces(self, m):
+ """Returns literal strings unchanged, replaces other inputs with group 2."""
+ # Other inputs are replaced with the contents of capture 1. This is either
+ # a single space or an empty string.
+ entire_match = m.group(0)
+ replacement = m.group(1)
+ if re.match(r"'.*'$", entire_match):
+ return entire_match
+ if re.match(r'".*"$', entire_match):
+ return entire_match
+ if re.match(r"/.+/$", entire_match):
+ return entire_match
+ return replacement
+
+ def JSMinify(self, text):
+ """The main entry point. Takes a text and returns a compressed version."""
+ # The compressed version hopefully does the same thing. Line breaks are
+ # preserved.
+ new_lines = []
+ for line in re.split(r"\n", text):
+ line = line.replace("\t", " ")
+ if self.in_comment:
+ m = re.search(r"\*/", line)
+ if m:
+ line = line[m.end():]
+ self.in_comment = False
+ else:
+ new_lines.append("")
+ continue
+
+ if not self.in_comment:
+ line = re.sub(r"/\*.*?\*/", " ", line)
+ line = re.sub(r"//.*", "", line)
+ m = re.search(r"/\*", line)
+ if m:
+ line = line[:m.start()]
+ self.in_comment = True
+
+ # Strip leading and trailing spaces.
+ line = re.sub(r"^ +", "", line)
+ line = re.sub(r" +$", "", line)
+ # A regexp that matches a literal string surrounded by "double quotes".
+ # This regexp can handle embedded backslash-escaped characters including
+ # embedded backslash-escaped double quotes.
+ double_quoted_string = r'"(?:[^"\\]|\\.)*"'
+ # A regexp that matches a literal string surrounded by 'double quotes'.
+ single_quoted_string = r"'(?:[^'\\]|\\.)*'"
+ # A regexp that matches a regexp literal surrounded by /slashes/.
+ slash_quoted_regexp = r"/(?:[^/\\]|\\.)+/"
+ # Replace multiple spaces with a single space.
+ line = re.sub("|".join([double_quoted_string,
+ single_quoted_string,
+ slash_quoted_regexp,
+ "( )+"]),
+ self.RemoveSpaces,
+ line)
+ # Strip single spaces unless they have an identifier character both before
+ # and after the space. % and $ are counted as identifier characters.
+ line = re.sub("|".join([double_quoted_string,
+ single_quoted_string,
+ slash_quoted_regexp,
+ r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]),
+ self.RemoveSpaces,
+ line)
+ # Collect keywords and identifiers that are already in use.
+ if self.nesting == 0:
+ re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line)
+ function_declaration_regexp = (
+ r"\bfunction" # Function definition keyword...
+ r"( [\w$%]+)?" # ...optional function name...
+ r"\([\w$%,]+\)\{") # ...argument declarations.
+ # Unfortunately the keyword-value syntax { key:value } makes the key look
+ # like a variable where in fact it is a literal string. We use the
+ # presence or absence of a question mark to try to distinguish between
+ # this case and the ternary operator: "condition ? iftrue : iffalse".
+ if re.search(r"\?", line):
+ block_trailing_colon = r""
+ else:
+ block_trailing_colon = r"(?![:\w$%])"
+ # Variable use. Cannot follow a period precede a colon.
+ variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon
+ line = re.sub("|".join([double_quoted_string,
+ single_quoted_string,
+ slash_quoted_regexp,
+ r"\{", # Curly braces.
+ r"\}",
+ r"\bvar [\w$%,]+", # var declarations.
+ function_declaration_regexp,
+ variable_use_regexp]),
+ self.Declaration,
+ line)
+ new_lines.append(line)
+
+ return "\n".join(new_lines) + "\n"
« no previous file with comments | « tools/js2c.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698