Index: tools/jsmin.py |
=================================================================== |
--- tools/jsmin.py (revision 2941) |
+++ tools/jsmin.py (working copy) |
@@ -1,218 +1,241 @@ |
-#!/usr/bin/python |
- |
-# This code is original from jsmin by Douglas Crockford, it was translated to |
-# Python by Baruch Even. The original code had the following copyright and |
-# license. |
-# |
-# /* jsmin.c |
-# 2007-05-22 |
-# |
-# Copyright (c) 2002 Douglas Crockford (www.crockford.com) |
-# |
-# Permission is hereby granted, free of charge, to any person obtaining a copy of |
-# this software and associated documentation files (the "Software"), to deal in |
-# the Software without restriction, including without limitation the rights to |
-# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
-# of the Software, and to permit persons to whom the Software is furnished to do |
-# so, subject to the following conditions: |
-# |
-# The above copyright notice and this permission notice shall be included in all |
-# copies or substantial portions of the Software. |
-# |
-# The Software shall be used for Good, not Evil. |
-# |
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
-# SOFTWARE. |
-# */ |
- |
-from StringIO import StringIO |
- |
-def jsmin(js): |
- ins = StringIO(js) |
- outs = StringIO() |
- JavascriptMinify().minify(ins, outs) |
- str = outs.getvalue() |
- if len(str) > 0 and str[0] == '\n': |
- str = str[1:] |
- return str |
- |
-def isAlphanum(c): |
- """return true if the character is a letter, digit, underscore, |
- dollar sign, or non-ASCII character. |
- """ |
- return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or |
- (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c is not None and ord(c) > 126)); |
- |
-class UnterminatedComment(Exception): |
- pass |
- |
-class UnterminatedStringLiteral(Exception): |
- pass |
- |
-class UnterminatedRegularExpression(Exception): |
- pass |
- |
-class JavascriptMinify(object): |
- |
- def _outA(self): |
- self.outstream.write(self.theA) |
- def _outB(self): |
- self.outstream.write(self.theB) |
- |
- def _get(self): |
- """return the next character from stdin. Watch out for lookahead. If |
- the character is a control character, translate it to a space or |
- linefeed. |
- """ |
- c = self.theLookahead |
- self.theLookahead = None |
- if c == None: |
- c = self.instream.read(1) |
- if c >= ' ' or c == '\n': |
- return c |
- if c == '': # EOF |
- return '\000' |
- if c == '\r': |
- return '\n' |
- return ' ' |
- |
- def _peek(self): |
- self.theLookahead = self._get() |
- return self.theLookahead |
- |
- def _next(self): |
- """get the next character, excluding comments. peek() is used to see |
- if an unescaped '/' is followed by a '/' or '*'. |
- """ |
- c = self._get() |
- if c == '/' and self.theA != '\\': |
- p = self._peek() |
- if p == '/': |
- c = self._get() |
- while c > '\n': |
- c = self._get() |
- return c |
- if p == '*': |
- c = self._get() |
- while 1: |
- c = self._get() |
- if c == '*': |
- if self._peek() == '/': |
- self._get() |
- return ' ' |
- if c == '\000': |
- raise UnterminatedComment() |
- |
- return c |
- |
- def _action(self, action): |
- """do something! What you do is determined by the argument: |
- 1 Output A. Copy B to A. Get the next B. |
- 2 Copy B to A. Get the next B. (Delete A). |
- 3 Get the next B. (Delete B). |
- action treats a string as a single character. Wow! |
- action recognizes a regular expression if it is preceded by ( or , or =. |
- """ |
- if action <= 1: |
- self._outA() |
- |
- if action <= 2: |
- self.theA = self.theB |
- if self.theA == "'" or self.theA == '"': |
- while 1: |
- self._outA() |
- self.theA = self._get() |
- if self.theA == self.theB: |
- break |
- if self.theA <= '\n': |
- raise UnterminatedStringLiteral() |
- if self.theA == '\\': |
- self._outA() |
- self.theA = self._get() |
- |
- |
- if action <= 3: |
- self.theB = self._next() |
- if self.theB == '/' and (self.theA == '(' or self.theA == ',' or |
- self.theA == '=' or self.theA == ':' or |
- self.theA == '[' or self.theA == '?' or |
- self.theA == '!' or self.theA == '&' or |
- self.theA == '|' or self.theA == ';' or |
- self.theA == '{' or self.theA == '}' or |
- self.theA == '\n'): |
- self._outA() |
- self._outB() |
- while 1: |
- self.theA = self._get() |
- if self.theA == '/': |
- break |
- elif self.theA == '\\': |
- self._outA() |
- self.theA = self._get() |
- elif self.theA <= '\n': |
- raise UnterminatedRegularExpression() |
- self._outA() |
- self.theB = self._next() |
- |
- |
- def _jsmin(self): |
- """Copy the input to the output, deleting the characters which are |
- insignificant to JavaScript. Comments will be removed. Tabs will be |
- replaced with spaces. Carriage returns will be replaced with linefeeds. |
- Most spaces and linefeeds will be removed. |
- """ |
- self.theA = '\n' |
- self._action(3) |
- |
- while self.theA != '\000': |
- if self.theA == ' ': |
- if isAlphanum(self.theB): |
- self._action(1) |
- else: |
- self._action(2) |
- elif self.theA == '\n': |
- if self.theB in ['{', '[', '(', '+', '-']: |
- self._action(1) |
- elif self.theB == ' ': |
- self._action(3) |
- else: |
- if isAlphanum(self.theB): |
- self._action(1) |
- else: |
- self._action(2) |
- else: |
- if self.theB == ' ': |
- if isAlphanum(self.theA): |
- self._action(1) |
- else: |
- self._action(3) |
- elif self.theB == '\n': |
- if self.theA in ['}', ']', ')', '+', '-', '"', '\'']: |
- self._action(1) |
- else: |
- if isAlphanum(self.theA): |
- self._action(1) |
- else: |
- self._action(3) |
- else: |
- self._action(1) |
- |
- def minify(self, instream, outstream): |
- self.instream = instream |
- self.outstream = outstream |
- self.theA = '\n' |
- self.theB = None |
- self.theLookahead = None |
- |
- self._jsmin() |
- self.instream.close() |
- |
-if __name__ == '__main__': |
- import sys |
- jsm = JavascriptMinify() |
- jsm.minify(sys.stdin, sys.stdout) |
+#!/usr/bin/python2.4 |
+ |
+# Copyright 2009 the V8 project authors. All rights reserved. |
+# Redistribution and use in source and binary forms, with or without |
+# modification, are permitted provided that the following conditions are |
+# met: |
+# |
+# * Redistributions of source code must retain the above copyright |
+# notice, this list of conditions and the following disclaimer. |
+# * Redistributions in binary form must reproduce the above |
+# copyright notice, this list of conditions and the following |
+# disclaimer in the documentation and/or other materials provided |
+# with the distribution. |
+# * Neither the name of Google Inc. nor the names of its |
+# contributors may be used to endorse or promote products derived |
+# from this software without specific prior written permission. |
+# |
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
+ |
+# Suppress copyright warning: pylint: disable-msg=C6304 |
Christian Plesner Hansen
2009/09/23 12:19:35
I would suggest removing this and just letting pyl
|
+ |
+"""A JavaScript minifier. |
+ |
+It is far from being a complete JS parser, so there are many valid |
+JavaScript programs that will be ruined by it. Another strangeness is that |
+it accepts $ and % as parts of identifiers. It doesn't merge lines or strip |
+out blank lines in order to ease debugging. Variables at the top scope are |
+properties of the global object so we can't rename them. It is assumed that |
+you introduce variables with var as if JavaScript followed C++ scope rules |
+around curly braces, so the declaration must be above the first use. |
+ |
+Use as: |
+import jsmin |
+minifier = JavaScriptMinifier() |
+program1 = minifier.JSMinify(program1) |
+program2 = minifier.JSMinify(program2) |
+""" |
+ |
+import re |
+ |
+ |
+class JavaScriptMinifier(object): |
+ """An object that you can feed code snippets to to get them minified.""" |
+ |
+ def __init__(self): |
+ # We prepopulate the list of identifiers that shouldn't be used. These |
+ # short language keywords could otherwise be used by the script as variable |
+ # names. |
+ self.seen_identifiers = {"do": True, "in": True} |
+ self.identifier_counter = 0 |
+ self.in_comment = False |
+ self.map = {} |
+ self.nesting = 0 |
+ |
+ def LookAtIdentifier(self, m): |
+ """Records identifiers or keywords that we see in use.""" |
+ # (So we can avoid renaming variables to these strings.) |
+ identifier = m.group(1) |
+ self.seen_identifiers[identifier] = True |
+ |
+ def Push(self): |
+ """Called when we encounter a '{'.""" |
+ self.nesting += 1 |
+ |
+ def Pop(self): |
+ """Called when we encounter a '}'.""" |
+ self.nesting -= 1 |
+ # We treat each top-level opening brace as a single scope that can span |
+ # several sets of nested braces. |
+ if self.nesting == 0: |
+ self.map = {} |
+ self.identifier_counter = 0 |
+ |
+ def Declaration(self, m): |
+ """Rewrites bits of the program selected by a regexp.""" |
+ # These can be curly braces, literal strings, function declarations and var |
+ # declarations. (These last two must be on one line including the opening |
+ # curly brace of the function for their variables to be renamed). |
+ matched_text = m.group(0) |
+ if matched_text == "{": |
+ self.Push() |
+ return matched_text |
+ if matched_text == "}": |
+ self.Pop() |
+ return matched_text |
+ if re.match("[\"'/]", matched_text): |
+ return matched_text |
+ m = re.match(r"var ", matched_text) |
+ if m: |
+ var_names = matched_text[m.end():] |
+ var_names = re.split(r",", var_names) |
+ return "var " + ",".join(map(self.FindNewName, var_names)) |
+ m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text) |
+ if m: |
+ up_to_args = m.group(1) |
+ args = m.group(2) |
+ args = re.split(r",", args) |
+ self.Push() |
+ return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){" |
+ |
+ if matched_text in self.map: |
+ return self.map[matched_text] |
+ |
+ return matched_text |
+ |
+ def CharFromNumber(self, number): |
+ """A single-digit base-52 encoding using a-zA-Z.""" |
+ if number < 26: |
+ return chr(number + 97) |
+ number -= 26 |
+ return chr(number + 65) |
+ |
+ def FindNewName(self, var_name): |
+ """Finds a new 1-character or 2-character name for a variable.""" |
+ # Enters it into the mapping table for this scope. |
Christian Plesner Hansen
2009/09/23 12:19:35
Why is this not part of the docstring?
|
+ new_identifier = "" |
+ # Variable names that end in _ are member variables of the global object, |
+ # so they can be visible from code in a different scope. We leave them |
+ # alone. |
+ if var_name in self.map: |
+ return self.map[var_name] |
+ if self.nesting == 0: |
+ return var_name |
+ while True: |
+ identifier_first_char = self.identifier_counter % 52 |
+ identifier_second_char = self.identifier_counter / 52 |
+ new_identifier = self.CharFromNumber(identifier_first_char) |
+ if identifier_second_char != 0: |
+ new_identifier = ( |
+ self.CharFromNumber(identifier_second_char - 1) + new_identifier) |
+ self.identifier_counter += 1 |
+ if not new_identifier in self.seen_identifiers: |
+ break |
+ |
+ self.map[var_name] = new_identifier |
+ return new_identifier |
+ |
+ def RemoveSpaces(self, m): |
+ """Returns literal strings unchanged, replaces other inputs with group 2.""" |
+ # Other inputs are replaced with the contents of capture 1. This is either |
+ # a single space or an empty string. |
+ entire_match = m.group(0) |
+ replacement = m.group(1) |
+ if re.match(r"'.*'$", entire_match): |
+ return entire_match |
+ if re.match(r'".*"$', entire_match): |
+ return entire_match |
+ if re.match(r"/.+/$", entire_match): |
+ return entire_match |
+ return replacement |
+ |
+ def JSMinify(self, text): |
+ """The main entry point. Takes a text and returns a compressed version.""" |
+ # The compressed version hopefully does the same thing. Line breaks are |
+ # preserved. |
+ new_lines = [] |
+ for line in re.split(r"\n", text): |
+ line = line.replace("\t", " ") |
+ if self.in_comment: |
+ m = re.search(r"\*/", line) |
+ if m: |
+ line = line[m.end():] |
+ self.in_comment = False |
+ else: |
+ new_lines.append("") |
+ continue |
+ |
+ if not self.in_comment: |
+ line = re.sub(r"/\*.*?\*/", " ", line) |
+ line = re.sub(r"//.*", "", line) |
+ m = re.search(r"/\*", line) |
+ if m: |
+ line = line[:m.start()] |
+ self.in_comment = True |
+ |
+ # Strip leading and trailing spaces. |
+ line = re.sub(r"^ +", "", line) |
+ line = re.sub(r" +$", "", line) |
+ # A regexp that matches a literal string surrounded by "double quotes". |
+ # This regexp can handle embedded backslash-escaped characters including |
+ # embedded backslash-escaped double quotes. |
+ double_quoted_string = r'"(?:[^"\\]|\\.)*"' |
+ # A regexp that matches a literal string surrounded by 'double quotes'. |
+ single_quoted_string = r"'(?:[^'\\]|\\.)*'" |
+ # A regexp that matches a regexp literal surrounded by /slashes/. |
+ slash_quoted_regexp = r"/(?:[^/\\]|\\.)+/" |
+ # Replace multiple spaces with a single space. |
+ line = re.sub("|".join([double_quoted_string, |
+ single_quoted_string, |
+ slash_quoted_regexp, |
+ "( )+"]), |
+ self.RemoveSpaces, |
+ line) |
+ # Strip single spaces unless they have an identifier character both before |
+ # and after the space. % and $ are counted as identifier characters. |
+ line = re.sub("|".join([double_quoted_string, |
+ single_quoted_string, |
+ slash_quoted_regexp, |
+ r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]), |
+ self.RemoveSpaces, |
+ line) |
+ # Collect keywords and identifiers that are already in use. |
+ if self.nesting == 0: |
+ re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line) |
+ function_declaration_regexp = ( |
+ r"\bfunction" # Function definition keyword... |
+ r"( [\w$%]+)?" # ...optional function name... |
+ r"\([\w$%,]+\)\{") # ...argument declarations. |
+ # Unfortunately the keyword-value syntax { key:value } makes the key look |
+ # like a variable where in fact it is a literal string. We use the |
+ # presence or absence of a question mark to try to distinguish between |
+ # this case and the ternary operator: "condition ? iftrue : iffalse". |
+ if re.search(r"\?", line): |
+ block_trailing_colon = r"" |
+ else: |
+ block_trailing_colon = r"(?![:\w$%])" |
+ # Variable use. Cannot follow a period precede a colon. |
+ variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon |
+ line = re.sub("|".join([double_quoted_string, |
+ single_quoted_string, |
+ slash_quoted_regexp, |
+ r"\{", # Curly braces. |
+ r"\}", |
+ r"\bvar [\w$%,]+", # var declarations. |
+ function_declaration_regexp, |
+ variable_use_regexp]), |
+ self.Declaration, |
+ line) |
+ new_lines.append(line) |
+ |
+ return "\n".join(new_lines) + "\n" |