tools/jsmin.py - Issue 215052: * Remove non-Open Source code from Douglas Crockford....

Side by Side Diff: tools/jsmin.py

Issue 215052: * Remove non-Open Source code from Douglas Crockford.... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 11 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/python	1 #!/usr/bin/python2.4

2	2

3 # This code is original from jsmin by Douglas Crockford, it was translated to	3 # Copyright 2009 the V8 project authors. All rights reserved.

4 # Python by Baruch Even. The original code had the following copyright and	4 # Redistribution and use in source and binary forms, with or without

5 # license.	5 # modification, are permitted provided that the following conditions are

	6 # met:

6 #	7 #

7 # /* jsmin.c	8 # * Redistributions of source code must retain the above copyright

8 # 2007-05-22	9 # notice, this list of conditions and the following disclaimer.

	10 # * Redistributions in binary form must reproduce the above

	11 # copyright notice, this list of conditions and the following

	12 # disclaimer in the documentation and/or other materials provided

	13 # with the distribution.

	14 # * Neither the name of Google Inc. nor the names of its

	15 # contributors may be used to endorse or promote products derived

	16 # from this software without specific prior written permission.

9 #	17 #

10 # Copyright (c) 2002 Douglas Crockford (www.crockford.com)	18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

11 #	19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

12 # Permission is hereby granted, free of charge, to any person obtaining a copy o f	20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

13 # this software and associated documentation files (the "Software"), to deal in	21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

14 # the Software without restriction, including without limitation the rights to	22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

15 # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies	23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

16 # of the Software, and to permit persons to whom the Software is furnished to do	24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

17 # so, subject to the following conditions:	25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

18 #	26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

19 # The above copyright notice and this permission notice shall be included in all	27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

20 # copies or substantial portions of the Software.	28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

21 #	29

22 # The Software shall be used for Good, not Evil.	30 # Suppress copyright warning: pylint: disable-msg=C6304
	Christian Plesner Hansen 2009/09/23 12:19:35 I would suggest removing this and just letting pyl I would suggest removing this and just letting pylint complain.
23 #	31

24 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR	32 """A JavaScript minifier.

25 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,	33

26 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE	34 It is far from being a complete JS parser, so there are many valid

27 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER	35 JavaScript programs that will be ruined by it. Another strangeness is that

28 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,	36 it accepts $ and % as parts of identifiers. It doesn't merge lines or strip

29 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE	37 out blank lines in order to ease debugging. Variables at the top scope are

30 # SOFTWARE.	38 properties of the global object so we can't rename them. It is assumed that

31 # */	39 you introduce variables with var as if JavaScript followed C++ scope rules

32	40 around curly braces, so the declaration must be above the first use.

33 from StringIO import StringIO	41

34	42 Use as:

35 def jsmin(js):	43 import jsmin

36 ins = StringIO(js)	44 minifier = JavaScriptMinifier()

37 outs = StringIO()	45 program1 = minifier.JSMinify(program1)

38 JavascriptMinify().minify(ins, outs)	46 program2 = minifier.JSMinify(program2)

39 str = outs.getvalue()	47 """

40 if len(str) > 0 and str[0] == '\n':	48

41 str = str[1:]	49 import re

42 return str	50

43	51

44 def isAlphanum(c):	52 class JavaScriptMinifier(object):

45 """return true if the character is a letter, digit, underscore,	53 """An object that you can feed code snippets to to get them minified."""

46 dollar sign, or non-ASCII character.	54

47 """	55 def __init__(self):

48 return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or	56 # We prepopulate the list of identifiers that shouldn't be used. These

49 (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c i s not None and ord(c) > 126));	57 # short language keywords could otherwise be used by the script as variable

50	58 # names.

51 class UnterminatedComment(Exception):	59 self.seen_identifiers = {"do": True, "in": True}

52 pass	60 self.identifier_counter = 0

53	61 self.in_comment = False

54 class UnterminatedStringLiteral(Exception):	62 self.map = {}

55 pass	63 self.nesting = 0

56	64

57 class UnterminatedRegularExpression(Exception):	65 def LookAtIdentifier(self, m):

58 pass	66 """Records identifiers or keywords that we see in use."""

59	67 # (So we can avoid renaming variables to these strings.)

60 class JavascriptMinify(object):	68 identifier = m.group(1)

61	69 self.seen_identifiers[identifier] = True

62 def _outA(self):	70

63 self.outstream.write(self.theA)	71 def Push(self):

64 def _outB(self):	72 """Called when we encounter a '{'."""

65 self.outstream.write(self.theB)	73 self.nesting += 1

66	74

67 def _get(self):	75 def Pop(self):

68 """return the next character from stdin. Watch out for lookahead. If	76 """Called when we encounter a '}'."""

69 the character is a control character, translate it to a space or	77 self.nesting -= 1

70 linefeed.	78 # We treat each top-level opening brace as a single scope that can span

71 """	79 # several sets of nested braces.

72 c = self.theLookahead	80 if self.nesting == 0:

73 self.theLookahead = None	81 self.map = {}

74 if c == None:	82 self.identifier_counter = 0

75 c = self.instream.read(1)	83

76 if c >= ' ' or c == '\n':	84 def Declaration(self, m):

77 return c	85 """Rewrites bits of the program selected by a regexp."""

78 if c == '': # EOF	86 # These can be curly braces, literal strings, function declarations and var

79 return '\000'	87 # declarations. (These last two must be on one line including the opening

80 if c == '\r':	88 # curly brace of the function for their variables to be renamed).

81 return '\n'	89 matched_text = m.group(0)

82 return ' '	90 if matched_text == "{":

83	91 self.Push()

84 def _peek(self):	92 return matched_text

85 self.theLookahead = self._get()	93 if matched_text == "}":

86 return self.theLookahead	94 self.Pop()

87	95 return matched_text

88 def _next(self):	96 if re.match("[\"'/]", matched_text):

89 """get the next character, excluding comments. peek() is used to see	97 return matched_text

90 if an unescaped '/' is followed by a '/' or '*'.	98 m = re.match(r"var ", matched_text)

91 """	99 if m:

92 c = self._get()	100 var_names = matched_text[m.end():]

93 if c == '/' and self.theA != '\\':	101 var_names = re.split(r",", var_names)

94 p = self._peek()	102 return "var " + ",".join(map(self.FindNewName, var_names))

95 if p == '/':	103 m = re.match(r"(function\b[^(])$(.)$\{$", matched_text)

96 c = self._get()	104 if m:

97 while c > '\n':	105 up_to_args = m.group(1)

98 c = self._get()	106 args = m.group(2)

99 return c	107 args = re.split(r",", args)

100 if p == '*':	108 self.Push()

101 c = self._get()	109 return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){"

102 while 1:	110

103 c = self._get()	111 if matched_text in self.map:

104 if c == '*':	112 return self.map[matched_text]

105 if self._peek() == '/':	113

106 self._get()	114 return matched_text

107 return ' '	115

108 if c == '\000':	116 def CharFromNumber(self, number):

109 raise UnterminatedComment()	117 """A single-digit base-52 encoding using a-zA-Z."""

110	118 if number < 26:

111 return c	119 return chr(number + 97)

112	120 number -= 26

113 def _action(self, action):	121 return chr(number + 65)

114 """do something! What you do is determined by the argument:	122

115 1 Output A. Copy B to A. Get the next B.	123 def FindNewName(self, var_name):

116 2 Copy B to A. Get the next B. (Delete A).	124 """Finds a new 1-character or 2-character name for a variable."""

117 3 Get the next B. (Delete B).	125 # Enters it into the mapping table for this scope.
	Christian Plesner Hansen 2009/09/23 12:19:35 Why is this not part of the docstring? Why is this not part of the docstring?
118 action treats a string as a single character. Wow!	126 new_identifier = ""

119 action recognizes a regular expression if it is preceded by ( or , or =.	127 # Variable names that end in _ are member variables of the global object,

120 """	128 # so they can be visible from code in a different scope. We leave them

121 if action <= 1:	129 # alone.

122 self._outA()	130 if var_name in self.map:

123	131 return self.map[var_name]

124 if action <= 2:	132 if self.nesting == 0:

125 self.theA = self.theB	133 return var_name

126 if self.theA == "'" or self.theA == '"':	134 while True:

127 while 1:	135 identifier_first_char = self.identifier_counter % 52

128 self._outA()	136 identifier_second_char = self.identifier_counter / 52

129 self.theA = self._get()	137 new_identifier = self.CharFromNumber(identifier_first_char)

130 if self.theA == self.theB:	138 if identifier_second_char != 0:

131 break	139 new_identifier = (

132 if self.theA <= '\n':	140 self.CharFromNumber(identifier_second_char - 1) + new_identifier)

133 raise UnterminatedStringLiteral()	141 self.identifier_counter += 1

134 if self.theA == '\\':	142 if not new_identifier in self.seen_identifiers:

135 self._outA()	143 break

136 self.theA = self._get()	144

137	145 self.map[var_name] = new_identifier

138	146 return new_identifier

139 if action <= 3:	147

140 self.theB = self._next()	148 def RemoveSpaces(self, m):

141 if self.theB == '/' and (self.theA == '(' or self.theA == ',' or	149 """Returns literal strings unchanged, replaces other inputs with group 2."""

142 self.theA == '=' or self.theA == ':' or	150 # Other inputs are replaced with the contents of capture 1. This is either

143 self.theA == '[' or self.theA == '?' or	151 # a single space or an empty string.

144 self.theA == '!' or self.theA == '&' or	152 entire_match = m.group(0)

145 self.theA == '\|' or self.theA == ';' or	153 replacement = m.group(1)

146 self.theA == '{' or self.theA == '}' or	154 if re.match(r"'.*'$", entire_match):

147 self.theA == '\n'):	155 return entire_match

148 self._outA()	156 if re.match(r'".*"$', entire_match):

149 self._outB()	157 return entire_match

150 while 1:	158 if re.match(r"/.+/$", entire_match):

151 self.theA = self._get()	159 return entire_match

152 if self.theA == '/':	160 return replacement

153 break	161

154 elif self.theA == '\\':	162 def JSMinify(self, text):

155 self._outA()	163 """The main entry point. Takes a text and returns a compressed version."""

156 self.theA = self._get()	164 # The compressed version hopefully does the same thing. Line breaks are

157 elif self.theA <= '\n':	165 # preserved.

158 raise UnterminatedRegularExpression()	166 new_lines = []

159 self._outA()	167 for line in re.split(r"\n", text):

160 self.theB = self._next()	168 line = line.replace("\t", " ")

161	169 if self.in_comment:

162	170 m = re.search(r"\*/", line)

163 def _jsmin(self):	171 if m:

164 """Copy the input to the output, deleting the characters which are	172 line = line[m.end():]

165 insignificant to JavaScript. Comments will be removed. Tabs will be	173 self.in_comment = False

166 replaced with spaces. Carriage returns will be replaced with linefeed s.	174 else:

167 Most spaces and linefeeds will be removed.	175 new_lines.append("")

168 """	176 continue

169 self.theA = '\n'	177

170 self._action(3)	178 if not self.in_comment:

171	179 line = re.sub(r"/\.?\*/", " ", line)

172 while self.theA != '\000':	180 line = re.sub(r"//.*", "", line)

173 if self.theA == ' ':	181 m = re.search(r"/\*", line)

174 if isAlphanum(self.theB):	182 if m:

175 self._action(1)	183 line = line[:m.start()]

176 else:	184 self.in_comment = True

177 self._action(2)	185

178 elif self.theA == '\n':	186 # Strip leading and trailing spaces.

179 if self.theB in ['{', '[', '(', '+', '-']:	187 line = re.sub(r"^ +", "", line)

180 self._action(1)	188 line = re.sub(r" +$", "", line)

181 elif self.theB == ' ':	189 # A regexp that matches a literal string surrounded by "double quotes".

182 self._action(3)	190 # This regexp can handle embedded backslash-escaped characters including

183 else:	191 # embedded backslash-escaped double quotes.

184 if isAlphanum(self.theB):	192 double_quoted_string = r'"(?:[^"\\]\|\\.)*"'

185 self._action(1)	193 # A regexp that matches a literal string surrounded by 'double quotes'.

186 else:	194 single_quoted_string = r"'(?:[^'\\]\|\\.)*'"

187 self._action(2)	195 # A regexp that matches a regexp literal surrounded by /slashes/.

188 else:	196 slash_quoted_regexp = r"/(?:[^/\\]\|\\.)+/"

189 if self.theB == ' ':	197 # Replace multiple spaces with a single space.

190 if isAlphanum(self.theA):	198 line = re.sub("\|".join([double_quoted_string,

191 self._action(1)	199 single_quoted_string,

192 else:	200 slash_quoted_regexp,

193 self._action(3)	201 "( )+"]),

194 elif self.theB == '\n':	202 self.RemoveSpaces,

195 if self.theA in ['}', ']', ')', '+', '-', '"', '\'']:	203 line)

196 self._action(1)	204 # Strip single spaces unless they have an identifier character both before

197 else:	205 # and after the space. % and $ are counted as identifier characters.

198 if isAlphanum(self.theA):	206 line = re.sub("\|".join([double_quoted_string,

199 self._action(1)	207 single_quoted_string,

200 else:	208 slash_quoted_regexp,

201 self._action(3)	209 r"(?<![a-zA-Z_0-9$%]) \| (?![a-zA-Z_0-9$%])()"]),

202 else:	210 self.RemoveSpaces,

203 self._action(1)	211 line)

204	212 # Collect keywords and identifiers that are already in use.

205 def minify(self, instream, outstream):	213 if self.nesting == 0:

206 self.instream = instream	214 re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line)

207 self.outstream = outstream	215 function_declaration_regexp = (

208 self.theA = '\n'	216 r"\bfunction" # Function definition keyword...

209 self.theB = None	217 r"( [\w$%]+)?" # ...optional function name...

210 self.theLookahead = None	218 r"$[\w$%,]+$\{") # ...argument declarations.

211	219 # Unfortunately the keyword-value syntax { key:value } makes the key look

212 self._jsmin()	220 # like a variable where in fact it is a literal string. We use the

213 self.instream.close()	221 # presence or absence of a question mark to try to distinguish between

214	222 # this case and the ternary operator: "condition ? iftrue : iffalse".

215 if __name__ == '__main__':	223 if re.search(r"\?", line):

216 import sys	224 block_trailing_colon = r""

217 jsm = JavascriptMinify()	225 else:

218 jsm.minify(sys.stdin, sys.stdout)	226 block_trailing_colon = r"(?![:\w$%])"

	227 # Variable use. Cannot follow a period precede a colon.

	228 variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon

	229 line = re.sub("\|".join([double_quoted_string,

	230 single_quoted_string,

	231 slash_quoted_regexp,

	232 r"\{", # Curly braces.

	233 r"\}",

	234 r"\bvar [\w$%,]+", # var declarations.

	235 function_declaration_regexp,

	236 variable_use_regexp]),

	237 self.Declaration,

	238 line)

	239 new_lines.append(line)

	240

	241 return "\n".join(new_lines) + "\n"

OLD	NEW

« no previous file with comments | « tools/js2c.py ('k') | no next file » | no next file with comments »