OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python2.4 | |
John Grabowski
2009/05/29 00:34:43
Ditto comments form other files (py2.4, copyright)
| |
2 # | |
3 # Copyright 2009, Google Inc. | |
4 # All rights reserved. | |
5 # | |
6 # Redistribution and use in source and binary forms, with or without | |
7 # modification, are permitted provided that the following conditions are | |
8 # met: | |
9 # | |
10 # * Redistributions of source code must retain the above copyright | |
11 # notice, this list of conditions and the following disclaimer. | |
12 # * Redistributions in binary form must reproduce the above | |
13 # copyright notice, this list of conditions and the following disclaimer | |
14 # in the documentation and/or other materials provided with the | |
15 # distribution. | |
16 # * Neither the name of Google Inc. nor the names of its | |
17 # contributors may be used to endorse or promote products derived from | |
18 # this software without specific prior written permission. | |
19 # | |
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
21 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
22 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
23 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
24 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
25 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
26 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
27 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
28 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
29 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
30 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
31 | |
32 """Crocodile source scanners.""" | |
John Grabowski
2009/05/29 00:34:43
Elaborate a little. Why is a scanner useful? Wha
| |
33 | |
34 | |
35 import re | |
36 | |
37 | |
38 class Scanner(object): | |
39 """Generic source scanner.""" | |
40 | |
41 def __init__(self): | |
42 """Constructor.""" | |
43 | |
44 self.re_token = re.compile('#') | |
45 self.comment_to_eol = ['#'] | |
46 self.comment_start = None | |
47 self.comment_end = None | |
48 | |
49 def ScanLines(self, lines): | |
50 """Scans the lines for executable statements. | |
51 | |
52 Args: | |
53 lines: Iterator returning source lines. | |
54 | |
55 Returns: | |
56 An array of line numbers which are executable. | |
57 """ | |
58 exe_lines = [] | |
59 lineno = 0 | |
60 | |
61 in_string = None | |
62 in_comment = None | |
63 comment_index = None | |
64 | |
65 for line in lines: | |
66 lineno += 1 | |
67 in_string_at_start = in_string | |
68 | |
69 for t in self.re_token.finditer(line): | |
70 tokenstr = t.groups()[0] | |
71 | |
72 if in_comment: | |
73 # Inside a multi-line comment, so look for end token | |
74 if tokenstr == in_comment: | |
75 in_comment = None | |
76 # Replace comment with spaces | |
77 line = (line[:comment_index] | |
78 + ' ' * (t.end(0) - comment_index) | |
79 + line[t.end(0):]) | |
80 | |
81 elif in_string: | |
82 # Inside a string, so look for end token | |
83 if tokenstr == in_string: | |
84 in_string = None | |
85 | |
86 elif tokenstr in self.comment_to_eol: | |
87 # Single-line comment, so truncate line at start of token | |
88 line = line[:t.start(0)] | |
89 break | |
90 | |
91 elif tokenstr == self.comment_start: | |
92 # Multi-line comment start - end token is comment_end | |
93 in_comment = self.comment_end | |
94 comment_index = t.start(0) | |
95 | |
96 else: | |
97 # Starting a string - end token is same as start | |
98 in_string = tokenstr | |
99 | |
100 # If still in comment at end of line, remove comment | |
101 if in_comment: | |
102 line = line[:comment_index] | |
103 # Next line, delete from the beginnine | |
104 comment_index = 0 | |
105 | |
106 # If line-sans-comments is not empty, claim it may be executable | |
107 if line.strip() or in_string_at_start: | |
108 exe_lines.append(lineno) | |
109 | |
110 # Return executable lines | |
111 return exe_lines | |
112 | |
113 def Scan(self, filename): | |
114 """Reads the file and scans its lines. | |
115 | |
116 Args: | |
117 filename: Path to file to scan. | |
118 | |
119 Returns: | |
120 An array of line numbers which are executable. | |
121 """ | |
122 | |
123 # TODO: All manner of error checking | |
124 f = None | |
125 try: | |
126 f = open(filename, 'rt') | |
127 return self.ScanLines(f) | |
128 finally: | |
129 if f: | |
130 f.close() | |
131 | |
132 | |
133 class PythonScanner(Scanner): | |
134 """Python source scanner.""" | |
135 | |
136 def __init__(self): | |
137 """Constructor.""" | |
138 Scanner.__init__(self) | |
139 | |
140 # TODO: This breaks for strings ending in more than 2 backslashes. Need | |
141 # a pattern which counts only an odd number of backslashes, so the last | |
142 # one thus escapes the quote. | |
143 self.re_token = re.compile(r'(#|\'\'\'|"""|(?<!(?<!\\)\\)["\'])') | |
John Grabowski
2009/05/29 00:34:43
Comments around regexps explaing what they are sup
| |
144 self.comment_to_eol = ['#'] | |
145 self.comment_start = None | |
146 self.comment_end = None | |
147 | |
148 | |
149 class CppScanner(Scanner): | |
150 """C / C++ / ObjC / ObjC++ source scanner.""" | |
151 # TODO: Do Objective C and C++ follow the same comment patterns as C / C++? | |
bradn
2009/05/28 23:57:36
Yes.
| |
152 | |
153 def __init__(self): | |
154 """Constructor.""" | |
155 Scanner.__init__(self) | |
156 | |
157 # TODO: This breaks for strings ending in more than 2 backslashes. Need | |
158 # a pattern which counts only an odd number of backslashes, so the last | |
159 # one thus escapes the quote. | |
160 self.re_token = re.compile(r'(^\s*#|//|/\*|\*/|(?<!(?<!\\)\\)["\'])') | |
161 | |
162 # TODO: Treat '\' at EOL as a token, and handle it as continuing the | |
163 # previous line. That is, if in a comment-to-eol, this line is a comment | |
164 # too. | |
165 | |
166 # Note that we treat # at beginning of line as a comment, so that we ignore | |
167 # preprocessor definitions | |
168 self.comment_to_eol = ['//', '#'] | |
169 | |
170 self.comment_start = '/*' | |
171 self.comment_end = '*/' | |
172 | |
173 | |
174 def ScanFile(filename, language): | |
175 """Scans a file for executable lines. | |
176 | |
177 Args: | |
178 filename: Path to file to scan. | |
179 language: Language for file ('C', 'C++', 'python', 'ObjC', 'ObjC++') | |
180 | |
181 Returns: | |
182 A list of executable lines, or an empty list if the file was not a handled | |
183 language. | |
184 """ | |
185 | |
186 if language == 'python': | |
187 return PythonScanner().Scan(filename) | |
188 elif language in ['C', 'C++', 'ObjC', 'ObjC++']: | |
189 return CppScanner().Scan(filename) | |
190 | |
191 # Something we don't handle | |
192 return [] | |
OLD | NEW |