| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env python | |
| 2 # Copyright (C) 2013 Google Inc. All rights reserved. | |
| 3 # | |
| 4 # Redistribution and use in source and binary forms, with or without | |
| 5 # modification, are permitted provided that the following conditions are | |
| 6 # met: | |
| 7 # | |
| 8 # * Redistributions of source code must retain the above copyright | |
| 9 # notice, this list of conditions and the following disclaimer. | |
| 10 # * Redistributions in binary form must reproduce the above | |
| 11 # copyright notice, this list of conditions and the following disclaimer | |
| 12 # in the documentation and/or other materials provided with the | |
| 13 # distribution. | |
| 14 # * Neither the name of Google Inc. nor the names of its | |
| 15 # contributors may be used to endorse or promote products derived from | |
| 16 # this software without specific prior written permission. | |
| 17 # | |
| 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 29 | |
| 30 import io | |
| 31 import itertools | |
| 32 import re | |
| 33 import sys | |
| 34 | |
| 35 | |
| 36 class BadInput(Exception): | |
| 37 """Unsupported input has been found.""" | |
| 38 | |
| 39 | |
| 40 class SwitchCase(object): | |
| 41 """Represents a CASE block.""" | |
| 42 def __init__(self, identifier, block): | |
| 43 self.identifier = identifier | |
| 44 self.block = block | |
| 45 | |
| 46 | |
| 47 class Optimizer(object): | |
| 48 """Generates optimized identifier matching code.""" | |
| 49 def __init__(self, output_file, array_variable, length_variable): | |
| 50 self.output_file = output_file | |
| 51 self.array_variable = array_variable | |
| 52 self.length_variable = length_variable | |
| 53 | |
| 54 def inspect(self, cases): | |
| 55 lengths = list(set([len(c.identifier) for c in cases])) | |
| 56 lengths.sort() | |
| 57 | |
| 58 def response(length): | |
| 59 self.inspect_array([c for c in cases if len(c.identifier) == length]
, range(length)) | |
| 60 self.write_selection(self.length_variable, lengths, str, response) | |
| 61 | |
| 62 def score(self, alternatives): | |
| 63 return -sum([len(list(count)) ** 2 for _, count in itertools.groupby(sor
ted(alternatives))]) | |
| 64 | |
| 65 def choose_selection_pos(self, cases, pending): | |
| 66 candidates = [pos for pos in pending if all(alternative.isalpha() for al
ternative in [c.identifier[pos] for c in cases])] | |
| 67 if not candidates: | |
| 68 raise BadInput('Case-insensitive switching on non-alphabetic charact
ers not yet implemented') | |
| 69 return sorted(candidates, key=lambda pos: self.score([c.identifier[pos]
for c in cases]))[0] | |
| 70 | |
| 71 def inspect_array(self, cases, pending): | |
| 72 assert len(cases) >= 1 | |
| 73 if pending: | |
| 74 common = [pos for pos in pending | |
| 75 if len(set([c.identifier[pos] for c in cases])) == 1] | |
| 76 if common: | |
| 77 identifier = cases[0].identifier | |
| 78 for index in xrange(len(common)): | |
| 79 if index == 0: | |
| 80 self.output_file.write(u'if (LIKELY(') | |
| 81 else: | |
| 82 self.output_file.write(u' && ') | |
| 83 pos = common[index] | |
| 84 if identifier[pos].isalpha(): | |
| 85 self.output_file.write("(%s[%d] | 0x20) == '%s'" % | |
| 86 (self.array_variable, pos, identi
fier[pos])) | |
| 87 else: | |
| 88 self.output_file.write("%s[%d] == '%s'" % | |
| 89 (self.array_variable, pos, identi
fier[pos])) | |
| 90 self.output_file.write(u')) {\n') | |
| 91 next_pending = list(set(pending) - set(common)) | |
| 92 next_pending.sort() | |
| 93 self.inspect_array(cases, next_pending) | |
| 94 self.output_file.write(u'}\n') | |
| 95 else: | |
| 96 pos = self.choose_selection_pos(cases, pending) | |
| 97 next_pending = filter(lambda p: p != pos, pending) | |
| 98 | |
| 99 alternatives = list(set([c.identifier[pos] for c in cases])) | |
| 100 alternatives.sort() | |
| 101 | |
| 102 def literal(alternative): | |
| 103 if isinstance(alternative, int): | |
| 104 return str(alternative) | |
| 105 else: | |
| 106 return "'%s'" % alternative | |
| 107 | |
| 108 def response(alternative): | |
| 109 self.inspect_array([c for c in cases if c.identifier[pos] ==
alternative], | |
| 110 next_pending) | |
| 111 | |
| 112 expression = '(%s[%d] | 0x20)' % (self.array_variable, pos) | |
| 113 self.write_selection(expression, alternatives, literal, response
) | |
| 114 else: | |
| 115 assert len(cases) == 1 | |
| 116 for block_line in cases[0].block: | |
| 117 self.output_file.write(block_line) | |
| 118 | |
| 119 def write_selection(self, expression, alternatives, literal, response): | |
| 120 if len(alternatives) == 1: | |
| 121 self.output_file.write(u'if (LIKELY(%s == %s)) {\n' % (expression, l
iteral(alternatives[0]))) | |
| 122 response(alternatives[0]) | |
| 123 self.output_file.write(u'}\n') | |
| 124 elif len(alternatives) == 2: | |
| 125 self.output_file.write(u'if (%s == %s) {\n' % (expression, literal(a
lternatives[0]))) | |
| 126 response(alternatives[0]) | |
| 127 self.output_file.write(u'} else if (LIKELY(%s == %s)) {\n' % (expres
sion, literal(alternatives[1]))) | |
| 128 response(alternatives[1]) | |
| 129 self.output_file.write(u'}\n') | |
| 130 else: | |
| 131 self.output_file.write('switch (%s) {\n' % expression) | |
| 132 for alternative in alternatives: | |
| 133 self.output_file.write(u'case %s: {\n' % literal(alternative)) | |
| 134 response(alternative) | |
| 135 self.output_file.write(u'} break;\n') | |
| 136 self.output_file.write(u'}\n') | |
| 137 | |
| 138 | |
| 139 class LineProcessor(object): | |
| 140 def process_line(self, line): | |
| 141 pass | |
| 142 | |
| 143 | |
| 144 class MainLineProcessor(LineProcessor): | |
| 145 """Processes the contents of an input file.""" | |
| 146 SWITCH_PATTERN = re.compile(r'\s*SWITCH\s*\((\w*),\s*(\w*)\) \{$') | |
| 147 | |
| 148 def __init__(self, output_file): | |
| 149 self.output_file = output_file | |
| 150 | |
| 151 def process_line(self, line): | |
| 152 match_switch = MainLineProcessor.SWITCH_PATTERN.match(line) | |
| 153 if match_switch: | |
| 154 array_variable = match_switch.group(1) | |
| 155 length_variable = match_switch.group(2) | |
| 156 return SwitchLineProcessor(self, self.output_file, array_variable, l
ength_variable) | |
| 157 else: | |
| 158 self.output_file.write(line) | |
| 159 return self | |
| 160 | |
| 161 | |
| 162 class SwitchLineProcessor(LineProcessor): | |
| 163 """Processes the contents of a SWITCH block.""" | |
| 164 CASE_PATTERN = re.compile(r'\s*CASE\s*\(\"([a-z0-9_\-\(]*)\"\) \{$') | |
| 165 CLOSE_BRACE_PATTERN = re.compile(r'\s*\}$') | |
| 166 EMPTY_PATTERN = re.compile(r'\s*$') | |
| 167 | |
| 168 def __init__(self, parent, output_file, array_variable, length_variable): | |
| 169 self.parent = parent | |
| 170 self.output_file = output_file | |
| 171 self.array_variable = array_variable | |
| 172 self.length_variable = length_variable | |
| 173 self.cases = [] | |
| 174 | |
| 175 def process_line(self, line): | |
| 176 match_case = SwitchLineProcessor.CASE_PATTERN.match(line) | |
| 177 match_close_brace = SwitchLineProcessor.CLOSE_BRACE_PATTERN.match(line) | |
| 178 match_empty = SwitchLineProcessor.EMPTY_PATTERN.match(line) | |
| 179 if match_case: | |
| 180 identifier = match_case.group(1) | |
| 181 return CaseLineProcessor(self, self.output_file, identifier) | |
| 182 elif match_close_brace: | |
| 183 Optimizer(self.output_file, self.array_variable, self.length_variabl
e).inspect(self.cases) | |
| 184 return self.parent | |
| 185 elif match_empty: | |
| 186 return self | |
| 187 else: | |
| 188 raise BadInput('Invalid line within SWITCH: %s' % line) | |
| 189 | |
| 190 def add_case(self, latest_case): | |
| 191 if latest_case.identifier in [c.identifier for c in self.cases]: | |
| 192 raise BadInput('Repeated case: %s' % latest_case.identifier) | |
| 193 self.cases.append(latest_case) | |
| 194 | |
| 195 | |
| 196 class CaseLineProcessor(LineProcessor): | |
| 197 """Processes the contents of a CASE block.""" | |
| 198 CLOSE_BRACE_PATTERN = re.compile(r'\s*\}$') | |
| 199 BREAK_PATTERN = re.compile(r'break;') | |
| 200 | |
| 201 def __init__(self, parent, output_file, identifier): | |
| 202 self.parent = parent | |
| 203 self.output_file = output_file | |
| 204 self.identifier = identifier | |
| 205 self.block = [] | |
| 206 | |
| 207 def process_line(self, line): | |
| 208 match_close_brace = CaseLineProcessor.CLOSE_BRACE_PATTERN.match(line) | |
| 209 match_break = CaseLineProcessor.BREAK_PATTERN.search(line) | |
| 210 if match_close_brace: | |
| 211 self.parent.add_case(SwitchCase(self.identifier, self.block)) | |
| 212 return self.parent | |
| 213 elif match_break: | |
| 214 raise BadInput('break within CASE not supported: %s' % line) | |
| 215 else: | |
| 216 self.block.append(line) | |
| 217 return self | |
| 218 | |
| 219 | |
| 220 def process_file(input_name, output_name): | |
| 221 """Transforms input file into legal C++ source code.""" | |
| 222 with io.open(input_name, 'r', -1, 'utf-8') as input_file: | |
| 223 with io.open(output_name, 'w', -1, 'utf-8') as output_file: | |
| 224 processor = MainLineProcessor(output_file) | |
| 225 input_lines = input_file.readlines() | |
| 226 for line in input_lines: | |
| 227 processor = processor.process_line(line) | |
| 228 | |
| 229 | |
| 230 if __name__ == '__main__': | |
| 231 process_file(sys.argv[1], sys.argv[2]) | |
| OLD | NEW |