Index: third_party/cython/src/Cython/Plex/Scanners.py |
diff --git a/third_party/cython/src/Cython/Plex/Scanners.py b/third_party/cython/src/Cython/Plex/Scanners.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..315742f309de1ad94f6cebbf6e485cd004d62f55 |
--- /dev/null |
+++ b/third_party/cython/src/Cython/Plex/Scanners.py |
@@ -0,0 +1,332 @@ |
+#======================================================================= |
+# |
+# Python Lexical Analyser |
+# |
+# |
+# Scanning an input stream |
+# |
+#======================================================================= |
+ |
+import cython |
+cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object) |
+ |
+import Errors |
+from Regexps import BOL, EOL, EOF |
+ |
+NOT_FOUND = object() |
+ |
+class Scanner(object): |
+ """ |
+ A Scanner is used to read tokens from a stream of characters |
+ using the token set specified by a Plex.Lexicon. |
+ |
+ Constructor: |
+ |
+ Scanner(lexicon, stream, name = '') |
+ |
+ See the docstring of the __init__ method for details. |
+ |
+ Methods: |
+ |
+ See the docstrings of the individual methods for more |
+ information. |
+ |
+ read() --> (value, text) |
+ Reads the next lexical token from the stream. |
+ |
+ position() --> (name, line, col) |
+ Returns the position of the last token read using the |
+ read() method. |
+ |
+ begin(state_name) |
+ Causes scanner to change state. |
+ |
+ produce(value [, text]) |
+ Causes return of a token value to the caller of the |
+ Scanner. |
+ |
+ """ |
+ |
+# lexicon = None # Lexicon |
+# stream = None # file-like object |
+# name = '' |
+# buffer = '' |
+# buf_start_pos = 0 # position in input of start of buffer |
+# next_pos = 0 # position in input of next char to read |
+# cur_pos = 0 # position in input of current char |
+# cur_line = 1 # line number of current char |
+# cur_line_start = 0 # position in input of start of current line |
+# start_pos = 0 # position in input of start of token |
+# start_line = 0 # line number of start of token |
+# start_col = 0 # position in line of start of token |
+# text = None # text of last token read |
+# initial_state = None # Node |
+# state_name = '' # Name of initial state |
+# queue = None # list of tokens to be returned |
+# trace = 0 |
+ |
+ def __init__(self, lexicon, stream, name = '', initial_pos = None): |
+ """ |
+ Scanner(lexicon, stream, name = '') |
+ |
+ |lexicon| is a Plex.Lexicon instance specifying the lexical tokens |
+ to be recognised. |
+ |
+ |stream| can be a file object or anything which implements a |
+ compatible read() method. |
+ |
+ |name| is optional, and may be the name of the file being |
+ scanned or any other identifying string. |
+ """ |
+ self.trace = 0 |
+ |
+ self.buffer = u'' |
+ self.buf_start_pos = 0 |
+ self.next_pos = 0 |
+ self.cur_pos = 0 |
+ self.cur_line = 1 |
+ self.start_pos = 0 |
+ self.start_line = 0 |
+ self.start_col = 0 |
+ self.text = None |
+ self.state_name = None |
+ |
+ self.lexicon = lexicon |
+ self.stream = stream |
+ self.name = name |
+ self.queue = [] |
+ self.initial_state = None |
+ self.begin('') |
+ self.next_pos = 0 |
+ self.cur_pos = 0 |
+ self.cur_line_start = 0 |
+ self.cur_char = BOL |
+ self.input_state = 1 |
+ if initial_pos is not None: |
+ self.cur_line, self.cur_line_start = initial_pos[1], -initial_pos[2] |
+ |
+ def read(self): |
+ """ |
+ Read the next lexical token from the stream and return a |
+ tuple (value, text), where |value| is the value associated with |
+ the token as specified by the Lexicon, and |text| is the actual |
+ string read from the stream. Returns (None, '') on end of file. |
+ """ |
+ queue = self.queue |
+ while not queue: |
+ self.text, action = self.scan_a_token() |
+ if action is None: |
+ self.produce(None) |
+ self.eof() |
+ else: |
+ value = action.perform(self, self.text) |
+ if value is not None: |
+ self.produce(value) |
+ result = queue[0] |
+ del queue[0] |
+ return result |
+ |
+ def scan_a_token(self): |
+ """ |
+ Read the next input sequence recognised by the machine |
+ and return (text, action). Returns ('', None) on end of |
+ file. |
+ """ |
+ self.start_pos = self.cur_pos |
+ self.start_line = self.cur_line |
+ self.start_col = self.cur_pos - self.cur_line_start |
+ action = self.run_machine_inlined() |
+ if action is not None: |
+ if self.trace: |
+ print("Scanner: read: Performing %s %d:%d" % ( |
+ action, self.start_pos, self.cur_pos)) |
+ text = self.buffer[self.start_pos - self.buf_start_pos : |
+ self.cur_pos - self.buf_start_pos] |
+ return (text, action) |
+ else: |
+ if self.cur_pos == self.start_pos: |
+ if self.cur_char is EOL: |
+ self.next_char() |
+ if self.cur_char is None or self.cur_char is EOF: |
+ return (u'', None) |
+ raise Errors.UnrecognizedInput(self, self.state_name) |
+ |
+ def run_machine_inlined(self): |
+ """ |
+ Inlined version of run_machine for speed. |
+ """ |
+ state = self.initial_state |
+ cur_pos = self.cur_pos |
+ cur_line = self.cur_line |
+ cur_line_start = self.cur_line_start |
+ cur_char = self.cur_char |
+ input_state = self.input_state |
+ next_pos = self.next_pos |
+ buffer = self.buffer |
+ buf_start_pos = self.buf_start_pos |
+ buf_len = len(buffer) |
+ b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \ |
+ None, 0, 0, 0, u'', 0, 0 |
+ trace = self.trace |
+ while 1: |
+ if trace: #TRACE# |
+ print("State %d, %d/%d:%s -->" % ( #TRACE# |
+ state['number'], input_state, cur_pos, repr(cur_char))) #TRACE# |
+ # Begin inlined self.save_for_backup() |
+ #action = state.action #@slow |
+ action = state['action'] #@fast |
+ if action is not None: |
+ b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \ |
+ action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos |
+ # End inlined self.save_for_backup() |
+ c = cur_char |
+ #new_state = state.new_state(c) #@slow |
+ new_state = state.get(c, NOT_FOUND) #@fast |
+ if new_state is NOT_FOUND: #@fast |
+ new_state = c and state.get('else') #@fast |
+ if new_state: |
+ if trace: #TRACE# |
+ print("State %d" % new_state['number']) #TRACE# |
+ state = new_state |
+ # Begin inlined: self.next_char() |
+ if input_state == 1: |
+ cur_pos = next_pos |
+ # Begin inlined: c = self.read_char() |
+ buf_index = next_pos - buf_start_pos |
+ if buf_index < buf_len: |
+ c = buffer[buf_index] |
+ next_pos = next_pos + 1 |
+ else: |
+ discard = self.start_pos - buf_start_pos |
+ data = self.stream.read(0x1000) |
+ buffer = self.buffer[discard:] + data |
+ self.buffer = buffer |
+ buf_start_pos = buf_start_pos + discard |
+ self.buf_start_pos = buf_start_pos |
+ buf_len = len(buffer) |
+ buf_index = buf_index - discard |
+ if data: |
+ c = buffer[buf_index] |
+ next_pos = next_pos + 1 |
+ else: |
+ c = u'' |
+ # End inlined: c = self.read_char() |
+ if c == u'\n': |
+ cur_char = EOL |
+ input_state = 2 |
+ elif not c: |
+ cur_char = EOL |
+ input_state = 4 |
+ else: |
+ cur_char = c |
+ elif input_state == 2: |
+ cur_char = u'\n' |
+ input_state = 3 |
+ elif input_state == 3: |
+ cur_line = cur_line + 1 |
+ cur_line_start = cur_pos = next_pos |
+ cur_char = BOL |
+ input_state = 1 |
+ elif input_state == 4: |
+ cur_char = EOF |
+ input_state = 5 |
+ else: # input_state = 5 |
+ cur_char = u'' |
+ # End inlined self.next_char() |
+ else: # not new_state |
+ if trace: #TRACE# |
+ print("blocked") #TRACE# |
+ # Begin inlined: action = self.back_up() |
+ if b_action is not None: |
+ (action, cur_pos, cur_line, cur_line_start, |
+ cur_char, input_state, next_pos) = \ |
+ (b_action, b_cur_pos, b_cur_line, b_cur_line_start, |
+ b_cur_char, b_input_state, b_next_pos) |
+ else: |
+ action = None |
+ break # while 1 |
+ # End inlined: action = self.back_up() |
+ self.cur_pos = cur_pos |
+ self.cur_line = cur_line |
+ self.cur_line_start = cur_line_start |
+ self.cur_char = cur_char |
+ self.input_state = input_state |
+ self.next_pos = next_pos |
+ if trace: #TRACE# |
+ if action is not None: #TRACE# |
+ print("Doing %s" % action) #TRACE# |
+ return action |
+ |
+ def next_char(self): |
+ input_state = self.input_state |
+ if self.trace: |
+ print("Scanner: next: %s [%d] %d" % (" "*20, input_state, self.cur_pos)) |
+ if input_state == 1: |
+ self.cur_pos = self.next_pos |
+ c = self.read_char() |
+ if c == u'\n': |
+ self.cur_char = EOL |
+ self.input_state = 2 |
+ elif not c: |
+ self.cur_char = EOL |
+ self.input_state = 4 |
+ else: |
+ self.cur_char = c |
+ elif input_state == 2: |
+ self.cur_char = u'\n' |
+ self.input_state = 3 |
+ elif input_state == 3: |
+ self.cur_line = self.cur_line + 1 |
+ self.cur_line_start = self.cur_pos = self.next_pos |
+ self.cur_char = BOL |
+ self.input_state = 1 |
+ elif input_state == 4: |
+ self.cur_char = EOF |
+ self.input_state = 5 |
+ else: # input_state = 5 |
+ self.cur_char = u'' |
+ if self.trace: |
+ print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char))) |
+ |
+ def position(self): |
+ """ |
+ Return a tuple (name, line, col) representing the location of |
+ the last token read using the read() method. |name| is the |
+ name that was provided to the Scanner constructor; |line| |
+ is the line number in the stream (1-based); |col| is the |
+ position within the line of the first character of the token |
+ (0-based). |
+ """ |
+ return (self.name, self.start_line, self.start_col) |
+ |
+ def get_position(self): |
+ """Python accessible wrapper around position(), only for error reporting. |
+ """ |
+ return self.position() |
+ |
+ def begin(self, state_name): |
+ """Set the current state of the scanner to the named state.""" |
+ self.initial_state = ( |
+ self.lexicon.get_initial_state(state_name)) |
+ self.state_name = state_name |
+ |
+ def produce(self, value, text = None): |
+ """ |
+ Called from an action procedure, causes |value| to be returned |
+ as the token value from read(). If |text| is supplied, it is |
+ returned in place of the scanned text. |
+ |
+ produce() can be called more than once during a single call to an action |
+ procedure, in which case the tokens are queued up and returned one |
+ at a time by subsequent calls to read(), until the queue is empty, |
+ whereupon scanning resumes. |
+ """ |
+ if text is None: |
+ text = self.text |
+ self.queue.append((value, text)) |
+ |
+ def eof(self): |
+ """ |
+ Override this method if you want something to be done at |
+ end of file. |
+ """ |