| OLD | NEW |
| 1 #include "lexer/experimental-scanner.h" | 1 #include "lexer/experimental-scanner.h" |
| 2 | 2 |
| 3 {%- macro do_key(key) -%} | 3 {%- macro do_key(key) -%} |
| 4 {%- for r in key -%} | 4 {%- for r in key -%} |
| 5 {%- if not loop.first %} || {% endif -%} | 5 {%- if not loop.first %} || {% endif -%} |
| 6 {%- if r[0] == 'PRIMARY_RANGE' -%} | 6 {%- if r[0] == 'PRIMARY_RANGE' -%} |
| 7 {%- if r[1][0] == r[1][1] -%} | 7 {%- if r[1][0] == r[1][1] -%} |
| 8 yych == {{r[1][0]}} | 8 primary_char == {{r[1][0]}} |
| 9 {%- elif r[1][0] == 0 -%} | 9 {%- elif r[1][0] == 0 -%} |
| 10 yych <= {{r[1][1]}} | 10 primary_char <= {{r[1][1]}} |
| 11 {%- elif r[1][1] == upper_bound and not encoding == 'utf16'-%} | 11 {%- elif r[1][1] == upper_bound and not encoding == 'utf16'-%} |
| 12 yych >= {{r[1][0]}} | 12 primary_char >= {{r[1][0]}} |
| 13 {%- else -%} | 13 {%- else -%} |
| 14 ({{r[1][0]}} <= yych && yych <= {{r[1][1]}}) | 14 ({{r[1][0]}} <= primary_char && primary_char <= {{r[1][1]}}) |
| 15 {%- endif -%} | 15 {%- endif -%} |
| 16 {%- elif r[0] == 'CLASS' -%} | 16 {%- elif r[0] == 'CLASS' -%} |
| 17 {%- if r[1] == 'eos' -%} | 17 {%- if r[1] == 'eos' -%} |
| 18 (yych == 0 && cursor_ >= buffer_end_) | 18 (primary_char == 0 && cursor_ >= buffer_end_) |
| 19 {%- elif r[1] == 'zero' -%} | 19 {%- elif r[1] == 'zero' -%} |
| 20 (yych == 0 && cursor_ < buffer_end_) | 20 (primary_char == 0 && cursor_ < buffer_end_) |
| 21 {%- elif encoding == 'latin1' -%} | 21 {%- else %} |
| 22 false /* {{r[1]}} */ | 22 uncompilable code for {{encoding}} {{r[0]}} {{r[1]}} |
| 23 {%- elif encoding == 'utf16' -%} | 23 {%- endif -%} |
| 24 {%- if r[1] == 'byte_order_mark' -%} | 24 {# These classes require long_char and to be outside the primary range #} |
| 25 (yych == 0xfffe || yych == 0xfeff) | 25 {%- elif r[0] == 'LONG_CHAR_CLASS' and encoding in ['utf16', 'utf8'] -%} |
| 26 {%- elif r[1] == 'non_latin_1_whitespace' -%} | 26 {%- if r[1] == 'byte_order_mark' -%} |
| 27 (yych > {{upper_bound}} && unicode_cache_->IsWhiteSpace(yych)) | 27 (long_char == 0xfffe || long_char == 0xfeff) |
| 28 {%- elif r[1] == 'non_latin_1_letter' -%} | 28 {%- elif r[1] == 'call' -%} |
| 29 (yych > {{upper_bound}} && unicode_cache_->IsLetter(yych)) | 29 unicode_cache_->{{r[2]}}(long_char) |
| 30 {%- elif r[1] == 'non_latin_1_identifier_part_not_letter' -%} | 30 {%- elif r[1] == 'invert' -%} |
| 31 (yych > {{upper_bound}} && | 31 !({{do_key(r[2])}}) |
| 32 unicode_cache_->IsIdentifierPartNotLetter(yych)) | 32 {%- elif r[1] == 'catch_all' -%} |
| 33 {%- elif r[1] == 'non_latin_1_line_terminator' -%} | 33 (true || long_char == 0) /* {{r[1]}} */ |
| 34 (yych > {{upper_bound}} && unicode_cache_->IsLineTerminator(yych)) | |
| 35 {%- elif r[1] == 'non_latin_1_everything_else' -%} | |
| 36 {# FIXME: Optimize this away #} | |
| 37 (yych > {{upper_bound}} && | |
| 38 !unicode_cache_->IsWhiteSpace(yych) && | |
| 39 !unicode_cache_->IsLetter(yych) && | |
| 40 !unicode_cache_->IsIdentifierPartNotLetter(yych) && | |
| 41 !unicode_cache_->IsLineTerminator(yych)) | |
| 42 {%- else %} | |
| 43 uncompilable code for {{encoding}} {{r[0]}} {{r[1]}} | |
| 44 {%- endif -%} | |
| 45 {%- else -%} | 34 {%- else -%} |
| 46 uncompilable code for {{encoding}} {{r[0]}} {{r[1]}} | 35 uncompilable code for {{encoding}} {{r[0]}} {{r[1]}} |
| 47 {%- endif -%} | 36 {%- endif -%} |
| 48 {%- else -%} | 37 {%- else -%} |
| 49 uncompilable code for {{encoding}} {{r[0]}} {{r[1]}} | 38 uncompilable code for {{encoding}} {{r[0]}} {{r[1]}} |
| 50 {%- endif -%} | 39 {%- endif -%} |
| 51 {%- endfor -%} | 40 {%- endfor -%} |
| 52 {%- endmacro -%} | 41 {%- endmacro -%} |
| 53 | 42 |
| 54 | 43 |
| 55 {% macro dispatch_action(type, value) -%} | 44 {% macro dispatch_action(type, value) -%} |
| 56 {%- if type == 'code' %} | 45 {%- if type == 'code' %} |
| 57 {{value}} | 46 {{value}} |
| 58 {% elif type == 'terminate' %} | 47 {% elif type == 'terminate' %} |
| 59 PUSH_EOS(); | 48 PUSH_EOS(); |
| 60 {% elif type == 'terminate_illegal' %} | 49 {% elif type == 'terminate_illegal' %} |
| 61 start_ = marker_; BACKWARD(); PUSH_TOKEN(Token::ILLEGAL); | 50 start_ = marker_; BACKWARD(1); PUSH_TOKEN(Token::ILLEGAL); |
| 62 {% elif type == 'skip' %} | 51 {% elif type == 'skip' %} |
| 63 SKIP(); | 52 SKIP(); |
| 64 {% elif type == 'skip_and_terminate' %} | 53 {% elif type == 'skip_and_terminate' %} |
| 65 SKIP(); | 54 SKIP(); |
| 66 --start_; | 55 --start_; |
| 67 {{dispatch_action('terminate', None)}} | 56 {{dispatch_action('terminate', None)}} |
| 68 {% elif type == 'push_line_terminator' %} | 57 {% elif type == 'push_line_terminator' %} |
| 69 PUSH_LINE_TERMINATOR(); | 58 PUSH_LINE_TERMINATOR(); |
| 70 {% elif type == 'push_token' %} | 59 {% elif type == 'push_token' %} |
| 71 PUSH_TOKEN(Token::{{value}}) | 60 PUSH_TOKEN(Token::{{value}}) |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 105 {% if debug_print %} | 94 {% if debug_print %} |
| 106 fprintf(stderr, "state {{state.node_number}}\n"); | 95 fprintf(stderr, "state {{state.node_number}}\n"); |
| 107 {% endif -%} | 96 {% endif -%} |
| 108 | 97 |
| 109 {%- set entry_action = state.entry_action -%} | 98 {%- set entry_action = state.entry_action -%} |
| 110 {%- if entry_action %} | 99 {%- if entry_action %} |
| 111 {{ dispatch_action(entry_action[0], entry_action[1]) }} | 100 {{ dispatch_action(entry_action[0], entry_action[1]) }} |
| 112 {%- endif %} | 101 {%- endif %} |
| 113 | 102 |
| 114 {%- if debug_print %} | 103 {%- if debug_print %} |
| 115 fprintf(stderr, "char at hand is %c (%d)\n", yych, yych); | 104 fprintf(stderr, "char at hand is %c (%d)\n", primary_char, primary_char); |
| 116 {% endif -%} | 105 {% endif -%} |
| 117 | 106 |
| 118 {%- macro do_transition(transition_state_id) -%} | 107 {%- macro do_transition(transition_state_id) -%} |
| 119 {%- set inline_transition = dfa_states[transition_state_id]['inline'] %} | 108 {%- set inline_transition = dfa_states[transition_state_id]['inline'] %} |
| 120 FORWARD(); | 109 FORWARD(); |
| 121 {%- if inline_transition %} | 110 {%- if inline_transition %} |
| 122 {{ do_dfa_state(transition_state_id, True) }} | 111 {{ do_dfa_state(transition_state_id, True) }} |
| 123 {% else %} | 112 {% else %} |
| 124 goto code_{{transition_state_id}}; | 113 goto code_{{transition_state_id}}; |
| 125 {% endif %} | 114 {% endif %} |
| 126 {%- endmacro -%} | 115 {%- endmacro -%} |
| 127 | 116 |
| 128 {%- if state['switch_transitions'] -%} | 117 {%- if state['switch_transitions'] -%} |
| 129 switch(yych) { | 118 switch(primary_char) { |
| 130 {%- for ranges, transition_state_id in state['switch_transitions'] %} | 119 {%- for ranges, transition_state_id in state['switch_transitions'] %} |
| 131 {%- for r in ranges -%} | 120 {%- for r in ranges -%} |
| 132 {%- for key in range(r[0], r[1] + 1) -%} | 121 {%- for key in range(r[0], r[1] + 1) -%} |
| 133 case {{key}}: | 122 case {{key}}: |
| 134 {% endfor %} | 123 {% endfor %} |
| 135 {%- endfor -%} | 124 {%- endfor -%} |
| 136 {{ do_transition(transition_state_id) }} | 125 {{ do_transition(transition_state_id) }} |
| 137 {% endfor -%} | 126 {% endfor -%} |
| 138 } | 127 } |
| 139 {%- endif -%} | 128 {%- endif -%} |
| 140 | 129 |
| 141 {%- for key, transition_state_id in state.transitions %} | 130 {%- for key, transition_state_id in state.transitions %} |
| 142 if ({{do_key(key)}}) { | 131 if ({{do_key(key)}}) { // normal if transition |
| 143 {{ do_transition(transition_state_id) }} | 132 {{ do_transition(transition_state_id) }} |
| 144 } | 133 } |
| 145 {% endfor -%} | 134 {% endfor -%} |
| 146 | 135 |
| 147 {%- for key, transition_state_id in state['deferred_transitions'] %} | 136 {%- for key, transition_state_id in state['deferred_transitions'] %} |
| 148 if ({{do_key(key)}}) { // deferred transition | 137 if ({{do_key(key)}}) { // deferred transition |
| 149 {{ do_transition(transition_state_id) }} | 138 {{ do_transition(transition_state_id) }} |
| 150 } | 139 } |
| 151 {% endfor -%} | 140 {% endfor -%} |
| 152 | 141 |
| 142 {%- if state['long_char_transitions'] -%} |
| 143 {# TODO macro this up for utf8 #} |
| 144 if (primary_char > {{upper_bound}}) { |
| 145 uint32_t long_char = primary_char; |
| 146 {%- for key, transition_state_id in state['long_char_transitions'] %} |
| 147 if ({{do_key(key)}}) { // long_char transition |
| 148 {{ do_transition(transition_state_id) }} |
| 149 } |
| 150 {% endfor -%} |
| 151 } |
| 152 {%- endif-%} |
| 153 |
| 153 {%- set match_action = state.match_action -%} | 154 {%- set match_action = state.match_action -%} |
| 154 | 155 |
| 155 {%- if match_action %} | 156 {%- if match_action %} |
| 156 {{ dispatch_action(match_action[0], match_action[1]) }} | 157 {{ dispatch_action(match_action[0], match_action[1]) }} |
| 157 goto code_start; | 158 goto code_start; |
| 158 {% else %} | 159 {% else %} |
| 159 goto default_action; | 160 goto default_action; |
| 160 {%- endif %} | 161 {%- endif %} |
| 161 | 162 |
| 162 {%- endmacro %} | 163 {%- endmacro %} |
| (...skipping 19 matching lines...) Expand all Loading... |
| 182 #define PUSH_EOS() { \ | 183 #define PUSH_EOS() { \ |
| 183 cursor_ -= 1; \ | 184 cursor_ -= 1; \ |
| 184 PUSH_TOKEN(Token::EOS); \ | 185 PUSH_TOKEN(Token::EOS); \ |
| 185 } | 186 } |
| 186 | 187 |
| 187 #define PUSH_LINE_TERMINATOR(s) { \ | 188 #define PUSH_LINE_TERMINATOR(s) { \ |
| 188 start_ = cursor_; \ | 189 start_ = cursor_; \ |
| 189 just_seen_line_terminator_ = true; \ | 190 just_seen_line_terminator_ = true; \ |
| 190 } | 191 } |
| 191 | 192 |
| 192 #define FORWARD() { \ | 193 #define FORWARD() { \ |
| 193 if (++cursor_ >= buffer_end_) yych = 0; \ | 194 if (++cursor_ >= buffer_end_) primary_char = 0; \ |
| 194 else yych = *(cursor_); \ | 195 else primary_char = *(cursor_); \ |
| 195 } | 196 } |
| 196 | 197 |
| 197 #define BACKWARD() { \ | 198 #define BACKWARD(n) { \ |
| 198 if (--cursor_ >= buffer_end_) yych = 0; \ | 199 cursor_ -= n; \ |
| 199 else yych = *(cursor_); \ | 200 if (cursor_ >= buffer_end_) primary_char = 0; \ |
| 201 else primary_char = *(cursor_); \ |
| 200 } | 202 } |
| 201 | 203 |
| 202 #define SKIP() { \ | 204 #define SKIP() { \ |
| 203 start_ = cursor_; \ | 205 start_ = cursor_; \ |
| 204 } | 206 } |
| 205 | 207 |
| 206 namespace v8 { | 208 namespace v8 { |
| 207 namespace internal { | 209 namespace internal { |
| 208 template<> | 210 template<> |
| 209 Token::Value ExperimentalScanner<{{char_type}}>::Next(int* beg_pos_to_return, | 211 Token::Value ExperimentalScanner<{{char_type}}>::Next(int* beg_pos_to_return, |
| 210 int* end_pos_to_return) { | 212 int* end_pos_to_return) { |
| 211 // Setup environment. | 213 // Setup environment. |
| 212 {{char_type}} yych; | 214 {{char_type}} primary_char; |
| 213 if (cursor_ >= buffer_end_) yych = 0; | 215 if (cursor_ >= buffer_end_) primary_char = 0; |
| 214 else yych = *(cursor_); | 216 else primary_char = *(cursor_); |
| 215 | 217 |
| 216 {# first node is start node #} | 218 {# first node is start node #} |
| 217 {% for dfa_state in dfa_states -%} | 219 {% for dfa_state in dfa_states -%} |
| 218 {%- set inline = dfa_state['inline'] -%} | 220 {%- set inline = dfa_state['inline'] -%} |
| 219 {%- if not inline %} | 221 {%- if not inline %} |
| 220 {{ do_dfa_state(dfa_state['node_number'], False) }} | 222 {{ do_dfa_state(dfa_state['node_number'], False) }} |
| 221 {%- endif -%} | 223 {%- endif -%} |
| 222 {%- endfor %} | 224 {%- endfor %} |
| 223 | 225 |
| 224 // Should never fall off the edge. | 226 // Should never fall off the edge. |
| 225 goto fell_through; | 227 goto fell_through; |
| 226 // Execute the default action. | 228 // Execute the default action. |
| 227 default_action: | 229 default_action: |
| 228 {%- if debug_print %} | 230 {%- if debug_print %} |
| 229 fprintf(stderr, "default action\n"); | 231 fprintf(stderr, "default action\n"); |
| 230 {% endif -%} | 232 {% endif -%} |
| 231 {{dispatch_action(default_action[0], default_action[1])}} | 233 {{dispatch_action(default_action[0], default_action[1])}} |
| 232 FORWARD(); | 234 FORWARD(); |
| 233 goto code_start; | 235 goto code_start; |
| 234 | 236 |
| 235 fell_through: | 237 fell_through: |
| 236 CHECK(false); | 238 CHECK(false); |
| 237 } | 239 } |
| 238 } } | 240 } } |
| 239 | 241 |
| OLD | NEW |