Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Unified Diff: tools/lexer_generator/code_generator.jinja

Issue 83583002: Experimental parser: utf8 added to build (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/lexer/lexer-shell.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/code_generator.jinja
diff --git a/tools/lexer_generator/code_generator.jinja b/tools/lexer_generator/code_generator.jinja
index 493b90bbdaeedde0674d55a3695d720c717413e5..f118166ce03c5557d95cd24c394421e2397fb344 100644
--- a/tools/lexer_generator/code_generator.jinja
+++ b/tools/lexer_generator/code_generator.jinja
@@ -74,6 +74,34 @@
{%- endmacro -%}
+{%- macro long_char_check() -%}
+ {%- if encoding == 'utf16'-%}
+ primary_char > {{upper_bound}}
+ {%- elif encoding == 'utf8'-%}
+ primary_char < 0
+ {%- else -%}
+ uncompilable code for {{encoding}}
+ {%- endif -%}
+{%- endmacro -%}
+
+
+{%- macro long_char_create() -%}
+ {%- if encoding == 'utf16'-%}
+ const uint32_t long_char = primary_char;
+ {%- elif encoding == 'utf8'-%}
+ unsigned bytes_read = 0;
+ const uint32_t long_char = unibrow::Utf8::CalculateValue(
+ reinterpret_cast<uint8_t*>(cursor_),
+ buffer_end_ - cursor_,
+ &bytes_read);
+ cursor_ += bytes_read;
+ if (long_char == unibrow::Utf8::kBadChar) goto default_action;
+ {%- else -%}
+ uncompilable code for {{encoding}}
+ {%- endif -%}
+{%- endmacro -%}
+
+
{%- macro do_dfa_state(node_number, inline) -%}
{%- set state = dfa_states[node_number] -%}
@@ -140,9 +168,8 @@
{% endfor -%}
{%- if state['long_char_transitions'] -%}
- {# TODO macro this up for utf8 #}
- if (primary_char > {{upper_bound}}) {
- uint32_t long_char = primary_char;
+ if ({{long_char_check()}}) {
+ {{long_char_create()}}
{%- for key, transition_state_id in state['long_char_transitions'] %}
if ({{do_key(key)}}) { // long_char transition
{{ do_transition(transition_state_id) }}
« no previous file with comments | « src/lexer/lexer-shell.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698