Index: third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl |
diff --git a/third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl b/third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl |
index c3083ee36863c29a98a61f4af3c7ea9932ff1c18..a2e9b1da29316fcd28b753627d6250bb761d50e7 100644 |
--- a/third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl |
+++ b/third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl |
@@ -1,77 +1,5 @@ |
-# |
-# Parameter $zName must be a path to the file UnicodeData.txt. This command |
-# reads the file and returns a list of mappings required to remove all |
-# diacritical marks from a unicode string. Each mapping is itself a list |
-# consisting of two elements - the unicode codepoint and the single ASCII |
-# character that it should be replaced with, or an empty string if the |
-# codepoint should simply be removed from the input. Examples: |
-# |
-# { 224 a } (replace codepoint 224 to "a") |
-# { 769 "" } (remove codepoint 769 from input) |
-# |
-# Mappings are only returned for non-upper case codepoints. It is assumed |
-# that the input has already been folded to lower case. |
-# |
-proc rd_load_unicodedata_text {zName} { |
- global tl_lookup_table |
- |
- set fd [open $zName] |
- set lField { |
- code |
- character_name |
- general_category |
- canonical_combining_classes |
- bidirectional_category |
- character_decomposition_mapping |
- decimal_digit_value |
- digit_value |
- numeric_value |
- mirrored |
- unicode_1_name |
- iso10646_comment_field |
- uppercase_mapping |
- lowercase_mapping |
- titlecase_mapping |
- } |
- set lRet [list] |
- |
- while { ![eof $fd] } { |
- set line [gets $fd] |
- if {$line == ""} continue |
- |
- set fields [split $line ";"] |
- if {[llength $fields] != [llength $lField]} { error "parse error: $line" } |
- foreach $lField $fields {} |
- if { [llength $character_decomposition_mapping]!=2 |
- || [string is xdigit [lindex $character_decomposition_mapping 0]]==0 |
- } { |
- continue |
- } |
- |
- set iCode [expr "0x$code"] |
- set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"] |
- set iDia [expr "0x[lindex $character_decomposition_mapping 1]"] |
- |
- if {[info exists tl_lookup_table($iCode)]} continue |
- |
- if { ($iAscii >= 97 && $iAscii <= 122) |
- || ($iAscii >= 65 && $iAscii <= 90) |
- } { |
- lappend lRet [list $iCode [string tolower [format %c $iAscii]]] |
- set dia($iDia) 1 |
- } |
- } |
- |
- foreach d [array names dia] { |
- lappend lRet [list $d ""] |
- } |
- set lRet [lsort -integer -index 0 $lRet] |
- |
- close $fd |
- set lRet |
-} |
- |
+source [file join [file dirname [info script]] parseunicode.tcl] |
proc print_rd {map} { |
global tl_lookup_table |
@@ -117,7 +45,7 @@ proc print_rd {map} { |
puts "** E\"). The resuls of passing a codepoint that corresponds to an" |
puts "** uppercase letter are undefined." |
puts "*/" |
- puts "static int remove_diacritic(int c)\{" |
+ puts "static int ${::remove_diacritic}(int c)\{" |
puts " unsigned short aDia\[\] = \{" |
puts -nonewline " 0, " |
set i 1 |
@@ -204,53 +132,6 @@ proc print_isdiacritic {zFunc map} { |
#------------------------------------------------------------------------- |
-# Parameter $zName must be a path to the file UnicodeData.txt. This command |
-# reads the file and returns a list of codepoints (integers). The list |
-# contains all codepoints in the UnicodeData.txt assigned to any "General |
-# Category" that is not a "Letter" or "Number". |
-# |
-proc an_load_unicodedata_text {zName} { |
- set fd [open $zName] |
- set lField { |
- code |
- character_name |
- general_category |
- canonical_combining_classes |
- bidirectional_category |
- character_decomposition_mapping |
- decimal_digit_value |
- digit_value |
- numeric_value |
- mirrored |
- unicode_1_name |
- iso10646_comment_field |
- uppercase_mapping |
- lowercase_mapping |
- titlecase_mapping |
- } |
- set lRet [list] |
- |
- while { ![eof $fd] } { |
- set line [gets $fd] |
- if {$line == ""} continue |
- |
- set fields [split $line ";"] |
- if {[llength $fields] != [llength $lField]} { error "parse error: $line" } |
- foreach $lField $fields {} |
- |
- set iCode [expr "0x$code"] |
- set bAlnum [expr { |
- [lsearch {L N} [string range $general_category 0 0]] >= 0 |
- || $general_category=="Co" |
- }] |
- |
- if { !$bAlnum } { lappend lRet $iCode } |
- } |
- |
- close $fd |
- set lRet |
-} |
- |
proc an_load_separator_ranges {} { |
global unicodedata.txt |
set lSep [an_load_unicodedata_text ${unicodedata.txt}] |
@@ -440,29 +321,6 @@ proc print_test_isalnum {zFunc lRange} { |
#------------------------------------------------------------------------- |
-proc tl_load_casefolding_txt {zName} { |
- global tl_lookup_table |
- |
- set fd [open $zName] |
- while { ![eof $fd] } { |
- set line [gets $fd] |
- if {[string range $line 0 0] == "#"} continue |
- if {$line == ""} continue |
- |
- foreach x {a b c d} {unset -nocomplain $x} |
- foreach {a b c d} [split $line ";"] {} |
- |
- set a2 [list] |
- set c2 [list] |
- foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] } |
- foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] } |
- set b [string trim $b] |
- set d [string trim $d] |
- |
- if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 } |
- } |
-} |
- |
proc tl_create_records {} { |
global tl_lookup_table |
@@ -626,19 +484,20 @@ proc print_fold {zFunc} { |
tl_print_table_footer toggle |
tl_print_ioff_table $liOff |
- puts { |
+ puts [subst -nocommands { |
int ret = c; |
- assert( c>=0 ); |
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); |
if( c<128 ){ |
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); |
}else if( c<65536 ){ |
+ const struct TableEntry *p; |
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; |
int iLo = 0; |
int iRes = -1; |
+ assert( c>aEntry[0].iCode ); |
while( iHi>=iLo ){ |
int iTest = (iHi + iLo) / 2; |
int cmp = (c - aEntry[iTest].iCode); |
@@ -649,19 +508,17 @@ proc print_fold {zFunc} { |
iHi = iTest-1; |
} |
} |
- assert( iRes<0 || c>=aEntry[iRes].iCode ); |
- if( iRes>=0 ){ |
- const struct TableEntry *p = &aEntry[iRes]; |
- if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ |
- ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; |
- assert( ret>0 ); |
- } |
+ assert( iRes>=0 && c>=aEntry[iRes].iCode ); |
+ p = &aEntry[iRes]; |
+ if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ |
+ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; |
+ assert( ret>0 ); |
} |
- if( bRemoveDiacritic ) ret = remove_diacritic(ret); |
- } |
+ if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret); |
} |
+ }] |
foreach entry $lHigh { |
tl_print_if_entry $entry |
@@ -732,8 +589,12 @@ proc print_fileheader {} { |
*/ |
}] |
puts "" |
- puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE" |
- puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)" |
+ if {$::generate_fts5_code} { |
+ # no-op |
+ } else { |
+ puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE" |
+ puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)" |
+ } |
puts "" |
puts "#include <assert.h>" |
puts "" |
@@ -760,22 +621,40 @@ proc print_test_main {} { |
# our liking. |
# |
proc usage {} { |
- puts -nonewline stderr "Usage: $::argv0 ?-test? " |
+ puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? " |
puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>" |
exit 1 |
} |
-if {[llength $argv]!=2 && [llength $argv]!=3} usage |
-if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage |
+if {[llength $argv]<2} usage |
set unicodedata.txt [lindex $argv end] |
set casefolding.txt [lindex $argv end-1] |
-set generate_test_code [expr {[llength $argv]==3}] |
+ |
+set remove_diacritic remove_diacritic |
+set generate_test_code 0 |
+set generate_fts5_code 0 |
+set function_prefix "sqlite3Fts" |
+for {set i 0} {$i < [llength $argv]-2} {incr i} { |
+ switch -- [lindex $argv $i] { |
+ -test { |
+ set generate_test_code 1 |
+ } |
+ -fts5 { |
+ set function_prefix sqlite3Fts5 |
+ set generate_fts5_code 1 |
+ set remove_diacritic fts5_remove_diacritic |
+ } |
+ default { |
+ usage |
+ } |
+ } |
+} |
print_fileheader |
# Print the isalnum() function to stdout. |
# |
set lRange [an_load_separator_ranges] |
-print_isalnum sqlite3FtsUnicodeIsalnum $lRange |
+print_isalnum ${function_prefix}UnicodeIsalnum $lRange |
# Leave a gap between the two generated C functions. |
# |
@@ -790,22 +669,26 @@ set mappings [rd_load_unicodedata_text ${unicodedata.txt}] |
print_rd $mappings |
puts "" |
puts "" |
-print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings |
+print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings |
puts "" |
puts "" |
# Print the fold() function to stdout. |
# |
-print_fold sqlite3FtsUnicodeFold |
+print_fold ${function_prefix}UnicodeFold |
# Print the test routines and main() function to stdout, if -test |
# was specified. |
# |
if {$::generate_test_code} { |
- print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange |
- print_fold_test sqlite3FtsUnicodeFold $mappings |
+ print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange |
+ print_fold_test ${function_prefix}UnicodeFold $mappings |
print_test_main |
} |
-puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */" |
-puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */" |
+if {$generate_fts5_code} { |
+ # no-op |
+} else { |
+ puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */" |
+ puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */" |
+} |