| Index: third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/mkunicode.tcl
|
| diff --git a/third_party/sqlite/sqlite-src-3080704/ext/fts3/unicode/mkunicode.tcl b/third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/mkunicode.tcl
|
| similarity index 77%
|
| copy from third_party/sqlite/sqlite-src-3080704/ext/fts3/unicode/mkunicode.tcl
|
| copy to third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/mkunicode.tcl
|
| index c3083ee36863c29a98a61f4af3c7ea9932ff1c18..a2e9b1da29316fcd28b753627d6250bb761d50e7 100644
|
| --- a/third_party/sqlite/sqlite-src-3080704/ext/fts3/unicode/mkunicode.tcl
|
| +++ b/third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/mkunicode.tcl
|
| @@ -1,77 +1,5 @@
|
|
|
| -#
|
| -# Parameter $zName must be a path to the file UnicodeData.txt. This command
|
| -# reads the file and returns a list of mappings required to remove all
|
| -# diacritical marks from a unicode string. Each mapping is itself a list
|
| -# consisting of two elements - the unicode codepoint and the single ASCII
|
| -# character that it should be replaced with, or an empty string if the
|
| -# codepoint should simply be removed from the input. Examples:
|
| -#
|
| -# { 224 a } (replace codepoint 224 to "a")
|
| -# { 769 "" } (remove codepoint 769 from input)
|
| -#
|
| -# Mappings are only returned for non-upper case codepoints. It is assumed
|
| -# that the input has already been folded to lower case.
|
| -#
|
| -proc rd_load_unicodedata_text {zName} {
|
| - global tl_lookup_table
|
| -
|
| - set fd [open $zName]
|
| - set lField {
|
| - code
|
| - character_name
|
| - general_category
|
| - canonical_combining_classes
|
| - bidirectional_category
|
| - character_decomposition_mapping
|
| - decimal_digit_value
|
| - digit_value
|
| - numeric_value
|
| - mirrored
|
| - unicode_1_name
|
| - iso10646_comment_field
|
| - uppercase_mapping
|
| - lowercase_mapping
|
| - titlecase_mapping
|
| - }
|
| - set lRet [list]
|
| -
|
| - while { ![eof $fd] } {
|
| - set line [gets $fd]
|
| - if {$line == ""} continue
|
| -
|
| - set fields [split $line ";"]
|
| - if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
|
| - foreach $lField $fields {}
|
| - if { [llength $character_decomposition_mapping]!=2
|
| - || [string is xdigit [lindex $character_decomposition_mapping 0]]==0
|
| - } {
|
| - continue
|
| - }
|
| -
|
| - set iCode [expr "0x$code"]
|
| - set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
|
| - set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]
|
| -
|
| - if {[info exists tl_lookup_table($iCode)]} continue
|
| -
|
| - if { ($iAscii >= 97 && $iAscii <= 122)
|
| - || ($iAscii >= 65 && $iAscii <= 90)
|
| - } {
|
| - lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
|
| - set dia($iDia) 1
|
| - }
|
| - }
|
| -
|
| - foreach d [array names dia] {
|
| - lappend lRet [list $d ""]
|
| - }
|
| - set lRet [lsort -integer -index 0 $lRet]
|
| -
|
| - close $fd
|
| - set lRet
|
| -}
|
| -
|
| +source [file join [file dirname [info script]] parseunicode.tcl]
|
|
|
| proc print_rd {map} {
|
| global tl_lookup_table
|
| @@ -117,7 +45,7 @@ proc print_rd {map} {
|
| puts "** E\"). The resuls of passing a codepoint that corresponds to an"
|
| puts "** uppercase letter are undefined."
|
| puts "*/"
|
| - puts "static int remove_diacritic(int c)\{"
|
| + puts "static int ${::remove_diacritic}(int c)\{"
|
| puts " unsigned short aDia\[\] = \{"
|
| puts -nonewline " 0, "
|
| set i 1
|
| @@ -204,53 +132,6 @@ proc print_isdiacritic {zFunc map} {
|
|
|
| #-------------------------------------------------------------------------
|
|
|
| -# Parameter $zName must be a path to the file UnicodeData.txt. This command
|
| -# reads the file and returns a list of codepoints (integers). The list
|
| -# contains all codepoints in the UnicodeData.txt assigned to any "General
|
| -# Category" that is not a "Letter" or "Number".
|
| -#
|
| -proc an_load_unicodedata_text {zName} {
|
| - set fd [open $zName]
|
| - set lField {
|
| - code
|
| - character_name
|
| - general_category
|
| - canonical_combining_classes
|
| - bidirectional_category
|
| - character_decomposition_mapping
|
| - decimal_digit_value
|
| - digit_value
|
| - numeric_value
|
| - mirrored
|
| - unicode_1_name
|
| - iso10646_comment_field
|
| - uppercase_mapping
|
| - lowercase_mapping
|
| - titlecase_mapping
|
| - }
|
| - set lRet [list]
|
| -
|
| - while { ![eof $fd] } {
|
| - set line [gets $fd]
|
| - if {$line == ""} continue
|
| -
|
| - set fields [split $line ";"]
|
| - if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
|
| - foreach $lField $fields {}
|
| -
|
| - set iCode [expr "0x$code"]
|
| - set bAlnum [expr {
|
| - [lsearch {L N} [string range $general_category 0 0]] >= 0
|
| - || $general_category=="Co"
|
| - }]
|
| -
|
| - if { !$bAlnum } { lappend lRet $iCode }
|
| - }
|
| -
|
| - close $fd
|
| - set lRet
|
| -}
|
| -
|
| proc an_load_separator_ranges {} {
|
| global unicodedata.txt
|
| set lSep [an_load_unicodedata_text ${unicodedata.txt}]
|
| @@ -440,29 +321,6 @@ proc print_test_isalnum {zFunc lRange} {
|
|
|
| #-------------------------------------------------------------------------
|
|
|
| -proc tl_load_casefolding_txt {zName} {
|
| - global tl_lookup_table
|
| -
|
| - set fd [open $zName]
|
| - while { ![eof $fd] } {
|
| - set line [gets $fd]
|
| - if {[string range $line 0 0] == "#"} continue
|
| - if {$line == ""} continue
|
| -
|
| - foreach x {a b c d} {unset -nocomplain $x}
|
| - foreach {a b c d} [split $line ";"] {}
|
| -
|
| - set a2 [list]
|
| - set c2 [list]
|
| - foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
|
| - foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
|
| - set b [string trim $b]
|
| - set d [string trim $d]
|
| -
|
| - if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
|
| - }
|
| -}
|
| -
|
| proc tl_create_records {} {
|
| global tl_lookup_table
|
|
|
| @@ -626,19 +484,20 @@ proc print_fold {zFunc} {
|
| tl_print_table_footer toggle
|
| tl_print_ioff_table $liOff
|
|
|
| - puts {
|
| + puts [subst -nocommands {
|
| int ret = c;
|
|
|
| - assert( c>=0 );
|
| assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
|
|
|
| if( c<128 ){
|
| if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
|
| }else if( c<65536 ){
|
| + const struct TableEntry *p;
|
| int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
|
| int iLo = 0;
|
| int iRes = -1;
|
|
|
| + assert( c>aEntry[0].iCode );
|
| while( iHi>=iLo ){
|
| int iTest = (iHi + iLo) / 2;
|
| int cmp = (c - aEntry[iTest].iCode);
|
| @@ -649,19 +508,17 @@ proc print_fold {zFunc} {
|
| iHi = iTest-1;
|
| }
|
| }
|
| - assert( iRes<0 || c>=aEntry[iRes].iCode );
|
|
|
| - if( iRes>=0 ){
|
| - const struct TableEntry *p = &aEntry[iRes];
|
| - if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
|
| - ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
|
| - assert( ret>0 );
|
| - }
|
| + assert( iRes>=0 && c>=aEntry[iRes].iCode );
|
| + p = &aEntry[iRes];
|
| + if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
|
| + ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
|
| + assert( ret>0 );
|
| }
|
|
|
| - if( bRemoveDiacritic ) ret = remove_diacritic(ret);
|
| - }
|
| + if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret);
|
| }
|
| + }]
|
|
|
| foreach entry $lHigh {
|
| tl_print_if_entry $entry
|
| @@ -732,8 +589,12 @@ proc print_fileheader {} {
|
| */
|
| }]
|
| puts ""
|
| - puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
|
| - puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
|
| + if {$::generate_fts5_code} {
|
| + # no-op
|
| + } else {
|
| + puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
|
| + puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
|
| + }
|
| puts ""
|
| puts "#include <assert.h>"
|
| puts ""
|
| @@ -760,22 +621,40 @@ proc print_test_main {} {
|
| # our liking.
|
| #
|
| proc usage {} {
|
| - puts -nonewline stderr "Usage: $::argv0 ?-test? "
|
| + puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? "
|
| puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
|
| exit 1
|
| }
|
| -if {[llength $argv]!=2 && [llength $argv]!=3} usage
|
| -if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage
|
| +if {[llength $argv]<2} usage
|
| set unicodedata.txt [lindex $argv end]
|
| set casefolding.txt [lindex $argv end-1]
|
| -set generate_test_code [expr {[llength $argv]==3}]
|
| +
|
| +set remove_diacritic remove_diacritic
|
| +set generate_test_code 0
|
| +set generate_fts5_code 0
|
| +set function_prefix "sqlite3Fts"
|
| +for {set i 0} {$i < [llength $argv]-2} {incr i} {
|
| + switch -- [lindex $argv $i] {
|
| + -test {
|
| + set generate_test_code 1
|
| + }
|
| + -fts5 {
|
| + set function_prefix sqlite3Fts5
|
| + set generate_fts5_code 1
|
| + set remove_diacritic fts5_remove_diacritic
|
| + }
|
| + default {
|
| + usage
|
| + }
|
| + }
|
| +}
|
|
|
| print_fileheader
|
|
|
| # Print the isalnum() function to stdout.
|
| #
|
| set lRange [an_load_separator_ranges]
|
| -print_isalnum sqlite3FtsUnicodeIsalnum $lRange
|
| +print_isalnum ${function_prefix}UnicodeIsalnum $lRange
|
|
|
| # Leave a gap between the two generated C functions.
|
| #
|
| @@ -790,22 +669,26 @@ set mappings [rd_load_unicodedata_text ${unicodedata.txt}]
|
| print_rd $mappings
|
| puts ""
|
| puts ""
|
| -print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings
|
| +print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings
|
| puts ""
|
| puts ""
|
|
|
| # Print the fold() function to stdout.
|
| #
|
| -print_fold sqlite3FtsUnicodeFold
|
| +print_fold ${function_prefix}UnicodeFold
|
|
|
| # Print the test routines and main() function to stdout, if -test
|
| # was specified.
|
| #
|
| if {$::generate_test_code} {
|
| - print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange
|
| - print_fold_test sqlite3FtsUnicodeFold $mappings
|
| + print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange
|
| + print_fold_test ${function_prefix}UnicodeFold $mappings
|
| print_test_main
|
| }
|
|
|
| -puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
|
| -puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"
|
| +if {$generate_fts5_code} {
|
| + # no-op
|
| +} else {
|
| + puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
|
| + puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"
|
| +}
|
|
|