| Index: third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl
|
| diff --git a/third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl b/third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl
|
| deleted file mode 100644
|
| index 0cb2c83a18f76deaf8b75ebe9c5000544b7708e1..0000000000000000000000000000000000000000
|
| --- a/third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl
|
| +++ /dev/null
|
| @@ -1,146 +0,0 @@
|
| -
|
| -#--------------------------------------------------------------------------
|
| -# Parameter $zName must be a path to the file UnicodeData.txt. This command
|
| -# reads the file and returns a list of mappings required to remove all
|
| -# diacritical marks from a unicode string. Each mapping is itself a list
|
| -# consisting of two elements - the unicode codepoint and the single ASCII
|
| -# character that it should be replaced with, or an empty string if the
|
| -# codepoint should simply be removed from the input. Examples:
|
| -#
|
| -# { 224 a } (replace codepoint 224 to "a")
|
| -# { 769 "" } (remove codepoint 769 from input)
|
| -#
|
| -# Mappings are only returned for non-upper case codepoints. It is assumed
|
| -# that the input has already been folded to lower case.
|
| -#
|
| -proc rd_load_unicodedata_text {zName} {
|
| - global tl_lookup_table
|
| -
|
| - set fd [open $zName]
|
| - set lField {
|
| - code
|
| - character_name
|
| - general_category
|
| - canonical_combining_classes
|
| - bidirectional_category
|
| - character_decomposition_mapping
|
| - decimal_digit_value
|
| - digit_value
|
| - numeric_value
|
| - mirrored
|
| - unicode_1_name
|
| - iso10646_comment_field
|
| - uppercase_mapping
|
| - lowercase_mapping
|
| - titlecase_mapping
|
| - }
|
| - set lRet [list]
|
| -
|
| - while { ![eof $fd] } {
|
| - set line [gets $fd]
|
| - if {$line == ""} continue
|
| -
|
| - set fields [split $line ";"]
|
| - if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
|
| - foreach $lField $fields {}
|
| - if { [llength $character_decomposition_mapping]!=2
|
| - || [string is xdigit [lindex $character_decomposition_mapping 0]]==0
|
| - } {
|
| - continue
|
| - }
|
| -
|
| - set iCode [expr "0x$code"]
|
| - set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
|
| - set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]
|
| -
|
| - if {[info exists tl_lookup_table($iCode)]} continue
|
| -
|
| - if { ($iAscii >= 97 && $iAscii <= 122)
|
| - || ($iAscii >= 65 && $iAscii <= 90)
|
| - } {
|
| - lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
|
| - set dia($iDia) 1
|
| - }
|
| - }
|
| -
|
| - foreach d [array names dia] {
|
| - lappend lRet [list $d ""]
|
| - }
|
| - set lRet [lsort -integer -index 0 $lRet]
|
| -
|
| - close $fd
|
| - set lRet
|
| -}
|
| -
|
| -#-------------------------------------------------------------------------
|
| -# Parameter $zName must be a path to the file UnicodeData.txt. This command
|
| -# reads the file and returns a list of codepoints (integers). The list
|
| -# contains all codepoints in the UnicodeData.txt assigned to any "General
|
| -# Category" that is not a "Letter" or "Number".
|
| -#
|
| -proc an_load_unicodedata_text {zName} {
|
| - set fd [open $zName]
|
| - set lField {
|
| - code
|
| - character_name
|
| - general_category
|
| - canonical_combining_classes
|
| - bidirectional_category
|
| - character_decomposition_mapping
|
| - decimal_digit_value
|
| - digit_value
|
| - numeric_value
|
| - mirrored
|
| - unicode_1_name
|
| - iso10646_comment_field
|
| - uppercase_mapping
|
| - lowercase_mapping
|
| - titlecase_mapping
|
| - }
|
| - set lRet [list]
|
| -
|
| - while { ![eof $fd] } {
|
| - set line [gets $fd]
|
| - if {$line == ""} continue
|
| -
|
| - set fields [split $line ";"]
|
| - if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
|
| - foreach $lField $fields {}
|
| -
|
| - set iCode [expr "0x$code"]
|
| - set bAlnum [expr {
|
| - [lsearch {L N} [string range $general_category 0 0]] >= 0
|
| - || $general_category=="Co"
|
| - }]
|
| -
|
| - if { !$bAlnum } { lappend lRet $iCode }
|
| - }
|
| -
|
| - close $fd
|
| - set lRet
|
| -}
|
| -
|
| -proc tl_load_casefolding_txt {zName} {
|
| - global tl_lookup_table
|
| -
|
| - set fd [open $zName]
|
| - while { ![eof $fd] } {
|
| - set line [gets $fd]
|
| - if {[string range $line 0 0] == "#"} continue
|
| - if {$line == ""} continue
|
| -
|
| - foreach x {a b c d} {unset -nocomplain $x}
|
| - foreach {a b c d} [split $line ";"] {}
|
| -
|
| - set a2 [list]
|
| - set c2 [list]
|
| - foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
|
| - foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
|
| - set b [string trim $b]
|
| - set d [string trim $d]
|
| -
|
| - if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
|
| - }
|
| -}
|
| -
|
| -
|
|
|