Index: third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl |
diff --git a/third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl b/third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl |
deleted file mode 100644 |
index 0cb2c83a18f76deaf8b75ebe9c5000544b7708e1..0000000000000000000000000000000000000000 |
--- a/third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl |
+++ /dev/null |
@@ -1,146 +0,0 @@ |
- |
-#-------------------------------------------------------------------------- |
-# Parameter $zName must be a path to the file UnicodeData.txt. This command |
-# reads the file and returns a list of mappings required to remove all |
-# diacritical marks from a unicode string. Each mapping is itself a list |
-# consisting of two elements - the unicode codepoint and the single ASCII |
-# character that it should be replaced with, or an empty string if the |
-# codepoint should simply be removed from the input. Examples: |
-# |
-# { 224 a } (replace codepoint 224 to "a") |
-# { 769 "" } (remove codepoint 769 from input) |
-# |
-# Mappings are only returned for non-upper case codepoints. It is assumed |
-# that the input has already been folded to lower case. |
-# |
-proc rd_load_unicodedata_text {zName} { |
- global tl_lookup_table |
- |
- set fd [open $zName] |
- set lField { |
- code |
- character_name |
- general_category |
- canonical_combining_classes |
- bidirectional_category |
- character_decomposition_mapping |
- decimal_digit_value |
- digit_value |
- numeric_value |
- mirrored |
- unicode_1_name |
- iso10646_comment_field |
- uppercase_mapping |
- lowercase_mapping |
- titlecase_mapping |
- } |
- set lRet [list] |
- |
- while { ![eof $fd] } { |
- set line [gets $fd] |
- if {$line == ""} continue |
- |
- set fields [split $line ";"] |
- if {[llength $fields] != [llength $lField]} { error "parse error: $line" } |
- foreach $lField $fields {} |
- if { [llength $character_decomposition_mapping]!=2 |
- || [string is xdigit [lindex $character_decomposition_mapping 0]]==0 |
- } { |
- continue |
- } |
- |
- set iCode [expr "0x$code"] |
- set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"] |
- set iDia [expr "0x[lindex $character_decomposition_mapping 1]"] |
- |
- if {[info exists tl_lookup_table($iCode)]} continue |
- |
- if { ($iAscii >= 97 && $iAscii <= 122) |
- || ($iAscii >= 65 && $iAscii <= 90) |
- } { |
- lappend lRet [list $iCode [string tolower [format %c $iAscii]]] |
- set dia($iDia) 1 |
- } |
- } |
- |
- foreach d [array names dia] { |
- lappend lRet [list $d ""] |
- } |
- set lRet [lsort -integer -index 0 $lRet] |
- |
- close $fd |
- set lRet |
-} |
- |
-#------------------------------------------------------------------------- |
-# Parameter $zName must be a path to the file UnicodeData.txt. This command |
-# reads the file and returns a list of codepoints (integers). The list |
-# contains all codepoints in the UnicodeData.txt assigned to any "General |
-# Category" that is not a "Letter" or "Number". |
-# |
-proc an_load_unicodedata_text {zName} { |
- set fd [open $zName] |
- set lField { |
- code |
- character_name |
- general_category |
- canonical_combining_classes |
- bidirectional_category |
- character_decomposition_mapping |
- decimal_digit_value |
- digit_value |
- numeric_value |
- mirrored |
- unicode_1_name |
- iso10646_comment_field |
- uppercase_mapping |
- lowercase_mapping |
- titlecase_mapping |
- } |
- set lRet [list] |
- |
- while { ![eof $fd] } { |
- set line [gets $fd] |
- if {$line == ""} continue |
- |
- set fields [split $line ";"] |
- if {[llength $fields] != [llength $lField]} { error "parse error: $line" } |
- foreach $lField $fields {} |
- |
- set iCode [expr "0x$code"] |
- set bAlnum [expr { |
- [lsearch {L N} [string range $general_category 0 0]] >= 0 |
- || $general_category=="Co" |
- }] |
- |
- if { !$bAlnum } { lappend lRet $iCode } |
- } |
- |
- close $fd |
- set lRet |
-} |
- |
-proc tl_load_casefolding_txt {zName} { |
- global tl_lookup_table |
- |
- set fd [open $zName] |
- while { ![eof $fd] } { |
- set line [gets $fd] |
- if {[string range $line 0 0] == "#"} continue |
- if {$line == ""} continue |
- |
- foreach x {a b c d} {unset -nocomplain $x} |
- foreach {a b c d} [split $line ";"] {} |
- |
- set a2 [list] |
- set c2 [list] |
- foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] } |
- foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] } |
- set b [string trim $b] |
- set d [string trim $d] |
- |
- if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 } |
- } |
-} |
- |
- |