third_party/sqlite/sqlite-src-3170000/ext/fts3/unicode/parseunicode.tcl - Issue 2747283002: [sql] Import reference version of SQLite 3.17..

Side by Side Diff: third_party/sqlite/sqlite-src-3170000/ext/fts3/unicode/parseunicode.tcl

Issue 2747283002: [sql] Import reference version of SQLite 3.17.. (Closed)

Patch Set: Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/sqlite/sqlite-src-3170000/ext/fts3/unicode/mkunicode.tcl ('k') | third_party/sqlite/sqlite-src-3170000/ext/fts5/extract_api_docs.tcl » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1

	2 #--------------------------------------------------------------------------

	3 # Parameter $zName must be a path to the file UnicodeData.txt. This command

	4 # reads the file and returns a list of mappings required to remove all

	5 # diacritical marks from a unicode string. Each mapping is itself a list

	6 # consisting of two elements - the unicode codepoint and the single ASCII

	7 # character that it should be replaced with, or an empty string if the

	8 # codepoint should simply be removed from the input. Examples:

	9 #

	10 # { 224 a } (replace codepoint 224 to "a")

	11 # { 769 "" } (remove codepoint 769 from input)

	12 #

	13 # Mappings are only returned for non-upper case codepoints. It is assumed

	14 # that the input has already been folded to lower case.

	15 #

	16 proc rd_load_unicodedata_text {zName} {

	17 global tl_lookup_table

	18

	19 set fd [open $zName]

	20 set lField {

	21 code

	22 character_name

	23 general_category

	24 canonical_combining_classes

	25 bidirectional_category

	26 character_decomposition_mapping

	27 decimal_digit_value

	28 digit_value

	29 numeric_value

	30 mirrored

	31 unicode_1_name

	32 iso10646_comment_field

	33 uppercase_mapping

	34 lowercase_mapping

	35 titlecase_mapping

	36 }

	37 set lRet [list]

	38

	39 while { ![eof $fd] } {

	40 set line [gets $fd]

	41 if {$line == ""} continue

	42

	43 set fields [split $line ";"]

	44 if {[llength $fields] != [llength $lField]} { error "parse error: $line" }

	45 foreach $lField $fields {}

	46 if { [llength $character_decomposition_mapping]!=2

	47 \|\| [string is xdigit [lindex $character_decomposition_mapping 0]]==0

	48 } {

	49 continue

	50 }

	51

	52 set iCode [expr "0x$code"]

	53 set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]

	54 set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]

	55

	56 if {[info exists tl_lookup_table($iCode)]} continue

	57

	58 if { ($iAscii >= 97 && $iAscii <= 122)

	59 \|\| ($iAscii >= 65 && $iAscii <= 90)

	60 } {

	61 lappend lRet [list $iCode [string tolower [format %c $iAscii]]]

	62 set dia($iDia) 1

	63 }

	64 }

	65

	66 foreach d [array names dia] {

	67 lappend lRet [list $d ""]

	68 }

	69 set lRet [lsort -integer -index 0 $lRet]

	70

	71 close $fd

	72 set lRet

	73 }

	74

	75 #-------------------------------------------------------------------------

	76 # Parameter $zName must be a path to the file UnicodeData.txt. This command

	77 # reads the file and returns a list of codepoints (integers). The list

	78 # contains all codepoints in the UnicodeData.txt assigned to any "General

	79 # Category" that is not a "Letter" or "Number".

	80 #

	81 proc an_load_unicodedata_text {zName} {

	82 set fd [open $zName]

	83 set lField {

	84 code

	85 character_name

	86 general_category

	87 canonical_combining_classes

	88 bidirectional_category

	89 character_decomposition_mapping

	90 decimal_digit_value

	91 digit_value

	92 numeric_value

	93 mirrored

	94 unicode_1_name

	95 iso10646_comment_field

	96 uppercase_mapping

	97 lowercase_mapping

	98 titlecase_mapping

	99 }

	100 set lRet [list]

	101

	102 while { ![eof $fd] } {

	103 set line [gets $fd]

	104 if {$line == ""} continue

	105

	106 set fields [split $line ";"]

	107 if {[llength $fields] != [llength $lField]} { error "parse error: $line" }

	108 foreach $lField $fields {}

	109

	110 set iCode [expr "0x$code"]

	111 set bAlnum [expr {

	112 [lsearch {L N} [string range $general_category 0 0]] >= 0

	113 \|\| $general_category=="Co"

	114 }]

	115

	116 if { !$bAlnum } { lappend lRet $iCode }

	117 }

	118

	119 close $fd

	120 set lRet

	121 }

	122

	123 proc tl_load_casefolding_txt {zName} {

	124 global tl_lookup_table

	125

	126 set fd [open $zName]

	127 while { ![eof $fd] } {

	128 set line [gets $fd]

	129 if {[string range $line 0 0] == "#"} continue

	130 if {$line == ""} continue

	131

	132 foreach x {a b c d} {unset -nocomplain $x}

	133 foreach {a b c d} [split $line ";"] {}

	134

	135 set a2 [list]

	136 set c2 [list]

	137 foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }

	138 foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }

	139 set b [string trim $b]

	140 set d [string trim $d]

	141

	142 if {$b=="C" \|\| $b=="S"} { set tl_lookup_table($a2) $c2 }

	143 }

	144 }

	145

	146

OLD	NEW