Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(606)

Side by Side Diff: third_party/sqlite/sqlite-src-3170000/ext/fts3/unicode/parseunicode.tcl

Issue 2747283002: [sql] Import reference version of SQLite 3.17.. (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1
2 #--------------------------------------------------------------------------
3 # Parameter $zName must be a path to the file UnicodeData.txt. This command
4 # reads the file and returns a list of mappings required to remove all
5 # diacritical marks from a unicode string. Each mapping is itself a list
6 # consisting of two elements - the unicode codepoint and the single ASCII
7 # character that it should be replaced with, or an empty string if the
8 # codepoint should simply be removed from the input. Examples:
9 #
10 # { 224 a } (replace codepoint 224 to "a")
11 # { 769 "" } (remove codepoint 769 from input)
12 #
13 # Mappings are only returned for non-upper case codepoints. It is assumed
14 # that the input has already been folded to lower case.
15 #
16 proc rd_load_unicodedata_text {zName} {
17 global tl_lookup_table
18
19 set fd [open $zName]
20 set lField {
21 code
22 character_name
23 general_category
24 canonical_combining_classes
25 bidirectional_category
26 character_decomposition_mapping
27 decimal_digit_value
28 digit_value
29 numeric_value
30 mirrored
31 unicode_1_name
32 iso10646_comment_field
33 uppercase_mapping
34 lowercase_mapping
35 titlecase_mapping
36 }
37 set lRet [list]
38
39 while { ![eof $fd] } {
40 set line [gets $fd]
41 if {$line == ""} continue
42
43 set fields [split $line ";"]
44 if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
45 foreach $lField $fields {}
46 if { [llength $character_decomposition_mapping]!=2
47 || [string is xdigit [lindex $character_decomposition_mapping 0]]==0
48 } {
49 continue
50 }
51
52 set iCode [expr "0x$code"]
53 set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
54 set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]
55
56 if {[info exists tl_lookup_table($iCode)]} continue
57
58 if { ($iAscii >= 97 && $iAscii <= 122)
59 || ($iAscii >= 65 && $iAscii <= 90)
60 } {
61 lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
62 set dia($iDia) 1
63 }
64 }
65
66 foreach d [array names dia] {
67 lappend lRet [list $d ""]
68 }
69 set lRet [lsort -integer -index 0 $lRet]
70
71 close $fd
72 set lRet
73 }
74
75 #-------------------------------------------------------------------------
76 # Parameter $zName must be a path to the file UnicodeData.txt. This command
77 # reads the file and returns a list of codepoints (integers). The list
78 # contains all codepoints in the UnicodeData.txt assigned to any "General
79 # Category" that is not a "Letter" or "Number".
80 #
81 proc an_load_unicodedata_text {zName} {
82 set fd [open $zName]
83 set lField {
84 code
85 character_name
86 general_category
87 canonical_combining_classes
88 bidirectional_category
89 character_decomposition_mapping
90 decimal_digit_value
91 digit_value
92 numeric_value
93 mirrored
94 unicode_1_name
95 iso10646_comment_field
96 uppercase_mapping
97 lowercase_mapping
98 titlecase_mapping
99 }
100 set lRet [list]
101
102 while { ![eof $fd] } {
103 set line [gets $fd]
104 if {$line == ""} continue
105
106 set fields [split $line ";"]
107 if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
108 foreach $lField $fields {}
109
110 set iCode [expr "0x$code"]
111 set bAlnum [expr {
112 [lsearch {L N} [string range $general_category 0 0]] >= 0
113 || $general_category=="Co"
114 }]
115
116 if { !$bAlnum } { lappend lRet $iCode }
117 }
118
119 close $fd
120 set lRet
121 }
122
123 proc tl_load_casefolding_txt {zName} {
124 global tl_lookup_table
125
126 set fd [open $zName]
127 while { ![eof $fd] } {
128 set line [gets $fd]
129 if {[string range $line 0 0] == "#"} continue
130 if {$line == ""} continue
131
132 foreach x {a b c d} {unset -nocomplain $x}
133 foreach {a b c d} [split $line ";"] {}
134
135 set a2 [list]
136 set c2 [list]
137 foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
138 foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
139 set b [string trim $b]
140 set d [string trim $d]
141
142 if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
143 }
144 }
145
146
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698