third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl - Issue 1610543003: [sql] Import reference version of SQLite 3.10.2.

Unified Diff: third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl

Issue 1610543003: [sql] Import reference version of SQLite 3.10.2. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/mkunicode.tcl ('k') | third_party/sqlite/sqlite-src-3100200/ext/fts5/extract_api_docs.tcl » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl

diff --git a/third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl b/third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl

new file mode 100644

index 0000000000000000000000000000000000000000..0cb2c83a18f76deaf8b75ebe9c5000544b7708e1

--- /dev/null

+++ b/third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl

@@ -0,0 +1,146 @@

+#--------------------------------------------------------------------------

+# Parameter $zName must be a path to the file UnicodeData.txt. This command

+# reads the file and returns a list of mappings required to remove all

+# diacritical marks from a unicode string. Each mapping is itself a list

+# consisting of two elements - the unicode codepoint and the single ASCII

+# character that it should be replaced with, or an empty string if the

+# codepoint should simply be removed from the input. Examples:

+# { 224 a } (replace codepoint 224 to "a")

+# { 769 "" } (remove codepoint 769 from input)

+# Mappings are only returned for non-upper case codepoints. It is assumed

+# that the input has already been folded to lower case.

+proc rd_load_unicodedata_text {zName} {

+ global tl_lookup_table

+ set fd [open $zName]

+ set lField {

+ code

+ character_name

+ general_category

+ canonical_combining_classes

+ bidirectional_category

+ character_decomposition_mapping

+ decimal_digit_value

+ digit_value

+ numeric_value

+ mirrored

+ unicode_1_name

+ iso10646_comment_field

+ uppercase_mapping

+ lowercase_mapping

+ titlecase_mapping

+ }

+ set lRet [list]

+ while { ![eof $fd] } {

+ set line [gets $fd]

+ if {$line == ""} continue

+ set fields [split $line ";"]

+ if {[llength $fields] != [llength $lField]} { error "parse error: $line" }

+ foreach $lField $fields {}

+ if { [llength $character_decomposition_mapping]!=2

+ || [string is xdigit [lindex $character_decomposition_mapping 0]]==0

+ } {

+ continue

+ }

+ set iCode [expr "0x$code"]

+ set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]

+ set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]

+ if {[info exists tl_lookup_table($iCode)]} continue

+ if { ($iAscii >= 97 && $iAscii <= 122)

+ || ($iAscii >= 65 && $iAscii <= 90)

+ } {

+ lappend lRet [list $iCode [string tolower [format %c $iAscii]]]

+ set dia($iDia) 1

+ }

+ foreach d [array names dia] {

+ lappend lRet [list $d ""]

+ }

+ set lRet [lsort -integer -index 0 $lRet]

+ close $fd

+ set lRet

+#-------------------------------------------------------------------------

+# Parameter $zName must be a path to the file UnicodeData.txt. This command

+# reads the file and returns a list of codepoints (integers). The list

+# contains all codepoints in the UnicodeData.txt assigned to any "General

+# Category" that is not a "Letter" or "Number".

+proc an_load_unicodedata_text {zName} {

+ set fd [open $zName]

+ set lField {

+ code

+ character_name

+ general_category

+ canonical_combining_classes

+ bidirectional_category

+ character_decomposition_mapping

+ decimal_digit_value

+ digit_value

+ numeric_value

+ mirrored

+ unicode_1_name

+ iso10646_comment_field

+ uppercase_mapping

+ lowercase_mapping

+ titlecase_mapping

+ }

+ set lRet [list]

+ while { ![eof $fd] } {

+ set line [gets $fd]

+ if {$line == ""} continue

+ set fields [split $line ";"]

+ if {[llength $fields] != [llength $lField]} { error "parse error: $line" }

+ foreach $lField $fields {}

+ set iCode [expr "0x$code"]

+ set bAlnum [expr {

+ [lsearch {L N} [string range $general_category 0 0]] >= 0

+ || $general_category=="Co"

+ }]

+ if { !$bAlnum } { lappend lRet $iCode }

+ }

+ close $fd

+ set lRet

+proc tl_load_casefolding_txt {zName} {

+ global tl_lookup_table

+ set fd [open $zName]

+ while { ![eof $fd] } {

+ set line [gets $fd]

+ if {[string range $line 0 0] == "#"} continue

+ if {$line == ""} continue

+ foreach x {a b c d} {unset -nocomplain $x}

+ foreach {a b c d} [split $line ";"] {}

+ set a2 [list]

+ set c2 [list]

+ foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }

+ foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }

+ set b [string trim $b]

+ set d [string trim $d]

+ if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }

+ }