third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl - Issue 1610963002: Import SQLite 3.10.2.

Unified Diff: third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl

Issue 1610963002: Import SQLite 3.10.2. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl

diff --git a/third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl b/third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl

index c3083ee36863c29a98a61f4af3c7ea9932ff1c18..a2e9b1da29316fcd28b753627d6250bb761d50e7 100644

--- a/third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl

+++ b/third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl

@@ -1,77 +1,5 @@

-# Parameter $zName must be a path to the file UnicodeData.txt. This command

-# reads the file and returns a list of mappings required to remove all

-# diacritical marks from a unicode string. Each mapping is itself a list

-# consisting of two elements - the unicode codepoint and the single ASCII

-# character that it should be replaced with, or an empty string if the

-# codepoint should simply be removed from the input. Examples:

-# { 224 a } (replace codepoint 224 to "a")

-# { 769 "" } (remove codepoint 769 from input)

-# Mappings are only returned for non-upper case codepoints. It is assumed

-# that the input has already been folded to lower case.

-proc rd_load_unicodedata_text {zName} {

- global tl_lookup_table

- set fd [open $zName]

- set lField {

- code

- character_name

- general_category

- canonical_combining_classes

- bidirectional_category

- character_decomposition_mapping

- decimal_digit_value

- digit_value

- numeric_value

- mirrored

- unicode_1_name

- iso10646_comment_field

- uppercase_mapping

- lowercase_mapping

- titlecase_mapping

- }

- set lRet [list]

- while { ![eof $fd] } {

- set line [gets $fd]

- if {$line == ""} continue

- set fields [split $line ";"]

- if {[llength $fields] != [llength $lField]} { error "parse error: $line" }

- foreach $lField $fields {}

- if { [llength $character_decomposition_mapping]!=2

- || [string is xdigit [lindex $character_decomposition_mapping 0]]==0

- } {

- continue

- }

- set iCode [expr "0x$code"]

- set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]

- set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]

- if {[info exists tl_lookup_table($iCode)]} continue

- if { ($iAscii >= 97 && $iAscii <= 122)

- || ($iAscii >= 65 && $iAscii <= 90)

- } {

- lappend lRet [list $iCode [string tolower [format %c $iAscii]]]

- set dia($iDia) 1

- }

- foreach d [array names dia] {

- lappend lRet [list $d ""]

- }

- set lRet [lsort -integer -index 0 $lRet]

- close $fd

- set lRet

+source [file join [file dirname [info script]] parseunicode.tcl]

proc print_rd {map} {

global tl_lookup_table

@@ -117,7 +45,7 @@ proc print_rd {map} {

puts "** E\"). The resuls of passing a codepoint that corresponds to an"

puts "** uppercase letter are undefined."

puts "*/"

- puts "static int remove_diacritic(int c)\{"

+ puts "static int ${::remove_diacritic}(int c)\{"

puts " unsigned short aDia\[\] = \{"

puts -nonewline " 0, "

set i 1

@@ -204,53 +132,6 @@ proc print_isdiacritic {zFunc map} {

#-------------------------------------------------------------------------

-# Parameter $zName must be a path to the file UnicodeData.txt. This command

-# reads the file and returns a list of codepoints (integers). The list

-# contains all codepoints in the UnicodeData.txt assigned to any "General

-# Category" that is not a "Letter" or "Number".

-proc an_load_unicodedata_text {zName} {

- set fd [open $zName]

- set lField {

- code

- character_name

- general_category

- canonical_combining_classes

- bidirectional_category

- character_decomposition_mapping

- decimal_digit_value

- digit_value

- numeric_value

- mirrored

- unicode_1_name

- iso10646_comment_field

- uppercase_mapping

- lowercase_mapping

- titlecase_mapping

- }

- set lRet [list]

- while { ![eof $fd] } {

- set line [gets $fd]

- if {$line == ""} continue

- set fields [split $line ";"]

- if {[llength $fields] != [llength $lField]} { error "parse error: $line" }

- foreach $lField $fields {}

- set iCode [expr "0x$code"]

- set bAlnum [expr {

- [lsearch {L N} [string range $general_category 0 0]] >= 0

- || $general_category=="Co"

- }]

- if { !$bAlnum } { lappend lRet $iCode }

- }

- close $fd

- set lRet

proc an_load_separator_ranges {} {

global unicodedata.txt

set lSep [an_load_unicodedata_text ${unicodedata.txt}]

@@ -440,29 +321,6 @@ proc print_test_isalnum {zFunc lRange} {

#-------------------------------------------------------------------------

-proc tl_load_casefolding_txt {zName} {

- global tl_lookup_table

- set fd [open $zName]

- while { ![eof $fd] } {

- set line [gets $fd]

- if {[string range $line 0 0] == "#"} continue

- if {$line == ""} continue

- foreach x {a b c d} {unset -nocomplain $x}

- foreach {a b c d} [split $line ";"] {}

- set a2 [list]

- set c2 [list]

- foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }

- foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }

- set b [string trim $b]

- set d [string trim $d]

- if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }

- }

proc tl_create_records {} {

global tl_lookup_table

@@ -626,19 +484,20 @@ proc print_fold {zFunc} {

tl_print_table_footer toggle

tl_print_ioff_table $liOff

- puts {

+ puts [subst -nocommands {

int ret = c;

- assert( c>=0 );

assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );

if( c<128 ){

if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');

}else if( c<65536 ){

+ const struct TableEntry *p;

int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;

int iLo = 0;

int iRes = -1;

+ assert( c>aEntry[0].iCode );

while( iHi>=iLo ){

int iTest = (iHi + iLo) / 2;

int cmp = (c - aEntry[iTest].iCode);

@@ -649,19 +508,17 @@ proc print_fold {zFunc} {

iHi = iTest-1;

}

- assert( iRes<0 || c>=aEntry[iRes].iCode );

- if( iRes>=0 ){

- const struct TableEntry *p = &aEntry[iRes];

- if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){

- ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;

- assert( ret>0 );

- }

+ assert( iRes>=0 && c>=aEntry[iRes].iCode );

+ p = &aEntry[iRes];

+ if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){

+ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;

+ assert( ret>0 );

}

- if( bRemoveDiacritic ) ret = remove_diacritic(ret);

- }

+ if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret);

}

+ }]

foreach entry $lHigh {

tl_print_if_entry $entry

@@ -732,8 +589,12 @@ proc print_fileheader {} {

}]

puts ""

- puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"

- puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"

+ if {$::generate_fts5_code} {

+ # no-op

+ } else {

+ puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"

+ puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"

+ }

puts ""

puts "#include <assert.h>"

puts ""

@@ -760,22 +621,40 @@ proc print_test_main {} {

# our liking.

proc usage {} {

- puts -nonewline stderr "Usage: $::argv0 ?-test? "

+ puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? "

puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>"

exit 1

}

-if {[llength $argv]!=2 && [llength $argv]!=3} usage

-if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage

+if {[llength $argv]<2} usage

set unicodedata.txt [lindex $argv end]

set casefolding.txt [lindex $argv end-1]

-set generate_test_code [expr {[llength $argv]==3}]

+set remove_diacritic remove_diacritic

+set generate_test_code 0

+set generate_fts5_code 0

+set function_prefix "sqlite3Fts"

+for {set i 0} {$i < [llength $argv]-2} {incr i} {

+ switch -- [lindex $argv $i] {

+ -test {

+ set generate_test_code 1

+ }

+ -fts5 {

+ set function_prefix sqlite3Fts5

+ set generate_fts5_code 1

+ set remove_diacritic fts5_remove_diacritic

+ }

+ default {

+ usage

+ }

print_fileheader

# Print the isalnum() function to stdout.

set lRange [an_load_separator_ranges]

-print_isalnum sqlite3FtsUnicodeIsalnum $lRange

+print_isalnum ${function_prefix}UnicodeIsalnum $lRange

# Leave a gap between the two generated C functions.

@@ -790,22 +669,26 @@ set mappings [rd_load_unicodedata_text ${unicodedata.txt}]

print_rd $mappings

puts ""

-print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings

+print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings

puts ""

# Print the fold() function to stdout.

-print_fold sqlite3FtsUnicodeFold

+print_fold ${function_prefix}UnicodeFold

# Print the test routines and main() function to stdout, if -test

# was specified.

if {$::generate_test_code} {

- print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange

- print_fold_test sqlite3FtsUnicodeFold $mappings

+ print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange

+ print_fold_test ${function_prefix}UnicodeFold $mappings

print_test_main

}

-puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"

-puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"

+if {$generate_fts5_code} {

+ # no-op

+} else {

+ puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"

+ puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"

« no previous file with comments | « third_party/sqlite/src/ext/fts3/tool/fts3view.c ('k') | third_party/sqlite/src/ext/fts3/unicode/parseunicode.tcl » ('j') | no next file with comments »