Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(116)

Unified Diff: third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl

Issue 1610963002: Import SQLite 3.10.2. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl
diff --git a/third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl b/third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl
index c3083ee36863c29a98a61f4af3c7ea9932ff1c18..a2e9b1da29316fcd28b753627d6250bb761d50e7 100644
--- a/third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl
+++ b/third_party/sqlite/src/ext/fts3/unicode/mkunicode.tcl
@@ -1,77 +1,5 @@
-#
-# Parameter $zName must be a path to the file UnicodeData.txt. This command
-# reads the file and returns a list of mappings required to remove all
-# diacritical marks from a unicode string. Each mapping is itself a list
-# consisting of two elements - the unicode codepoint and the single ASCII
-# character that it should be replaced with, or an empty string if the
-# codepoint should simply be removed from the input. Examples:
-#
-# { 224 a } (replace codepoint 224 to "a")
-# { 769 "" } (remove codepoint 769 from input)
-#
-# Mappings are only returned for non-upper case codepoints. It is assumed
-# that the input has already been folded to lower case.
-#
-proc rd_load_unicodedata_text {zName} {
- global tl_lookup_table
-
- set fd [open $zName]
- set lField {
- code
- character_name
- general_category
- canonical_combining_classes
- bidirectional_category
- character_decomposition_mapping
- decimal_digit_value
- digit_value
- numeric_value
- mirrored
- unicode_1_name
- iso10646_comment_field
- uppercase_mapping
- lowercase_mapping
- titlecase_mapping
- }
- set lRet [list]
-
- while { ![eof $fd] } {
- set line [gets $fd]
- if {$line == ""} continue
-
- set fields [split $line ";"]
- if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
- foreach $lField $fields {}
- if { [llength $character_decomposition_mapping]!=2
- || [string is xdigit [lindex $character_decomposition_mapping 0]]==0
- } {
- continue
- }
-
- set iCode [expr "0x$code"]
- set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
- set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]
-
- if {[info exists tl_lookup_table($iCode)]} continue
-
- if { ($iAscii >= 97 && $iAscii <= 122)
- || ($iAscii >= 65 && $iAscii <= 90)
- } {
- lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
- set dia($iDia) 1
- }
- }
-
- foreach d [array names dia] {
- lappend lRet [list $d ""]
- }
- set lRet [lsort -integer -index 0 $lRet]
-
- close $fd
- set lRet
-}
-
+source [file join [file dirname [info script]] parseunicode.tcl]
proc print_rd {map} {
global tl_lookup_table
@@ -117,7 +45,7 @@ proc print_rd {map} {
puts "** E\"). The resuls of passing a codepoint that corresponds to an"
puts "** uppercase letter are undefined."
puts "*/"
- puts "static int remove_diacritic(int c)\{"
+ puts "static int ${::remove_diacritic}(int c)\{"
puts " unsigned short aDia\[\] = \{"
puts -nonewline " 0, "
set i 1
@@ -204,53 +132,6 @@ proc print_isdiacritic {zFunc map} {
#-------------------------------------------------------------------------
-# Parameter $zName must be a path to the file UnicodeData.txt. This command
-# reads the file and returns a list of codepoints (integers). The list
-# contains all codepoints in the UnicodeData.txt assigned to any "General
-# Category" that is not a "Letter" or "Number".
-#
-proc an_load_unicodedata_text {zName} {
- set fd [open $zName]
- set lField {
- code
- character_name
- general_category
- canonical_combining_classes
- bidirectional_category
- character_decomposition_mapping
- decimal_digit_value
- digit_value
- numeric_value
- mirrored
- unicode_1_name
- iso10646_comment_field
- uppercase_mapping
- lowercase_mapping
- titlecase_mapping
- }
- set lRet [list]
-
- while { ![eof $fd] } {
- set line [gets $fd]
- if {$line == ""} continue
-
- set fields [split $line ";"]
- if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
- foreach $lField $fields {}
-
- set iCode [expr "0x$code"]
- set bAlnum [expr {
- [lsearch {L N} [string range $general_category 0 0]] >= 0
- || $general_category=="Co"
- }]
-
- if { !$bAlnum } { lappend lRet $iCode }
- }
-
- close $fd
- set lRet
-}
-
proc an_load_separator_ranges {} {
global unicodedata.txt
set lSep [an_load_unicodedata_text ${unicodedata.txt}]
@@ -440,29 +321,6 @@ proc print_test_isalnum {zFunc lRange} {
#-------------------------------------------------------------------------
-proc tl_load_casefolding_txt {zName} {
- global tl_lookup_table
-
- set fd [open $zName]
- while { ![eof $fd] } {
- set line [gets $fd]
- if {[string range $line 0 0] == "#"} continue
- if {$line == ""} continue
-
- foreach x {a b c d} {unset -nocomplain $x}
- foreach {a b c d} [split $line ";"] {}
-
- set a2 [list]
- set c2 [list]
- foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
- foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
- set b [string trim $b]
- set d [string trim $d]
-
- if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
- }
-}
-
proc tl_create_records {} {
global tl_lookup_table
@@ -626,19 +484,20 @@ proc print_fold {zFunc} {
tl_print_table_footer toggle
tl_print_ioff_table $liOff
- puts {
+ puts [subst -nocommands {
int ret = c;
- assert( c>=0 );
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
if( c<128 ){
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
}else if( c<65536 ){
+ const struct TableEntry *p;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
int iRes = -1;
+ assert( c>aEntry[0].iCode );
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
int cmp = (c - aEntry[iTest].iCode);
@@ -649,19 +508,17 @@ proc print_fold {zFunc} {
iHi = iTest-1;
}
}
- assert( iRes<0 || c>=aEntry[iRes].iCode );
- if( iRes>=0 ){
- const struct TableEntry *p = &aEntry[iRes];
- if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
- ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
- assert( ret>0 );
- }
+ assert( iRes>=0 && c>=aEntry[iRes].iCode );
+ p = &aEntry[iRes];
+ if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
+ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
+ assert( ret>0 );
}
- if( bRemoveDiacritic ) ret = remove_diacritic(ret);
- }
+ if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret);
}
+ }]
foreach entry $lHigh {
tl_print_if_entry $entry
@@ -732,8 +589,12 @@ proc print_fileheader {} {
*/
}]
puts ""
- puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
- puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
+ if {$::generate_fts5_code} {
+ # no-op
+ } else {
+ puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
+ puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
+ }
puts ""
puts "#include <assert.h>"
puts ""
@@ -760,22 +621,40 @@ proc print_test_main {} {
# our liking.
#
proc usage {} {
- puts -nonewline stderr "Usage: $::argv0 ?-test? "
+ puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? "
puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
exit 1
}
-if {[llength $argv]!=2 && [llength $argv]!=3} usage
-if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage
+if {[llength $argv]<2} usage
set unicodedata.txt [lindex $argv end]
set casefolding.txt [lindex $argv end-1]
-set generate_test_code [expr {[llength $argv]==3}]
+
+set remove_diacritic remove_diacritic
+set generate_test_code 0
+set generate_fts5_code 0
+set function_prefix "sqlite3Fts"
+for {set i 0} {$i < [llength $argv]-2} {incr i} {
+ switch -- [lindex $argv $i] {
+ -test {
+ set generate_test_code 1
+ }
+ -fts5 {
+ set function_prefix sqlite3Fts5
+ set generate_fts5_code 1
+ set remove_diacritic fts5_remove_diacritic
+ }
+ default {
+ usage
+ }
+ }
+}
print_fileheader
# Print the isalnum() function to stdout.
#
set lRange [an_load_separator_ranges]
-print_isalnum sqlite3FtsUnicodeIsalnum $lRange
+print_isalnum ${function_prefix}UnicodeIsalnum $lRange
# Leave a gap between the two generated C functions.
#
@@ -790,22 +669,26 @@ set mappings [rd_load_unicodedata_text ${unicodedata.txt}]
print_rd $mappings
puts ""
puts ""
-print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings
+print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings
puts ""
puts ""
# Print the fold() function to stdout.
#
-print_fold sqlite3FtsUnicodeFold
+print_fold ${function_prefix}UnicodeFold
# Print the test routines and main() function to stdout, if -test
# was specified.
#
if {$::generate_test_code} {
- print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange
- print_fold_test sqlite3FtsUnicodeFold $mappings
+ print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange
+ print_fold_test ${function_prefix}UnicodeFold $mappings
print_test_main
}
-puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
-puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"
+if {$generate_fts5_code} {
+ # no-op
+} else {
+ puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
+ puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"
+}
« no previous file with comments | « third_party/sqlite/src/ext/fts3/tool/fts3view.c ('k') | third_party/sqlite/src/ext/fts3/unicode/parseunicode.tcl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698