third_party/sqlite/sqlite-src-3080704/ext/fts3/unicode/mkunicode.tcl - Issue 883353008: [sql] Import reference version of SQLite 3.8.7.4.

Side by Side Diff: third_party/sqlite/sqlite-src-3080704/ext/fts3/unicode/mkunicode.tcl

Issue 883353008: [sql] Import reference version of SQLite 3.8.7.4. (Closed) Base URL: http://chromium.googlesource.com/chromium/src.git@master

Patch Set: Hold back encoding change which is messing up patch. Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « third_party/sqlite/sqlite-src-3080704/ext/fts3/unicode/CaseFolding.txt ('k') | third_party/sqlite/sqlite-src-3080704/ext/icu/README.txt » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1

	2 #

	3 # Parameter $zName must be a path to the file UnicodeData.txt. This command

	4 # reads the file and returns a list of mappings required to remove all

	5 # diacritical marks from a unicode string. Each mapping is itself a list

	6 # consisting of two elements - the unicode codepoint and the single ASCII

	7 # character that it should be replaced with, or an empty string if the

	8 # codepoint should simply be removed from the input. Examples:

	9 #

	10 # { 224 a } (replace codepoint 224 to "a")

	11 # { 769 "" } (remove codepoint 769 from input)

	12 #

	13 # Mappings are only returned for non-upper case codepoints. It is assumed

	14 # that the input has already been folded to lower case.

	15 #

	16 proc rd_load_unicodedata_text {zName} {

	17 global tl_lookup_table

	18

	19 set fd [open $zName]

	20 set lField {

	21 code

	22 character_name

	23 general_category

	24 canonical_combining_classes

	25 bidirectional_category

	26 character_decomposition_mapping

	27 decimal_digit_value

	28 digit_value

	29 numeric_value

	30 mirrored

	31 unicode_1_name

	32 iso10646_comment_field

	33 uppercase_mapping

	34 lowercase_mapping

	35 titlecase_mapping

	36 }

	37 set lRet [list]

	38

	39 while { ![eof $fd] } {

	40 set line [gets $fd]

	41 if {$line == ""} continue

	42

	43 set fields [split $line ";"]

	44 if {[llength $fields] != [llength $lField]} { error "parse error: $line" }

	45 foreach $lField $fields {}

	46 if { [llength $character_decomposition_mapping]!=2

	47 \|\| [string is xdigit [lindex $character_decomposition_mapping 0]]==0

	48 } {

	49 continue

	50 }

	51

	52 set iCode [expr "0x$code"]

	53 set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]

	54 set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]

	55

	56 if {[info exists tl_lookup_table($iCode)]} continue

	57

	58 if { ($iAscii >= 97 && $iAscii <= 122)

	59 \|\| ($iAscii >= 65 && $iAscii <= 90)

	60 } {

	61 lappend lRet [list $iCode [string tolower [format %c $iAscii]]]

	62 set dia($iDia) 1

	63 }

	64 }

	65

	66 foreach d [array names dia] {

	67 lappend lRet [list $d ""]

	68 }

	69 set lRet [lsort -integer -index 0 $lRet]

	70

	71 close $fd

	72 set lRet

	73 }

	74

	75

	76 proc print_rd {map} {

	77 global tl_lookup_table

	78 set aChar [list]

	79 set lRange [list]

	80

	81 set nRange 1

	82 set iFirst [lindex $map 0 0]

	83 set cPrev [lindex $map 0 1]

	84

	85 foreach m [lrange $map 1 end] {

	86 foreach {i c} $m {}

	87

	88 if {$cPrev == $c} {

	89 for {set j [expr $iFirst+$nRange]} {$j<$i} {incr j} {

	90 if {[info exists tl_lookup_table($j)]==0} break

	91 }

	92

	93 if {$j==$i} {

	94 set nNew [expr {(1 + $i - $iFirst)}]

	95 if {$nNew<=8} {

	96 set nRange $nNew

	97 continue

	98 }

	99 }

	100 }

	101

	102 lappend lRange [list $iFirst $nRange]

	103 lappend aChar $cPrev

	104

	105 set iFirst $i

	106 set cPrev $c

	107 set nRange 1

	108 }

	109 lappend lRange [list $iFirst $nRange]

	110 lappend aChar $cPrev

	111

	112 puts "/*"

	113 puts "** If the argument is a codepoint corresponding to a lowercase letter"

	114 puts "** in the ASCII range with a diacritic added, return the codepoint"

	115 puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN"

	116 puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER"

	117 puts "** E\"). The resuls of passing a codepoint that corresponds to an"

	118 puts "** uppercase letter are undefined."

	119 puts "*/"

	120 puts "static int remove_diacritic(int c)\{"

	121 puts " unsigned short aDia\[\] = \{"

	122 puts -nonewline " 0, "

	123 set i 1

	124 foreach r $lRange {

	125 foreach {iCode nRange} $r {}

	126 if {($i % 8)==0} {puts "" ; puts -nonewline " " }

	127 incr i

	128

	129 puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]]

	130 puts -nonewline ", "

	131 }

	132 puts ""

	133 puts " \};"

	134 puts " char aChar\[\] = \{"

	135 puts -nonewline " '\\0', "

	136 set i 1

	137 foreach c $aChar {

	138 set str "'$c', "

	139 if {$c == ""} { set str "'\\0', " }

	140

	141 if {($i % 12)==0} {puts "" ; puts -nonewline " " }

	142 incr i

	143 puts -nonewline "$str"

	144 }

	145 puts ""

	146 puts " \};"

	147 puts {

	148 unsigned int key = (((unsigned int)c)<<3) \| 0x00000007;

	149 int iRes = 0;

	150 int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;

	151 int iLo = 0;

	152 while( iHi>=iLo ){

	153 int iTest = (iHi + iLo) / 2;

	154 if( key >= aDia[iTest] ){

	155 iRes = iTest;

	156 iLo = iTest+1;

	157 }else{

	158 iHi = iTest-1;

	159 }

	160 }

	161 assert( key>=aDia[iRes] );

	162 return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);}

	163 puts "\}"

	164 }

	165

	166 proc print_isdiacritic {zFunc map} {

	167

	168 set lCode [list]

	169 foreach m $map {

	170 foreach {code char} $m {}

	171 if {$code && $char == ""} { lappend lCode $code }

	172 }

	173 set lCode [lsort -integer $lCode]

	174 set iFirst [lindex $lCode 0]

	175 set iLast [lindex $lCode end]

	176

	177 set i1 0

	178 set i2 0

	179

	180 foreach c $lCode {

	181 set i [expr $c - $iFirst]

	182 if {$i < 32} {

	183 set i1 [expr {$i1 \| (1<<$i)}]

	184 } else {

	185 set i2 [expr {$i2 \| (1<<($i-32))}]

	186 }

	187 }

	188

	189 puts "/*"

	190 puts "** Return true if the argument interpreted as a unicode codepoint"

	191 puts "** is a diacritical modifier character."

	192 puts "*/"

	193 puts "int ${zFunc}\(int c)\{"

	194 puts " unsigned int mask0 = [format "0x%08X" $i1];"

	195 puts " unsigned int mask1 = [format "0x%08X" $i2];"

	196

	197 puts " if( c<$iFirst \|\| c>$iLast ) return 0;"

	198 puts " return (c < $iFirst+32) ?"

	199 puts " (mask0 & (1 << (c-$iFirst))) :"

	200 puts " (mask1 & (1 << (c-$iFirst-32)));"

	201 puts "\}"

	202 }

	203

	204

	205 #-------------------------------------------------------------------------

	206

	207 # Parameter $zName must be a path to the file UnicodeData.txt. This command

	208 # reads the file and returns a list of codepoints (integers). The list

	209 # contains all codepoints in the UnicodeData.txt assigned to any "General

	210 # Category" that is not a "Letter" or "Number".

	211 #

	212 proc an_load_unicodedata_text {zName} {

	213 set fd [open $zName]

	214 set lField {

	215 code

	216 character_name

	217 general_category

	218 canonical_combining_classes

	219 bidirectional_category

	220 character_decomposition_mapping

	221 decimal_digit_value

	222 digit_value

	223 numeric_value

	224 mirrored

	225 unicode_1_name

	226 iso10646_comment_field

	227 uppercase_mapping

	228 lowercase_mapping

	229 titlecase_mapping

	230 }

	231 set lRet [list]

	232

	233 while { ![eof $fd] } {

	234 set line [gets $fd]

	235 if {$line == ""} continue

	236

	237 set fields [split $line ";"]

	238 if {[llength $fields] != [llength $lField]} { error "parse error: $line" }

	239 foreach $lField $fields {}

	240

	241 set iCode [expr "0x$code"]

	242 set bAlnum [expr {

	243 [lsearch {L N} [string range $general_category 0 0]] >= 0

	244 \|\| $general_category=="Co"

	245 }]

	246

	247 if { !$bAlnum } { lappend lRet $iCode }

	248 }

	249

	250 close $fd

	251 set lRet

	252 }

	253

	254 proc an_load_separator_ranges {} {

	255 global unicodedata.txt

	256 set lSep [an_load_unicodedata_text ${unicodedata.txt}]

	257 unset -nocomplain iFirst

	258 unset -nocomplain nRange

	259 set lRange [list]

	260 foreach sep $lSep {

	261 if {0==[info exists iFirst]} {

	262 set iFirst $sep

	263 set nRange 1

	264 } elseif { $sep == ($iFirst+$nRange) } {

	265 incr nRange

	266 } else {

	267 lappend lRange [list $iFirst $nRange]

	268 set iFirst $sep

	269 set nRange 1

	270 }

	271 }

	272 lappend lRange [list $iFirst $nRange]

	273 set lRange

	274 }

	275

	276 proc an_print_range_array {lRange} {

	277 set iFirstMax 0

	278 set nRangeMax 0

	279 foreach range $lRange {

	280 foreach {iFirst nRange} $range {}

	281 if {$iFirst > $iFirstMax} {set iFirstMax $iFirst}

	282 if {$nRange > $nRangeMax} {set nRangeMax $nRange}

	283 }

	284 if {$iFirstMax >= (1<<22)} {error "first-max is too large for format"}

	285 if {$nRangeMax >= (1<<10)} {error "range-max is too large for format"}

	286

	287 puts -nonewline " "

	288 puts [string trim {

	289 /* Each unsigned integer in the following array corresponds to a contiguous

	290 ** range of unicode codepoints that are not either letters or numbers (i.e.

	291 ** codepoints for which this function should return 0).

	292 **

	293 ** The most significant 22 bits in each 32-bit value contain the first

	294 ** codepoint in the range. The least significant 10 bits are used to store

	295 ** the size of the range (always at least 1). In other words, the value

	296 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint

	297 ** C. It is not possible to represent a range larger than 1023 codepoints

	298 ** using this format.

	299 */

	300 }]

	301 puts -nonewline " static const unsigned int aEntry\[\] = \{"

	302 set i 0

	303 foreach range $lRange {

	304 foreach {iFirst nRange} $range {}

	305 set u32 [format "0x%08X" [expr ($iFirst<<10) + $nRange]]

	306

	307 if {($i % 5)==0} {puts "" ; puts -nonewline " "}

	308 puts -nonewline " $u32,"

	309 incr i

	310 }

	311 puts ""

	312 puts " \};"

	313 }

	314

	315 proc an_print_ascii_bitmap {lRange} {

	316 foreach range $lRange {

	317 foreach {iFirst nRange} $range {}

	318 for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} {

	319 if {$i<=127} { set a($i) 1 }

	320 }

	321 }

	322

	323 set aAscii [list 0 0 0 0]

	324 foreach key [array names a] {

	325 set idx [expr $key >> 5]

	326 lset aAscii $idx [expr [lindex $aAscii $idx] \| (1 << ($key&0x001F))]

	327 }

	328

	329 puts " static const unsigned int aAscii\[4\] = \{"

	330 puts -nonewline " "

	331 foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] }

	332 puts ""

	333 puts " \};"

	334 }

	335

	336 proc print_isalnum {zFunc lRange} {

	337 puts "/*"

	338 puts "** Return true if the argument corresponds to a unicode codepoint"

	339 puts "** classified as either a letter or a number. Otherwise false."

	340 puts "**"

	341 puts "** The results are undefined if the value passed to this function"

	342 puts "** is less than zero."

	343 puts "*/"

	344 puts "int ${zFunc}\(int c)\{"

	345 an_print_range_array $lRange

	346 an_print_ascii_bitmap $lRange

	347 puts {

	348 if( c<128 ){

	349 return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );

	350 }else if( c<(1<<22) ){

	351 unsigned int key = (((unsigned int)c)<<10) \| 0x000003FF;

	352 int iRes = 0;

	353 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;

	354 int iLo = 0;

	355 while( iHi>=iLo ){

	356 int iTest = (iHi + iLo) / 2;

	357 if( key >= aEntry[iTest] ){

	358 iRes = iTest;

	359 iLo = iTest+1;

	360 }else{

	361 iHi = iTest-1;

	362 }

	363 }

	364 assert( aEntry[0]<key );

	365 assert( key>=aEntry[iRes] );

	366 return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));

	367 }

	368 return 1;}

	369 puts "\}"

	370 }

	371

	372 proc print_test_isalnum {zFunc lRange} {

	373 foreach range $lRange {

	374 foreach {iFirst nRange} $range {}

	375 for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { set a($i) 1 }

	376 }

	377

	378 puts "static int isalnum_test(int *piCode)\{"

	379 puts -nonewline " unsigned char aAlnum\[\] = \{"

	380 for {set i 0} {$i < 70000} {incr i} {

	381 if {($i % 32)==0} { puts "" ; puts -nonewline " " }

	382 set bFlag [expr ![info exists a($i)]]

	383 puts -nonewline "${bFlag},"

	384 }

	385 puts ""

	386 puts " \};"

	387

	388 puts -nonewline " int aLargeSep\[\] = \{"

	389 set i 0

	390 foreach iSep [lsort -integer [array names a]] {

	391 if {$iSep<70000} continue

	392 if {($i % 8)==0} { puts "" ; puts -nonewline " " }

	393 puts -nonewline " $iSep,"

	394 incr i

	395 }

	396 puts ""

	397 puts " \};"

	398 puts -nonewline " int aLargeOther\[\] = \{"

	399 set i 0

	400 foreach iSep [lsort -integer [array names a]] {

	401 if {$iSep<70000} continue

	402 if {[info exists a([expr $iSep-1])]==0} {

	403 if {($i % 8)==0} { puts "" ; puts -nonewline " " }

	404 puts -nonewline " [expr $iSep-1],"

	405 incr i

	406 }

	407 if {[info exists a([expr $iSep+1])]==0} {

	408 if {($i % 8)==0} { puts "" ; puts -nonewline " " }

	409 puts -nonewline " [expr $iSep+1],"

	410 incr i

	411 }

	412 }

	413 puts ""

	414 puts " \};"

	415

	416 puts [subst -nocommands {

	417 int i;

	418 for(i=0; i<sizeof(aAlnum)/sizeof(aAlnum[0]); i++){

	419 if( ${zFunc}(i)!=aAlnum[i] ){

	420 *piCode = i;

	421 return 1;

	422 }

	423 }

	424 for(i=0; i<sizeof(aLargeSep)/sizeof(aLargeSep[0]); i++){

	425 if( ${zFunc}(aLargeSep[i])!=0 ){

	426 *piCode = aLargeSep[i];

	427 return 1;

	428 }

	429 }

	430 for(i=0; i<sizeof(aLargeOther)/sizeof(aLargeOther[0]); i++){

	431 if( ${zFunc}(aLargeOther[i])!=1 ){

	432 *piCode = aLargeOther[i];

	433 return 1;

	434 }

	435 }

	436 }]

	437 puts " return 0;"

	438 puts "\}"

	439 }

	440

	441 #-------------------------------------------------------------------------

	442

	443 proc tl_load_casefolding_txt {zName} {

	444 global tl_lookup_table

	445

	446 set fd [open $zName]

	447 while { ![eof $fd] } {

	448 set line [gets $fd]

	449 if {[string range $line 0 0] == "#"} continue

	450 if {$line == ""} continue

	451

	452 foreach x {a b c d} {unset -nocomplain $x}

	453 foreach {a b c d} [split $line ";"] {}

	454

	455 set a2 [list]

	456 set c2 [list]

	457 foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }

	458 foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }

	459 set b [string trim $b]

	460 set d [string trim $d]

	461

	462 if {$b=="C" \|\| $b=="S"} { set tl_lookup_table($a2) $c2 }

	463 }

	464 }

	465

	466 proc tl_create_records {} {

	467 global tl_lookup_table

	468

	469 set iFirst ""

	470 set nOff 0

	471 set nRange 0

	472 set nIncr 0

	473

	474 set lRecord [list]

	475 foreach code [lsort -integer [array names tl_lookup_table]] {

	476 set mapping $tl_lookup_table($code)

	477 if {$iFirst == ""} {

	478 set iFirst $code

	479 set nOff [expr $mapping - $code]

	480 set nRange 1

	481 set nIncr 1

	482 } else {

	483 set diff [expr $code - ($iFirst + ($nIncr * ($nRange - 1)))]

	484 if { $nRange==1 && ($diff==1 \|\| $diff==2) } {

	485 set nIncr $diff

	486 }

	487

	488 if {$diff != $nIncr \|\| ($mapping - $code)!=$nOff} {

	489 if { $nRange==1 } {set nIncr 1}

	490 lappend lRecord [list $iFirst $nIncr $nRange $nOff]

	491 set iFirst $code

	492 set nOff [expr $mapping - $code]

	493 set nRange 1

	494 set nIncr 1

	495 } else {

	496 incr nRange

	497 }

	498 }

	499 }

	500

	501 lappend lRecord [list $iFirst $nIncr $nRange $nOff]

	502

	503 set lRecord

	504 }

	505

	506 proc tl_print_table_header {} {

	507 puts -nonewline " "

	508 puts [string trim {

	509 /* Each entry in the following array defines a rule for folding a range

	510 ** of codepoints to lower case. The rule applies to a range of nRange

	511 ** codepoints starting at codepoint iCode.

	512 **

	513 ** If the least significant bit in flags is clear, then the rule applies

	514 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and

	515 ** need to be folded). Or, if it is set, then the rule only applies to

	516 ** every second codepoint in the range, starting with codepoint C.

	517 **

	518 ** The 7 most significant bits in flags are an index into the aiOff[]

	519 ** array. If a specific codepoint C does require folding, then its lower

	520 ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).

	521 **

	522 ** The contents of this array are generated by parsing the CaseFolding.txt

	523 ** file distributed as part of the "Unicode Character Database". See

	524 ** http://www.unicode.org for details.

	525 */

	526 }]

	527 puts " static const struct TableEntry \{"

	528 puts " unsigned short iCode;"

	529 puts " unsigned char flags;"

	530 puts " unsigned char nRange;"

	531 puts " \} aEntry\[\] = \{"

	532 }

	533

	534 proc tl_print_table_entry {togglevar entry liOff} {

	535 upvar $togglevar t

	536 foreach {iFirst nIncr nRange nOff} $entry {}

	537

	538 if {$iFirst > (1<<16)} { return 1 }

	539

	540 if {[info exists t]==0} {set t 0}

	541 if {$t==0} { puts -nonewline " " }

	542

	543 set flags 0

	544 if {$nIncr==2} { set flags 1 ; set nRange [expr $nRange * 2]}

	545 if {$nOff<0} { incr nOff [expr (1<<16)] }

	546

	547 set idx [lsearch $liOff $nOff]

	548 if {$idx<0} {error "malfunction generating aiOff"}

	549 set flags [expr $flags + $idx*2]

	550

	551 set txt "{$iFirst, $flags, $nRange},"

	552 if {$t==2} {

	553 puts $txt

	554 } else {

	555 puts -nonewline [format "% -23s" $txt]

	556 }

	557 set t [expr ($t+1)%3]

	558

	559 return 0

	560 }

	561

	562 proc tl_print_table_footer {togglevar} {

	563 upvar $togglevar t

	564 if {$t!=0} {puts ""}

	565 puts " \};"

	566 }

	567

	568 proc tl_print_if_entry {entry} {

	569 foreach {iFirst nIncr nRange nOff} $entry {}

	570 if {$nIncr==2} {error "tl_print_if_entry needs improvement!"}

	571

	572 puts " else if( c>=$iFirst && c<[expr $iFirst+$nRange] )\{"

	573 puts " ret = c + $nOff;"

	574 puts " \}"

	575 }

	576

	577 proc tl_generate_ioff_table {lRecord} {

	578 foreach entry $lRecord {

	579 foreach {iFirst nIncr nRange iOff} $entry {}

	580 if {$iOff<0} { incr iOff [expr (1<<16)] }

	581 if {[info exists a($iOff)]} continue

	582 set a($iOff) 1

	583 }

	584

	585 set liOff [lsort -integer [array names a]]

	586 if {[llength $liOff]>128} { error "Too many distinct ioffs" }

	587 return $liOff

	588 }

	589

	590 proc tl_print_ioff_table {liOff} {

	591 puts -nonewline " static const unsigned short aiOff\[\] = \{"

	592 set i 0

	593 foreach off $liOff {

	594 if {($i % 8)==0} {puts "" ; puts -nonewline " "}

	595 puts -nonewline [format "% -7s" "$off,"]

	596 incr i

	597 }

	598 puts ""

	599 puts " \};"

	600

	601 }

	602

	603 proc print_fold {zFunc} {

	604

	605 set lRecord [tl_create_records]

	606

	607 set lHigh [list]

	608 puts "/*"

	609 puts "** Interpret the argument as a unicode codepoint. If the codepoint"

	610 puts "** is an upper case character that has a lower case equivalent,"

	611 puts "** return the codepoint corresponding to the lower case version."

	612 puts "** Otherwise, return a copy of the argument."

	613 puts "**"

	614 puts "** The results are undefined if the value passed to this function"

	615 puts "** is less than zero."

	616 puts "*/"

	617 puts "int ${zFunc}\(int c, int bRemoveDiacritic)\{"

	618

	619 set liOff [tl_generate_ioff_table $lRecord]

	620 tl_print_table_header

	621 foreach entry $lRecord {

	622 if {[tl_print_table_entry toggle $entry $liOff]} {

	623 lappend lHigh $entry

	624 }

	625 }

	626 tl_print_table_footer toggle

	627 tl_print_ioff_table $liOff

	628

	629 puts {

	630 int ret = c;

	631

	632 assert( c>=0 );

	633 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );

	634

	635 if( c<128 ){

	636 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');

	637 }else if( c<65536 ){

	638 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;

	639 int iLo = 0;

	640 int iRes = -1;

	641

	642 while( iHi>=iLo ){

	643 int iTest = (iHi + iLo) / 2;

	644 int cmp = (c - aEntry[iTest].iCode);

	645 if( cmp>=0 ){

	646 iRes = iTest;

	647 iLo = iTest+1;

	648 }else{

	649 iHi = iTest-1;

	650 }

	651 }

	652 assert( iRes<0 \|\| c>=aEntry[iRes].iCode );

	653

	654 if( iRes>=0 ){

	655 const struct TableEntry *p = &aEntry[iRes];

	656 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){

	657 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;

	658 assert( ret>0 );

	659 }

	660 }

	661

	662 if( bRemoveDiacritic ) ret = remove_diacritic(ret);

	663 }

	664 }

	665

	666 foreach entry $lHigh {

	667 tl_print_if_entry $entry

	668 }

	669

	670 puts ""

	671 puts " return ret;"

	672 puts "\}"

	673 }

	674

	675 proc print_fold_test {zFunc mappings} {

	676 global tl_lookup_table

	677

	678 foreach m $mappings {

	679 set c [lindex $m 1]

	680 if {$c == ""} {

	681 set extra([lindex $m 0]) 0

	682 } else {

	683 scan $c %c i

	684 set extra([lindex $m 0]) $i

	685 }

	686 }

	687

	688 puts "static int fold_test(int *piCode)\{"

	689 puts -nonewline " static int aLookup\[\] = \{"

	690 for {set i 0} {$i < 70000} {incr i} {

	691

	692 set expected $i

	693 catch { set expected $tl_lookup_table($i) }

	694 set expected2 $expected

	695 catch { set expected2 $extra($expected2) }

	696

	697 if {($i % 4)==0} { puts "" ; puts -nonewline " " }

	698 puts -nonewline "$expected, $expected2, "

	699 }

	700 puts " \};"

	701 puts " int i;"

	702 puts " for(i=0; i<sizeof(aLookup)/sizeof(aLookup\[0\]); i++)\{"

	703 puts " int iCode = (i/2);"

	704 puts " int bFlag = i & 0x0001;"

	705 puts " if( ${zFunc}\(iCode, bFlag)!=aLookup\[i\] )\{"

	706 puts " *piCode = iCode;"

	707 puts " return 1;"

	708 puts " \}"

	709 puts " \}"

	710 puts " return 0;"

	711 puts "\}"

	712 }

	713

	714

	715 proc print_fileheader {} {

	716 puts [string trim {

	717 /*

	718 ** 2012 May 25

	719 **

	720 ** The author disclaims copyright to this source code. In place of

	721 ** a legal notice, here is a blessing:

	722 **

	723 ** May you do good and not evil.

	724 ** May you find forgiveness for yourself and forgive others.

	725 ** May you share freely, never taking more than you give.

	726 **

	727 ******************************************************************************

	728 */

	729

	730 /*

	731 ** DO NOT EDIT THIS MACHINE GENERATED FILE.

	732 */

	733 }]

	734 puts ""

	735 puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"

	736 puts "#if defined(SQLITE_ENABLE_FTS3) \|\| defined(SQLITE_ENABLE_FTS4)"

	737 puts ""

	738 puts "#include <assert.h>"

	739 puts ""

	740 }

	741

	742 proc print_test_main {} {

	743 puts ""

	744 puts "#include <stdio.h>"

	745 puts ""

	746 puts "int main(int argc, char **argv)\{"

	747 puts " int r1, r2;"

	748 puts " int code;"

	749 puts " r1 = isalnum_test(&code);"

	750 puts " if( r1 ) printf(\"isalnum(): Problem with code %d\\n\",code);"

	751 puts " else printf(\"isalnum(): test passed\\n\");"

	752 puts " r2 = fold_test(&code);"

	753 puts " if( r2 ) printf(\"fold(): Problem with code %d\\n\",code);"

	754 puts " else printf(\"fold(): test passed\\n\");"

	755 puts " return (r1 \|\| r2);"

	756 puts "\}"

	757 }

	758

	759 # Proces the command line arguments. Exit early if they are not to

	760 # our liking.

	761 #

	762 proc usage {} {

	763 puts -nonewline stderr "Usage: $::argv0 ?-test? "

	764 puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>"

	765 exit 1

	766 }

	767 if {[llength $argv]!=2 && [llength $argv]!=3} usage

	768 if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage

	769 set unicodedata.txt [lindex $argv end]

	770 set casefolding.txt [lindex $argv end-1]

	771 set generate_test_code [expr {[llength $argv]==3}]

	772

	773 print_fileheader

	774

	775 # Print the isalnum() function to stdout.

	776 #

	777 set lRange [an_load_separator_ranges]

	778 print_isalnum sqlite3FtsUnicodeIsalnum $lRange

	779

	780 # Leave a gap between the two generated C functions.

	781 #

	782 puts ""

	783 puts ""

	784

	785 # Load the fold data. This is used by the [rd_XXX] commands

	786 # as well as [print_fold].

	787 tl_load_casefolding_txt ${casefolding.txt}

	788

	789 set mappings [rd_load_unicodedata_text ${unicodedata.txt}]

	790 print_rd $mappings

	791 puts ""

	792 puts ""

	793 print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings

	794 puts ""

	795 puts ""

	796

	797 # Print the fold() function to stdout.

	798 #

	799 print_fold sqlite3FtsUnicodeFold

	800

	801 # Print the test routines and main() function to stdout, if -test

	802 # was specified.

	803 #

	804 if {$::generate_test_code} {

	805 print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange

	806 print_fold_test sqlite3FtsUnicodeFold $mappings

	807 print_test_main

	808 }

	809

	810 puts "#endif /* defined(SQLITE_ENABLE_FTS3) \|\| defined(SQLITE_ENABLE_FTS4) */"

	811 puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"

OLD	NEW