OLD | NEW |
| (Empty) |
1 | |
2 source [file join [file dirname [info script]] parseunicode.tcl] | |
3 | |
4 proc print_rd {map} { | |
5 global tl_lookup_table | |
6 set aChar [list] | |
7 set lRange [list] | |
8 | |
9 set nRange 1 | |
10 set iFirst [lindex $map 0 0] | |
11 set cPrev [lindex $map 0 1] | |
12 | |
13 foreach m [lrange $map 1 end] { | |
14 foreach {i c} $m {} | |
15 | |
16 if {$cPrev == $c} { | |
17 for {set j [expr $iFirst+$nRange]} {$j<$i} {incr j} { | |
18 if {[info exists tl_lookup_table($j)]==0} break | |
19 } | |
20 | |
21 if {$j==$i} { | |
22 set nNew [expr {(1 + $i - $iFirst)}] | |
23 if {$nNew<=8} { | |
24 set nRange $nNew | |
25 continue | |
26 } | |
27 } | |
28 } | |
29 | |
30 lappend lRange [list $iFirst $nRange] | |
31 lappend aChar $cPrev | |
32 | |
33 set iFirst $i | |
34 set cPrev $c | |
35 set nRange 1 | |
36 } | |
37 lappend lRange [list $iFirst $nRange] | |
38 lappend aChar $cPrev | |
39 | |
40 puts "/*" | |
41 puts "** If the argument is a codepoint corresponding to a lowercase letter" | |
42 puts "** in the ASCII range with a diacritic added, return the codepoint" | |
43 puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN" | |
44 puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER" | |
45 puts "** E\"). The resuls of passing a codepoint that corresponds to an" | |
46 puts "** uppercase letter are undefined." | |
47 puts "*/" | |
48 puts "static int ${::remove_diacritic}(int c)\{" | |
49 puts " unsigned short aDia\[\] = \{" | |
50 puts -nonewline " 0, " | |
51 set i 1 | |
52 foreach r $lRange { | |
53 foreach {iCode nRange} $r {} | |
54 if {($i % 8)==0} {puts "" ; puts -nonewline " " } | |
55 incr i | |
56 | |
57 puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]] | |
58 puts -nonewline ", " | |
59 } | |
60 puts "" | |
61 puts " \};" | |
62 puts " char aChar\[\] = \{" | |
63 puts -nonewline " '\\0', " | |
64 set i 1 | |
65 foreach c $aChar { | |
66 set str "'$c', " | |
67 if {$c == ""} { set str "'\\0', " } | |
68 | |
69 if {($i % 12)==0} {puts "" ; puts -nonewline " " } | |
70 incr i | |
71 puts -nonewline "$str" | |
72 } | |
73 puts "" | |
74 puts " \};" | |
75 puts { | |
76 unsigned int key = (((unsigned int)c)<<3) | 0x00000007; | |
77 int iRes = 0; | |
78 int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; | |
79 int iLo = 0; | |
80 while( iHi>=iLo ){ | |
81 int iTest = (iHi + iLo) / 2; | |
82 if( key >= aDia[iTest] ){ | |
83 iRes = iTest; | |
84 iLo = iTest+1; | |
85 }else{ | |
86 iHi = iTest-1; | |
87 } | |
88 } | |
89 assert( key>=aDia[iRes] ); | |
90 return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);} | |
91 puts "\}" | |
92 } | |
93 | |
94 proc print_isdiacritic {zFunc map} { | |
95 | |
96 set lCode [list] | |
97 foreach m $map { | |
98 foreach {code char} $m {} | |
99 if {$code && $char == ""} { lappend lCode $code } | |
100 } | |
101 set lCode [lsort -integer $lCode] | |
102 set iFirst [lindex $lCode 0] | |
103 set iLast [lindex $lCode end] | |
104 | |
105 set i1 0 | |
106 set i2 0 | |
107 | |
108 foreach c $lCode { | |
109 set i [expr $c - $iFirst] | |
110 if {$i < 32} { | |
111 set i1 [expr {$i1 | (1<<$i)}] | |
112 } else { | |
113 set i2 [expr {$i2 | (1<<($i-32))}] | |
114 } | |
115 } | |
116 | |
117 puts "/*" | |
118 puts "** Return true if the argument interpreted as a unicode codepoint" | |
119 puts "** is a diacritical modifier character." | |
120 puts "*/" | |
121 puts "int ${zFunc}\(int c)\{" | |
122 puts " unsigned int mask0 = [format "0x%08X" $i1];" | |
123 puts " unsigned int mask1 = [format "0x%08X" $i2];" | |
124 | |
125 puts " if( c<$iFirst || c>$iLast ) return 0;" | |
126 puts " return (c < $iFirst+32) ?" | |
127 puts " (mask0 & (1 << (c-$iFirst))) :" | |
128 puts " (mask1 & (1 << (c-$iFirst-32)));" | |
129 puts "\}" | |
130 } | |
131 | |
132 | |
133 #------------------------------------------------------------------------- | |
134 | |
135 proc an_load_separator_ranges {} { | |
136 global unicodedata.txt | |
137 set lSep [an_load_unicodedata_text ${unicodedata.txt}] | |
138 unset -nocomplain iFirst | |
139 unset -nocomplain nRange | |
140 set lRange [list] | |
141 foreach sep $lSep { | |
142 if {0==[info exists iFirst]} { | |
143 set iFirst $sep | |
144 set nRange 1 | |
145 } elseif { $sep == ($iFirst+$nRange) } { | |
146 incr nRange | |
147 } else { | |
148 lappend lRange [list $iFirst $nRange] | |
149 set iFirst $sep | |
150 set nRange 1 | |
151 } | |
152 } | |
153 lappend lRange [list $iFirst $nRange] | |
154 set lRange | |
155 } | |
156 | |
157 proc an_print_range_array {lRange} { | |
158 set iFirstMax 0 | |
159 set nRangeMax 0 | |
160 foreach range $lRange { | |
161 foreach {iFirst nRange} $range {} | |
162 if {$iFirst > $iFirstMax} {set iFirstMax $iFirst} | |
163 if {$nRange > $nRangeMax} {set nRangeMax $nRange} | |
164 } | |
165 if {$iFirstMax >= (1<<22)} {error "first-max is too large for format"} | |
166 if {$nRangeMax >= (1<<10)} {error "range-max is too large for format"} | |
167 | |
168 puts -nonewline " " | |
169 puts [string trim { | |
170 /* Each unsigned integer in the following array corresponds to a contiguous | |
171 ** range of unicode codepoints that are not either letters or numbers (i.e. | |
172 ** codepoints for which this function should return 0). | |
173 ** | |
174 ** The most significant 22 bits in each 32-bit value contain the first | |
175 ** codepoint in the range. The least significant 10 bits are used to store | |
176 ** the size of the range (always at least 1). In other words, the value | |
177 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint | |
178 ** C. It is not possible to represent a range larger than 1023 codepoints | |
179 ** using this format. | |
180 */ | |
181 }] | |
182 puts -nonewline " static const unsigned int aEntry\[\] = \{" | |
183 set i 0 | |
184 foreach range $lRange { | |
185 foreach {iFirst nRange} $range {} | |
186 set u32 [format "0x%08X" [expr ($iFirst<<10) + $nRange]] | |
187 | |
188 if {($i % 5)==0} {puts "" ; puts -nonewline " "} | |
189 puts -nonewline " $u32," | |
190 incr i | |
191 } | |
192 puts "" | |
193 puts " \};" | |
194 } | |
195 | |
196 proc an_print_ascii_bitmap {lRange} { | |
197 foreach range $lRange { | |
198 foreach {iFirst nRange} $range {} | |
199 for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { | |
200 if {$i<=127} { set a($i) 1 } | |
201 } | |
202 } | |
203 | |
204 set aAscii [list 0 0 0 0] | |
205 foreach key [array names a] { | |
206 set idx [expr $key >> 5] | |
207 lset aAscii $idx [expr [lindex $aAscii $idx] | (1 << ($key&0x001F))] | |
208 } | |
209 | |
210 puts " static const unsigned int aAscii\[4\] = \{" | |
211 puts -nonewline " " | |
212 foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] } | |
213 puts "" | |
214 puts " \};" | |
215 } | |
216 | |
217 proc print_isalnum {zFunc lRange} { | |
218 puts "/*" | |
219 puts "** Return true if the argument corresponds to a unicode codepoint" | |
220 puts "** classified as either a letter or a number. Otherwise false." | |
221 puts "**" | |
222 puts "** The results are undefined if the value passed to this function" | |
223 puts "** is less than zero." | |
224 puts "*/" | |
225 puts "int ${zFunc}\(int c)\{" | |
226 an_print_range_array $lRange | |
227 an_print_ascii_bitmap $lRange | |
228 puts { | |
229 if( c<128 ){ | |
230 return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); | |
231 }else if( c<(1<<22) ){ | |
232 unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; | |
233 int iRes = 0; | |
234 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; | |
235 int iLo = 0; | |
236 while( iHi>=iLo ){ | |
237 int iTest = (iHi + iLo) / 2; | |
238 if( key >= aEntry[iTest] ){ | |
239 iRes = iTest; | |
240 iLo = iTest+1; | |
241 }else{ | |
242 iHi = iTest-1; | |
243 } | |
244 } | |
245 assert( aEntry[0]<key ); | |
246 assert( key>=aEntry[iRes] ); | |
247 return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); | |
248 } | |
249 return 1;} | |
250 puts "\}" | |
251 } | |
252 | |
253 proc print_test_isalnum {zFunc lRange} { | |
254 foreach range $lRange { | |
255 foreach {iFirst nRange} $range {} | |
256 for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { set a($i) 1 } | |
257 } | |
258 | |
259 puts "static int isalnum_test(int *piCode)\{" | |
260 puts -nonewline " unsigned char aAlnum\[\] = \{" | |
261 for {set i 0} {$i < 70000} {incr i} { | |
262 if {($i % 32)==0} { puts "" ; puts -nonewline " " } | |
263 set bFlag [expr ![info exists a($i)]] | |
264 puts -nonewline "${bFlag}," | |
265 } | |
266 puts "" | |
267 puts " \};" | |
268 | |
269 puts -nonewline " int aLargeSep\[\] = \{" | |
270 set i 0 | |
271 foreach iSep [lsort -integer [array names a]] { | |
272 if {$iSep<70000} continue | |
273 if {($i % 8)==0} { puts "" ; puts -nonewline " " } | |
274 puts -nonewline " $iSep," | |
275 incr i | |
276 } | |
277 puts "" | |
278 puts " \};" | |
279 puts -nonewline " int aLargeOther\[\] = \{" | |
280 set i 0 | |
281 foreach iSep [lsort -integer [array names a]] { | |
282 if {$iSep<70000} continue | |
283 if {[info exists a([expr $iSep-1])]==0} { | |
284 if {($i % 8)==0} { puts "" ; puts -nonewline " " } | |
285 puts -nonewline " [expr $iSep-1]," | |
286 incr i | |
287 } | |
288 if {[info exists a([expr $iSep+1])]==0} { | |
289 if {($i % 8)==0} { puts "" ; puts -nonewline " " } | |
290 puts -nonewline " [expr $iSep+1]," | |
291 incr i | |
292 } | |
293 } | |
294 puts "" | |
295 puts " \};" | |
296 | |
297 puts [subst -nocommands { | |
298 int i; | |
299 for(i=0; i<sizeof(aAlnum)/sizeof(aAlnum[0]); i++){ | |
300 if( ${zFunc}(i)!=aAlnum[i] ){ | |
301 *piCode = i; | |
302 return 1; | |
303 } | |
304 } | |
305 for(i=0; i<sizeof(aLargeSep)/sizeof(aLargeSep[0]); i++){ | |
306 if( ${zFunc}(aLargeSep[i])!=0 ){ | |
307 *piCode = aLargeSep[i]; | |
308 return 1; | |
309 } | |
310 } | |
311 for(i=0; i<sizeof(aLargeOther)/sizeof(aLargeOther[0]); i++){ | |
312 if( ${zFunc}(aLargeOther[i])!=1 ){ | |
313 *piCode = aLargeOther[i]; | |
314 return 1; | |
315 } | |
316 } | |
317 }] | |
318 puts " return 0;" | |
319 puts "\}" | |
320 } | |
321 | |
322 #------------------------------------------------------------------------- | |
323 | |
324 proc tl_create_records {} { | |
325 global tl_lookup_table | |
326 | |
327 set iFirst "" | |
328 set nOff 0 | |
329 set nRange 0 | |
330 set nIncr 0 | |
331 | |
332 set lRecord [list] | |
333 foreach code [lsort -integer [array names tl_lookup_table]] { | |
334 set mapping $tl_lookup_table($code) | |
335 if {$iFirst == ""} { | |
336 set iFirst $code | |
337 set nOff [expr $mapping - $code] | |
338 set nRange 1 | |
339 set nIncr 1 | |
340 } else { | |
341 set diff [expr $code - ($iFirst + ($nIncr * ($nRange - 1)))] | |
342 if { $nRange==1 && ($diff==1 || $diff==2) } { | |
343 set nIncr $diff | |
344 } | |
345 | |
346 if {$diff != $nIncr || ($mapping - $code)!=$nOff} { | |
347 if { $nRange==1 } {set nIncr 1} | |
348 lappend lRecord [list $iFirst $nIncr $nRange $nOff] | |
349 set iFirst $code | |
350 set nOff [expr $mapping - $code] | |
351 set nRange 1 | |
352 set nIncr 1 | |
353 } else { | |
354 incr nRange | |
355 } | |
356 } | |
357 } | |
358 | |
359 lappend lRecord [list $iFirst $nIncr $nRange $nOff] | |
360 | |
361 set lRecord | |
362 } | |
363 | |
364 proc tl_print_table_header {} { | |
365 puts -nonewline " " | |
366 puts [string trim { | |
367 /* Each entry in the following array defines a rule for folding a range | |
368 ** of codepoints to lower case. The rule applies to a range of nRange | |
369 ** codepoints starting at codepoint iCode. | |
370 ** | |
371 ** If the least significant bit in flags is clear, then the rule applies | |
372 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and | |
373 ** need to be folded). Or, if it is set, then the rule only applies to | |
374 ** every second codepoint in the range, starting with codepoint C. | |
375 ** | |
376 ** The 7 most significant bits in flags are an index into the aiOff[] | |
377 ** array. If a specific codepoint C does require folding, then its lower | |
378 ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). | |
379 ** | |
380 ** The contents of this array are generated by parsing the CaseFolding.txt | |
381 ** file distributed as part of the "Unicode Character Database". See | |
382 ** http://www.unicode.org for details. | |
383 */ | |
384 }] | |
385 puts " static const struct TableEntry \{" | |
386 puts " unsigned short iCode;" | |
387 puts " unsigned char flags;" | |
388 puts " unsigned char nRange;" | |
389 puts " \} aEntry\[\] = \{" | |
390 } | |
391 | |
392 proc tl_print_table_entry {togglevar entry liOff} { | |
393 upvar $togglevar t | |
394 foreach {iFirst nIncr nRange nOff} $entry {} | |
395 | |
396 if {$iFirst > (1<<16)} { return 1 } | |
397 | |
398 if {[info exists t]==0} {set t 0} | |
399 if {$t==0} { puts -nonewline " " } | |
400 | |
401 set flags 0 | |
402 if {$nIncr==2} { set flags 1 ; set nRange [expr $nRange * 2]} | |
403 if {$nOff<0} { incr nOff [expr (1<<16)] } | |
404 | |
405 set idx [lsearch $liOff $nOff] | |
406 if {$idx<0} {error "malfunction generating aiOff"} | |
407 set flags [expr $flags + $idx*2] | |
408 | |
409 set txt "{$iFirst, $flags, $nRange}," | |
410 if {$t==2} { | |
411 puts $txt | |
412 } else { | |
413 puts -nonewline [format "% -23s" $txt] | |
414 } | |
415 set t [expr ($t+1)%3] | |
416 | |
417 return 0 | |
418 } | |
419 | |
420 proc tl_print_table_footer {togglevar} { | |
421 upvar $togglevar t | |
422 if {$t!=0} {puts ""} | |
423 puts " \};" | |
424 } | |
425 | |
426 proc tl_print_if_entry {entry} { | |
427 foreach {iFirst nIncr nRange nOff} $entry {} | |
428 if {$nIncr==2} {error "tl_print_if_entry needs improvement!"} | |
429 | |
430 puts " else if( c>=$iFirst && c<[expr $iFirst+$nRange] )\{" | |
431 puts " ret = c + $nOff;" | |
432 puts " \}" | |
433 } | |
434 | |
435 proc tl_generate_ioff_table {lRecord} { | |
436 foreach entry $lRecord { | |
437 foreach {iFirst nIncr nRange iOff} $entry {} | |
438 if {$iOff<0} { incr iOff [expr (1<<16)] } | |
439 if {[info exists a($iOff)]} continue | |
440 set a($iOff) 1 | |
441 } | |
442 | |
443 set liOff [lsort -integer [array names a]] | |
444 if {[llength $liOff]>128} { error "Too many distinct ioffs" } | |
445 return $liOff | |
446 } | |
447 | |
448 proc tl_print_ioff_table {liOff} { | |
449 puts -nonewline " static const unsigned short aiOff\[\] = \{" | |
450 set i 0 | |
451 foreach off $liOff { | |
452 if {($i % 8)==0} {puts "" ; puts -nonewline " "} | |
453 puts -nonewline [format "% -7s" "$off,"] | |
454 incr i | |
455 } | |
456 puts "" | |
457 puts " \};" | |
458 | |
459 } | |
460 | |
461 proc print_fold {zFunc} { | |
462 | |
463 set lRecord [tl_create_records] | |
464 | |
465 set lHigh [list] | |
466 puts "/*" | |
467 puts "** Interpret the argument as a unicode codepoint. If the codepoint" | |
468 puts "** is an upper case character that has a lower case equivalent," | |
469 puts "** return the codepoint corresponding to the lower case version." | |
470 puts "** Otherwise, return a copy of the argument." | |
471 puts "**" | |
472 puts "** The results are undefined if the value passed to this function" | |
473 puts "** is less than zero." | |
474 puts "*/" | |
475 puts "int ${zFunc}\(int c, int bRemoveDiacritic)\{" | |
476 | |
477 set liOff [tl_generate_ioff_table $lRecord] | |
478 tl_print_table_header | |
479 foreach entry $lRecord { | |
480 if {[tl_print_table_entry toggle $entry $liOff]} { | |
481 lappend lHigh $entry | |
482 } | |
483 } | |
484 tl_print_table_footer toggle | |
485 tl_print_ioff_table $liOff | |
486 | |
487 puts [subst -nocommands { | |
488 int ret = c; | |
489 | |
490 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); | |
491 | |
492 if( c<128 ){ | |
493 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); | |
494 }else if( c<65536 ){ | |
495 const struct TableEntry *p; | |
496 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; | |
497 int iLo = 0; | |
498 int iRes = -1; | |
499 | |
500 assert( c>aEntry[0].iCode ); | |
501 while( iHi>=iLo ){ | |
502 int iTest = (iHi + iLo) / 2; | |
503 int cmp = (c - aEntry[iTest].iCode); | |
504 if( cmp>=0 ){ | |
505 iRes = iTest; | |
506 iLo = iTest+1; | |
507 }else{ | |
508 iHi = iTest-1; | |
509 } | |
510 } | |
511 | |
512 assert( iRes>=0 && c>=aEntry[iRes].iCode ); | |
513 p = &aEntry[iRes]; | |
514 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ | |
515 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; | |
516 assert( ret>0 ); | |
517 } | |
518 | |
519 if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret); | |
520 } | |
521 }] | |
522 | |
523 foreach entry $lHigh { | |
524 tl_print_if_entry $entry | |
525 } | |
526 | |
527 puts "" | |
528 puts " return ret;" | |
529 puts "\}" | |
530 } | |
531 | |
532 proc print_fold_test {zFunc mappings} { | |
533 global tl_lookup_table | |
534 | |
535 foreach m $mappings { | |
536 set c [lindex $m 1] | |
537 if {$c == ""} { | |
538 set extra([lindex $m 0]) 0 | |
539 } else { | |
540 scan $c %c i | |
541 set extra([lindex $m 0]) $i | |
542 } | |
543 } | |
544 | |
545 puts "static int fold_test(int *piCode)\{" | |
546 puts -nonewline " static int aLookup\[\] = \{" | |
547 for {set i 0} {$i < 70000} {incr i} { | |
548 | |
549 set expected $i | |
550 catch { set expected $tl_lookup_table($i) } | |
551 set expected2 $expected | |
552 catch { set expected2 $extra($expected2) } | |
553 | |
554 if {($i % 4)==0} { puts "" ; puts -nonewline " " } | |
555 puts -nonewline "$expected, $expected2, " | |
556 } | |
557 puts " \};" | |
558 puts " int i;" | |
559 puts " for(i=0; i<sizeof(aLookup)/sizeof(aLookup\[0\]); i++)\{" | |
560 puts " int iCode = (i/2);" | |
561 puts " int bFlag = i & 0x0001;" | |
562 puts " if( ${zFunc}\(iCode, bFlag)!=aLookup\[i\] )\{" | |
563 puts " *piCode = iCode;" | |
564 puts " return 1;" | |
565 puts " \}" | |
566 puts " \}" | |
567 puts " return 0;" | |
568 puts "\}" | |
569 } | |
570 | |
571 | |
572 proc print_fileheader {} { | |
573 puts [string trim { | |
574 /* | |
575 ** 2012 May 25 | |
576 ** | |
577 ** The author disclaims copyright to this source code. In place of | |
578 ** a legal notice, here is a blessing: | |
579 ** | |
580 ** May you do good and not evil. | |
581 ** May you find forgiveness for yourself and forgive others. | |
582 ** May you share freely, never taking more than you give. | |
583 ** | |
584 ****************************************************************************** | |
585 */ | |
586 | |
587 /* | |
588 ** DO NOT EDIT THIS MACHINE GENERATED FILE. | |
589 */ | |
590 }] | |
591 puts "" | |
592 if {$::generate_fts5_code} { | |
593 # no-op | |
594 } else { | |
595 puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE" | |
596 puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)" | |
597 } | |
598 puts "" | |
599 puts "#include <assert.h>" | |
600 puts "" | |
601 } | |
602 | |
603 proc print_test_main {} { | |
604 puts "" | |
605 puts "#include <stdio.h>" | |
606 puts "" | |
607 puts "int main(int argc, char **argv)\{" | |
608 puts " int r1, r2;" | |
609 puts " int code;" | |
610 puts " r1 = isalnum_test(&code);" | |
611 puts " if( r1 ) printf(\"isalnum(): Problem with code %d\\n\",code);" | |
612 puts " else printf(\"isalnum(): test passed\\n\");" | |
613 puts " r2 = fold_test(&code);" | |
614 puts " if( r2 ) printf(\"fold(): Problem with code %d\\n\",code);" | |
615 puts " else printf(\"fold(): test passed\\n\");" | |
616 puts " return (r1 || r2);" | |
617 puts "\}" | |
618 } | |
619 | |
620 # Proces the command line arguments. Exit early if they are not to | |
621 # our liking. | |
622 # | |
623 proc usage {} { | |
624 puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? " | |
625 puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>" | |
626 exit 1 | |
627 } | |
628 if {[llength $argv]<2} usage | |
629 set unicodedata.txt [lindex $argv end] | |
630 set casefolding.txt [lindex $argv end-1] | |
631 | |
632 set remove_diacritic remove_diacritic | |
633 set generate_test_code 0 | |
634 set generate_fts5_code 0 | |
635 set function_prefix "sqlite3Fts" | |
636 for {set i 0} {$i < [llength $argv]-2} {incr i} { | |
637 switch -- [lindex $argv $i] { | |
638 -test { | |
639 set generate_test_code 1 | |
640 } | |
641 -fts5 { | |
642 set function_prefix sqlite3Fts5 | |
643 set generate_fts5_code 1 | |
644 set remove_diacritic fts5_remove_diacritic | |
645 } | |
646 default { | |
647 usage | |
648 } | |
649 } | |
650 } | |
651 | |
652 print_fileheader | |
653 | |
654 # Print the isalnum() function to stdout. | |
655 # | |
656 set lRange [an_load_separator_ranges] | |
657 print_isalnum ${function_prefix}UnicodeIsalnum $lRange | |
658 | |
659 # Leave a gap between the two generated C functions. | |
660 # | |
661 puts "" | |
662 puts "" | |
663 | |
664 # Load the fold data. This is used by the [rd_XXX] commands | |
665 # as well as [print_fold]. | |
666 tl_load_casefolding_txt ${casefolding.txt} | |
667 | |
668 set mappings [rd_load_unicodedata_text ${unicodedata.txt}] | |
669 print_rd $mappings | |
670 puts "" | |
671 puts "" | |
672 print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings | |
673 puts "" | |
674 puts "" | |
675 | |
676 # Print the fold() function to stdout. | |
677 # | |
678 print_fold ${function_prefix}UnicodeFold | |
679 | |
680 # Print the test routines and main() function to stdout, if -test | |
681 # was specified. | |
682 # | |
683 if {$::generate_test_code} { | |
684 print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange | |
685 print_fold_test ${function_prefix}UnicodeFold $mappings | |
686 print_test_main | |
687 } | |
688 | |
689 if {$generate_fts5_code} { | |
690 # no-op | |
691 } else { | |
692 puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */" | |
693 puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */" | |
694 } | |
OLD | NEW |