third_party/sqlite/sqlite-src-3080704/ext/fts3/unicode/mkunicode.tcl - Issue 2363173002: [sqlite] Remove obsolete reference version 3.8.7.4.

Side by Side Diff: third_party/sqlite/sqlite-src-3080704/ext/fts3/unicode/mkunicode.tcl

Issue 2363173002: [sqlite] Remove obsolete reference version 3.8.7.4. (Closed)

Patch Set: Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/sqlite/sqlite-src-3080704/ext/fts3/unicode/CaseFolding.txt ('k') | third_party/sqlite/sqlite-src-3080704/ext/icu/README.txt » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1

2 #

3 # Parameter $zName must be a path to the file UnicodeData.txt. This command

4 # reads the file and returns a list of mappings required to remove all

5 # diacritical marks from a unicode string. Each mapping is itself a list

6 # consisting of two elements - the unicode codepoint and the single ASCII

7 # character that it should be replaced with, or an empty string if the

8 # codepoint should simply be removed from the input. Examples:

9 #

10 # { 224 a } (replace codepoint 224 to "a")

11 # { 769 "" } (remove codepoint 769 from input)

12 #

13 # Mappings are only returned for non-upper case codepoints. It is assumed

14 # that the input has already been folded to lower case.

15 #

16 proc rd_load_unicodedata_text {zName} {

17 global tl_lookup_table

18

19 set fd [open $zName]

20 set lField {

21 code

22 character_name

23 general_category

24 canonical_combining_classes

25 bidirectional_category

26 character_decomposition_mapping

27 decimal_digit_value

28 digit_value

29 numeric_value

30 mirrored

31 unicode_1_name

32 iso10646_comment_field

33 uppercase_mapping

34 lowercase_mapping

35 titlecase_mapping

36 }

37 set lRet [list]

38

39 while { ![eof $fd] } {

40 set line [gets $fd]

41 if {$line == ""} continue

42

43 set fields [split $line ";"]

44 if {[llength $fields] != [llength $lField]} { error "parse error: $line" }

45 foreach $lField $fields {}

46 if { [llength $character_decomposition_mapping]!=2

47 \|\| [string is xdigit [lindex $character_decomposition_mapping 0]]==0

48 } {

49 continue

50 }

51

52 set iCode [expr "0x$code"]

53 set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]

54 set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]

55

56 if {[info exists tl_lookup_table($iCode)]} continue

57

58 if { ($iAscii >= 97 && $iAscii <= 122)

59 \|\| ($iAscii >= 65 && $iAscii <= 90)

60 } {

61 lappend lRet [list $iCode [string tolower [format %c $iAscii]]]

62 set dia($iDia) 1

63 }

64 }

65

66 foreach d [array names dia] {

67 lappend lRet [list $d ""]

68 }

69 set lRet [lsort -integer -index 0 $lRet]

70

71 close $fd

72 set lRet

73 }

74

75

76 proc print_rd {map} {

77 global tl_lookup_table

78 set aChar [list]

79 set lRange [list]

80

81 set nRange 1

82 set iFirst [lindex $map 0 0]

83 set cPrev [lindex $map 0 1]

84

85 foreach m [lrange $map 1 end] {

86 foreach {i c} $m {}

87

88 if {$cPrev == $c} {

89 for {set j [expr $iFirst+$nRange]} {$j<$i} {incr j} {

90 if {[info exists tl_lookup_table($j)]==0} break

91 }

92

93 if {$j==$i} {

94 set nNew [expr {(1 + $i - $iFirst)}]

95 if {$nNew<=8} {

96 set nRange $nNew

97 continue

98 }

99 }

100 }

101

102 lappend lRange [list $iFirst $nRange]

103 lappend aChar $cPrev

104

105 set iFirst $i

106 set cPrev $c

107 set nRange 1

108 }

109 lappend lRange [list $iFirst $nRange]

110 lappend aChar $cPrev

111

112 puts "/*"

113 puts "** If the argument is a codepoint corresponding to a lowercase letter"

114 puts "** in the ASCII range with a diacritic added, return the codepoint"

115 puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN"

116 puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER"

117 puts "** E\"). The resuls of passing a codepoint that corresponds to an"

118 puts "** uppercase letter are undefined."

119 puts "*/"

120 puts "static int remove_diacritic(int c)\{"

121 puts " unsigned short aDia\[\] = \{"

122 puts -nonewline " 0, "

123 set i 1

124 foreach r $lRange {

125 foreach {iCode nRange} $r {}

126 if {($i % 8)==0} {puts "" ; puts -nonewline " " }

127 incr i

128

129 puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]]

130 puts -nonewline ", "

131 }

132 puts ""

133 puts " \};"

134 puts " char aChar\[\] = \{"

135 puts -nonewline " '\\0', "

136 set i 1

137 foreach c $aChar {

138 set str "'$c', "

139 if {$c == ""} { set str "'\\0', " }

140

141 if {($i % 12)==0} {puts "" ; puts -nonewline " " }

142 incr i

143 puts -nonewline "$str"

144 }

145 puts ""

146 puts " \};"

147 puts {

148 unsigned int key = (((unsigned int)c)<<3) \| 0x00000007;

149 int iRes = 0;

150 int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;

151 int iLo = 0;

152 while( iHi>=iLo ){

153 int iTest = (iHi + iLo) / 2;

154 if( key >= aDia[iTest] ){

155 iRes = iTest;

156 iLo = iTest+1;

157 }else{

158 iHi = iTest-1;

159 }

160 }

161 assert( key>=aDia[iRes] );

162 return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);}

163 puts "\}"

164 }

165

166 proc print_isdiacritic {zFunc map} {

167

168 set lCode [list]

169 foreach m $map {

170 foreach {code char} $m {}

171 if {$code && $char == ""} { lappend lCode $code }

172 }

173 set lCode [lsort -integer $lCode]

174 set iFirst [lindex $lCode 0]

175 set iLast [lindex $lCode end]

176

177 set i1 0

178 set i2 0

179

180 foreach c $lCode {

181 set i [expr $c - $iFirst]

182 if {$i < 32} {

183 set i1 [expr {$i1 \| (1<<$i)}]

184 } else {

185 set i2 [expr {$i2 \| (1<<($i-32))}]

186 }

187 }

188

189 puts "/*"

190 puts "** Return true if the argument interpreted as a unicode codepoint"

191 puts "** is a diacritical modifier character."

192 puts "*/"

193 puts "int ${zFunc}\(int c)\{"

194 puts " unsigned int mask0 = [format "0x%08X" $i1];"

195 puts " unsigned int mask1 = [format "0x%08X" $i2];"

196

197 puts " if( c<$iFirst \|\| c>$iLast ) return 0;"

198 puts " return (c < $iFirst+32) ?"

199 puts " (mask0 & (1 << (c-$iFirst))) :"

200 puts " (mask1 & (1 << (c-$iFirst-32)));"

201 puts "\}"

202 }

203

204

205 #-------------------------------------------------------------------------

206

207 # Parameter $zName must be a path to the file UnicodeData.txt. This command

208 # reads the file and returns a list of codepoints (integers). The list

209 # contains all codepoints in the UnicodeData.txt assigned to any "General

210 # Category" that is not a "Letter" or "Number".

211 #

212 proc an_load_unicodedata_text {zName} {

213 set fd [open $zName]

214 set lField {

215 code

216 character_name

217 general_category

218 canonical_combining_classes

219 bidirectional_category

220 character_decomposition_mapping

221 decimal_digit_value

222 digit_value

223 numeric_value

224 mirrored

225 unicode_1_name

226 iso10646_comment_field

227 uppercase_mapping

228 lowercase_mapping

229 titlecase_mapping

230 }

231 set lRet [list]

232

233 while { ![eof $fd] } {

234 set line [gets $fd]

235 if {$line == ""} continue

236

237 set fields [split $line ";"]

238 if {[llength $fields] != [llength $lField]} { error "parse error: $line" }

239 foreach $lField $fields {}

240

241 set iCode [expr "0x$code"]

242 set bAlnum [expr {

243 [lsearch {L N} [string range $general_category 0 0]] >= 0

244 \|\| $general_category=="Co"

245 }]

246

247 if { !$bAlnum } { lappend lRet $iCode }

248 }

249

250 close $fd

251 set lRet

252 }

253

254 proc an_load_separator_ranges {} {

255 global unicodedata.txt

256 set lSep [an_load_unicodedata_text ${unicodedata.txt}]

257 unset -nocomplain iFirst

258 unset -nocomplain nRange

259 set lRange [list]

260 foreach sep $lSep {

261 if {0==[info exists iFirst]} {

262 set iFirst $sep

263 set nRange 1

264 } elseif { $sep == ($iFirst+$nRange) } {

265 incr nRange

266 } else {

267 lappend lRange [list $iFirst $nRange]

268 set iFirst $sep

269 set nRange 1

270 }

271 }

272 lappend lRange [list $iFirst $nRange]

273 set lRange

274 }

275

276 proc an_print_range_array {lRange} {

277 set iFirstMax 0

278 set nRangeMax 0

279 foreach range $lRange {

280 foreach {iFirst nRange} $range {}

281 if {$iFirst > $iFirstMax} {set iFirstMax $iFirst}

282 if {$nRange > $nRangeMax} {set nRangeMax $nRange}

283 }

284 if {$iFirstMax >= (1<<22)} {error "first-max is too large for format"}

285 if {$nRangeMax >= (1<<10)} {error "range-max is too large for format"}

286

287 puts -nonewline " "

288 puts [string trim {

289 /* Each unsigned integer in the following array corresponds to a contiguous

290 ** range of unicode codepoints that are not either letters or numbers (i.e.

291 ** codepoints for which this function should return 0).

292 **

293 ** The most significant 22 bits in each 32-bit value contain the first

294 ** codepoint in the range. The least significant 10 bits are used to store

295 ** the size of the range (always at least 1). In other words, the value

296 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint

297 ** C. It is not possible to represent a range larger than 1023 codepoints

298 ** using this format.

299 */

300 }]

301 puts -nonewline " static const unsigned int aEntry\[\] = \{"

302 set i 0

303 foreach range $lRange {

304 foreach {iFirst nRange} $range {}

305 set u32 [format "0x%08X" [expr ($iFirst<<10) + $nRange]]

306

307 if {($i % 5)==0} {puts "" ; puts -nonewline " "}

308 puts -nonewline " $u32,"

309 incr i

310 }

311 puts ""

312 puts " \};"

313 }

314

315 proc an_print_ascii_bitmap {lRange} {

316 foreach range $lRange {

317 foreach {iFirst nRange} $range {}

318 for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} {

319 if {$i<=127} { set a($i) 1 }

320 }

321 }

322

323 set aAscii [list 0 0 0 0]

324 foreach key [array names a] {

325 set idx [expr $key >> 5]

326 lset aAscii $idx [expr [lindex $aAscii $idx] \| (1 << ($key&0x001F))]

327 }

328

329 puts " static const unsigned int aAscii\[4\] = \{"

330 puts -nonewline " "

331 foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] }

332 puts ""

333 puts " \};"

334 }

335

336 proc print_isalnum {zFunc lRange} {

337 puts "/*"

338 puts "** Return true if the argument corresponds to a unicode codepoint"

339 puts "** classified as either a letter or a number. Otherwise false."

340 puts "**"

341 puts "** The results are undefined if the value passed to this function"

342 puts "** is less than zero."

343 puts "*/"

344 puts "int ${zFunc}\(int c)\{"

345 an_print_range_array $lRange

346 an_print_ascii_bitmap $lRange

347 puts {

348 if( c<128 ){

349 return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );

350 }else if( c<(1<<22) ){

351 unsigned int key = (((unsigned int)c)<<10) \| 0x000003FF;

352 int iRes = 0;

353 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;

354 int iLo = 0;

355 while( iHi>=iLo ){

356 int iTest = (iHi + iLo) / 2;

357 if( key >= aEntry[iTest] ){

358 iRes = iTest;

359 iLo = iTest+1;

360 }else{

361 iHi = iTest-1;

362 }

363 }

364 assert( aEntry[0]<key );

365 assert( key>=aEntry[iRes] );

366 return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));

367 }

368 return 1;}

369 puts "\}"

370 }

371

372 proc print_test_isalnum {zFunc lRange} {

373 foreach range $lRange {

374 foreach {iFirst nRange} $range {}

375 for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { set a($i) 1 }

376 }

377

378 puts "static int isalnum_test(int *piCode)\{"

379 puts -nonewline " unsigned char aAlnum\[\] = \{"

380 for {set i 0} {$i < 70000} {incr i} {

381 if {($i % 32)==0} { puts "" ; puts -nonewline " " }

382 set bFlag [expr ![info exists a($i)]]

383 puts -nonewline "${bFlag},"

384 }

385 puts ""

386 puts " \};"

387

388 puts -nonewline " int aLargeSep\[\] = \{"

389 set i 0

390 foreach iSep [lsort -integer [array names a]] {

391 if {$iSep<70000} continue

392 if {($i % 8)==0} { puts "" ; puts -nonewline " " }

393 puts -nonewline " $iSep,"

394 incr i

395 }

396 puts ""

397 puts " \};"

398 puts -nonewline " int aLargeOther\[\] = \{"

399 set i 0

400 foreach iSep [lsort -integer [array names a]] {

401 if {$iSep<70000} continue

402 if {[info exists a([expr $iSep-1])]==0} {

403 if {($i % 8)==0} { puts "" ; puts -nonewline " " }

404 puts -nonewline " [expr $iSep-1],"

405 incr i

406 }

407 if {[info exists a([expr $iSep+1])]==0} {

408 if {($i % 8)==0} { puts "" ; puts -nonewline " " }

409 puts -nonewline " [expr $iSep+1],"

410 incr i

411 }

412 }

413 puts ""

414 puts " \};"

415

416 puts [subst -nocommands {

417 int i;

418 for(i=0; i<sizeof(aAlnum)/sizeof(aAlnum[0]); i++){

419 if( ${zFunc}(i)!=aAlnum[i] ){

420 *piCode = i;

421 return 1;

422 }

423 }

424 for(i=0; i<sizeof(aLargeSep)/sizeof(aLargeSep[0]); i++){

425 if( ${zFunc}(aLargeSep[i])!=0 ){

426 *piCode = aLargeSep[i];

427 return 1;

428 }

429 }

430 for(i=0; i<sizeof(aLargeOther)/sizeof(aLargeOther[0]); i++){

431 if( ${zFunc}(aLargeOther[i])!=1 ){

432 *piCode = aLargeOther[i];

433 return 1;

434 }

435 }

436 }]

437 puts " return 0;"

438 puts "\}"

439 }

440

441 #-------------------------------------------------------------------------

442

443 proc tl_load_casefolding_txt {zName} {

444 global tl_lookup_table

445

446 set fd [open $zName]

447 while { ![eof $fd] } {

448 set line [gets $fd]

449 if {[string range $line 0 0] == "#"} continue

450 if {$line == ""} continue

451

452 foreach x {a b c d} {unset -nocomplain $x}

453 foreach {a b c d} [split $line ";"] {}

454

455 set a2 [list]

456 set c2 [list]

457 foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }

458 foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }

459 set b [string trim $b]

460 set d [string trim $d]

461

462 if {$b=="C" \|\| $b=="S"} { set tl_lookup_table($a2) $c2 }

463 }

464 }

465

466 proc tl_create_records {} {

467 global tl_lookup_table

468

469 set iFirst ""

470 set nOff 0

471 set nRange 0

472 set nIncr 0

473

474 set lRecord [list]

475 foreach code [lsort -integer [array names tl_lookup_table]] {

476 set mapping $tl_lookup_table($code)

477 if {$iFirst == ""} {

478 set iFirst $code

479 set nOff [expr $mapping - $code]

480 set nRange 1

481 set nIncr 1

482 } else {

483 set diff [expr $code - ($iFirst + ($nIncr * ($nRange - 1)))]

484 if { $nRange==1 && ($diff==1 \|\| $diff==2) } {

485 set nIncr $diff

486 }

487

488 if {$diff != $nIncr \|\| ($mapping - $code)!=$nOff} {

489 if { $nRange==1 } {set nIncr 1}

490 lappend lRecord [list $iFirst $nIncr $nRange $nOff]

491 set iFirst $code

492 set nOff [expr $mapping - $code]

493 set nRange 1

494 set nIncr 1

495 } else {

496 incr nRange

497 }

498 }

499 }

500

501 lappend lRecord [list $iFirst $nIncr $nRange $nOff]

502

503 set lRecord

504 }

505

506 proc tl_print_table_header {} {

507 puts -nonewline " "

508 puts [string trim {

509 /* Each entry in the following array defines a rule for folding a range

510 ** of codepoints to lower case. The rule applies to a range of nRange

511 ** codepoints starting at codepoint iCode.

512 **

513 ** If the least significant bit in flags is clear, then the rule applies

514 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and

515 ** need to be folded). Or, if it is set, then the rule only applies to

516 ** every second codepoint in the range, starting with codepoint C.

517 **

518 ** The 7 most significant bits in flags are an index into the aiOff[]

519 ** array. If a specific codepoint C does require folding, then its lower

520 ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).

521 **

522 ** The contents of this array are generated by parsing the CaseFolding.txt

523 ** file distributed as part of the "Unicode Character Database". See

524 ** http://www.unicode.org for details.

525 */

526 }]

527 puts " static const struct TableEntry \{"

528 puts " unsigned short iCode;"

529 puts " unsigned char flags;"

530 puts " unsigned char nRange;"

531 puts " \} aEntry\[\] = \{"

532 }

533

534 proc tl_print_table_entry {togglevar entry liOff} {

535 upvar $togglevar t

536 foreach {iFirst nIncr nRange nOff} $entry {}

537

538 if {$iFirst > (1<<16)} { return 1 }

539

540 if {[info exists t]==0} {set t 0}

541 if {$t==0} { puts -nonewline " " }

542

543 set flags 0

544 if {$nIncr==2} { set flags 1 ; set nRange [expr $nRange * 2]}

545 if {$nOff<0} { incr nOff [expr (1<<16)] }

546

547 set idx [lsearch $liOff $nOff]

548 if {$idx<0} {error "malfunction generating aiOff"}

549 set flags [expr $flags + $idx*2]

550

551 set txt "{$iFirst, $flags, $nRange},"

552 if {$t==2} {

553 puts $txt

554 } else {

555 puts -nonewline [format "% -23s" $txt]

556 }

557 set t [expr ($t+1)%3]

558

559 return 0

560 }

561

562 proc tl_print_table_footer {togglevar} {

563 upvar $togglevar t

564 if {$t!=0} {puts ""}

565 puts " \};"

566 }

567

568 proc tl_print_if_entry {entry} {

569 foreach {iFirst nIncr nRange nOff} $entry {}

570 if {$nIncr==2} {error "tl_print_if_entry needs improvement!"}

571

572 puts " else if( c>=$iFirst && c<[expr $iFirst+$nRange] )\{"

573 puts " ret = c + $nOff;"

574 puts " \}"

575 }

576

577 proc tl_generate_ioff_table {lRecord} {

578 foreach entry $lRecord {

579 foreach {iFirst nIncr nRange iOff} $entry {}

580 if {$iOff<0} { incr iOff [expr (1<<16)] }

581 if {[info exists a($iOff)]} continue

582 set a($iOff) 1

583 }

584

585 set liOff [lsort -integer [array names a]]

586 if {[llength $liOff]>128} { error "Too many distinct ioffs" }

587 return $liOff

588 }

589

590 proc tl_print_ioff_table {liOff} {

591 puts -nonewline " static const unsigned short aiOff\[\] = \{"

592 set i 0

593 foreach off $liOff {

594 if {($i % 8)==0} {puts "" ; puts -nonewline " "}

595 puts -nonewline [format "% -7s" "$off,"]

596 incr i

597 }

598 puts ""

599 puts " \};"

600

601 }

602

603 proc print_fold {zFunc} {

604

605 set lRecord [tl_create_records]

606

607 set lHigh [list]

608 puts "/*"

609 puts "** Interpret the argument as a unicode codepoint. If the codepoint"

610 puts "** is an upper case character that has a lower case equivalent,"

611 puts "** return the codepoint corresponding to the lower case version."

612 puts "** Otherwise, return a copy of the argument."

613 puts "**"

614 puts "** The results are undefined if the value passed to this function"

615 puts "** is less than zero."

616 puts "*/"

617 puts "int ${zFunc}\(int c, int bRemoveDiacritic)\{"

618

619 set liOff [tl_generate_ioff_table $lRecord]

620 tl_print_table_header

621 foreach entry $lRecord {

622 if {[tl_print_table_entry toggle $entry $liOff]} {

623 lappend lHigh $entry

624 }

625 }

626 tl_print_table_footer toggle

627 tl_print_ioff_table $liOff

628

629 puts {

630 int ret = c;

631

632 assert( c>=0 );

633 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );

634

635 if( c<128 ){

636 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');

637 }else if( c<65536 ){

638 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;

639 int iLo = 0;

640 int iRes = -1;

641

642 while( iHi>=iLo ){

643 int iTest = (iHi + iLo) / 2;

644 int cmp = (c - aEntry[iTest].iCode);

645 if( cmp>=0 ){

646 iRes = iTest;

647 iLo = iTest+1;

648 }else{

649 iHi = iTest-1;

650 }

651 }

652 assert( iRes<0 \|\| c>=aEntry[iRes].iCode );

653

654 if( iRes>=0 ){

655 const struct TableEntry *p = &aEntry[iRes];

656 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){

657 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;

658 assert( ret>0 );

659 }

660 }

661

662 if( bRemoveDiacritic ) ret = remove_diacritic(ret);

663 }

664 }

665

666 foreach entry $lHigh {

667 tl_print_if_entry $entry

668 }

669

670 puts ""

671 puts " return ret;"

672 puts "\}"

673 }

674

675 proc print_fold_test {zFunc mappings} {

676 global tl_lookup_table

677

678 foreach m $mappings {

679 set c [lindex $m 1]

680 if {$c == ""} {

681 set extra([lindex $m 0]) 0

682 } else {

683 scan $c %c i

684 set extra([lindex $m 0]) $i

685 }

686 }

687

688 puts "static int fold_test(int *piCode)\{"

689 puts -nonewline " static int aLookup\[\] = \{"

690 for {set i 0} {$i < 70000} {incr i} {

691

692 set expected $i

693 catch { set expected $tl_lookup_table($i) }

694 set expected2 $expected

695 catch { set expected2 $extra($expected2) }

696

697 if {($i % 4)==0} { puts "" ; puts -nonewline " " }

698 puts -nonewline "$expected, $expected2, "

699 }

700 puts " \};"

701 puts " int i;"

702 puts " for(i=0; i<sizeof(aLookup)/sizeof(aLookup\[0\]); i++)\{"

703 puts " int iCode = (i/2);"

704 puts " int bFlag = i & 0x0001;"

705 puts " if( ${zFunc}\(iCode, bFlag)!=aLookup\[i\] )\{"

706 puts " *piCode = iCode;"

707 puts " return 1;"

708 puts " \}"

709 puts " \}"

710 puts " return 0;"

711 puts "\}"

712 }

713

714

715 proc print_fileheader {} {

716 puts [string trim {

717 /*

718 ** 2012 May 25

719 **

720 ** The author disclaims copyright to this source code. In place of

721 ** a legal notice, here is a blessing:

722 **

723 ** May you do good and not evil.

724 ** May you find forgiveness for yourself and forgive others.

725 ** May you share freely, never taking more than you give.

726 **

727 ******************************************************************************

728 */

729

730 /*

731 ** DO NOT EDIT THIS MACHINE GENERATED FILE.

732 */

733 }]

734 puts ""

735 puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"

736 puts "#if defined(SQLITE_ENABLE_FTS3) \|\| defined(SQLITE_ENABLE_FTS4)"

737 puts ""

738 puts "#include <assert.h>"

739 puts ""

740 }

741

742 proc print_test_main {} {

743 puts ""

744 puts "#include <stdio.h>"

745 puts ""

746 puts "int main(int argc, char **argv)\{"

747 puts " int r1, r2;"

748 puts " int code;"

749 puts " r1 = isalnum_test(&code);"

750 puts " if( r1 ) printf(\"isalnum(): Problem with code %d\\n\",code);"

751 puts " else printf(\"isalnum(): test passed\\n\");"

752 puts " r2 = fold_test(&code);"

753 puts " if( r2 ) printf(\"fold(): Problem with code %d\\n\",code);"

754 puts " else printf(\"fold(): test passed\\n\");"

755 puts " return (r1 \|\| r2);"

756 puts "\}"

757 }

758

759 # Proces the command line arguments. Exit early if they are not to

760 # our liking.

761 #

762 proc usage {} {

763 puts -nonewline stderr "Usage: $::argv0 ?-test? "

764 puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>"

765 exit 1

766 }

767 if {[llength $argv]!=2 && [llength $argv]!=3} usage

768 if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage

769 set unicodedata.txt [lindex $argv end]

770 set casefolding.txt [lindex $argv end-1]

771 set generate_test_code [expr {[llength $argv]==3}]

772

773 print_fileheader

774

775 # Print the isalnum() function to stdout.

776 #

777 set lRange [an_load_separator_ranges]

778 print_isalnum sqlite3FtsUnicodeIsalnum $lRange

779

780 # Leave a gap between the two generated C functions.

781 #

782 puts ""

783 puts ""

784

785 # Load the fold data. This is used by the [rd_XXX] commands

786 # as well as [print_fold].

787 tl_load_casefolding_txt ${casefolding.txt}

788

789 set mappings [rd_load_unicodedata_text ${unicodedata.txt}]

790 print_rd $mappings

791 puts ""

792 puts ""

793 print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings

794 puts ""

795 puts ""

796

797 # Print the fold() function to stdout.

798 #

799 print_fold sqlite3FtsUnicodeFold

800

801 # Print the test routines and main() function to stdout, if -test

802 # was specified.

803 #

804 if {$::generate_test_code} {

805 print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange

806 print_fold_test sqlite3FtsUnicodeFold $mappings

807 print_test_main

808 }

809

810 puts "#endif /* defined(SQLITE_ENABLE_FTS3) \|\| defined(SQLITE_ENABLE_FTS4) */"

811 puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"

OLD	NEW