third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl - Issue 2846743003: [sql] Remove SQLite 3.10.2 reference directory.

Side by Side Diff: third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/parseunicode.tcl

Issue 2846743003: [sql] Remove SQLite 3.10.2 reference directory. (Closed)

Patch Set: Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/sqlite/sqlite-src-3100200/ext/fts3/unicode/mkunicode.tcl ('k') | third_party/sqlite/sqlite-src-3100200/ext/fts5/extract_api_docs.tcl » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1

2 #--------------------------------------------------------------------------

3 # Parameter $zName must be a path to the file UnicodeData.txt. This command

4 # reads the file and returns a list of mappings required to remove all

5 # diacritical marks from a unicode string. Each mapping is itself a list

6 # consisting of two elements - the unicode codepoint and the single ASCII

7 # character that it should be replaced with, or an empty string if the

8 # codepoint should simply be removed from the input. Examples:

9 #

10 # { 224 a } (replace codepoint 224 to "a")

11 # { 769 "" } (remove codepoint 769 from input)

12 #

13 # Mappings are only returned for non-upper case codepoints. It is assumed

14 # that the input has already been folded to lower case.

15 #

16 proc rd_load_unicodedata_text {zName} {

17 global tl_lookup_table

18

19 set fd [open $zName]

20 set lField {

21 code

22 character_name

23 general_category

24 canonical_combining_classes

25 bidirectional_category

26 character_decomposition_mapping

27 decimal_digit_value

28 digit_value

29 numeric_value

30 mirrored

31 unicode_1_name

32 iso10646_comment_field

33 uppercase_mapping

34 lowercase_mapping

35 titlecase_mapping

36 }

37 set lRet [list]

38

39 while { ![eof $fd] } {

40 set line [gets $fd]

41 if {$line == ""} continue

42

43 set fields [split $line ";"]

44 if {[llength $fields] != [llength $lField]} { error "parse error: $line" }

45 foreach $lField $fields {}

46 if { [llength $character_decomposition_mapping]!=2

47 \|\| [string is xdigit [lindex $character_decomposition_mapping 0]]==0

48 } {

49 continue

50 }

51

52 set iCode [expr "0x$code"]

53 set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]

54 set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]

55

56 if {[info exists tl_lookup_table($iCode)]} continue

57

58 if { ($iAscii >= 97 && $iAscii <= 122)

59 \|\| ($iAscii >= 65 && $iAscii <= 90)

60 } {

61 lappend lRet [list $iCode [string tolower [format %c $iAscii]]]

62 set dia($iDia) 1

63 }

64 }

65

66 foreach d [array names dia] {

67 lappend lRet [list $d ""]

68 }

69 set lRet [lsort -integer -index 0 $lRet]

70

71 close $fd

72 set lRet

73 }

74

75 #-------------------------------------------------------------------------

76 # Parameter $zName must be a path to the file UnicodeData.txt. This command

77 # reads the file and returns a list of codepoints (integers). The list

78 # contains all codepoints in the UnicodeData.txt assigned to any "General

79 # Category" that is not a "Letter" or "Number".

80 #

81 proc an_load_unicodedata_text {zName} {

82 set fd [open $zName]

83 set lField {

84 code

85 character_name

86 general_category

87 canonical_combining_classes

88 bidirectional_category

89 character_decomposition_mapping

90 decimal_digit_value

91 digit_value

92 numeric_value

93 mirrored

94 unicode_1_name

95 iso10646_comment_field

96 uppercase_mapping

97 lowercase_mapping

98 titlecase_mapping

99 }

100 set lRet [list]

101

102 while { ![eof $fd] } {

103 set line [gets $fd]

104 if {$line == ""} continue

105

106 set fields [split $line ";"]

107 if {[llength $fields] != [llength $lField]} { error "parse error: $line" }

108 foreach $lField $fields {}

109

110 set iCode [expr "0x$code"]

111 set bAlnum [expr {

112 [lsearch {L N} [string range $general_category 0 0]] >= 0

113 \|\| $general_category=="Co"

114 }]

115

116 if { !$bAlnum } { lappend lRet $iCode }

117 }

118

119 close $fd

120 set lRet

121 }

122

123 proc tl_load_casefolding_txt {zName} {

124 global tl_lookup_table

125

126 set fd [open $zName]

127 while { ![eof $fd] } {

128 set line [gets $fd]

129 if {[string range $line 0 0] == "#"} continue

130 if {$line == ""} continue

131

132 foreach x {a b c d} {unset -nocomplain $x}

133 foreach {a b c d} [split $line ";"] {}

134

135 set a2 [list]

136 set c2 [list]

137 foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }

138 foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }

139 set b [string trim $b]

140 set d [string trim $d]

141

142 if {$b=="C" \|\| $b=="S"} { set tl_lookup_table($a2) $c2 }

143 }

144 }

145

146

OLD	NEW