OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/perl |
| 2 # Copyright (c) 2001-2009 International Business Machines |
| 3 # Corporation and others. All Rights Reserved. |
| 4 |
| 5 ################################################################################
#### |
| 6 # filterRFC3454.pl: |
| 7 # This tool filters the RFC-3454 txt file for StringPrep tables and creates a ta
ble |
| 8 # to be used in NamePrepProfile |
| 9 # |
| 10 # Author: Ram Viswanadha |
| 11 # |
| 12 ################################################################################
#### |
| 13 |
| 14 use File::Find; |
| 15 use File::Basename; |
| 16 use IO::File; |
| 17 use Cwd; |
| 18 use File::Copy; |
| 19 use Getopt::Long; |
| 20 use File::Path; |
| 21 use File::Copy; |
| 22 use Time::localtime; |
| 23 |
| 24 $icu_copyright = "##############################################################
#######\n# Copyright (c) %d, International Business Machines Corporation and\n#
others. All Rights Reserved.\n##################################################
###################\n\n"; |
| 25 $copyright = "###################\n# This file was generated from RFC 3454 (http
://www.ietf.org/rfc/rfc3454.txt)\n# Copyright (C) The Internet Society (2002).
All Rights Reserved. \n###################\n\n"; |
| 26 $warning = "###################\n# WARNING: This table is generated by filterRFC
3454.pl tool with\n# options: @ARGV \n###################\n\n"; |
| 27 #run the program) |
| 28 main(); |
| 29 |
| 30 #--------------------------------------------------------------------- |
| 31 # The main program |
| 32 |
| 33 sub main(){ |
| 34 GetOptions( |
| 35 "--sourcedir=s" => \$sourceDir, |
| 36 "--destdir=s" => \$destDir, |
| 37 "--src-filename=s" => \$srcFileName, |
| 38 "--dest-filename=s" => \$destFileName, |
| 39 "--A1" => \$a1, |
| 40 "--B1" => \$b1, |
| 41 "--B2" => \$b2, |
| 42 "--B3" => \$b3, |
| 43 "--C11" => \$c11, |
| 44 "--C12" => \$c12, |
| 45 "--C21" => \$c21, |
| 46 "--C22" => \$c22, |
| 47 "--C3" => \$c3, |
| 48 "--C4" => \$c4, |
| 49 "--C5" => \$c5, |
| 50 "--C6" => \$c6, |
| 51 "--C7" => \$c7, |
| 52 "--C8" => \$c8, |
| 53 "--C9" => \$c9, |
| 54 "--iscsi" => \$writeISCSIProhibitedExtra, |
| 55 "--xmpp-node" => \$writeXMPPNodeProhibitedExtra, |
| 56 "--sasl" => \$writeSASLMap, |
| 57 "--ldap" => \$writeLDAPMap, |
| 58 "--normalize" => \$norm, |
| 59 "--check-bidi" => \$checkBidi, |
| 60 ); |
| 61 usage() unless defined $sourceDir; |
| 62 usage() unless defined $destDir; |
| 63 usage() unless defined $srcFileName; |
| 64 usage() unless defined $destFileName; |
| 65 |
| 66 $infile = $sourceDir."/".$srcFileName; |
| 67 $inFH = IO::File->new($infile,"r") |
| 68 or die "could not open the file $infile for reading: $! \n"; |
| 69 $outfile = $destDir."/".$destFileName; |
| 70 |
| 71 unlink($outfile); |
| 72 $outFH = IO::File->new($outfile,"a") |
| 73 or die "could not open the file $outfile for writing: $! \n"; |
| 74 |
| 75 printf $outFH $icu_copyright, localtime->year()+1900; |
| 76 print $outFH $copyright; |
| 77 print $outFH $warning; |
| 78 |
| 79 if(defined $norm) { |
| 80 print $outFH "\@normalize;;\n"; |
| 81 } |
| 82 if(defined $checkBidi) { |
| 83 print $outFH "\@check-bidi;;\n"; |
| 84 } |
| 85 print $outFH "\n"; |
| 86 close($outFH); |
| 87 |
| 88 if(defined $b2 && defined $b3){ |
| 89 die "ERROR: --B2 and --B3 are both specified\!\n"; |
| 90 } |
| 91 |
| 92 while(defined ($line=<$inFH>)){ |
| 93 next unless $line=~ /Start\sTable/; |
| 94 if($line =~ /A.1/){ |
| 95 createUnassignedTable($inFH,$outfile); |
| 96 } |
| 97 if($line =~ /B.1/ && defined $b1){ |
| 98 createMapToNothing($inFH,$outfile); |
| 99 } |
| 100 if($line =~ /B.2/ && defined $b2){ |
| 101 createCaseMapNorm($inFH,$outfile); |
| 102 } |
| 103 if($line =~ /B.3/ && defined $b3){ |
| 104 createCaseMapNoNorm($inFH,$outfile); |
| 105 } |
| 106 if($line =~ /C.1.1/ && defined $c11 ){ |
| 107 createProhibitedTable($inFH,$outfile,$line); |
| 108 } |
| 109 if($line =~ /C.1.2/ && defined $c12 ){ |
| 110 createProhibitedTable($inFH,$outfile,$line); |
| 111 } |
| 112 if($line =~ /C.2.1/ && defined $c21 ){ |
| 113 createProhibitedTable($inFH,$outfile,$line); |
| 114 } |
| 115 if($line =~ /C.2.2/ && defined $c22 ){ |
| 116 createProhibitedTable($inFH,$outfile,$line); |
| 117 } |
| 118 if($line =~ /C.3/ && defined $c3 ){ |
| 119 createProhibitedTable($inFH,$outfile,$line); |
| 120 } |
| 121 if($line =~ /C.4/ && defined $c4 ){ |
| 122 createProhibitedTable($inFH,$outfile,$line); |
| 123 } |
| 124 if($line =~ /C.5/ && defined $c5 ){ |
| 125 createProhibitedTable($inFH,$outfile,$line); |
| 126 } |
| 127 if($line =~ /C.6/ && defined $c6 ){ |
| 128 createProhibitedTable($inFH,$outfile,$line); |
| 129 } |
| 130 if($line =~ /C.7/ && defined $c7 ){ |
| 131 createProhibitedTable($inFH,$outfile,$line); |
| 132 } |
| 133 if($line =~ /C.8/ && defined $c8 ){ |
| 134 createProhibitedTable($inFH,$outfile,$line); |
| 135 } |
| 136 if($line =~ /C.9/ && defined $c9 ){ |
| 137 createProhibitedTable($inFH,$outfile,$line); |
| 138 } |
| 139 } |
| 140 if( defined $writeISCSIProhibitedExtra){ |
| 141 create_iSCSIExtraProhibitedTable($inFH, $outfile); |
| 142 } |
| 143 if( defined $writeXMPPNodeProhitedExtra){ |
| 144 create_XMPPNodeExtraProhibitedTable($inFH, $outfile); |
| 145 } |
| 146 if( defined $writeSASLMap){ |
| 147 create_SASLMapTable($inFH, $outfile); |
| 148 } |
| 149 if( defined $writeLDAPMap){ |
| 150 create_LDAPMapTable($inFH, $outfile); |
| 151 } |
| 152 close($inFH); |
| 153 } |
| 154 |
| 155 #----------------------------------------------------------------------- |
| 156 sub readPrint{ |
| 157 local ($inFH, $outFH,$comment, $table) = @_; |
| 158 $count = 0; |
| 159 print $outFH $comment."\n"; |
| 160 while(defined ($line = <$inFH>)){ |
| 161 next if $line =~ /Hoffman\s\&\sBlanchet/; # ignore heading |
| 162 next if $line =~ /RFC\s3454/; # ignore heading |
| 163 next if $line =~ /\f/; # ignore form feed |
| 164 next if $line eq "\n"; # ignore blank lines |
| 165 # break if "End Table" is found |
| 166 if( $line =~ /End\sTable/){ |
| 167 print $outFH "\n# Total code points $count\n\n"; |
| 168 return; |
| 169 } |
| 170 if($print==1){ |
| 171 print $line; |
| 172 } |
| 173 $line =~ s/-/../; |
| 174 $line =~ s/^\s+//; |
| 175 if($line =~ /\;/){ |
| 176 }else{ |
| 177 $line =~ s/$/;/; |
| 178 } |
| 179 if($table =~ /A/ ){ |
| 180 ($code, $noise) = split /;/ , $line; |
| 181 $line = $code."; ; UNASSIGNED\n"; |
| 182 }elsif ( $table =~ /B\.1/ ){ |
| 183 $line =~ s/Map to nothing/MAP/; |
| 184 }elsif ( $table =~ /B\.[23]/ ){ |
| 185 $line =~ s/Case map/MAP/; |
| 186 $line =~ s/Additional folding/MAP/; |
| 187 }elsif ( $table =~ /C/ ) { |
| 188 ($code, $noise) = split /;/ , $line; |
| 189 $line = $code."; ; PROHIBITED\n"; |
| 190 } |
| 191 if($line =~ /\.\./){ |
| 192 ($code, $noise) = split /;/ , $line; |
| 193 ($startStr, $endStr ) = split /\.\./, $code; |
| 194 $start = atoi($startStr); |
| 195 $end = atoi($endStr); |
| 196 #print $start." ".$end."\n"; |
| 197 while($start <= $end){ |
| 198 $count++; |
| 199 $start++; |
| 200 } |
| 201 }else{ |
| 202 $count++; |
| 203 } |
| 204 print $outFH $line; |
| 205 } |
| 206 } |
| 207 #----------------------------------------------------------------------- |
| 208 sub atoi { |
| 209 my $t; |
| 210 foreach my $d (split(//, shift())) { |
| 211 $t = $t * 16 + $d; |
| 212 } |
| 213 return $t; |
| 214 } |
| 215 #----------------------------------------------------------------------- |
| 216 sub createUnassignedTable{ |
| 217 ($inFH,$outfile) = @_; |
| 218 $outFH = IO::File->new($outfile,"a") |
| 219 or die "could not open the file $outfile for writing: $! \n"; |
| 220 $comment = "# This table contains code points from Table A.1 from RFC 3454\n
"; |
| 221 readPrint($inFH,$outFH, $comment, "A"); |
| 222 close($outFH); |
| 223 } |
| 224 #----------------------------------------------------------------------- |
| 225 sub createMapToNothing{ |
| 226 ($inFH,$outfile) = @_; |
| 227 $outFH = IO::File->new($outfile,"a") |
| 228 or die "could not open the file $outfile for writing: $! \n"; |
| 229 $comment = "# This table contains code points from Table B.1 from RFC 3454\n
"; |
| 230 readPrint($inFH,$outFH,$comment, "B.1"); |
| 231 close($outFH); |
| 232 } |
| 233 #----------------------------------------------------------------------- |
| 234 sub createCaseMapNorm{ |
| 235 ($inFH,$outfile) = @_; |
| 236 $outFH = IO::File->new($outfile,"a") |
| 237 or die "could not open the file $outfile for writing: $! \n"; |
| 238 $comment = $warning."# This table contains code points from Table B.2 from R
FC 3454\n"; |
| 239 readPrint($inFH,$outFH,$comment, "B.2"); |
| 240 close($outFH); |
| 241 } |
| 242 #----------------------------------------------------------------------- |
| 243 sub createCaseMapNoNorm{ |
| 244 ($inFH,$outfile) = @_; |
| 245 $outFH = IO::File->new($outfile,"a") |
| 246 or die "could not open the file $outfile for writing: $! \n"; |
| 247 $comment = $warning."# This table contains code points from Table B.3 from R
FC 3454\n"; |
| 248 readPrint($inFH,$outFH,$comment, "B.3"); |
| 249 close($outFH); |
| 250 } |
| 251 #----------------------------------------------------------------------- |
| 252 sub createProhibitedTable{ |
| 253 ($inFH,$outfile,$line) = @_; |
| 254 $line =~ s/Start//; |
| 255 $line =~ s/-//g; |
| 256 $comment = "# code points from $line"; |
| 257 |
| 258 $outFH = IO::File->new($outfile, "a") |
| 259 or die "could not open the file $outfile for writing: $! \n"; |
| 260 readPrint($inFH,$outFH,$comment, "C"); |
| 261 close($outFH); |
| 262 } |
| 263 |
| 264 #----------------------------------------------------------------------- |
| 265 sub create_iSCSIExtraProhibitedTable{ |
| 266 ($inFH,$outfile,$line) = @_; |
| 267 $comment ="# Additional prohibitions from iSCSI profile (rfc3722.txt)\n\n"; |
| 268 |
| 269 $outFH = IO::File->new($outfile, "a") |
| 270 or die "could not open the file $outfile for writing: $! \n"; |
| 271 print $outFH $comment; |
| 272 print $outFH "0021..002C; ; PROHIBITED\n"; |
| 273 print $outFH "002F; ; PROHIBITED\n"; |
| 274 print $outFH "003B..0040; ; PROHIBITED\n"; |
| 275 print $outFH "005B..0060; ; PROHIBITED\n"; |
| 276 print $outFH "007B..007E; ; PROHIBITED\n"; |
| 277 print $outFH "3002; ; PROHIBITED\n"; |
| 278 print $outFH "\n# Total code points 30\n"; |
| 279 close($outFH); |
| 280 } |
| 281 #----------------------------------------------------------------------- |
| 282 sub create_XMPPNodeExtraProhibitedTable{ |
| 283 ($inFH,$outfile,$line) = @_; |
| 284 $comment ="# Additional prohibitions from XMPP Nodeprep profile (rfc3920.txt
)\n\n"; |
| 285 |
| 286 $outFH = IO::File->new($outfile, "a") |
| 287 or die "could not open the file $outfile for writing: $! \n"; |
| 288 print $outFH $comment; |
| 289 print $outFH "0022; ; PROHIBITED\n"; |
| 290 print $outFH "0026; ; PROHIBITED\n"; |
| 291 print $outFH "0027; ; PROHIBITED\n"; |
| 292 print $outFH "002F; ; PROHIBITED\n"; |
| 293 print $outFH "003A; ; PROHIBITED\n"; |
| 294 print $outFH "003C; ; PROHIBITED\n"; |
| 295 print $outFH "003E; ; PROHIBITED\n"; |
| 296 print $outFH "0040; ; PROHIBITED\n"; |
| 297 print $outFH "\n# Total code points 8\n"; |
| 298 close($outFH); |
| 299 } |
| 300 #----------------------------------------------------------------------- |
| 301 sub create_SASLMapTable{ |
| 302 ($inFH,$outfile,$line) = @_; |
| 303 $comment ="# Map table for SASL profile (rfc4013.txt)\n\n"; |
| 304 |
| 305 $outFH = IO::File->new($outfile, "a") |
| 306 or die "could not open the file $outfile for writing: $! \n"; |
| 307 print $outFH $comment; |
| 308 # non-ASCII space characters [C.1.2] to SPACE |
| 309 print $outFH "00A0; 0020; MAP\n"; |
| 310 print $outFH "1680; 0020; MAP\n"; |
| 311 print $outFH "2000; 0020; MAP\n"; |
| 312 print $outFH "2001; 0020; MAP\n"; |
| 313 print $outFH "2002; 0020; MAP\n"; |
| 314 print $outFH "2003; 0020; MAP\n"; |
| 315 print $outFH "2004; 0020; MAP\n"; |
| 316 print $outFH "2005; 0020; MAP\n"; |
| 317 print $outFH "2006; 0020; MAP\n"; |
| 318 print $outFH "2007; 0020; MAP\n"; |
| 319 print $outFH "2008; 0020; MAP\n"; |
| 320 print $outFH "2009; 0020; MAP\n"; |
| 321 print $outFH "200A; 0020; MAP\n"; |
| 322 print $outFH "200B; 0020; MAP\n"; |
| 323 print $outFH "202F; 0020; MAP\n"; |
| 324 print $outFH "205F; 0020; MAP\n"; |
| 325 print $outFH "3000; 0020; MAP\n"; |
| 326 |
| 327 # commonly mapped to nothing characters except U+200B to nothing |
| 328 print $outFH "00AD; ; MAP\n"; |
| 329 print $outFH "034F; ; MAP\n"; |
| 330 print $outFH "1806; ; MAP\n"; |
| 331 print $outFH "180B; ; MAP\n"; |
| 332 print $outFH "180C; ; MAP\n"; |
| 333 print $outFH "180D; ; MAP\n"; |
| 334 print $outFH "200C; ; MAP\n"; |
| 335 print $outFH "200D; ; MAP\n"; |
| 336 print $outFH "2060; ; MAP\n"; |
| 337 print $outFH "FE00; ; MAP\n"; |
| 338 print $outFH "FE01; ; MAP\n"; |
| 339 print $outFH "FE02; ; MAP\n"; |
| 340 print $outFH "FE03; ; MAP\n"; |
| 341 print $outFH "FE04; ; MAP\n"; |
| 342 print $outFH "FE05; ; MAP\n"; |
| 343 print $outFH "FE06; ; MAP\n"; |
| 344 print $outFH "FE07; ; MAP\n"; |
| 345 print $outFH "FE08; ; MAP\n"; |
| 346 print $outFH "FE09; ; MAP\n"; |
| 347 print $outFH "FE0A; ; MAP\n"; |
| 348 print $outFH "FE0B; ; MAP\n"; |
| 349 print $outFH "FE0C; ; MAP\n"; |
| 350 print $outFH "FE0D; ; MAP\n"; |
| 351 print $outFH "FE0E; ; MAP\n"; |
| 352 print $outFH "FE0F; ; MAP\n"; |
| 353 print $outFH "FEFF; ; MAP\n"; |
| 354 print $outFH "\n# Total code points 43\n"; |
| 355 close($outFH); |
| 356 } |
| 357 #----------------------------------------------------------------------- |
| 358 sub create_LDAPMapTable{ |
| 359 ($inFH,$outfile,$line) = @_; |
| 360 $comment ="# Map table for LDAP profile (rfc4518.txt)\n\n"; |
| 361 |
| 362 $outFH = IO::File->new($outfile, "a") |
| 363 or die "could not open the file $outfile for writing: $! \n"; |
| 364 print $outFH $comment; |
| 365 |
| 366 # SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code |
| 367 # points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and |
| 368 # VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also |
| 369 # mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is |
| 370 # mapped to nothing. |
| 371 |
| 372 print $outFH "00AD; ; MAP\n"; |
| 373 print $outFH "034F; ; MAP\n"; |
| 374 print $outFH "1806; ; MAP\n"; |
| 375 print $outFH "180B; ; MAP\n"; |
| 376 print $outFH "180C; ; MAP\n"; |
| 377 print $outFH "180D; ; MAP\n"; |
| 378 print $outFH "FE00; ; MAP\n"; |
| 379 print $outFH "FE01; ; MAP\n"; |
| 380 print $outFH "FE02; ; MAP\n"; |
| 381 print $outFH "FE03; ; MAP\n"; |
| 382 print $outFH "FE04; ; MAP\n"; |
| 383 print $outFH "FE05; ; MAP\n"; |
| 384 print $outFH "FE06; ; MAP\n"; |
| 385 print $outFH "FE07; ; MAP\n"; |
| 386 print $outFH "FE08; ; MAP\n"; |
| 387 print $outFH "FE09; ; MAP\n"; |
| 388 print $outFH "FE0A; ; MAP\n"; |
| 389 print $outFH "FE0B; ; MAP\n"; |
| 390 print $outFH "FE0C; ; MAP\n"; |
| 391 print $outFH "FE0D; ; MAP\n"; |
| 392 print $outFH "FE0E; ; MAP\n"; |
| 393 print $outFH "FE0F; ; MAP\n"; |
| 394 print $outFH "FFFC; ; MAP\n"; |
| 395 |
| 396 # CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE |
| 397 # TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR) |
| 398 # (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020). |
| 399 |
| 400 print $outFH "0009; 0020; MAP\n"; |
| 401 print $outFH "000A; 0020; MAP\n"; |
| 402 print $outFH "000B; 0020; MAP\n"; |
| 403 print $outFH "000C; 0020; MAP\n"; |
| 404 print $outFH "000D; 0020; MAP\n"; |
| 405 print $outFH "0085; 0020; MAP\n"; |
| 406 |
| 407 # All other control code (e.g., Cc) points or code points with a |
| 408 # control function (e.g., Cf) are mapped to nothing. The following is |
| 409 # a complete list of these code points: U+0000-0008, 000E-001F, 007F- |
| 410 # 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063, |
| 411 # 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F. |
| 412 |
| 413 print $outFH "0000; ; MAP\n"; |
| 414 print $outFH "0001; ; MAP\n"; |
| 415 print $outFH "0002; ; MAP\n"; |
| 416 print $outFH "0003; ; MAP\n"; |
| 417 print $outFH "0004; ; MAP\n"; |
| 418 print $outFH "0005; ; MAP\n"; |
| 419 print $outFH "0006; ; MAP\n"; |
| 420 print $outFH "0007; ; MAP\n"; |
| 421 print $outFH "0008; ; MAP\n"; |
| 422 print $outFH "000E; ; MAP\n"; |
| 423 print $outFH "000F; ; MAP\n"; |
| 424 print $outFH "0010; ; MAP\n"; |
| 425 print $outFH "0011; ; MAP\n"; |
| 426 print $outFH "0012; ; MAP\n"; |
| 427 print $outFH "0013; ; MAP\n"; |
| 428 print $outFH "0014; ; MAP\n"; |
| 429 print $outFH "0015; ; MAP\n"; |
| 430 print $outFH "0016; ; MAP\n"; |
| 431 print $outFH "0017; ; MAP\n"; |
| 432 print $outFH "0018; ; MAP\n"; |
| 433 print $outFH "0019; ; MAP\n"; |
| 434 print $outFH "001A; ; MAP\n"; |
| 435 print $outFH "001B; ; MAP\n"; |
| 436 print $outFH "001C; ; MAP\n"; |
| 437 print $outFH "001D; ; MAP\n"; |
| 438 print $outFH "001E; ; MAP\n"; |
| 439 print $outFH "001F; ; MAP\n"; |
| 440 print $outFH "007F; ; MAP\n"; |
| 441 print $outFH "0080; ; MAP\n"; |
| 442 print $outFH "0081; ; MAP\n"; |
| 443 print $outFH "0082; ; MAP\n"; |
| 444 print $outFH "0083; ; MAP\n"; |
| 445 print $outFH "0084; ; MAP\n"; |
| 446 print $outFH "0086; ; MAP\n"; |
| 447 print $outFH "0087; ; MAP\n"; |
| 448 print $outFH "0088; ; MAP\n"; |
| 449 print $outFH "0089; ; MAP\n"; |
| 450 print $outFH "008A; ; MAP\n"; |
| 451 print $outFH "008B; ; MAP\n"; |
| 452 print $outFH "008C; ; MAP\n"; |
| 453 print $outFH "008D; ; MAP\n"; |
| 454 print $outFH "008E; ; MAP\n"; |
| 455 print $outFH "008F; ; MAP\n"; |
| 456 print $outFH "0090; ; MAP\n"; |
| 457 print $outFH "0091; ; MAP\n"; |
| 458 print $outFH "0092; ; MAP\n"; |
| 459 print $outFH "0093; ; MAP\n"; |
| 460 print $outFH "0094; ; MAP\n"; |
| 461 print $outFH "0095; ; MAP\n"; |
| 462 print $outFH "0096; ; MAP\n"; |
| 463 print $outFH "0097; ; MAP\n"; |
| 464 print $outFH "0098; ; MAP\n"; |
| 465 print $outFH "0099; ; MAP\n"; |
| 466 print $outFH "009A; ; MAP\n"; |
| 467 print $outFH "009B; ; MAP\n"; |
| 468 print $outFH "009C; ; MAP\n"; |
| 469 print $outFH "009D; ; MAP\n"; |
| 470 print $outFH "009E; ; MAP\n"; |
| 471 print $outFH "009F; ; MAP\n"; |
| 472 print $outFH "06DD; ; MAP\n"; |
| 473 print $outFH "070F; ; MAP\n"; |
| 474 print $outFH "180E; ; MAP\n"; |
| 475 print $outFH "200C; ; MAP\n"; |
| 476 print $outFH "200D; ; MAP\n"; |
| 477 print $outFH "200E; ; MAP\n"; |
| 478 print $outFH "200F; ; MAP\n"; |
| 479 print $outFH "202A; ; MAP\n"; |
| 480 print $outFH "202B; ; MAP\n"; |
| 481 print $outFH "202C; ; MAP\n"; |
| 482 print $outFH "202D; ; MAP\n"; |
| 483 print $outFH "202E; ; MAP\n"; |
| 484 print $outFH "2060; ; MAP\n"; |
| 485 print $outFH "2061; ; MAP\n"; |
| 486 print $outFH "2062; ; MAP\n"; |
| 487 print $outFH "2063; ; MAP\n"; |
| 488 print $outFH "206A; ; MAP\n"; |
| 489 print $outFH "206B; ; MAP\n"; |
| 490 print $outFH "206C; ; MAP\n"; |
| 491 print $outFH "206D; ; MAP\n"; |
| 492 print $outFH "206E; ; MAP\n"; |
| 493 print $outFH "206F; ; MAP\n"; |
| 494 print $outFH "FEFF; ; MAP\n"; |
| 495 print $outFH "FFF9; ; MAP\n"; |
| 496 print $outFH "FFFA; ; MAP\n"; |
| 497 print $outFH "FFFB; ; MAP\n"; |
| 498 print $outFH "1D173; ; MAP\n"; |
| 499 print $outFH "1D174; ; MAP\n"; |
| 500 print $outFH "1D175; ; MAP\n"; |
| 501 print $outFH "1D176; ; MAP\n"; |
| 502 print $outFH "1D177; ; MAP\n"; |
| 503 print $outFH "1D178; ; MAP\n"; |
| 504 print $outFH "1D179; ; MAP\n"; |
| 505 print $outFH "1D17A; ; MAP\n"; |
| 506 print $outFH "E0001; ; MAP\n"; |
| 507 print $outFH "E0020; ; MAP\n"; |
| 508 print $outFH "E0021; ; MAP\n"; |
| 509 print $outFH "E0022; ; MAP\n"; |
| 510 print $outFH "E0023; ; MAP\n"; |
| 511 print $outFH "E0024; ; MAP\n"; |
| 512 print $outFH "E0025; ; MAP\n"; |
| 513 print $outFH "E0026; ; MAP\n"; |
| 514 print $outFH "E0027; ; MAP\n"; |
| 515 print $outFH "E0028; ; MAP\n"; |
| 516 print $outFH "E0029; ; MAP\n"; |
| 517 print $outFH "E002A; ; MAP\n"; |
| 518 print $outFH "E002B; ; MAP\n"; |
| 519 print $outFH "E002C; ; MAP\n"; |
| 520 print $outFH "E002D; ; MAP\n"; |
| 521 print $outFH "E002E; ; MAP\n"; |
| 522 print $outFH "E002F; ; MAP\n"; |
| 523 print $outFH "E0030; ; MAP\n"; |
| 524 print $outFH "E0031; ; MAP\n"; |
| 525 print $outFH "E0032; ; MAP\n"; |
| 526 print $outFH "E0033; ; MAP\n"; |
| 527 print $outFH "E0034; ; MAP\n"; |
| 528 print $outFH "E0035; ; MAP\n"; |
| 529 print $outFH "E0036; ; MAP\n"; |
| 530 print $outFH "E0037; ; MAP\n"; |
| 531 print $outFH "E0038; ; MAP\n"; |
| 532 print $outFH "E0039; ; MAP\n"; |
| 533 print $outFH "E003A; ; MAP\n"; |
| 534 print $outFH "E003B; ; MAP\n"; |
| 535 print $outFH "E003C; ; MAP\n"; |
| 536 print $outFH "E003D; ; MAP\n"; |
| 537 print $outFH "E003E; ; MAP\n"; |
| 538 print $outFH "E003F; ; MAP\n"; |
| 539 print $outFH "E0040; ; MAP\n"; |
| 540 print $outFH "E0041; ; MAP\n"; |
| 541 print $outFH "E0042; ; MAP\n"; |
| 542 print $outFH "E0043; ; MAP\n"; |
| 543 print $outFH "E0044; ; MAP\n"; |
| 544 print $outFH "E0045; ; MAP\n"; |
| 545 print $outFH "E0046; ; MAP\n"; |
| 546 print $outFH "E0047; ; MAP\n"; |
| 547 print $outFH "E0048; ; MAP\n"; |
| 548 print $outFH "E0049; ; MAP\n"; |
| 549 print $outFH "E004A; ; MAP\n"; |
| 550 print $outFH "E004B; ; MAP\n"; |
| 551 print $outFH "E004C; ; MAP\n"; |
| 552 print $outFH "E004D; ; MAP\n"; |
| 553 print $outFH "E004E; ; MAP\n"; |
| 554 print $outFH "E004F; ; MAP\n"; |
| 555 print $outFH "E0050; ; MAP\n"; |
| 556 print $outFH "E0051; ; MAP\n"; |
| 557 print $outFH "E0052; ; MAP\n"; |
| 558 print $outFH "E0053; ; MAP\n"; |
| 559 print $outFH "E0054; ; MAP\n"; |
| 560 print $outFH "E0055; ; MAP\n"; |
| 561 print $outFH "E0056; ; MAP\n"; |
| 562 print $outFH "E0057; ; MAP\n"; |
| 563 print $outFH "E0058; ; MAP\n"; |
| 564 print $outFH "E0059; ; MAP\n"; |
| 565 print $outFH "E005A; ; MAP\n"; |
| 566 print $outFH "E005B; ; MAP\n"; |
| 567 print $outFH "E005C; ; MAP\n"; |
| 568 print $outFH "E005D; ; MAP\n"; |
| 569 print $outFH "E005E; ; MAP\n"; |
| 570 print $outFH "E005F; ; MAP\n"; |
| 571 print $outFH "E0060; ; MAP\n"; |
| 572 print $outFH "E0061; ; MAP\n"; |
| 573 print $outFH "E0062; ; MAP\n"; |
| 574 print $outFH "E0063; ; MAP\n"; |
| 575 print $outFH "E0064; ; MAP\n"; |
| 576 print $outFH "E0065; ; MAP\n"; |
| 577 print $outFH "E0066; ; MAP\n"; |
| 578 print $outFH "E0067; ; MAP\n"; |
| 579 print $outFH "E0068; ; MAP\n"; |
| 580 print $outFH "E0069; ; MAP\n"; |
| 581 print $outFH "E006A; ; MAP\n"; |
| 582 print $outFH "E006B; ; MAP\n"; |
| 583 print $outFH "E006C; ; MAP\n"; |
| 584 print $outFH "E006D; ; MAP\n"; |
| 585 print $outFH "E006E; ; MAP\n"; |
| 586 print $outFH "E006F; ; MAP\n"; |
| 587 print $outFH "E0070; ; MAP\n"; |
| 588 print $outFH "E0071; ; MAP\n"; |
| 589 print $outFH "E0072; ; MAP\n"; |
| 590 print $outFH "E0073; ; MAP\n"; |
| 591 print $outFH "E0074; ; MAP\n"; |
| 592 print $outFH "E0075; ; MAP\n"; |
| 593 print $outFH "E0076; ; MAP\n"; |
| 594 print $outFH "E0077; ; MAP\n"; |
| 595 print $outFH "E0078; ; MAP\n"; |
| 596 print $outFH "E0079; ; MAP\n"; |
| 597 print $outFH "E007A; ; MAP\n"; |
| 598 print $outFH "E007B; ; MAP\n"; |
| 599 print $outFH "E007C; ; MAP\n"; |
| 600 print $outFH "E007D; ; MAP\n"; |
| 601 print $outFH "E007E; ; MAP\n"; |
| 602 print $outFH "E007F; ; MAP\n"; |
| 603 |
| 604 # ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code |
| 605 # points with Separator (space, line, or paragraph) property (e.g., Zs, |
| 606 # Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete |
| 607 # list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029, |
| 608 # 202F, 205F, 3000. |
| 609 |
| 610 print $outFH "200B; ; MAP\n"; |
| 611 print $outFH "00A0; 0020; MAP\n"; |
| 612 print $outFH "1680; 0020; MAP\n"; |
| 613 print $outFH "2000; 0020; MAP\n"; |
| 614 print $outFH "2001; 0020; MAP\n"; |
| 615 print $outFH "2002; 0020; MAP\n"; |
| 616 print $outFH "2003; 0020; MAP\n"; |
| 617 print $outFH "2004; 0020; MAP\n"; |
| 618 print $outFH "2005; 0020; MAP\n"; |
| 619 print $outFH "2006; 0020; MAP\n"; |
| 620 print $outFH "2007; 0020; MAP\n"; |
| 621 print $outFH "2008; 0020; MAP\n"; |
| 622 print $outFH "2009; 0020; MAP\n"; |
| 623 print $outFH "200A; 0020; MAP\n"; |
| 624 print $outFH "2028; 0020; MAP\n"; |
| 625 print $outFH "2029; 0020; MAP\n"; |
| 626 print $outFH "202F; 0020; MAP\n"; |
| 627 print $outFH "205F; 0020; MAP\n"; |
| 628 print $outFH "3000; 0020; MAP\n"; |
| 629 |
| 630 print $outFH "\n# Total code points 238\n"; |
| 631 close($outFH); |
| 632 } |
| 633 #----------------------------------------------------------------------- |
| 634 sub usage { |
| 635 print << "END"; |
| 636 Usage: |
| 637 filterRFC3454.pl |
| 638 Options: |
| 639 --sourcedir=<directory> |
| 640 --destdir=<directory> |
| 641 --src-filename=<name of RFC file> |
| 642 --dest-filename=<name of destination file> |
| 643 --A1 Generate data for table A.1 |
| 644 --B1 Generate data for table B.1 |
| 645 --B2 Generate data for table B.2 |
| 646 --B3 Generate data for table B.3 |
| 647 --C11 Generate data for table C.1.1 |
| 648 --C12 Generate data for table C.1.2 |
| 649 --C21 Generate data for table C.2.1 |
| 650 --C22 Generate data for table C.2.2 |
| 651 --C3 Generate data for table C.3 |
| 652 --C4 Generate data for table C.4 |
| 653 --C5 Generate data for table C.5 |
| 654 --C6 Generate data for table C.6 |
| 655 --C7 Generate data for table C.7 |
| 656 --C8 Generate data for table C.8 |
| 657 --C9 Generate data for table C.9 |
| 658 --iscsi Generate data for iSCSI extra prohibited table |
| 659 --xmpp-node Generate data for XMPP extra prohibited table |
| 660 --sasl Generate data for SASL map table |
| 661 --ldap Generate data for LDAP map table |
| 662 --normalize Embed the normalization directive in the output file |
| 663 --check-bidi Embed the check bidi directove in the output file |
| 664 |
| 665 Note, --B2 and --B3 are mutually exclusive. |
| 666 |
| 667 e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.t
xt --dest-filename=NamePrepProfile.txt --A1 --B1 --B2 --C12 --C22 --C3 --C4 --C
5 --C6 --C7 --C8 --C9 --normalize --check-bidi |
| 668 |
| 669 filterRFC3454.pl filters the RFC file and creates String prep table files. |
| 670 The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.tx
t |
| 671 |
| 672 END |
| 673 exit(0); |
| 674 } |
| 675 |
| 676 |
OLD | NEW |