OLD | NEW |
1 # Copyright (c) 2002-2013 International Business Machines Corporation and | 1 # Copyright (c) 2002-2015 International Business Machines Corporation and |
2 # others. All Rights Reserved. | 2 # others. All Rights Reserved. |
3 # | 3 # |
4 # file: line_fi.txt | 4 # file: line_fi.txt |
5 # | 5 # |
6 # Line Breaking Rules | 6 # Line Breaking Rules |
7 # Implement default line breaking as defined by | 7 # Implement default line breaking as defined by |
8 # Unicode Standard Annex #14 Revision 29 for Unicode 6.2 | 8 # Unicode Standard Annex #14 Revision 34 for Unicode 8.0 |
9 # http://www.unicode.org/reports/tr14/ | 9 # http://www.unicode.org/reports/tr14/ |
| 10 # tailored as noted in 2nd paragraph below.. |
10 # | 11 # |
11 # TODO: Rule LB 8 remains as it was in Unicode 5.2 | 12 # TODO: Rule LB 8 remains as it was in Unicode 5.2 |
12 # This is only because of a limitation of ICU break engine implementatio
n, | 13 # This is only because of a limitation of ICU break engine implementatio
n, |
13 # not because the older behavior is desirable. | 14 # not because the older behavior is desirable. |
| 15 # |
| 16 # This tailors the line break behavior for Finnish, while otherwise beha
ving |
| 17 # per UAX 14 which corresponds to CSS line-break=strict (BCP47 -u-lb-str
ict). |
| 18 # It sets characters of class CJ to behave like NS. |
14 | 19 |
15 # | 20 # |
16 # Character Classes defined by TR 14. | 21 # Character Classes defined by TR 14. |
17 # | 22 # |
18 | 23 |
19 !!chain; | 24 !!chain; |
20 !!LBCMNoChain; | 25 !!LBCMNoChain; |
21 | 26 |
22 | 27 |
23 !!lookAheadHardBreak; | 28 !!lookAheadHardBreak; |
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
348 # | 353 # |
349 $HLcm ($HYcm | $BAcm | $HHcm) [^$CB]?; | 354 $HLcm ($HYcm | $BAcm | $HHcm) [^$CB]?; |
350 | 355 |
351 # LB 21b (forward) Don't break between SY and HL | 356 # LB 21b (forward) Don't break between SY and HL |
352 # (break between HL and SY already disallowed by LB 13 above) | 357 # (break between HL and SY already disallowed by LB 13 above) |
353 $SYcm $HLcm; | 358 $SYcm $HLcm; |
354 | 359 |
355 # LB 22 | 360 # LB 22 |
356 ($ALcm | $HLcm) $INcm; | 361 ($ALcm | $HLcm) $INcm; |
357 $CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL | 362 $CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL |
| 363 $EXcm $INcm; |
358 $IDcm $INcm; | 364 $IDcm $INcm; |
359 $INcm $INcm; | 365 $INcm $INcm; |
360 $NUcm $INcm; | 366 $NUcm $INcm; |
361 | 367 |
362 | 368 |
363 # $LB 23 | 369 # $LB 23 |
364 $IDcm $POcm; | 370 $IDcm $POcm; |
365 $ALcm $NUcm; # includes $LB19 | 371 $ALcm $NUcm; # includes $LB19 |
366 $HLcm $NUcm; | 372 $HLcm $NUcm; |
367 $CM+ $NUcm; # Rule 10, any otherwise unattached CM behaves as AL | 373 $CM+ $NUcm; # Rule 10, any otherwise unattached CM behaves as AL |
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
587 [^$CB] $CM* $BB; # | 593 [^$CB] $CM* $BB; # |
588 | 594 |
589 # LB21a | 595 # LB21a |
590 [^$CB] $CM* ($HY | $BA | $HH) $CM* $HL; | 596 [^$CB] $CM* ($HY | $BA | $HH) $CM* $HL; |
591 | 597 |
592 # LB21b (reverse) | 598 # LB21b (reverse) |
593 $CM* $HL $CM* $SY; | 599 $CM* $HL $CM* $SY; |
594 | 600 |
595 # LB 22 | 601 # LB 22 |
596 $CM* $IN $CM* ($ALPlus | $HL); | 602 $CM* $IN $CM* ($ALPlus | $HL); |
| 603 $CM* $IN $CM* $EX; |
597 $CM* $IN $CM* $ID; | 604 $CM* $IN $CM* $ID; |
598 $CM* $IN $CM* $IN; | 605 $CM* $IN $CM* $IN; |
599 $CM* $IN $CM* $NU; | 606 $CM* $IN $CM* $NU; |
600 | 607 |
601 # LB 23 | 608 # LB 23 |
602 $CM* $PO $CM* $ID; | 609 $CM* $PO $CM* $ID; |
603 $CM* $NU $CM* ($ALPlus | $HL); | 610 $CM* $NU $CM* ($ALPlus | $HL); |
604 $CM* ($ALPlus | $HL) $CM* $NU; | 611 $CM* ($ALPlus | $HL) $CM* $NU; |
605 | 612 |
606 # LB 24 | 613 # LB 24 |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
675 # of context. | 682 # of context. |
676 # | 683 # |
677 # It might be slightly more efficient to have specific rules | 684 # It might be slightly more efficient to have specific rules |
678 # instead of one generic one, but only if we could | 685 # instead of one generic one, but only if we could |
679 # turn off rule chaining. We don't want to move more | 686 # turn off rule chaining. We don't want to move more |
680 # than necessary. | 687 # than necessary. |
681 # | 688 # |
682 [$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $dictionary]+ [^$CM $OP $QU $CL $CP $B2
$PR $HY $BA $dictionary]; | 689 [$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $dictionary]+ [^$CM $OP $QU $CL $CP $B2
$PR $HY $BA $dictionary]; |
683 $dictionary $dictionary; | 690 $dictionary $dictionary; |
684 | 691 |
OLD | NEW |