Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(64)

Side by Side Diff: source/data/brkitr/line_loose_fi.txt

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/data/brkitr/line_loose_cj.txt ('k') | source/data/brkitr/line_normal.txt » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2002-2013 International Business Machines Corporation and 1 # Copyright (c) 2002-2015 International Business Machines Corporation and
2 # others. All Rights Reserved. 2 # others. All Rights Reserved.
3 # 3 #
4 # file: line_fi.txt 4 # file: line_loose_fi.txt
5 # 5 #
6 # Line Breaking Rules 6 # Line Breaking Rules
7 # Implement default line breaking as defined by 7 # Implement default line breaking as defined by
8 # Unicode Standard Annex #14 Revision 29 for Unicode 6.2 8 # Unicode Standard Annex #14 Revision 34 for Unicode 8.0
9 # http://www.unicode.org/reports/tr14/ 9 # http://www.unicode.org/reports/tr14/
10 # tailored as noted in 2nd paragraph below..
10 # 11 #
11 # TODO: Rule LB 8 remains as it was in Unicode 5.2 12 # TODO: Rule LB 8 remains as it was in Unicode 5.2
12 # This is only because of a limitation of ICU break engine implementatio n, 13 # This is only because of a limitation of ICU break engine implementatio n,
13 # not because the older behavior is desirable. 14 # not because the older behavior is desirable.
15 #
16 # This tailors the line break behavior both for Finnish and to correpond to CSS
17 # line-break=loose (BCP47 -u-lb-loose) as defined for languages other th an
18 # Chinese & Japanese.
19 # It sets characters of class CJ to behave like ID.
20 # In addition, it allows breaks before 3005, 303B, 309D, 309E, 30FD, 30F E (all NS).
14 21
15 # 22 #
16 # Character Classes defined by TR 14. 23 # Character Classes defined by TR 14.
17 # 24 #
18 25
19 !!chain; 26 !!chain;
20 !!LBCMNoChain; 27 !!LBCMNoChain;
21 28
22 29
23 !!lookAheadHardBreak; 30 !!lookAheadHardBreak;
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
68 $CL = [:LineBreak = Close_Punctuation:]; 75 $CL = [:LineBreak = Close_Punctuation:];
69 $CM = [:LineBreak = Combining_Mark:]; 76 $CM = [:LineBreak = Combining_Mark:];
70 $CP = [:LineBreak = Close_Parenthesis:]; 77 $CP = [:LineBreak = Close_Parenthesis:];
71 $CR = [:LineBreak = Carriage_Return:]; 78 $CR = [:LineBreak = Carriage_Return:];
72 $EX = [:LineBreak = Exclamation:]; 79 $EX = [:LineBreak = Exclamation:];
73 $GL = [:LineBreak = Glue:]; 80 $GL = [:LineBreak = Glue:];
74 $HL = [:LineBreak = Hebrew_Letter:]; 81 $HL = [:LineBreak = Hebrew_Letter:];
75 $HY = [:LineBreak = Hyphen:]; 82 $HY = [:LineBreak = Hyphen:];
76 $H2 = [:LineBreak = H2:]; 83 $H2 = [:LineBreak = H2:];
77 $H3 = [:LineBreak = H3:]; 84 $H3 = [:LineBreak = H3:];
78 $ID = [:LineBreak = Ideographic:]; 85 $ID = [[:LineBreak = Ideographic:] $CJ];
79 $IN = [:LineBreak = Inseperable:]; 86 $IN = [:LineBreak = Inseperable:];
80 $IS = [:LineBreak = Infix_Numeric:]; 87 $IS = [:LineBreak = Infix_Numeric:];
81 $JL = [:LineBreak = JL:]; 88 $JL = [:LineBreak = JL:];
82 $JV = [:LineBreak = JV:]; 89 $JV = [:LineBreak = JV:];
83 $JT = [:LineBreak = JT:]; 90 $JT = [:LineBreak = JT:];
84 $LF = [:LineBreak = Line_Feed:]; 91 $LF = [:LineBreak = Line_Feed:];
85 $NL = [:LineBreak = Next_Line:]; 92 $NL = [:LineBreak = Next_Line:];
86 $NS = [[:LineBreak = Nonstarter:] $CJ]; 93 $NSX = [\u3005 \u303B \u309D \u309E \u30FD \u30FE];
94 $NS = [[:LineBreak = Nonstarter:] - $NSX];
87 $NU = [:LineBreak = Numeric:]; 95 $NU = [:LineBreak = Numeric:];
88 $OP = [:LineBreak = Open_Punctuation:]; 96 $OP = [:LineBreak = Open_Punctuation:];
89 $PO = [:LineBreak = Postfix_Numeric:]; 97 $PO = [:LineBreak = Postfix_Numeric:];
90 $PR = [:LineBreak = Prefix_Numeric:]; 98 $PR = [:LineBreak = Prefix_Numeric:];
91 $QU = [:LineBreak = Quotation:]; 99 $QU = [:LineBreak = Quotation:];
92 $RI = [:LineBreak = Regional_Indicator:]; 100 $RI = [:LineBreak = Regional_Indicator:];
93 $SA = [:LineBreak = Complex_Context:]; 101 $SA = [:LineBreak = Complex_Context:];
94 $SG = [:LineBreak = Surrogate:]; 102 $SG = [:LineBreak = Surrogate:];
95 $SP = [:LineBreak = Space:]; 103 $SP = [:LineBreak = Space:];
96 $SY = [:LineBreak = Break_Symbols:]; 104 $SY = [:LineBreak = Break_Symbols:];
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
130 $HYcm = $HY $CM*; 138 $HYcm = $HY $CM*;
131 $H2cm = $H2 $CM*; 139 $H2cm = $H2 $CM*;
132 $H3cm = $H3 $CM*; 140 $H3cm = $H3 $CM*;
133 $IDcm = $ID $CM*; 141 $IDcm = $ID $CM*;
134 $INcm = $IN $CM*; 142 $INcm = $IN $CM*;
135 $IScm = $IS $CM*; 143 $IScm = $IS $CM*;
136 $JLcm = $JL $CM*; 144 $JLcm = $JL $CM*;
137 $JVcm = $JV $CM*; 145 $JVcm = $JV $CM*;
138 $JTcm = $JT $CM*; 146 $JTcm = $JT $CM*;
139 $NScm = $NS $CM*; 147 $NScm = $NS $CM*;
148 $NSXcm = $NSX $CM*;
140 $NUcm = $NU $CM*; 149 $NUcm = $NU $CM*;
141 $OPcm = $OP $CM*; 150 $OPcm = $OP $CM*;
142 $POcm = $PO $CM*; 151 $POcm = $PO $CM*;
143 $PRcm = $PR $CM*; 152 $PRcm = $PR $CM*;
144 $QUcm = $QU $CM*; 153 $QUcm = $QU $CM*;
145 $RIcm = $RI $CM*; 154 $RIcm = $RI $CM*;
146 $SYcm = $SY $CM*; 155 $SYcm = $SY $CM*;
147 $WJcm = $WJ $CM*; 156 $WJcm = $WJ $CM*;
148 157
149 ## ------------------------------------------------- 158 ## -------------------------------------------------
(...skipping 16 matching lines...) Expand all
166 $HY $CM+; 175 $HY $CM+;
167 $H2 $CM+; 176 $H2 $CM+;
168 $H3 $CM+; 177 $H3 $CM+;
169 $ID $CM+; 178 $ID $CM+;
170 $IN $CM+; 179 $IN $CM+;
171 $IS $CM+; 180 $IS $CM+;
172 $JL $CM+; 181 $JL $CM+;
173 $JV $CM+; 182 $JV $CM+;
174 $JT $CM+; 183 $JT $CM+;
175 $NS $CM+; 184 $NS $CM+;
185 $NSX $CM+;
176 $NU $CM+; 186 $NU $CM+;
177 $OP $CM+; 187 $OP $CM+;
178 $PO $CM+; 188 $PO $CM+;
179 $PR $CM+; 189 $PR $CM+;
180 $QU $CM+; 190 $QU $CM+;
181 $RI $CM+; 191 $RI $CM+;
182 $SY $CM+; 192 $SY $CM+;
183 $WJ $CM+; 193 $WJ $CM+;
184 194
185 # 195 #
186 # CAN_CM is the set of characters that may combine with CM combining chars. 196 # CAN_CM is the set of characters that may combine with CM combining chars.
187 # Note that Linebreak UAX 14's concept of a combining char and the rules 197 # Note that Linebreak UAX 14's concept of a combining char and the rules
188 # for what they can combine with are _very_ different from the rest of U nicode. 198 # for what they can combine with are _very_ different from the rest of U nicode.
189 # 199 #
190 # Note that $CM itself is left out of this set. If CM is needed as a ba se 200 # Note that $CM itself is left out of this set. If CM is needed as a ba se
191 # it must be listed separately in the rule. 201 # it must be listed separately in the rule.
192 # 202 #
193 $CAN_CM = [^$SP $BK $CR $LF $NL $ZW $CM]; # Bases that can take CMs 203 $CAN_CM = [^$SP $BK $CR $LF $NL $ZW $CM]; # Bases that can take CMs
194 $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM]; # Bases that can't take CMs 204 $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM]; # Bases that can't take CMs
195 205
196 # 206 #
197 # AL_FOLLOW set of chars that can unconditionally follow an AL 207 # AL_FOLLOW set of chars that can unconditionally follow an AL
198 # Needed in rules where stand-alone $CM s are treated as AL. 208 # Needed in rules where stand-alone $CM s are treated as AL.
199 # Chaining is disabled with CM because it causes other failures, 209 # Chaining is disabled with CM because it causes other failures,
200 # so for this one case we need to manually list out longer sequences. 210 # so for this one case we need to manually list out longer sequences.
201 # 211 #
202 $AL_FOLLOW_NOCM = [$BK $CR $LF $NL $ZW $SP]; 212 $AL_FOLLOW_NOCM = [$BK $CR $LF $NL $ZW $SP];
203 $AL_FOLLOW_CM = [$CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HH $HY $NS $IN $ NU $ALPlus]; 213 $AL_FOLLOW_CM = [$CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HH $HY $NS $NSX $IN $NU $ALPlus];
204 $AL_FOLLOW = [$AL_FOLLOW_NOCM $AL_FOLLOW_CM]; 214 $AL_FOLLOW = [$AL_FOLLOW_NOCM $AL_FOLLOW_CM];
205 215
206 216
207 # 217 #
208 # Rule LB 4, 5 Mandatory (Hard) breaks. 218 # Rule LB 4, 5 Mandatory (Hard) breaks.
209 # 219 #
210 $LB4Breaks = [$BK $CR $LF $NL]; 220 $LB4Breaks = [$BK $CR $LF $NL];
211 $LB4NonBreaks = [^$BK $CR $LF $NL]; 221 $LB4NonBreaks = [^$BK $CR $LF $NL];
212 $CR $LF {100}; 222 $CR $LF {100};
213 223
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
296 # 306 #
297 $OPcm $SP* $CAN_CM $CM*; 307 $OPcm $SP* $CAN_CM $CM*;
298 $OPcm $SP* $CANT_CM; 308 $OPcm $SP* $CANT_CM;
299 309
300 $OPcm $SP+ $CM+ $AL_FOLLOW?; # by rule 10, stand-alone CM behaves as AL 310 $OPcm $SP+ $CM+ $AL_FOLLOW?; # by rule 10, stand-alone CM behaves as AL
301 311
302 # LB 15 312 # LB 15
303 $QUcm $SP* $OPcm; 313 $QUcm $SP* $OPcm;
304 314
305 # LB 16 315 # LB 16
316 # Do not break between closing punctuation and $NS, even with intervening spaces
317 # But DO allow a break between closing punctuation and $NSX, don't include it he re
306 ($CLcm | $CPcm) $SP* $NScm; 318 ($CLcm | $CPcm) $SP* $NScm;
307 319
308 # LB 17 320 # LB 17
309 $B2cm $SP* $B2cm; 321 $B2cm $SP* $B2cm;
310 322
311 # 323 #
312 # LB 18 Break after spaces. 324 # LB 18 Break after spaces.
313 # 325 #
314 $LB18NonBreaks = [$LB8NonBreaks - [$SP]]; 326 $LB18NonBreaks = [$LB8NonBreaks - [$SP]];
315 $LB18Breaks = [$LB8Breaks $SP]; 327 $LB18Breaks = [$LB8Breaks $SP];
(...skipping 13 matching lines...) Expand all
329 # LB 20 341 # LB 20
330 # <break> $CB 342 # <break> $CB
331 # $CB <break> 343 # $CB <break>
332 344
333 $LB20NonBreaks = [$LB18NonBreaks - $CB]; 345 $LB20NonBreaks = [$LB18NonBreaks - $CB];
334 346
335 # LB 20.09 added rule for Finnish tailoring 347 # LB 20.09 added rule for Finnish tailoring
336 # LB 21 x (BA | HY | NS) 348 # LB 21 x (BA | HY | NS)
337 # BB x 349 # BB x
338 # 350 #
351 # DO allow breaks here before NSXcm, so don't include it
339 $LB20NonBreaks $CM* ($BAcm | $HHcm | $HYcm | $NScm) / $AL; 352 $LB20NonBreaks $CM* ($BAcm | $HHcm | $HYcm | $NScm) / $AL;
340 $LB20NonBreaks $CM* ($BAcm | $HHcm | $HYcm | $NScm); 353 $LB20NonBreaks $CM* ($BAcm | $HHcm | $HYcm | $NScm);
341 ($HY | $HH) $AL; 354 ($HY | $HH) $AL;
342 355
343 $BBcm [^$CB]; # $BB x 356 $BBcm [^$CB]; # $BB x
344 $BBcm $LB20NonBreaks $CM*; 357 $BBcm $LB20NonBreaks $CM*;
345 358
346 # LB 21a Don't break after Hebrew + Hyphen 359 # LB 21a Don't break after Hebrew + Hyphen
347 # HL (HY | BA) x 360 # HL (HY | BA) x
348 # 361 #
349 $HLcm ($HYcm | $BAcm | $HHcm) [^$CB]?; 362 $HLcm ($HYcm | $BAcm | $HHcm) [^$CB]?;
350 363
351 # LB 21b (forward) Don't break between SY and HL 364 # LB 21b (forward) Don't break between SY and HL
352 # (break between HL and SY already disallowed by LB 13 above) 365 # (break between HL and SY already disallowed by LB 13 above)
353 $SYcm $HLcm; 366 $SYcm $HLcm;
354 367
355 # LB 22 368 # LB 22
356 ($ALcm | $HLcm) $INcm; 369 ($ALcm | $HLcm) $INcm;
357 $CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL 370 $CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL
371 $EXcm $INcm;
358 $IDcm $INcm; 372 $IDcm $INcm;
359 $INcm $INcm; 373 $INcm $INcm;
360 $NUcm $INcm; 374 $NUcm $INcm;
361 375
362 376
363 # $LB 23 377 # $LB 23
364 $IDcm $POcm; 378 $IDcm $POcm;
365 $ALcm $NUcm; # includes $LB19 379 $ALcm $NUcm; # includes $LB19
366 $HLcm $NUcm; 380 $HLcm $NUcm;
367 $CM+ $NUcm; # Rule 10, any otherwise unattached CM behaves as AL 381 $CM+ $NUcm; # Rule 10, any otherwise unattached CM behaves as AL
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
428 $CM+ $HY; 442 $CM+ $HY;
429 $CM+ $H2; 443 $CM+ $H2;
430 $CM+ $H3; 444 $CM+ $H3;
431 $CM+ $ID; 445 $CM+ $ID;
432 $CM+ $IN; 446 $CM+ $IN;
433 $CM+ $IS; 447 $CM+ $IS;
434 $CM+ $JL; 448 $CM+ $JL;
435 $CM+ $JV; 449 $CM+ $JV;
436 $CM+ $JT; 450 $CM+ $JT;
437 $CM+ $NS; 451 $CM+ $NS;
452 $CM+ $NSX;
438 $CM+ $NU; 453 $CM+ $NU;
439 $CM+ $OP; 454 $CM+ $OP;
440 $CM+ $PO; 455 $CM+ $PO;
441 $CM+ $PR; 456 $CM+ $PR;
442 $CM+ $QU; 457 $CM+ $QU;
443 $CM+ $RI; 458 $CM+ $RI;
444 $CM+ $SY; 459 $CM+ $SY;
445 $CM+ $WJ; 460 $CM+ $WJ;
446 $CM+; 461 $CM+;
447 462
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
546 $AL_FOLLOW_NOCM $CM+ $SP+ $CM* $OP; 561 $AL_FOLLOW_NOCM $CM+ $SP+ $CM* $OP;
547 $CM* $AL_FOLLOW_CM $CM+ $SP+ $CM* $OP; 562 $CM* $AL_FOLLOW_CM $CM+ $SP+ $CM* $OP;
548 $SY $CM $SP+ $OP; # TODO: Experiment. Remove. 563 $SY $CM $SP+ $OP; # TODO: Experiment. Remove.
549 564
550 565
551 566
552 # LB 15 567 # LB 15
553 $CM* $OP $SP* $CM* $QU; 568 $CM* $OP $SP* $CM* $QU;
554 569
555 # LB 16 570 # LB 16
571 # Don't include $NSX here
556 $CM* $NS $SP* $CM* ($CL | $CP); 572 $CM* $NS $SP* $CM* ($CL | $CP);
557 573
558 # LB 17 574 # LB 17
559 $CM* $B2 $SP* $CM* $B2; 575 $CM* $B2 $SP* $CM* $B2;
560 576
561 # LB 18 break after spaces 577 # LB 18 break after spaces
562 # Nothing explicit needed here. 578 # Nothing explicit needed here.
563 579
564 580
565 # 581 #
566 # LB 19 582 # LB 19
567 # 583 #
568 $CM* $QU $CM* $CAN_CM; # . x QU 584 $CM* $QU $CM* $CAN_CM; # . x QU
569 $CM* $QU $LB18NonBreaks; 585 $CM* $QU $LB18NonBreaks;
570 586
571 587
572 $CM* $CAN_CM $CM* $QU; # QU x . 588 $CM* $CAN_CM $CM* $QU; # QU x .
573 $CANT_CM $CM* $QU; 589 $CANT_CM $CM* $QU;
574 590
575 # 591 #
576 # LB 20 Break before and after CB. 592 # LB 20 Break before and after CB.
577 # nothing needed here. 593 # nothing needed here.
578 # 594 #
579 595
580 # LB 20.09 added rule for Finnish tailoring 596 # LB 20.09 added rule for Finnish tailoring
581 $AL ($HY | $HH) / $SP; 597 $AL ($HY | $HH) / $SP;
582 598
583 # LB 21 599 # LB 21
600 # Don't include $NSX here
584 $CM* ($BA | $HH | $HY | $NS) $CM* [$LB20NonBreaks-$CM]; # . x (BA | HY | NS ) 601 $CM* ($BA | $HH | $HY | $NS) $CM* [$LB20NonBreaks-$CM]; # . x (BA | HY | NS )
585 602
586 $CM* [$LB20NonBreaks-$CM] $CM* $BB; # BB x . 603 $CM* [$LB20NonBreaks-$CM] $CM* $BB; # BB x .
587 [^$CB] $CM* $BB; # 604 [^$CB] $CM* $BB; #
588 605
589 # LB21a 606 # LB21a
590 [^$CB] $CM* ($HY | $BA | $HH) $CM* $HL; 607 [^$CB] $CM* ($HY | $BA | $HH) $CM* $HL;
591 608
592 # LB21b (reverse) 609 # LB21b (reverse)
593 $CM* $HL $CM* $SY; 610 $CM* $HL $CM* $SY;
594 611
595 # LB 22 612 # LB 22
596 $CM* $IN $CM* ($ALPlus | $HL); 613 $CM* $IN $CM* ($ALPlus | $HL);
614 $CM* $IN $CM* $EX;
597 $CM* $IN $CM* $ID; 615 $CM* $IN $CM* $ID;
598 $CM* $IN $CM* $IN; 616 $CM* $IN $CM* $IN;
599 $CM* $IN $CM* $NU; 617 $CM* $IN $CM* $NU;
600 618
601 # LB 23 619 # LB 23
602 $CM* $PO $CM* $ID; 620 $CM* $PO $CM* $ID;
603 $CM* $NU $CM* ($ALPlus | $HL); 621 $CM* $NU $CM* ($ALPlus | $HL);
604 $CM* ($ALPlus | $HL) $CM* $NU; 622 $CM* ($ALPlus | $HL) $CM* $NU;
605 623
606 # LB 24 624 # LB 24
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
675 # of context. 693 # of context.
676 # 694 #
677 # It might be slightly more efficient to have specific rules 695 # It might be slightly more efficient to have specific rules
678 # instead of one generic one, but only if we could 696 # instead of one generic one, but only if we could
679 # turn off rule chaining. We don't want to move more 697 # turn off rule chaining. We don't want to move more
680 # than necessary. 698 # than necessary.
681 # 699 #
682 [$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $dictionary]; 700 [$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $dictionary];
683 $dictionary $dictionary; 701 $dictionary $dictionary;
684 702
OLDNEW
« no previous file with comments | « source/data/brkitr/line_loose_cj.txt ('k') | source/data/brkitr/line_normal.txt » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698