source/data/brkitr/line_loose_fi.txt - Issue 1621843002: ICU 56 update step 1

Unified Diff: source/data/brkitr/line_loose_fi.txt

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/data/brkitr/line_loose_fi.txt

diff --git a/source/data/brkitr/line_fi.txt b/source/data/brkitr/line_loose_fi.txt

similarity index 93%

copy from source/data/brkitr/line_fi.txt

copy to source/data/brkitr/line_loose_fi.txt

index adf78bd3887404c8c429c6f4fb16167f11501db9..c5dae9f85eea0fb59b5ceadc421cc621c77ecb67 100644

--- a/source/data/brkitr/line_fi.txt

+++ b/source/data/brkitr/line_loose_fi.txt

@@ -1,16 +1,23 @@

-# file: line_fi.txt

+# file: line_loose_fi.txt

# Line Breaking Rules

# Implement default line breaking as defined by

-# Unicode Standard Annex #14 Revision 29 for Unicode 6.2

+# Unicode Standard Annex #14 Revision 34 for Unicode 8.0

# http://www.unicode.org/reports/tr14/

+# tailored as noted in 2nd paragraph below..

# TODO: Rule LB 8 remains as it was in Unicode 5.2

# This is only because of a limitation of ICU break engine implementation,

# not because the older behavior is desirable.

+# This tailors the line break behavior both for Finnish and to correpond to CSS

+# line-break=loose (BCP47 -u-lb-loose) as defined for languages other than

+# Chinese & Japanese.

+# It sets characters of class CJ to behave like ID.

+# In addition, it allows breaks before 3005, 303B, 309D, 309E, 30FD, 30FE (all NS).

# Character Classes defined by TR 14.

@@ -75,7 +82,7 @@ $HL = [:LineBreak = Hebrew_Letter:];

$HY = [:LineBreak = Hyphen:];

$H2 = [:LineBreak = H2:];

$H3 = [:LineBreak = H3:];

-$ID = [:LineBreak = Ideographic:];

+$ID = [[:LineBreak = Ideographic:] $CJ];

$IN = [:LineBreak = Inseperable:];

$IS = [:LineBreak = Infix_Numeric:];

$JL = [:LineBreak = JL:];

@@ -83,7 +90,8 @@ $JV = [:LineBreak = JV:];

$JT = [:LineBreak = JT:];

$LF = [:LineBreak = Line_Feed:];

$NL = [:LineBreak = Next_Line:];

-$NS = [[:LineBreak = Nonstarter:] $CJ];

+$NSX = [\u3005 \u303B \u309D \u309E \u30FD \u30FE];

+$NS = [[:LineBreak = Nonstarter:] - $NSX];

$NU = [:LineBreak = Numeric:];

$OP = [:LineBreak = Open_Punctuation:];

$PO = [:LineBreak = Postfix_Numeric:];

@@ -137,6 +145,7 @@ $JLcm = $JL $CM*;

$JVcm = $JV $CM*;

$JTcm = $JT $CM*;

$NScm = $NS $CM*;

+$NSXcm = $NSX $CM*;

$NUcm = $NU $CM*;

$OPcm = $OP $CM*;

$POcm = $PO $CM*;

@@ -173,6 +182,7 @@ $JL $CM+;

$JV $CM+;

$JT $CM+;

$NS $CM+;

+$NSX $CM+;

$NU $CM+;

$OP $CM+;

$PO $CM+;

@@ -200,7 +210,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM]; # Bases that can't take CMs

# so for this one case we need to manually list out longer sequences.

$AL_FOLLOW_NOCM = [$BK $CR $LF $NL $ZW $SP];

-$AL_FOLLOW_CM = [$CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HH $HY $NS $IN $NU $ALPlus];

+$AL_FOLLOW_CM = [$CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HH $HY $NS $NSX $IN $NU $ALPlus];

$AL_FOLLOW = [$AL_FOLLOW_NOCM $AL_FOLLOW_CM];

@@ -303,6 +313,8 @@ $OPcm $SP+ $CM+ $AL_FOLLOW?; # by rule 10, stand-alone CM behaves as AL

$QUcm $SP* $OPcm;

# LB 16

+# Do not break between closing punctuation and $NS, even with intervening spaces

+# But DO allow a break between closing punctuation and $NSX, don't include it here

($CLcm | $CPcm) $SP* $NScm;

# LB 17

@@ -336,6 +348,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];

# LB 21 x (BA | HY | NS)

# BB x

+# DO allow breaks here before NSXcm, so don't include it

$LB20NonBreaks $CM* ($BAcm | $HHcm | $HYcm | $NScm) / $AL;

$LB20NonBreaks $CM* ($BAcm | $HHcm | $HYcm | $NScm);

($HY | $HH) $AL;

@@ -355,6 +368,7 @@ $SYcm $HLcm;

# LB 22

($ALcm | $HLcm) $INcm;

$CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL

+$EXcm $INcm;

$IDcm $INcm;

$INcm $INcm;

$NUcm $INcm;

@@ -435,6 +449,7 @@ $CM+ $JL;

$CM+ $JV;

$CM+ $JT;

$CM+ $NS;

+$CM+ $NSX;

$CM+ $NU;

$CM+ $OP;

$CM+ $PO;

@@ -553,6 +568,7 @@ $SY $CM $SP+ $OP; # TODO: Experiment. Remove.

$CM* $OP $SP* $CM* $QU;

# LB 16

+# Don't include $NSX here

$CM* $NS $SP* $CM* ($CL | $CP);

# LB 17

@@ -581,6 +597,7 @@ $CM* $CAN_CM $CM* $QU; # QU x .

$AL ($HY | $HH) / $SP;

# LB 21

+# Don't include $NSX here

$CM* ($BA | $HH | $HY | $NS) $CM* [$LB20NonBreaks-$CM]; # . x (BA | HY | NS)

$CM* [$LB20NonBreaks-$CM] $CM* $BB; # BB x .

@@ -594,6 +611,7 @@ $CM* $HL $CM* $SY;

# LB 22

$CM* $IN $CM* ($ALPlus | $HL);

+$CM* $IN $CM* $EX;

$CM* $IN $CM* $ID;

$CM* $IN $CM* $IN;

$CM* $IN $CM* $NU;

« no previous file with comments | « source/data/brkitr/line_loose_cj.txt ('k') | source/data/brkitr/line_normal.txt » ('j') | no next file with comments »