source/data/brkitr/line_loose.txt - Issue 1621843002: ICU 56 update step 1

Unified Diff: source/data/brkitr/line_loose.txt

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/data/brkitr/line_loose.txt

diff --git a/source/data/brkitr/line_ja.txt b/source/data/brkitr/line_loose.txt

similarity index 93%

copy from source/data/brkitr/line_ja.txt

copy to source/data/brkitr/line_loose.txt

index 70b203d1b0d26afae1aa80d16797c0c40be68a8b..2732b2b9ab758015c9adc50f19db5ac176fb4a77 100644

--- a/source/data/brkitr/line_ja.txt

+++ b/source/data/brkitr/line_loose.txt

@@ -1,16 +1,25 @@

-# file: line_ja.txt

+# file: line_loose.txt

# Line Breaking Rules

# Implement default line breaking as defined by

-# Unicode Standard Annex #14 Revision 29 for Unicode 6.2

+# Unicode Standard Annex #14 Revision 34 for Unicode 8.0

# http://www.unicode.org/reports/tr14/

+# tailored as noted in 2nd paragraph below..

# TODO: Rule LB 8 remains as it was in Unicode 5.2

# This is only because of a limitation of ICU break engine implementation,

# not because the older behavior is desirable.

+# This tailors the line break behavior to correspond to CSS

+# line-break=loose (BCP47 -u-lb-loose) as defined for languages other than

+# Chinese & Japanese.

+# It sets characters of class CJ to behave like ID.

+# In addition, it allows breaks:

+# * before iteration marks 3005, 303B, 309D, 309E, 30FD, 30FE (all NS)

+# * between characters of LineBreak class IN

# Character Classes defined by TR 14.

@@ -82,7 +91,8 @@ $JV = [:LineBreak = JV:];

$JT = [:LineBreak = JT:];

$LF = [:LineBreak = Line_Feed:];

$NL = [:LineBreak = Next_Line:];

-$NS = [:LineBreak = Nonstarter:];

+$NSX = [\u3005 \u303B \u309D \u309E \u30FD \u30FE];

+$NS = [[:LineBreak = Nonstarter:] - $NSX];

$NU = [:LineBreak = Numeric:];

$OP = [:LineBreak = Open_Punctuation:];

$PO = [:LineBreak = Postfix_Numeric:];

@@ -135,6 +145,7 @@ $JLcm = $JL $CM*;

$JVcm = $JV $CM*;

$JTcm = $JT $CM*;

$NScm = $NS $CM*;

+$NSXcm = $NSX $CM*;

$NUcm = $NU $CM*;

$OPcm = $OP $CM*;

$POcm = $PO $CM*;

@@ -170,6 +181,7 @@ $JL $CM+;

$JV $CM+;

$JT $CM+;

$NS $CM+;

+$NSX $CM+;

$NU $CM+;

$OP $CM+;

$PO $CM+;

@@ -300,6 +312,8 @@ $OPcm $SP+ $CM+ $AL_FOLLOW?; # by rule 10, stand-alone CM behaves as AL

$QUcm $SP* $OPcm;

# LB 16

+# Do not break between closing punctuation and $NS, even with intervening spaces

+# But DO allow a break between closing punctuation and $NSX, don't include it here

($CLcm | $CPcm) $SP* $NScm;

# LB 17

@@ -332,6 +346,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];

# LB 21 x (BA | HY | NS)

# BB x

+# DO allow breaks here before NSXcm, so don't include it

$LB20NonBreaks $CM* ($BAcm | $HYcm | $NScm);

$BBcm [^$CB]; # $BB x

@@ -349,8 +364,9 @@ $SYcm $HLcm;

# LB 22

($ALcm | $HLcm) $INcm;

$CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL

+$EXcm $INcm;

$IDcm $INcm;

-$INcm $INcm;

+# $INcm $INcm; # delete this rule for CSS loose

$NUcm $INcm;

@@ -428,6 +444,7 @@ $CM+ $JL;

$CM+ $JV;

$CM+ $JT;

$CM+ $NS;

+$CM+ $NSX;

$CM+ $NU;

$CM+ $OP;

$CM+ $PO;

@@ -546,6 +563,7 @@ $SY $CM $SP+ $OP; # TODO: Experiment. Remove.

$CM* $OP $SP* $CM* $QU;

# LB 16

+# Don't include $NSX here

$CM* $NS $SP* $CM* ($CL | $CP);

# LB 17

@@ -571,6 +589,7 @@ $CM* $CAN_CM $CM* $QU; # QU x .

# LB 21

+# Don't include $NSX here

$CM* ($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM]; # . x (BA | HY | NS)

$CM* [$LB20NonBreaks-$CM] $CM* $BB; # BB x .

@@ -584,8 +603,9 @@ $CM* $HL $CM* $SY;

# LB 22

$CM* $IN $CM* ($ALPlus | $HL);

+$CM* $IN $CM* $EX;

$CM* $IN $CM* $ID;

-$CM* $IN $CM* $IN;

+# $CM* $IN $CM* $IN; # delete this rule for CSS loose

$CM* $IN $CM* $NU;

# LB 23

« no previous file with comments | « source/data/brkitr/line_ja.txt ('k') | source/data/brkitr/line_loose_cj.txt » ('j') | no next file with comments »