source/data/brkitr/line_normal_cj.txt - Issue 1621843002: ICU 56 update step 1

Unified Diff: source/data/brkitr/line_normal_cj.txt

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/data/brkitr/line_normal_cj.txt

diff --git a/source/data/brkitr/line_ja.txt b/source/data/brkitr/line_normal_cj.txt

similarity index 91%

rename from source/data/brkitr/line_ja.txt

rename to source/data/brkitr/line_normal_cj.txt

index 70b203d1b0d26afae1aa80d16797c0c40be68a8b..908a41017fc5d2f4824139e9ea7917d5b81cd032 100644

--- a/source/data/brkitr/line_ja.txt

+++ b/source/data/brkitr/line_normal_cj.txt

@@ -1,16 +1,23 @@

-# file: line_ja.txt

+# file: line_normal_cj.txt

# Line Breaking Rules

# Implement default line breaking as defined by

-# Unicode Standard Annex #14 Revision 29 for Unicode 6.2

+# Unicode Standard Annex #14 Revision 34 for Unicode 8.0

# http://www.unicode.org/reports/tr14/

+# tailored as noted in 2nd paragraph below..

# TODO: Rule LB 8 remains as it was in Unicode 5.2

# This is only because of a limitation of ICU break engine implementation,

# not because the older behavior is desirable.

+# This tailors the line break behavior to correspond to CSS

+# line-break=normal (BCP47 -u-lb-normal) as defined for Chinese & Japanese.

+# It sets characters of class CJ to behave like ID.

+# In addition, it allows breaks:

+# * before hyphens 2010 & 2013 (both BA) and 301C, 30A0 (both NS)

# Character Classes defined by TR 14.

@@ -58,7 +65,8 @@

$AI = [:LineBreak = Ambiguous:];

$AL = [:LineBreak = Alphabetic:];

-$BA = [:LineBreak = Break_After:];

+$BAX = [\u2010 \u2013];

+$BA = [[:LineBreak = Break_After:] - $BAX];

$BB = [:LineBreak = Break_Before:];

$BK = [:LineBreak = Mandatory_Break:];

$B2 = [:LineBreak = Break_Both:];

@@ -82,7 +90,8 @@ $JV = [:LineBreak = JV:];

$JT = [:LineBreak = JT:];

$LF = [:LineBreak = Line_Feed:];

$NL = [:LineBreak = Next_Line:];

-$NS = [:LineBreak = Nonstarter:];

+$NSX = [\u301C \u30A0];

+$NS = [[:LineBreak = Nonstarter:] - $NSX];

$NU = [:LineBreak = Numeric:];

$OP = [:LineBreak = Open_Punctuation:];

$PO = [:LineBreak = Postfix_Numeric:];

@@ -118,6 +127,7 @@ $ALPlus = [$AL $AI $SA $SG $XX];

$ALcm = $ALPlus $CM*;

$BAcm = $BA $CM*;

+$BAXcm = $BAX $CM*;

$BBcm = $BB $CM*;

$B2cm = $B2 $CM*;

$CLcm = $CL $CM*;

@@ -135,6 +145,7 @@ $JLcm = $JL $CM*;

$JVcm = $JV $CM*;

$JTcm = $JT $CM*;

$NScm = $NS $CM*;

+$NSXcm = $NSX $CM*;

$NUcm = $NU $CM*;

$OPcm = $OP $CM*;

$POcm = $PO $CM*;

@@ -153,6 +164,7 @@ $WJcm = $WJ $CM*;

$ALPlus $CM+;

$BA $CM+;

+$BAX $CM+;

$BB $CM+;

$B2 $CM+;

$CL $CM+;

@@ -170,6 +182,7 @@ $JL $CM+;

$JV $CM+;

$JT $CM+;

$NS $CM+;

+$NSX $CM+;

$NU $CM+;

$OP $CM+;

$PO $CM+;

@@ -259,7 +272,7 @@ $GLcm $CANT_CM;

# LB 12a Do not break before NBSP and related characters ...

# [^SP BA HY] x GL

-[[$LB8NonBreaks] - [$SP $BA $HY]] $CM* $GLcm;

+[[$LB8NonBreaks] - [$SP $BA $BAX $HY]] $CM* $GLcm;

$CM+ GLcm;

@@ -300,6 +313,8 @@ $OPcm $SP+ $CM+ $AL_FOLLOW?; # by rule 10, stand-alone CM behaves as AL

$QUcm $SP* $OPcm;

# LB 16

+# Do not break between closing punctuation and $NS, even with intervening spaces

+# But DO allow a break between closing punctuation and $NSX, don't include it here

($CLcm | $CPcm) $SP* $NScm;

# LB 17

@@ -332,6 +347,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];

# LB 21 x (BA | HY | NS)

# BB x

+# DO allow breaks here before $BAXcm and $NSXcm, so don't include them

$LB20NonBreaks $CM* ($BAcm | $HYcm | $NScm);

$BBcm [^$CB]; # $BB x

@@ -340,7 +356,7 @@ $BBcm $LB20NonBreaks $CM*;

# LB 21a Don't break after Hebrew + Hyphen

# HL (HY | BA) x

-$HLcm ($HYcm | $BAcm) [^$CB]?;

+$HLcm ($HYcm | $BAcm | $BAXcm) [^$CB]?;

# LB 21b (forward) Don't break between SY and HL

# (break between HL and SY already disallowed by LB 13 above)

@@ -349,6 +365,7 @@ $SYcm $HLcm;

# LB 22

($ALcm | $HLcm) $INcm;

$CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL

+$EXcm $INcm;

$IDcm $INcm;

$INcm $INcm;

$NUcm $INcm;

@@ -411,6 +428,7 @@ $RIcm $RIcm;

$CM+ $ALPlus;

$CM+ $BA;

+$CM+ $BAX;

$CM+ $BB;

$CM+ $B2;

$CM+ $CL;

@@ -428,6 +446,7 @@ $CM+ $JL;

$CM+ $JV;

$CM+ $JT;

$CM+ $NS;

+$CM+ $NSX;

$CM+ $NU;

$CM+ $OP;

$CM+ $PO;

@@ -501,7 +520,7 @@ $CM* $CAN_CM $CM* $WJ;

# LB 12a

# [^SP BA HY] x GL

-$CM* $GL $CM* [$LB8NonBreaks-[$CM $SP $BA $HY]];

+$CM* $GL $CM* [$LB8NonBreaks-[$CM $SP $BA $BAX $HY]];

# LB 12

# GL x

@@ -546,6 +565,7 @@ $SY $CM $SP+ $OP; # TODO: Experiment. Remove.

$CM* $OP $SP* $CM* $QU;

# LB 16

+# Don't include $NSX here

$CM* $NS $SP* $CM* ($CL | $CP);

# LB 17

@@ -571,19 +591,21 @@ $CM* $CAN_CM $CM* $QU; # QU x .

# LB 21

+# Don't include $BAX or $NSX here

$CM* ($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM]; # . x (BA | HY | NS)

$CM* [$LB20NonBreaks-$CM] $CM* $BB; # BB x .

[^$CB] $CM* $BB; #

# LB21a

-[^$CB] $CM* ($HY | $BA) $CM* $HL;

+[^$CB]? $CM* ($HY | $BA | $BAX) $CM* $HL;

# LB21b (reverse)

$CM* $HL $CM* $SY;

# LB 22

$CM* $IN $CM* ($ALPlus | $HL);

+$CM* $IN $CM* $EX;

$CM* $IN $CM* $ID;

$CM* $IN $CM* $IN;

$CM* $IN $CM* $NU;

@@ -647,7 +669,7 @@ $SP+ $CM* ($CL | $CP);

$SP+ $CM* $B2;

# LB 21

-$CM* ($HY | $BA) $CM* $HL;

+$CM* ($HY | $BA | $BAX) $CM* $HL;

# LB 25

($CM* ($IS | $SY))+ $CM* $NU;

@@ -669,6 +691,6 @@ $dictionary $dictionary;

# turn off rule chaining. We don't want to move more

# than necessary.

-[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $dictionary];

+[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $BAX $SP $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $BAX $dictionary];

$dictionary $dictionary;

« no previous file with comments | « source/data/brkitr/line_normal.txt ('k') | source/data/brkitr/line_normal_fi.txt » ('j') | no next file with comments »