Index: patches/wordbrk.patch |
diff --git a/patches/wordbrk.patch b/patches/wordbrk.patch |
index bf1cadf1f065f0193c93e5fbacef8955c098ec4a..0bf6b03bffe0b743fb150c4418a1df24d7f3ed35 100644 |
--- a/patches/wordbrk.patch |
+++ b/patches/wordbrk.patch |
@@ -1,8 +1,8 @@ |
-diff --git a/source/data/brkitr/word.txt b/source/data/brkitr/word.txt |
-index f89a2fe..c74da4c 100644 |
---- a/source/data/brkitr/word.txt |
-+++ b/source/data/brkitr/word.txt |
-@@ -35,10 +35,16 @@ $Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}]; |
+diff --git a/source/data/brkitr/rules/word.txt b/source/data/brkitr/rules/word.txt |
+index aa7c47c..9c93dd5 100644 |
+--- a/source/data/brkitr/rules/word.txt |
++++ b/source/data/brkitr/rules/word.txt |
+@@ -39,10 +39,16 @@ $Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}]; |
$ALetter = [\p{Word_Break = ALetter}]; |
$Single_Quote = [\p{Word_Break = Single_Quote}]; |
$Double_Quote = [\p{Word_Break = Double_Quote}]; |
@@ -20,5 +20,5 @@ index f89a2fe..c74da4c 100644 |
+$MidNum = [\p{Word_Break = MidNum}[\u002E \uFF0E]]; |
+$Numeric = [\p{Word_Break = Numeric}[\uff10-\uff19]]; #includes fullwidth digits |
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; |
- |
- $Han = [:Han:]; |
+ $E_Base = [\p{Word_Break = EB}\U0001F3C2\U0001F3C7\U0001F3CC\U0001F46A-\U0001F46D\U0001F46F\U0001F574\U0001F6CC]; |
+ $E_Modifier = [\p{Word_Break = EM}]; |