Index: icu46/source/i18n/bocsu.c |
=================================================================== |
--- icu46/source/i18n/bocsu.c (revision 0) |
+++ icu46/source/i18n/bocsu.c (revision 0) |
@@ -0,0 +1,191 @@ |
+/* |
+******************************************************************************* |
+* Copyright (C) 2001-2003, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+******************************************************************************* |
+* file name: bocsu.c |
+* encoding: US-ASCII |
+* tab size: 8 (not used) |
+* indentation:4 |
+* |
+* Author: Markus W. Scherer |
+* |
+* Modification history: |
+* 05/18/2001 weiv Made into separate module |
+*/ |
+ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_COLLATION |
+ |
+#include "bocsu.h" |
+ |
+/* |
+ * encode one difference value -0x10ffff..+0x10ffff in 1..3 bytes, |
+ * preserving lexical order |
+ */ |
+U_CFUNC uint8_t * |
+u_writeDiff(int32_t diff, uint8_t *p) { |
+ if(diff>=SLOPE_REACH_NEG_1) { |
+ if(diff<=SLOPE_REACH_POS_1) { |
+ *p++=(uint8_t)(SLOPE_MIDDLE+diff); |
+ } else if(diff<=SLOPE_REACH_POS_2) { |
+ *p++=(uint8_t)(SLOPE_START_POS_2+(diff/SLOPE_TAIL_COUNT)); |
+ *p++=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); |
+ } else if(diff<=SLOPE_REACH_POS_3) { |
+ p[2]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); |
+ diff/=SLOPE_TAIL_COUNT; |
+ p[1]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); |
+ *p=(uint8_t)(SLOPE_START_POS_3+(diff/SLOPE_TAIL_COUNT)); |
+ p+=3; |
+ } else { |
+ p[3]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); |
+ diff/=SLOPE_TAIL_COUNT; |
+ p[2]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); |
+ diff/=SLOPE_TAIL_COUNT; |
+ p[1]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); |
+ *p=SLOPE_MAX; |
+ p+=4; |
+ } |
+ } else { |
+ int32_t m; |
+ |
+ if(diff>=SLOPE_REACH_NEG_2) { |
+ NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m); |
+ *p++=(uint8_t)(SLOPE_START_NEG_2+diff); |
+ *p++=(uint8_t)(SLOPE_MIN+m); |
+ } else if(diff>=SLOPE_REACH_NEG_3) { |
+ NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m); |
+ p[2]=(uint8_t)(SLOPE_MIN+m); |
+ NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m); |
+ p[1]=(uint8_t)(SLOPE_MIN+m); |
+ *p=(uint8_t)(SLOPE_START_NEG_3+diff); |
+ p+=3; |
+ } else { |
+ NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m); |
+ p[3]=(uint8_t)(SLOPE_MIN+m); |
+ NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m); |
+ p[2]=(uint8_t)(SLOPE_MIN+m); |
+ NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m); |
+ p[1]=(uint8_t)(SLOPE_MIN+m); |
+ *p=SLOPE_MIN; |
+ p+=4; |
+ } |
+ } |
+ return p; |
+} |
+ |
+/* How many bytes would writeDiff() write? */ |
+static int32_t |
+lengthOfDiff(int32_t diff) { |
+ if(diff>=SLOPE_REACH_NEG_1) { |
+ if(diff<=SLOPE_REACH_POS_1) { |
+ return 1; |
+ } else if(diff<=SLOPE_REACH_POS_2) { |
+ return 2; |
+ } else if(diff<=SLOPE_REACH_POS_3) { |
+ return 3; |
+ } else { |
+ return 4; |
+ } |
+ } else { |
+ if(diff>=SLOPE_REACH_NEG_2) { |
+ return 2; |
+ } else if(diff>=SLOPE_REACH_NEG_3) { |
+ return 3; |
+ } else { |
+ return 4; |
+ } |
+ } |
+} |
+ |
+/* |
+ * Encode the code points of a string as |
+ * a sequence of byte-encoded differences (slope detection), |
+ * preserving lexical order. |
+ * |
+ * Optimize the difference-taking for runs of Unicode text within |
+ * small scripts: |
+ * |
+ * Most small scripts are allocated within aligned 128-blocks of Unicode |
+ * code points. Lexical order is preserved if "prev" is always moved |
+ * into the middle of such a block. |
+ * |
+ * Additionally, "prev" is moved from anywhere in the Unihan |
+ * area into the middle of that area. |
+ * Note that the identical-level run in a sort key is generated from |
+ * NFD text - there are never Hangul characters included. |
+ */ |
+U_CFUNC int32_t |
+u_writeIdenticalLevelRun(const UChar *s, int32_t length, uint8_t *p) { |
+ uint8_t *p0; |
+ int32_t c, prev; |
+ int32_t i; |
+ |
+ prev=0; |
+ p0=p; |
+ i=0; |
+ while(i<length) { |
+ if(prev<0x4e00 || prev>=0xa000) { |
+ prev=(prev&~0x7f)-SLOPE_REACH_NEG_1; |
+ } else { |
+ /* |
+ * Unihan U+4e00..U+9fa5: |
+ * double-bytes down from the upper end |
+ */ |
+ prev=0x9fff-SLOPE_REACH_POS_2; |
+ } |
+ |
+ UTF_NEXT_CHAR(s, i, length, c); |
+ p=u_writeDiff(c-prev, p); |
+ prev=c; |
+ } |
+ return (int32_t)(p-p0); |
+} |
+ |
+U_CFUNC int32_t |
+u_writeIdenticalLevelRunTwoChars(UChar32 first, UChar32 second, uint8_t *p) { |
+ uint8_t *p0 = p; |
+ if(first<0x4e00 || first>=0xa000) { |
+ first=(first&~0x7f)-SLOPE_REACH_NEG_1; |
+ } else { |
+ /* |
+ * Unihan U+4e00..U+9fa5: |
+ * double-bytes down from the upper end |
+ */ |
+ first=0x9fff-SLOPE_REACH_POS_2; |
+ } |
+ |
+ p=u_writeDiff(second-first, p); |
+ return (int32_t)(p-p0); |
+} |
+ |
+/* How many bytes would writeIdenticalLevelRun() write? */ |
+U_CFUNC int32_t |
+u_lengthOfIdenticalLevelRun(const UChar *s, int32_t length) { |
+ int32_t c, prev; |
+ int32_t i, runLength; |
+ |
+ prev=0; |
+ runLength=0; |
+ i=0; |
+ while(i<length) { |
+ if(prev<0x4e00 || prev>=0xa000) { |
+ prev=(prev&~0x7f)-SLOPE_REACH_NEG_1; |
+ } else { |
+ /* |
+ * Unihan U+4e00..U+9fa5: |
+ * double-bytes down from the upper end |
+ */ |
+ prev=0x9fff-SLOPE_REACH_POS_2; |
+ } |
+ |
+ UTF_NEXT_CHAR(s, i, length, c); |
+ runLength+=lengthOfDiff(c-prev); |
+ prev=c; |
+ } |
+ return runLength; |
+} |
+ |
+#endif /* #if !UCONFIG_NO_COLLATION */ |
Property changes on: icu46/source/i18n/bocsu.c |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |