OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2000-2003, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * |
| 9 * File writejava.c |
| 10 * |
| 11 * Modification History: |
| 12 * |
| 13 * Date Name Description |
| 14 * 01/11/02 Ram Creation. |
| 15 ******************************************************************************* |
| 16 */ |
| 17 #include "rle.h" |
| 18 /** |
| 19 * The ESCAPE character is used during run-length encoding. It signals |
| 20 * a run of identical chars. |
| 21 */ |
| 22 static const uint16_t ESCAPE = 0xA5A5; |
| 23 |
| 24 /** |
| 25 * The ESCAPE_BYTE character is used during run-length encoding. It signals |
| 26 * a run of identical bytes. |
| 27 */ |
| 28 static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5; |
| 29 |
| 30 /** |
| 31 * Append a byte to the given StringBuffer, packing two bytes into each |
| 32 * character. The state parameter maintains intermediary data between |
| 33 * calls. |
| 34 * @param state A two-element array, with state[0] == 0 if this is the |
| 35 * first byte of a pair, or state[0] != 0 if this is the second byte |
| 36 * of a pair, in which case state[1] is the first byte. |
| 37 */ |
| 38 static uint16_t* |
| 39 appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t
state[],UErrorCode* status) { |
| 40 if(!status || U_FAILURE(*status)){ |
| 41 return NULL; |
| 42 } |
| 43 if (state[0] != 0) { |
| 44 uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF)); |
| 45 if(buffer < buffLimit){ |
| 46 *buffer++ = c; |
| 47 }else{ |
| 48 *status = U_BUFFER_OVERFLOW_ERROR; |
| 49 } |
| 50 state[0] = 0; |
| 51 return buffer; |
| 52 } |
| 53 else { |
| 54 state[0] = 1; |
| 55 state[1] = value; |
| 56 return buffer; |
| 57 } |
| 58 } |
| 59 /** |
| 60 * Encode a run, possibly a degenerate run (of < 4 values). |
| 61 * @param length The length of the run; must be > 0 && <= 0xFF. |
| 62 */ |
| 63 static uint16_t* |
| 64 encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length
, uint8_t state[], UErrorCode* status) { |
| 65 if(!status || U_FAILURE(*status)){ |
| 66 return NULL; |
| 67 } |
| 68 if (length < 4) { |
| 69 int32_t j=0; |
| 70 for (; j<length; ++j) { |
| 71 if (value == ESCAPE_BYTE) { |
| 72 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,s
tatus); |
| 73 } |
| 74 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); |
| 75 } |
| 76 } |
| 77 else { |
| 78 if (length == ESCAPE_BYTE) { |
| 79 if (value == ESCAPE_BYTE){ |
| 80 buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,s
tatus); |
| 81 } |
| 82 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); |
| 83 --length; |
| 84 } |
| 85 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); |
| 86 buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status)
; |
| 87 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Do
n't need to escape this value*/ |
| 88 } |
| 89 return buffer; |
| 90 } |
| 91 |
| 92 #define APPEND( buffer, bufLimit, value, num, status){ \ |
| 93 if(buffer<bufLimit){ \ |
| 94 *buffer++=(value); \ |
| 95 }else{ \ |
| 96 *status = U_BUFFER_OVERFLOW_ERROR; \ |
| 97 } \ |
| 98 num++; \ |
| 99 } |
| 100 |
| 101 /** |
| 102 * Encode a run, possibly a degenerate run (of < 4 values). |
| 103 * @param length The length of the run; must be > 0 && <= 0xFFFF. |
| 104 */ |
| 105 static uint16_t* |
| 106 encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t leng
th,UErrorCode* status) { |
| 107 int32_t num=0; |
| 108 if (length < 4) { |
| 109 int j=0; |
| 110 for (; j<length; ++j) { |
| 111 if (value == (int32_t) ESCAPE){ |
| 112 APPEND(buffer,bufLimit,ESCAPE, num, status); |
| 113 |
| 114 } |
| 115 APPEND(buffer,bufLimit,value,num, status); |
| 116 } |
| 117 } |
| 118 else { |
| 119 if (length == (int32_t) ESCAPE) { |
| 120 if (value == (int32_t) ESCAPE){ |
| 121 APPEND(buffer,bufLimit,ESCAPE,num,status); |
| 122 |
| 123 } |
| 124 APPEND(buffer,bufLimit,value,num,status); |
| 125 --length; |
| 126 } |
| 127 APPEND(buffer,bufLimit,ESCAPE,num,status); |
| 128 APPEND(buffer,bufLimit,(uint16_t) length, num,status); |
| 129 APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to e
scape this value */ |
| 130 } |
| 131 return buffer; |
| 132 } |
| 133 |
| 134 /** |
| 135 * Construct a string representing a char array. Use run-length encoding. |
| 136 * A character represents itself, unless it is the ESCAPE character. Then |
| 137 * the following notations are possible: |
| 138 * ESCAPE ESCAPE ESCAPE literal |
| 139 * ESCAPE n c n instances of character c |
| 140 * Since an encoded run occupies 3 characters, we only encode runs of 4 or |
| 141 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. |
| 142 * If we encounter a run where n == ESCAPE, we represent this as: |
| 143 * c ESCAPE n-1 c |
| 144 * The ESCAPE value is chosen so as not to collide with commonly |
| 145 * seen values. |
| 146 */ |
| 147 int32_t |
| 148 usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t
bufLen,UErrorCode* status) { |
| 149 uint16_t* bufLimit = buffer+bufLen; |
| 150 uint16_t* saveBuffer = buffer; |
| 151 if(buffer < bufLimit){ |
| 152 *buffer++ = (uint16_t)(srcLen>>16); |
| 153 if(buffer<bufLimit){ |
| 154 uint16_t runValue = src[0]; |
| 155 int32_t runLength = 1; |
| 156 int i=1; |
| 157 *buffer++ = (uint16_t) srcLen; |
| 158 |
| 159 for (; i<srcLen; ++i) { |
| 160 uint16_t s = src[i]; |
| 161 if (s == runValue && runLength < 0xFFFF){ |
| 162 ++runLength; |
| 163 }else { |
| 164 buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue,
runLength,status); |
| 165 runValue = s; |
| 166 runLength = 1; |
| 167 } |
| 168 } |
| 169 buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength
,status); |
| 170 }else{ |
| 171 *status = U_BUFFER_OVERFLOW_ERROR; |
| 172 } |
| 173 }else{ |
| 174 *status = U_BUFFER_OVERFLOW_ERROR; |
| 175 } |
| 176 return (int32_t)(buffer - saveBuffer); |
| 177 } |
| 178 |
| 179 /** |
| 180 * Construct a string representing a byte array. Use run-length encoding. |
| 181 * Two bytes are packed into a single char, with a single extra zero byte at |
| 182 * the end if needed. A byte represents itself, unless it is the |
| 183 * ESCAPE_BYTE. Then the following notations are possible: |
| 184 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal |
| 185 * ESCAPE_BYTE n b n instances of byte b |
| 186 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or |
| 187 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. |
| 188 * If we encounter a run where n == ESCAPE_BYTE, we represent this as: |
| 189 * b ESCAPE_BYTE n-1 b |
| 190 * The ESCAPE_BYTE value is chosen so as not to collide with commonly |
| 191 * seen values. |
| 192 */ |
| 193 int32_t |
| 194 byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t
bufLen, UErrorCode* status) { |
| 195 const uint16_t* saveBuf = buffer; |
| 196 uint16_t* bufLimit = buffer+bufLen; |
| 197 if(buffer < bufLimit){ |
| 198 *buffer++ = ((uint16_t) (srcLen >> 16)); |
| 199 |
| 200 if(buffer<bufLimit){ |
| 201 uint8_t runValue = src[0]; |
| 202 int runLength = 1; |
| 203 uint8_t state[2]= {0}; |
| 204 int i=1; |
| 205 *buffer++=((uint16_t) srcLen); |
| 206 for (; i<srcLen; ++i) { |
| 207 uint8_t b = src[i]; |
| 208 if (b == runValue && runLength < 0xFF){ |
| 209 ++runLength; |
| 210 } |
| 211 else { |
| 212 buffer = encodeRunByte(buffer, bufLimit,runValue, runLength,
state,status); |
| 213 runValue = b; |
| 214 runLength = 1; |
| 215 } |
| 216 } |
| 217 buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state,
status); |
| 218 |
| 219 /* We must save the final byte, if there is one, by padding |
| 220 * an extra zero. |
| 221 */ |
| 222 if (state[0] != 0) { |
| 223 buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status); |
| 224 } |
| 225 }else{ |
| 226 *status = U_BUFFER_OVERFLOW_ERROR; |
| 227 } |
| 228 }else{ |
| 229 *status = U_BUFFER_OVERFLOW_ERROR; |
| 230 } |
| 231 return (int32_t) (buffer - saveBuf); |
| 232 } |
| 233 |
| 234 |
| 235 /** |
| 236 * Construct an array of shorts from a run-length encoded string. |
| 237 */ |
| 238 int32_t |
| 239 rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t t
gtLen, UErrorCode* status) { |
| 240 int32_t length = 0; |
| 241 int32_t ai = 0; |
| 242 int i=2; |
| 243 |
| 244 if(!status || U_FAILURE(*status)){ |
| 245 return 0; |
| 246 } |
| 247 /* the source is null terminated */ |
| 248 if(srcLen == -1){ |
| 249 srcLen = u_strlen(src); |
| 250 } |
| 251 if(srcLen <= 2){ |
| 252 return 2; |
| 253 } |
| 254 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); |
| 255 |
| 256 if(target == NULL){ |
| 257 return length; |
| 258 } |
| 259 if(tgtLen < length){ |
| 260 *status = U_BUFFER_OVERFLOW_ERROR; |
| 261 return length; |
| 262 } |
| 263 |
| 264 for (; i<srcLen; ++i) { |
| 265 uint16_t c = src[i]; |
| 266 if (c == ESCAPE) { |
| 267 c = src[++i]; |
| 268 if (c == ESCAPE) { |
| 269 target[ai++] = c; |
| 270 } else { |
| 271 int32_t runLength = (int32_t) c; |
| 272 uint16_t runValue = src[++i]; |
| 273 int j=0; |
| 274 for (; j<runLength; ++j) { |
| 275 target[ai++] = runValue; |
| 276 } |
| 277 } |
| 278 } |
| 279 else { |
| 280 target[ai++] = c; |
| 281 } |
| 282 } |
| 283 |
| 284 if (ai != length){ |
| 285 *status = U_INTERNAL_PROGRAM_ERROR; |
| 286 } |
| 287 |
| 288 return length; |
| 289 } |
| 290 |
| 291 /** |
| 292 * Construct an array of bytes from a run-length encoded string. |
| 293 */ |
| 294 int32_t |
| 295 rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgt
Len, UErrorCode* status) { |
| 296 |
| 297 int32_t length = 0; |
| 298 UBool nextChar = TRUE; |
| 299 uint16_t c = 0; |
| 300 int32_t node = 0; |
| 301 int32_t runLength = 0; |
| 302 int32_t i = 2; |
| 303 int32_t ai=0; |
| 304 |
| 305 if(!status || U_FAILURE(*status)){ |
| 306 return 0; |
| 307 } |
| 308 /* the source is null terminated */ |
| 309 if(srcLen == -1){ |
| 310 srcLen = u_strlen(src); |
| 311 } |
| 312 if(srcLen <= 2){ |
| 313 return 2; |
| 314 } |
| 315 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); |
| 316 |
| 317 if(target == NULL){ |
| 318 return length; |
| 319 } |
| 320 if(tgtLen < length){ |
| 321 *status = U_BUFFER_OVERFLOW_ERROR; |
| 322 return length; |
| 323 } |
| 324 |
| 325 for (; ai<tgtLen; ) { |
| 326 /* This part of the loop places the next byte into the local |
| 327 * variable 'b' each time through the loop. It keeps the |
| 328 * current character in 'c' and uses the boolean 'nextChar' |
| 329 * to see if we've taken both bytes out of 'c' yet. |
| 330 */ |
| 331 uint8_t b; |
| 332 if (nextChar) { |
| 333 c = src[i++]; |
| 334 b = (uint8_t) (c >> 8); |
| 335 nextChar = FALSE; |
| 336 } |
| 337 else { |
| 338 b = (uint8_t) (c & 0xFF); |
| 339 nextChar = TRUE; |
| 340 } |
| 341 |
| 342 /* This part of the loop is a tiny state machine which handles |
| 343 * the parsing of the run-length encoding. This would be simpler |
| 344 * if we could look ahead, but we can't, so we use 'node' to |
| 345 * move between three nodes in the state machine. |
| 346 */ |
| 347 switch (node) { |
| 348 case 0: |
| 349 /* Normal idle node */ |
| 350 if (b == ESCAPE_BYTE) { |
| 351 node = 1; |
| 352 } |
| 353 else { |
| 354 target[ai++] = b; |
| 355 } |
| 356 break; |
| 357 case 1: |
| 358 /* We have seen one ESCAPE_BYTE; we expect either a second |
| 359 * one, or a run length and value. |
| 360 */ |
| 361 if (b == ESCAPE_BYTE) { |
| 362 target[ai++] = ESCAPE_BYTE; |
| 363 node = 0; |
| 364 } |
| 365 else { |
| 366 runLength = b; |
| 367 node = 2; |
| 368 } |
| 369 break; |
| 370 case 2: |
| 371 { |
| 372 int j=0; |
| 373 /* We have seen an ESCAPE_BYTE and length byte. We interpret |
| 374 * the next byte as the value to be repeated. |
| 375 */ |
| 376 for (; j<runLength; ++j){ |
| 377 if(ai<tgtLen){ |
| 378 target[ai++] = b; |
| 379 }else{ |
| 380 *status = U_BUFFER_OVERFLOW_ERROR; |
| 381 return ai; |
| 382 } |
| 383 } |
| 384 node = 0; |
| 385 break; |
| 386 } |
| 387 } |
| 388 } |
| 389 |
| 390 if (node != 0){ |
| 391 *status = U_INTERNAL_PROGRAM_ERROR; |
| 392 /*("Bad run-length encoded byte array")*/ |
| 393 return 0; |
| 394 } |
| 395 |
| 396 |
| 397 if (i != srcLen){ |
| 398 /*("Excess data in RLE byte array string");*/ |
| 399 *status = U_INTERNAL_PROGRAM_ERROR; |
| 400 return ai; |
| 401 } |
| 402 |
| 403 return ai; |
| 404 } |
| 405 |
OLD | NEW |