| Index: icu46/source/tools/genrb/rle.c
|
| ===================================================================
|
| --- icu46/source/tools/genrb/rle.c (revision 0)
|
| +++ icu46/source/tools/genrb/rle.c (revision 0)
|
| @@ -0,0 +1,405 @@
|
| +/*
|
| +*******************************************************************************
|
| +*
|
| +* Copyright (C) 2000-2003, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*
|
| +*******************************************************************************
|
| +*
|
| +* File writejava.c
|
| +*
|
| +* Modification History:
|
| +*
|
| +* Date Name Description
|
| +* 01/11/02 Ram Creation.
|
| +*******************************************************************************
|
| +*/
|
| +#include "rle.h"
|
| +/**
|
| + * The ESCAPE character is used during run-length encoding. It signals
|
| + * a run of identical chars.
|
| + */
|
| +static const uint16_t ESCAPE = 0xA5A5;
|
| +
|
| +/**
|
| + * The ESCAPE_BYTE character is used during run-length encoding. It signals
|
| + * a run of identical bytes.
|
| + */
|
| +static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
|
| +
|
| +/**
|
| + * Append a byte to the given StringBuffer, packing two bytes into each
|
| + * character. The state parameter maintains intermediary data between
|
| + * calls.
|
| + * @param state A two-element array, with state[0] == 0 if this is the
|
| + * first byte of a pair, or state[0] != 0 if this is the second byte
|
| + * of a pair, in which case state[1] is the first byte.
|
| + */
|
| +static uint16_t*
|
| +appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
|
| + if(!status || U_FAILURE(*status)){
|
| + return NULL;
|
| + }
|
| + if (state[0] != 0) {
|
| + uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
|
| + if(buffer < buffLimit){
|
| + *buffer++ = c;
|
| + }else{
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| + state[0] = 0;
|
| + return buffer;
|
| + }
|
| + else {
|
| + state[0] = 1;
|
| + state[1] = value;
|
| + return buffer;
|
| + }
|
| +}
|
| +/**
|
| + * Encode a run, possibly a degenerate run (of < 4 values).
|
| + * @param length The length of the run; must be > 0 && <= 0xFF.
|
| + */
|
| +static uint16_t*
|
| +encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
|
| + if(!status || U_FAILURE(*status)){
|
| + return NULL;
|
| + }
|
| + if (length < 4) {
|
| + int32_t j=0;
|
| + for (; j<length; ++j) {
|
| + if (value == ESCAPE_BYTE) {
|
| + buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
|
| + }
|
| + buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
|
| + }
|
| + }
|
| + else {
|
| + if (length == ESCAPE_BYTE) {
|
| + if (value == ESCAPE_BYTE){
|
| + buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
|
| + }
|
| + buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
|
| + --length;
|
| + }
|
| + buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
|
| + buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
|
| + buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
|
| + }
|
| + return buffer;
|
| +}
|
| +
|
| +#define APPEND( buffer, bufLimit, value, num, status){ \
|
| + if(buffer<bufLimit){ \
|
| + *buffer++=(value); \
|
| + }else{ \
|
| + *status = U_BUFFER_OVERFLOW_ERROR; \
|
| + } \
|
| + num++; \
|
| +}
|
| +
|
| +/**
|
| + * Encode a run, possibly a degenerate run (of < 4 values).
|
| + * @param length The length of the run; must be > 0 && <= 0xFFFF.
|
| + */
|
| +static uint16_t*
|
| +encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
|
| + int32_t num=0;
|
| + if (length < 4) {
|
| + int j=0;
|
| + for (; j<length; ++j) {
|
| + if (value == (int32_t) ESCAPE){
|
| + APPEND(buffer,bufLimit,ESCAPE, num, status);
|
| +
|
| + }
|
| + APPEND(buffer,bufLimit,value,num, status);
|
| + }
|
| + }
|
| + else {
|
| + if (length == (int32_t) ESCAPE) {
|
| + if (value == (int32_t) ESCAPE){
|
| + APPEND(buffer,bufLimit,ESCAPE,num,status);
|
| +
|
| + }
|
| + APPEND(buffer,bufLimit,value,num,status);
|
| + --length;
|
| + }
|
| + APPEND(buffer,bufLimit,ESCAPE,num,status);
|
| + APPEND(buffer,bufLimit,(uint16_t) length, num,status);
|
| + APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
|
| + }
|
| + return buffer;
|
| +}
|
| +
|
| +/**
|
| + * Construct a string representing a char array. Use run-length encoding.
|
| + * A character represents itself, unless it is the ESCAPE character. Then
|
| + * the following notations are possible:
|
| + * ESCAPE ESCAPE ESCAPE literal
|
| + * ESCAPE n c n instances of character c
|
| + * Since an encoded run occupies 3 characters, we only encode runs of 4 or
|
| + * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
|
| + * If we encounter a run where n == ESCAPE, we represent this as:
|
| + * c ESCAPE n-1 c
|
| + * The ESCAPE value is chosen so as not to collide with commonly
|
| + * seen values.
|
| + */
|
| +int32_t
|
| +usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
|
| + uint16_t* bufLimit = buffer+bufLen;
|
| + uint16_t* saveBuffer = buffer;
|
| + if(buffer < bufLimit){
|
| + *buffer++ = (uint16_t)(srcLen>>16);
|
| + if(buffer<bufLimit){
|
| + uint16_t runValue = src[0];
|
| + int32_t runLength = 1;
|
| + int i=1;
|
| + *buffer++ = (uint16_t) srcLen;
|
| +
|
| + for (; i<srcLen; ++i) {
|
| + uint16_t s = src[i];
|
| + if (s == runValue && runLength < 0xFFFF){
|
| + ++runLength;
|
| + }else {
|
| + buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
|
| + runValue = s;
|
| + runLength = 1;
|
| + }
|
| + }
|
| + buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
|
| + }else{
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| + }else{
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| + return (int32_t)(buffer - saveBuffer);
|
| +}
|
| +
|
| +/**
|
| + * Construct a string representing a byte array. Use run-length encoding.
|
| + * Two bytes are packed into a single char, with a single extra zero byte at
|
| + * the end if needed. A byte represents itself, unless it is the
|
| + * ESCAPE_BYTE. Then the following notations are possible:
|
| + * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
|
| + * ESCAPE_BYTE n b n instances of byte b
|
| + * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
|
| + * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
|
| + * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
|
| + * b ESCAPE_BYTE n-1 b
|
| + * The ESCAPE_BYTE value is chosen so as not to collide with commonly
|
| + * seen values.
|
| + */
|
| +int32_t
|
| +byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
|
| + const uint16_t* saveBuf = buffer;
|
| + uint16_t* bufLimit = buffer+bufLen;
|
| + if(buffer < bufLimit){
|
| + *buffer++ = ((uint16_t) (srcLen >> 16));
|
| +
|
| + if(buffer<bufLimit){
|
| + uint8_t runValue = src[0];
|
| + int runLength = 1;
|
| + uint8_t state[2]= {0};
|
| + int i=1;
|
| + *buffer++=((uint16_t) srcLen);
|
| + for (; i<srcLen; ++i) {
|
| + uint8_t b = src[i];
|
| + if (b == runValue && runLength < 0xFF){
|
| + ++runLength;
|
| + }
|
| + else {
|
| + buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
|
| + runValue = b;
|
| + runLength = 1;
|
| + }
|
| + }
|
| + buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
|
| +
|
| + /* We must save the final byte, if there is one, by padding
|
| + * an extra zero.
|
| + */
|
| + if (state[0] != 0) {
|
| + buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
|
| + }
|
| + }else{
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| + }else{
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + }
|
| + return (int32_t) (buffer - saveBuf);
|
| +}
|
| +
|
| +
|
| +/**
|
| + * Construct an array of shorts from a run-length encoded string.
|
| + */
|
| +int32_t
|
| +rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
|
| + int32_t length = 0;
|
| + int32_t ai = 0;
|
| + int i=2;
|
| +
|
| + if(!status || U_FAILURE(*status)){
|
| + return 0;
|
| + }
|
| + /* the source is null terminated */
|
| + if(srcLen == -1){
|
| + srcLen = u_strlen(src);
|
| + }
|
| + if(srcLen <= 2){
|
| + return 2;
|
| + }
|
| + length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
|
| +
|
| + if(target == NULL){
|
| + return length;
|
| + }
|
| + if(tgtLen < length){
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + return length;
|
| + }
|
| +
|
| + for (; i<srcLen; ++i) {
|
| + uint16_t c = src[i];
|
| + if (c == ESCAPE) {
|
| + c = src[++i];
|
| + if (c == ESCAPE) {
|
| + target[ai++] = c;
|
| + } else {
|
| + int32_t runLength = (int32_t) c;
|
| + uint16_t runValue = src[++i];
|
| + int j=0;
|
| + for (; j<runLength; ++j) {
|
| + target[ai++] = runValue;
|
| + }
|
| + }
|
| + }
|
| + else {
|
| + target[ai++] = c;
|
| + }
|
| + }
|
| +
|
| + if (ai != length){
|
| + *status = U_INTERNAL_PROGRAM_ERROR;
|
| + }
|
| +
|
| + return length;
|
| +}
|
| +
|
| +/**
|
| + * Construct an array of bytes from a run-length encoded string.
|
| + */
|
| +int32_t
|
| +rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
|
| +
|
| + int32_t length = 0;
|
| + UBool nextChar = TRUE;
|
| + uint16_t c = 0;
|
| + int32_t node = 0;
|
| + int32_t runLength = 0;
|
| + int32_t i = 2;
|
| + int32_t ai=0;
|
| +
|
| + if(!status || U_FAILURE(*status)){
|
| + return 0;
|
| + }
|
| + /* the source is null terminated */
|
| + if(srcLen == -1){
|
| + srcLen = u_strlen(src);
|
| + }
|
| + if(srcLen <= 2){
|
| + return 2;
|
| + }
|
| + length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
|
| +
|
| + if(target == NULL){
|
| + return length;
|
| + }
|
| + if(tgtLen < length){
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + return length;
|
| + }
|
| +
|
| + for (; ai<tgtLen; ) {
|
| + /* This part of the loop places the next byte into the local
|
| + * variable 'b' each time through the loop. It keeps the
|
| + * current character in 'c' and uses the boolean 'nextChar'
|
| + * to see if we've taken both bytes out of 'c' yet.
|
| + */
|
| + uint8_t b;
|
| + if (nextChar) {
|
| + c = src[i++];
|
| + b = (uint8_t) (c >> 8);
|
| + nextChar = FALSE;
|
| + }
|
| + else {
|
| + b = (uint8_t) (c & 0xFF);
|
| + nextChar = TRUE;
|
| + }
|
| +
|
| + /* This part of the loop is a tiny state machine which handles
|
| + * the parsing of the run-length encoding. This would be simpler
|
| + * if we could look ahead, but we can't, so we use 'node' to
|
| + * move between three nodes in the state machine.
|
| + */
|
| + switch (node) {
|
| + case 0:
|
| + /* Normal idle node */
|
| + if (b == ESCAPE_BYTE) {
|
| + node = 1;
|
| + }
|
| + else {
|
| + target[ai++] = b;
|
| + }
|
| + break;
|
| + case 1:
|
| + /* We have seen one ESCAPE_BYTE; we expect either a second
|
| + * one, or a run length and value.
|
| + */
|
| + if (b == ESCAPE_BYTE) {
|
| + target[ai++] = ESCAPE_BYTE;
|
| + node = 0;
|
| + }
|
| + else {
|
| + runLength = b;
|
| + node = 2;
|
| + }
|
| + break;
|
| + case 2:
|
| + {
|
| + int j=0;
|
| + /* We have seen an ESCAPE_BYTE and length byte. We interpret
|
| + * the next byte as the value to be repeated.
|
| + */
|
| + for (; j<runLength; ++j){
|
| + if(ai<tgtLen){
|
| + target[ai++] = b;
|
| + }else{
|
| + *status = U_BUFFER_OVERFLOW_ERROR;
|
| + return ai;
|
| + }
|
| + }
|
| + node = 0;
|
| + break;
|
| + }
|
| + }
|
| + }
|
| +
|
| + if (node != 0){
|
| + *status = U_INTERNAL_PROGRAM_ERROR;
|
| + /*("Bad run-length encoded byte array")*/
|
| + return 0;
|
| + }
|
| +
|
| +
|
| + if (i != srcLen){
|
| + /*("Excess data in RLE byte array string");*/
|
| + *status = U_INTERNAL_PROGRAM_ERROR;
|
| + return ai;
|
| + }
|
| +
|
| + return ai;
|
| +}
|
| +
|
|
|
| Property changes on: icu46/source/tools/genrb/rle.c
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|