icu46/source/tools/genrb/rle.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/tools/genrb/rle.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/tools/genrb/rle.c

===================================================================

--- icu46/source/tools/genrb/rle.c (revision 0)

+++ icu46/source/tools/genrb/rle.c (revision 0)

@@ -0,0 +1,405 @@

+/*

+*******************************************************************************

+* File writejava.c

+* Modification History:

+* Date Name Description

+* 01/11/02 Ram Creation.

+*******************************************************************************

+*/

+#include "rle.h"

+/**

+ * The ESCAPE character is used during run-length encoding. It signals

+ * a run of identical chars.

+ */

+static const uint16_t ESCAPE = 0xA5A5;

+/**

+ * The ESCAPE_BYTE character is used during run-length encoding. It signals

+ * a run of identical bytes.

+ */

+static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;

+/**

+ * Append a byte to the given StringBuffer, packing two bytes into each

+ * character. The state parameter maintains intermediary data between

+ * calls.

+ * @param state A two-element array, with state[0] == 0 if this is the

+ * first byte of a pair, or state[0] != 0 if this is the second byte

+ * of a pair, in which case state[1] is the first byte.

+ */

+static uint16_t*

+appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {

+ if(!status || U_FAILURE(*status)){

+ return NULL;

+ }

+ if (state[0] != 0) {

+ uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));

+ if(buffer < buffLimit){

+ *buffer++ = c;

+ }else{

+ *status = U_BUFFER_OVERFLOW_ERROR;

+ }

+ state[0] = 0;

+ return buffer;

+ }

+ else {

+ state[0] = 1;

+ state[1] = value;

+ return buffer;

+ }

+/**

+ * Encode a run, possibly a degenerate run (of < 4 values).

+ * @param length The length of the run; must be > 0 && <= 0xFF.

+ */

+static uint16_t*

+encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {

+ if(!status || U_FAILURE(*status)){

+ return NULL;

+ }

+ if (length < 4) {

+ int32_t j=0;

+ for (; j<length; ++j) {

+ if (value == ESCAPE_BYTE) {

+ buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);

+ }

+ buffer = appendEncodedByte(buffer,bufLimit, value, state, status);

+ }

+ else {

+ if (length == ESCAPE_BYTE) {

+ if (value == ESCAPE_BYTE){

+ buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);

+ }

+ buffer = appendEncodedByte(buffer,bufLimit, value, state, status);

+ --length;

+ }

+ buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);

+ buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);

+ buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/

+ }

+ return buffer;

+#define APPEND( buffer, bufLimit, value, num, status){ \

+ if(buffer<bufLimit){ \

+ *buffer++=(value); \

+ }else{ \

+ *status = U_BUFFER_OVERFLOW_ERROR; \

+ } \

+ num++; \

+/**

+ * Encode a run, possibly a degenerate run (of < 4 values).

+ * @param length The length of the run; must be > 0 && <= 0xFFFF.

+ */

+static uint16_t*

+encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {

+ int32_t num=0;

+ if (length < 4) {

+ int j=0;

+ for (; j<length; ++j) {

+ if (value == (int32_t) ESCAPE){

+ APPEND(buffer,bufLimit,ESCAPE, num, status);

+ }

+ APPEND(buffer,bufLimit,value,num, status);

+ }

+ else {

+ if (length == (int32_t) ESCAPE) {

+ if (value == (int32_t) ESCAPE){

+ APPEND(buffer,bufLimit,ESCAPE,num,status);

+ }

+ APPEND(buffer,bufLimit,value,num,status);

+ --length;

+ }

+ APPEND(buffer,bufLimit,ESCAPE,num,status);

+ APPEND(buffer,bufLimit,(uint16_t) length, num,status);

+ APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */

+ }

+ return buffer;

+/**

+ * Construct a string representing a char array. Use run-length encoding.

+ * A character represents itself, unless it is the ESCAPE character. Then

+ * the following notations are possible:

+ * ESCAPE ESCAPE ESCAPE literal

+ * ESCAPE n c n instances of character c

+ * Since an encoded run occupies 3 characters, we only encode runs of 4 or

+ * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.

+ * If we encounter a run where n == ESCAPE, we represent this as:

+ * c ESCAPE n-1 c

+ * The ESCAPE value is chosen so as not to collide with commonly

+ * seen values.

+ */

+int32_t

+usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {

+ uint16_t* bufLimit = buffer+bufLen;

+ uint16_t* saveBuffer = buffer;

+ if(buffer < bufLimit){

+ *buffer++ = (uint16_t)(srcLen>>16);

+ if(buffer<bufLimit){

+ uint16_t runValue = src[0];

+ int32_t runLength = 1;

+ int i=1;

+ *buffer++ = (uint16_t) srcLen;

+ for (; i<srcLen; ++i) {

+ uint16_t s = src[i];

+ if (s == runValue && runLength < 0xFFFF){

+ ++runLength;

+ }else {

+ buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);

+ runValue = s;

+ runLength = 1;

+ }

+ buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);

+ }else{

+ *status = U_BUFFER_OVERFLOW_ERROR;

+ }

+ }else{

+ *status = U_BUFFER_OVERFLOW_ERROR;

+ }

+ return (int32_t)(buffer - saveBuffer);

+/**

+ * Construct a string representing a byte array. Use run-length encoding.

+ * Two bytes are packed into a single char, with a single extra zero byte at

+ * the end if needed. A byte represents itself, unless it is the

+ * ESCAPE_BYTE. Then the following notations are possible:

+ * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal

+ * ESCAPE_BYTE n b n instances of byte b

+ * Since an encoded run occupies 3 bytes, we only encode runs of 4 or

+ * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.

+ * If we encounter a run where n == ESCAPE_BYTE, we represent this as:

+ * b ESCAPE_BYTE n-1 b

+ * The ESCAPE_BYTE value is chosen so as not to collide with commonly

+ * seen values.

+ */

+int32_t

+byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {

+ const uint16_t* saveBuf = buffer;

+ uint16_t* bufLimit = buffer+bufLen;

+ if(buffer < bufLimit){

+ *buffer++ = ((uint16_t) (srcLen >> 16));

+ if(buffer<bufLimit){

+ uint8_t runValue = src[0];

+ int runLength = 1;

+ uint8_t state[2]= {0};

+ int i=1;

+ *buffer++=((uint16_t) srcLen);

+ for (; i<srcLen; ++i) {

+ uint8_t b = src[i];

+ if (b == runValue && runLength < 0xFF){

+ ++runLength;

+ }

+ else {

+ buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);

+ runValue = b;

+ runLength = 1;

+ }

+ buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);

+ /* We must save the final byte, if there is one, by padding

+ * an extra zero.

+ */

+ if (state[0] != 0) {

+ buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);

+ }

+ }else{

+ *status = U_BUFFER_OVERFLOW_ERROR;

+ }

+ }else{

+ *status = U_BUFFER_OVERFLOW_ERROR;

+ }

+ return (int32_t) (buffer - saveBuf);

+/**

+ * Construct an array of shorts from a run-length encoded string.

+ */

+int32_t

+rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {

+ int32_t length = 0;

+ int32_t ai = 0;

+ int i=2;

+ if(!status || U_FAILURE(*status)){

+ return 0;

+ }

+ /* the source is null terminated */

+ if(srcLen == -1){

+ srcLen = u_strlen(src);

+ }

+ if(srcLen <= 2){

+ return 2;

+ }

+ length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);

+ if(target == NULL){

+ return length;

+ }

+ if(tgtLen < length){

+ *status = U_BUFFER_OVERFLOW_ERROR;

+ return length;

+ }

+ for (; i<srcLen; ++i) {

+ uint16_t c = src[i];

+ if (c == ESCAPE) {

+ c = src[++i];

+ if (c == ESCAPE) {

+ target[ai++] = c;

+ } else {

+ int32_t runLength = (int32_t) c;

+ uint16_t runValue = src[++i];

+ int j=0;

+ for (; j<runLength; ++j) {

+ target[ai++] = runValue;

+ }

+ else {

+ target[ai++] = c;

+ }

+ if (ai != length){

+ *status = U_INTERNAL_PROGRAM_ERROR;

+ }

+ return length;

+/**

+ * Construct an array of bytes from a run-length encoded string.

+ */

+int32_t

+rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {

+ int32_t length = 0;

+ UBool nextChar = TRUE;

+ uint16_t c = 0;

+ int32_t node = 0;

+ int32_t runLength = 0;

+ int32_t i = 2;

+ int32_t ai=0;

+ if(!status || U_FAILURE(*status)){

+ return 0;

+ }

+ /* the source is null terminated */

+ if(srcLen == -1){

+ srcLen = u_strlen(src);

+ }

+ if(srcLen <= 2){

+ return 2;

+ }

+ length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);

+ if(target == NULL){

+ return length;

+ }

+ if(tgtLen < length){

+ *status = U_BUFFER_OVERFLOW_ERROR;

+ return length;

+ }

+ for (; ai<tgtLen; ) {

+ /* This part of the loop places the next byte into the local

+ * variable 'b' each time through the loop. It keeps the

+ * current character in 'c' and uses the boolean 'nextChar'

+ * to see if we've taken both bytes out of 'c' yet.

+ */

+ uint8_t b;

+ if (nextChar) {

+ c = src[i++];

+ b = (uint8_t) (c >> 8);

+ nextChar = FALSE;

+ }

+ else {

+ b = (uint8_t) (c & 0xFF);

+ nextChar = TRUE;

+ }

+ /* This part of the loop is a tiny state machine which handles

+ * the parsing of the run-length encoding. This would be simpler

+ * if we could look ahead, but we can't, so we use 'node' to

+ * move between three nodes in the state machine.

+ */

+ switch (node) {

+ case 0:

+ /* Normal idle node */

+ if (b == ESCAPE_BYTE) {

+ node = 1;

+ }

+ else {

+ target[ai++] = b;

+ }

+ break;

+ case 1:

+ /* We have seen one ESCAPE_BYTE; we expect either a second

+ * one, or a run length and value.

+ */

+ if (b == ESCAPE_BYTE) {

+ target[ai++] = ESCAPE_BYTE;

+ node = 0;

+ }

+ else {

+ runLength = b;

+ node = 2;

+ }

+ break;

+ case 2:

+ {

+ int j=0;

+ /* We have seen an ESCAPE_BYTE and length byte. We interpret

+ * the next byte as the value to be repeated.

+ */

+ for (; j<runLength; ++j){

+ if(ai<tgtLen){

+ target[ai++] = b;

+ }else{

+ *status = U_BUFFER_OVERFLOW_ERROR;

+ return ai;

+ }

+ node = 0;

+ break;

+ }

+ if (node != 0){

+ *status = U_INTERNAL_PROGRAM_ERROR;

+ /*("Bad run-length encoded byte array")*/

+ return 0;

+ }

+ if (i != srcLen){

+ /*("Excess data in RLE byte array string");*/

+ *status = U_INTERNAL_PROGRAM_ERROR;

+ return ai;

+ }

+ return ai;

Property changes on: icu46/source/tools/genrb/rle.c

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/tools/genrb/rle.h ('k') | icu46/source/tools/genrb/ustr.h » ('j') | no next file with comments »