Index: source/tools/genrb/reslist.c |
diff --git a/source/tools/genrb/reslist.c b/source/tools/genrb/reslist.c |
index 829dc22f7e57a245aba8672e1d002f2f9ef89702..4e63d4fb647527221964cedea0c123fa1f23f544 100644 |
--- a/source/tools/genrb/reslist.c |
+++ b/source/tools/genrb/reslist.c |
@@ -1,7 +1,7 @@ |
/* |
******************************************************************************* |
* |
-* Copyright (C) 2000-2012, International Business Machines |
+* Copyright (C) 2000-2014, International Business Machines |
* Corporation and others. All Rights Reserved. |
* |
******************************************************************************* |
@@ -25,6 +25,7 @@ |
#include "uarrsort.h" |
#include "uelement.h" |
+#include "uhash.h" |
#include "uinvchar.h" |
#include "ustr_imp.h" |
#include "unicode/utf16.h" |
@@ -105,6 +106,19 @@ bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status); |
/* Writing Functions */ |
/* |
+ * Preflight strings. |
+ * Find duplicates and count the total number of string code units |
+ * so that they can be written first to the 16-bit array, |
+ * for minimal string and container storage. |
+ * |
+ * We walk the final parse tree, rather than collecting this information while building it, |
+ * so that we need not deal with changes to the parse tree (especially removing resources). |
+ */ |
+static void |
+res_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet, |
+ UErrorCode *status); |
+ |
+/* |
* type_write16() functions write resource values into f16BitUnits |
* and determine the resource item word, if possible. |
*/ |
@@ -141,6 +155,92 @@ res_write(UNewDataMemory *mem, uint32_t *byteOffset, |
struct SRBRoot *bundle, struct SResource *res, |
UErrorCode *status); |
+static void |
+string_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet, |
+ UErrorCode *status) { |
+ res->u.fString.fSame = uhash_get(stringSet, res); |
+ if (res->u.fString.fSame != NULL) { |
+ return; /* This is a duplicate of an earlier-visited string. */ |
+ } |
+ /* Put this string into the set for finding duplicates. */ |
+ uhash_put(stringSet, res, res, status); |
+ |
+ if (bundle->fStringsForm != STRINGS_UTF16_V1) { |
+ const UChar *s = res->u.fString.fChars; |
+ int32_t len = res->u.fString.fLength; |
+ if (len <= MAX_IMPLICIT_STRING_LENGTH && !U16_IS_TRAIL(s[0]) && len == u_strlen(s)) { |
+ /* |
+ * This string will be stored without an explicit length. |
+ * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen(). |
+ */ |
+ res->u.fString.fNumCharsForLength = 0; |
+ } else if (len <= 0x3ee) { |
+ res->u.fString.fNumCharsForLength = 1; |
+ } else if (len <= 0xfffff) { |
+ res->u.fString.fNumCharsForLength = 2; |
+ } else { |
+ res->u.fString.fNumCharsForLength = 3; |
+ } |
+ bundle->f16BitUnitsLength += res->u.fString.fNumCharsForLength + len + 1; /* +1 for the NUL */ |
+ } |
+} |
+ |
+static void |
+array_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet, |
+ UErrorCode *status) { |
+ struct SResource *current; |
+ |
+ if (U_FAILURE(*status)) { |
+ return; |
+ } |
+ for (current = res->u.fArray.fFirst; current != NULL; current = current->fNext) { |
+ res_preflightStrings(bundle, current, stringSet, status); |
+ } |
+} |
+ |
+static void |
+table_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet, |
+ UErrorCode *status) { |
+ struct SResource *current; |
+ |
+ if (U_FAILURE(*status)) { |
+ return; |
+ } |
+ for (current = res->u.fTable.fFirst; current != NULL; current = current->fNext) { |
+ res_preflightStrings(bundle, current, stringSet, status); |
+ } |
+} |
+ |
+static void |
+res_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet, |
+ UErrorCode *status) { |
+ if (U_FAILURE(*status) || res == NULL) { |
+ return; |
+ } |
+ if (res->fRes != RES_BOGUS) { |
+ /* |
+ * The resource item word was already precomputed, which means |
+ * no further data needs to be written. |
+ * This might be an integer, or an empty string/binary/etc. |
+ */ |
+ return; |
+ } |
+ switch (res->fType) { |
+ case URES_STRING: |
+ string_preflightStrings(bundle, res, stringSet, status); |
+ break; |
+ case URES_ARRAY: |
+ array_preflightStrings(bundle, res, stringSet, status); |
+ break; |
+ case URES_TABLE: |
+ table_preflightStrings(bundle, res, stringSet, status); |
+ break; |
+ default: |
+ /* Neither a string nor a container. */ |
+ break; |
+ } |
+} |
+ |
static uint16_t * |
reserve16BitUnits(struct SRBRoot *bundle, int32_t length, UErrorCode *status) { |
if (U_FAILURE(*status)) { |
@@ -221,10 +321,7 @@ string_write16(struct SRBRoot *bundle, struct SResource *res, UErrorCode *status |
struct SResource *same; |
if ((same = res->u.fString.fSame) != NULL) { |
/* This is a duplicate. */ |
- if (same->fRes == RES_BOGUS) { |
- /* The original has not been visited yet. */ |
- string_write16(bundle, same, status); |
- } |
+ assert(same->fRes != RES_BOGUS && same->fWritten); |
res->fRes = same->fRes; |
res->fWritten = same->fWritten; |
} |
@@ -900,98 +997,43 @@ string_comp(const UElement key1, const UElement key2) { |
FALSE); |
} |
-struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) { |
+static struct SResource * |
+stringbase_open(struct SRBRoot *bundle, const char *tag, int8_t type, |
+ const UChar *value, int32_t len, const struct UString* comment, |
+ UErrorCode *status) { |
struct SResource *res = res_open(bundle, tag, comment, status); |
if (U_FAILURE(*status)) { |
return NULL; |
} |
- res->fType = URES_STRING; |
- |
- if (len == 0 && gFormatVersion > 1) { |
- res->u.fString.fChars = &gEmptyString; |
- res->fRes = 0; |
- res->fWritten = TRUE; |
- return res; |
- } |
- |
- res->u.fString.fLength = len; |
- |
- if (gFormatVersion > 1) { |
- /* check for duplicates */ |
- res->u.fString.fChars = (UChar *)value; |
- if (bundle->fStringSet == NULL) { |
- UErrorCode localStatus = U_ZERO_ERROR; /* if failure: just don't detect dups */ |
- bundle->fStringSet = uhash_open(string_hash, string_comp, string_comp, &localStatus); |
- } else { |
- res->u.fString.fSame = uhash_get(bundle->fStringSet, res); |
- } |
- } |
- if (res->u.fString.fSame == NULL) { |
- /* this is a new string */ |
- res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1)); |
- |
- if (res->u.fString.fChars == NULL) { |
- *status = U_MEMORY_ALLOCATION_ERROR; |
- uprv_free(res); |
- return NULL; |
- } |
- |
- uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * len); |
- res->u.fString.fChars[len] = 0; |
- if (bundle->fStringSet != NULL) { |
- /* put it into the set for finding duplicates */ |
- uhash_put(bundle->fStringSet, res, res, status); |
- } |
- |
- if (bundle->fStringsForm != STRINGS_UTF16_V1) { |
- if (len <= MAX_IMPLICIT_STRING_LENGTH && !U16_IS_TRAIL(value[0]) && len == u_strlen(value)) { |
- /* |
- * This string will be stored without an explicit length. |
- * Runtime will detect !U16_IS_TRAIL(value[0]) and call u_strlen(). |
- */ |
- res->u.fString.fNumCharsForLength = 0; |
- } else if (len <= 0x3ee) { |
- res->u.fString.fNumCharsForLength = 1; |
- } else if (len <= 0xfffff) { |
- res->u.fString.fNumCharsForLength = 2; |
- } else { |
- res->u.fString.fNumCharsForLength = 3; |
- } |
- bundle->f16BitUnitsLength += res->u.fString.fNumCharsForLength + len + 1; /* +1 for the NUL */ |
- } |
- } else { |
- /* this is a duplicate of fSame */ |
- struct SResource *same = res->u.fString.fSame; |
- res->u.fString.fChars = same->u.fString.fChars; |
- } |
- return res; |
-} |
+ res->fType = type; |
-/* TODO: make alias_open and string_open use the same code */ |
-struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) { |
- struct SResource *res = res_open(bundle, tag, comment, status); |
- if (U_FAILURE(*status)) { |
- return NULL; |
- } |
- res->fType = URES_ALIAS; |
if (len == 0 && gFormatVersion > 1) { |
res->u.fString.fChars = &gEmptyString; |
- res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_ALIAS); |
+ res->fRes = URES_MAKE_EMPTY_RESOURCE(type); |
res->fWritten = TRUE; |
return res; |
} |
res->u.fString.fLength = len; |
- res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1)); |
+ res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1)); |
if (res->u.fString.fChars == NULL) { |
*status = U_MEMORY_ALLOCATION_ERROR; |
uprv_free(res); |
return NULL; |
} |
- uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * (len + 1)); |
+ uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * len); |
+ res->u.fString.fChars[len] = 0; |
return res; |
} |
+struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) { |
+ return stringbase_open(bundle, tag, URES_STRING, value, len, comment, status); |
+} |
+ |
+struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) { |
+ return stringbase_open(bundle, tag, URES_ALIAS, value, len, comment, status); |
+} |
+ |
struct SResource* intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { |
struct SResource *res = res_open(bundle, tag, comment, status); |
@@ -1142,9 +1184,7 @@ static void array_close(struct SResource *array) { |
static void string_close(struct SResource *string) { |
if (string->u.fString.fChars != NULL && |
- string->u.fString.fChars != &gEmptyString && |
- string->u.fString.fSame == NULL |
- ) { |
+ string->u.fString.fChars != &gEmptyString) { |
uprv_free(string->u.fString.fChars); |
string->u.fString.fChars =NULL; |
} |
@@ -1218,18 +1258,10 @@ void bundle_close(struct SRBRoot *bundle, UErrorCode *status) { |
uprv_free(bundle->fLocale); |
uprv_free(bundle->fKeys); |
uprv_free(bundle->fKeyMap); |
- uhash_close(bundle->fStringSet); |
uprv_free(bundle->f16BitUnits); |
uprv_free(bundle); |
} |
-void bundle_closeString(struct SRBRoot *bundle, struct SResource *string) { |
- if (bundle->fStringSet != NULL) { |
- uhash_remove(bundle->fStringSet, string); |
- } |
- string_close(string); |
-} |
- |
/* Adding Functions */ |
void table_add(struct SResource *table, struct SResource *res, int linenumber, UErrorCode *status) { |
struct SResource *current = NULL; |
@@ -1664,14 +1696,22 @@ string_writeUTF16v2(struct SRBRoot *bundle, struct SResource *res, int32_t utf16 |
static void |
bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status) { |
+ UHashtable *stringSet; |
+ if (gFormatVersion > 1) { |
+ stringSet = uhash_open(string_hash, string_comp, string_comp, status); |
+ res_preflightStrings(bundle, bundle->fRoot, stringSet, status); |
+ } else { |
+ stringSet = NULL; |
+ } |
if (U_FAILURE(*status)) { |
+ uhash_close(stringSet); |
return; |
} |
switch(bundle->fStringsForm) { |
case STRINGS_UTF16_V2: |
if (bundle->f16BitUnitsLength > 0) { |
struct SResource **array; |
- int32_t count = uhash_count(bundle->fStringSet); |
+ int32_t count = uhash_count(stringSet); |
int32_t i, pos; |
/* |
* Allocate enough space for the initial NUL and the UTF-16 v2 strings, |
@@ -1685,6 +1725,7 @@ bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status) { |
uprv_free(bundle->f16BitUnits); |
bundle->f16BitUnits = NULL; |
uprv_free(array); |
+ uhash_close(stringSet); |
*status = U_MEMORY_ALLOCATION_ERROR; |
return; |
} |
@@ -1694,7 +1735,7 @@ bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status) { |
utf16Length = 1; |
++bundle->f16BitUnitsLength; |
for (pos = -1, i = 0; i < count; ++i) { |
- array[i] = (struct SResource *)uhash_nextElement(bundle->fStringSet, &pos)->key.pointer; |
+ array[i] = (struct SResource *)uhash_nextElement(stringSet, &pos)->key.pointer; |
} |
/* Sort the strings so that each one is immediately followed by all of its suffixes. */ |
uprv_sortArray(array, count, (int32_t)sizeof(struct SResource **), |
@@ -1769,4 +1810,5 @@ bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status) { |
default: |
break; |
} |
+ uhash_close(stringSet); |
} |