Index: icu46/patches/segmentation.patch |
=================================================================== |
--- icu46/patches/segmentation.patch (revision 69841) |
+++ icu46/patches/segmentation.patch (working copy) |
@@ -1,14 +1,6 @@ |
---- source/common/brkeng.cpp 2007-09-11 20:53:13.000000000 -0700 |
-+++ source/common/brkeng.cpp 2009-07-29 12:57:49.973382000 -0700 |
-@@ -24,6 +24,7 @@ |
- #include "umutex.h" |
- #include "uresimp.h" |
- #include "ubrkimpl.h" |
-+#include <stdio.h> |
- |
- U_NAMESPACE_BEGIN |
- |
-@@ -226,6 +227,30 @@ |
+--- source/common/brkeng.cpp 2009-11-11 07:47:22.000000000 -0800 |
++++ source/common/brkeng.cpp 2011-01-21 14:12:45.479922000 -0800 |
+@@ -226,6 +226,30 @@ |
case USCRIPT_THAI: |
engine = new ThaiBreakEngine(dict, status); |
break; |
@@ -39,7 +31,7 @@ |
default: |
break; |
} |
-@@ -281,6 +306,13 @@ |
+@@ -281,6 +305,13 @@ |
dict = NULL; |
} |
return dict; |
@@ -54,20 +46,18 @@ |
return NULL; |
} |
--- source/common/dictbe.cpp 2008-06-13 12:21:12.000000000 -0700 |
-+++ source/common/dictbe.cpp 2009-11-11 12:58:40.199829000 -0800 |
-@@ -16,6 +16,11 @@ |
++++ source/common/dictbe.cpp 2011-01-21 14:12:45.468928000 -0800 |
+@@ -16,6 +16,9 @@ |
#include "unicode/ubrk.h" |
#include "uvector.h" |
#include "triedict.h" |
+#include "uassert.h" |
+#include "unicode/normlzr.h" |
+#include "cmemory.h" |
-+ |
-+#include <stdio.h> |
U_NAMESPACE_BEGIN |
-@@ -422,6 +427,294 @@ |
+@@ -422,6 +425,294 @@ |
return wordsFound; |
} |
@@ -363,7 +353,7 @@ |
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
--- source/common/dictbe.h 2006-09-29 17:37:45.000000000 -0700 |
-+++ source/common/dictbe.h 2009-07-27 13:01:17.704415000 -0700 |
++++ source/common/dictbe.h 2011-01-21 14:12:45.492920000 -0800 |
@@ -1,8 +1,8 @@ |
/** |
- ******************************************************************************* |
@@ -371,13 +361,13 @@ |
- * All Rights Reserved. * |
- ******************************************************************************* |
+ ********************************************************************************** |
-+ * Copyright (C) 2006,2007, International Business Machines Corporation and others. |
++ * Copyright (C) 2006-2010, International Business Machines Corporation and others. |
+ * All Rights Reserved. |
+ ********************************************************************************** |
*/ |
#ifndef DICTBE_H |
-@@ -65,37 +65,37 @@ |
+@@ -65,31 +65,31 @@ |
*/ |
virtual ~DictionaryBreakEngine(); |
@@ -430,19 +420,8 @@ |
+ * @return The number of breaks found. |
+ */ |
virtual int32_t findBreaks( UText *text, |
-- int32_t startPos, |
-- int32_t endPos, |
-- UBool reverse, |
-- int32_t breakType, |
-- UStack &foundBreaks ) const; |
-+ int32_t startPos, |
-+ int32_t endPos, |
-+ UBool reverse, |
-+ int32_t breakType, |
-+ UStack &foundBreaks ) const; |
- |
- protected: |
- |
+ int32_t startPos, |
+ int32_t endPos, |
@@ -114,7 +114,7 @@ |
// virtual void setBreakTypes( uint32_t breakTypes ); |
@@ -461,16 +440,7 @@ |
* |
* @param text A UText representing the text |
* @param rangeStart The start of the range of dictionary characters |
-@@ -180,12 +180,72 @@ |
- * @return The number of breaks found |
- */ |
- virtual int32_t divideUpDictionaryRange( UText *text, |
-- int32_t rangeStart, |
-- int32_t rangeEnd, |
-- UStack &foundBreaks ) const; |
-+ int32_t rangeStart, |
-+ int32_t rangeEnd, |
-+ UStack &foundBreaks ) const; |
+@@ -186,6 +186,66 @@ |
}; |
@@ -537,34 +507,24 @@ |
U_NAMESPACE_END |
---- source/common/rbbi.cpp 2008-09-24 22:48:27.000000000 -0700 |
-+++ source/common/rbbi.cpp 2009-07-27 13:01:17.710416000 -0700 |
-@@ -29,6 +29,7 @@ |
- |
- #include "uassert.h" |
- #include "uvector.h" |
-+#include <stdio.h> |
- |
- // if U_LOCAL_SERVICE_HOOK is defined, then localsvc.cpp is expected to be included. |
- #if U_LOCAL_SERVICE_HOOK |
-@@ -1552,10 +1553,14 @@ |
+--- source/common/rbbi.cpp 2010-07-22 17:15:37.000000000 -0700 |
++++ source/common/rbbi.cpp 2011-01-21 14:12:45.457938000 -0800 |
+@@ -1555,10 +1555,12 @@ |
int32_t endPos, |
UBool reverse) { |
// Reset the old break cache first. |
- uint32_t dictionaryCount = fDictionaryCharCount; |
-+// uint32_t dictionaryCount = fDictionaryCharCount; |
reset(); |
- if (dictionaryCount <= 1 || (endPos - startPos) <= 1) { |
+ // note: code segment below assumes that dictionary chars are in the |
+ // startPos-endPos range |
+ // value returned should be next character in sequence |
-+// if (dictionaryCount <= 1 || (endPos - startPos) <= 1) { |
+ if ((endPos - startPos) <= 1) { |
return (reverse ? startPos : endPos); |
} |
-@@ -1684,7 +1689,7 @@ |
+@@ -1711,7 +1713,7 @@ |
// proposed break by one of the breaks we found. Use following() and |
// preceding() to do the work. They should never recurse in this case. |
if (reverse) { |
@@ -574,7 +534,7 @@ |
else { |
return following(startPos); |
--- source/common/triedict.cpp 2008-02-13 01:35:50.000000000 -0800 |
-+++ source/common/triedict.cpp 2009-07-27 13:01:17.718409000 -0700 |
++++ source/common/triedict.cpp 2011-01-21 14:12:45.271006000 -0800 |
@@ -20,6 +20,7 @@ |
#include "uvector.h" |
#include "uvectr32.h" |
@@ -613,7 +573,7 @@ |
-MutableTrieDictionary::MutableTrieDictionary( UChar median, UErrorCode &status ) { |
+MutableTrieDictionary::MutableTrieDictionary( UChar median, UErrorCode &status, |
-+ UBool containsValue /* = FALSE */ ) { |
++ UBool containsValue /* = FALSE */ ) { |
// Start the trie off with something. Having the root node already present |
// cuts a special case out of the search/insertion functions. |
// Making it a median character cuts the worse case for searches from |
@@ -627,7 +587,7 @@ |
-MutableTrieDictionary::MutableTrieDictionary( UErrorCode &status ) { |
+MutableTrieDictionary::MutableTrieDictionary( UErrorCode &status, |
-+ UBool containsValue /* = false */ ) { |
++ UBool containsValue /* = false */ ) { |
fTrie = NULL; |
fIter = utext_openUChars(NULL, NULL, 0, &status); |
if (U_SUCCESS(status) && fIter == NULL) { |
@@ -638,13 +598,23 @@ |
} |
MutableTrieDictionary::~MutableTrieDictionary() { |
-@@ -113,7 +130,8 @@ |
- int &count, |
- int limit, |
- TernaryNode *&parent, |
+@@ -108,12 +125,13 @@ |
+ |
+ int32_t |
+ MutableTrieDictionary::search( UText *text, |
+- int32_t maxLength, |
+- int32_t *lengths, |
+- int &count, |
+- int limit, |
+- TernaryNode *&parent, |
- UBool &pMatched ) const { |
-+ UBool &pMatched, |
-+ uint16_t *values /*=NULL*/) const { |
++ int32_t maxLength, |
++ int32_t *lengths, |
++ int &count, |
++ int limit, |
++ TernaryNode *&parent, |
++ UBool &pMatched, |
++ uint16_t *values /*=NULL*/) const { |
// TODO: current implementation works in UTF-16 space |
const TernaryNode *up = NULL; |
const TernaryNode *p = fTrie; |
@@ -700,20 +670,31 @@ |
U_ASSERT(uc != U_SENTINEL); |
TernaryNode *newNode = new TernaryNode(uc); |
if (newNode == NULL) { |
-@@ -199,7 +226,11 @@ |
+@@ -199,30 +226,23 @@ |
parent = newNode; |
} |
- parent->flags |= kEndsWord; |
+-} |
+- |
+-#if 0 |
+-void |
+-MutableTrieDictionary::addWords( UEnumeration *words, |
+- UErrorCode &status ) { |
+- int32_t length; |
+- const UChar *word; |
+- while ((word = uenum_unext(words, &length, &status)) && U_SUCCESS(status)) { |
+- addWord(word, length, status); |
+ if(fValued && value > 0){ |
+ parent->flags = value; |
+ } else { |
+ parent->flags |= kEndsWord; |
-+ } |
+ } |
} |
+-#endif |
- #if 0 |
-@@ -219,10 +250,11 @@ |
+ int32_t |
+ MutableTrieDictionary::matches( UText *text, |
int32_t maxLength, |
int32_t *lengths, |
int &count, |
@@ -727,7 +708,7 @@ |
} |
// Implementation of iteration for MutableTrieDictionary |
-@@ -277,7 +309,7 @@ |
+@@ -277,7 +297,7 @@ |
break; |
} |
case kEqual: |
@@ -736,7 +717,7 @@ |
equal = (node->equal != NULL); |
// If this node should be part of the next emitted string, append |
// the UChar to the string, and make sure we pop it when we come |
-@@ -299,7 +331,7 @@ |
+@@ -299,7 +319,7 @@ |
} |
case kGreaterThan: |
// If this node's character is in the string, remove it. |
@@ -745,11 +726,12 @@ |
unistr.truncate(unistr.length()-1); |
} |
if (node->high != NULL) { |
-@@ -354,12 +386,74 @@ |
+@@ -354,12 +374,75 @@ |
* CompactTrieDictionary |
*/ |
-+//TODO if time permits: minimise size of trie with logprobs by storing values |
++//TODO further optimization: |
++// minimise size of trie with logprobs by storing values |
+// for terminal nodes directly in offsets[] |
+// --> calculating from next offset *might* be simpler, but would have to add |
+// one last offset for logprob of last node |
@@ -821,7 +803,7 @@ |
}; |
// Note that to avoid platform-specific alignment issues, all members of the node |
-@@ -375,10 +469,14 @@ |
+@@ -375,10 +458,14 @@ |
enum CompactTrieNodeFlags { |
kVerticalNode = 0x1000, // This is a vertical node |
kParentEndsWord = 0x2000, // The node whose equal link points to this ends a word |
@@ -839,7 +821,7 @@ |
}; |
// The two node types are distinguished by the kVerticalNode flag. |
-@@ -402,63 +500,177 @@ |
+@@ -402,63 +489,177 @@ |
uint16_t chars[1]; // Code units |
}; |
@@ -914,23 +896,23 @@ |
CompactTrieDictionary::data() const { |
- return fData; |
+ return fInfo->address; |
- } |
- |
--// This function finds the address of a node for us, given its node ID |
++} |
++ |
+//This function finds the address of a node for us, given its node ID |
- static inline const CompactTrieNode * |
--getCompactNode(const CompactTrieHeader *header, uint16_t node) { |
-- return (const CompactTrieNode *)((const uint8_t *)header + header->offsets[node]); |
++static inline const CompactTrieNode * |
+getCompactNode(const CompactTrieInfo *info, uint32_t node) { |
+ if(node < info->root-1) { |
+ return (const CompactTrieNode *)(&info->offsets[node]); |
+ } else { |
+ return (const CompactTrieNode *)(info->address + info->offsets[node]); |
+ } |
-+} |
-+ |
+ } |
+ |
+-// This function finds the address of a node for us, given its node ID |
+//this version of getCompactNode is currently only used in compactMutableTrieDictionary() |
-+static inline const CompactTrieNode * |
+ static inline const CompactTrieNode * |
+-getCompactNode(const CompactTrieHeader *header, uint16_t node) { |
+- return (const CompactTrieNode *)((const uint8_t *)header + header->offsets[node]); |
+getCompactNode(const CompactTrieHeader *header, uint32_t node) { |
+ if(node < header->root-1) { |
+ return (const CompactTrieNode *)(&header->offsets[node]); |
@@ -1037,7 +1019,7 @@ |
} |
int32_t |
-@@ -466,17 +678,38 @@ |
+@@ -466,17 +667,38 @@ |
int32_t maxLength, |
int32_t *lengths, |
int &count, |
@@ -1078,7 +1060,7 @@ |
lengths[mycount++] = i; |
--limit; |
} |
-@@ -487,7 +720,7 @@ |
+@@ -487,7 +709,7 @@ |
break; |
} |
@@ -1087,7 +1069,7 @@ |
if (nodeCount == 0) { |
// Special terminal node; return now |
break; |
-@@ -507,35 +740,27 @@ |
+@@ -507,35 +729,27 @@ |
// To get here we must have come through the whole list successfully; |
// go on to the next node. Note that a word cannot end in the middle |
// of a vertical node. |
@@ -1137,7 +1119,7 @@ |
count = mycount; |
return i; |
} |
-@@ -545,16 +770,16 @@ |
+@@ -545,16 +759,16 @@ |
private: |
UVector32 fNodeStack; // Stack of nodes to process |
UVector32 fIndexStack; // Stack of where in node we are |
@@ -1158,7 +1140,7 @@ |
fIndexStack.push(0, status); |
unistr.remove(); |
} |
-@@ -564,14 +789,14 @@ |
+@@ -564,14 +778,14 @@ |
virtual StringEnumeration *clone() const { |
UErrorCode status = U_ZERO_ERROR; |
@@ -1175,7 +1157,7 @@ |
int32_t result = 0; |
while (counter.snext(status) != NULL && U_SUCCESS(status)) { |
++result; |
-@@ -582,7 +807,7 @@ |
+@@ -582,7 +796,7 @@ |
virtual void reset(UErrorCode &status) { |
fNodeStack.removeAllElements(); |
fIndexStack.removeAllElements(); |
@@ -1184,7 +1166,7 @@ |
fIndexStack.push(0, status); |
unistr.remove(); |
} |
-@@ -595,26 +820,34 @@ |
+@@ -595,26 +809,34 @@ |
if (fNodeStack.empty() || U_FAILURE(status)) { |
return NULL; |
} |
@@ -1225,7 +1207,7 @@ |
where = fIndexStack.push(0, status); |
goingDown = TRUE; |
} |
-@@ -623,7 +856,7 @@ |
+@@ -623,7 +845,7 @@ |
unistr.truncate(unistr.length()-nodeCount); |
fNodeStack.popi(); |
fIndexStack.popi(); |
@@ -1234,7 +1216,7 @@ |
where = fIndexStack.peeki(); |
} |
} |
-@@ -638,7 +871,7 @@ |
+@@ -638,7 +860,7 @@ |
// Push on next node |
unistr.append((UChar)hnode->entries[where].ch); |
fIndexStack.setElementAt(where+1, fIndexStack.size()-1); |
@@ -1243,7 +1225,7 @@ |
where = fIndexStack.push(0, status); |
goingDown = TRUE; |
} |
-@@ -646,12 +879,14 @@ |
+@@ -646,12 +868,14 @@ |
// Going up |
fNodeStack.popi(); |
fIndexStack.popi(); |
@@ -1259,7 +1241,7 @@ |
if (goingDown && (node->flagscount & kParentEndsWord)) { |
return &unistr; |
} |
-@@ -664,7 +899,7 @@ |
+@@ -664,7 +888,7 @@ |
if (U_FAILURE(status)) { |
return NULL; |
} |
@@ -1268,7 +1250,7 @@ |
} |
// |
-@@ -672,21 +907,36 @@ |
+@@ -672,21 +896,36 @@ |
// and back again |
// |
@@ -1311,7 +1293,7 @@ |
nodes.push(this, status); |
} |
-@@ -694,87 +944,225 @@ |
+@@ -694,87 +933,225 @@ |
} |
virtual uint32_t size() { |
@@ -1553,7 +1535,7 @@ |
} |
void addChar(UChar ch) { |
-@@ -784,60 +1172,85 @@ |
+@@ -784,60 +1161,85 @@ |
void setLink(BuildCompactTrieNode *node) { |
fEqual = node; |
} |
@@ -1651,16 +1633,16 @@ |
} |
result = vResult; |
} |
-@@ -849,19 +1262,28 @@ |
+@@ -849,19 +1251,28 @@ |
// Uses recursion. |
static void walkHorizontal(const TernaryNode *node, |
- BuildCompactTrieHorizontalNode *building, |
- UStack &nodes, |
- UErrorCode &status) { |
-+ BuildCompactTrieHorizontalNode *building, |
-+ UStack &nodes, |
-+ UErrorCode &status, Hashtable *values = NULL) { |
++ BuildCompactTrieHorizontalNode *building, |
++ UStack &nodes, |
++ UErrorCode &status, Hashtable *values = NULL) { |
while (U_SUCCESS(status) && node != NULL) { |
if (node->low != NULL) { |
- walkHorizontal(node->low, building, nodes, status); |
@@ -1687,7 +1669,7 @@ |
} |
if (U_SUCCESS(status) && link != NULL) { |
building->addNode(node->ch, link, status); |
-@@ -881,13 +1303,15 @@ |
+@@ -881,13 +1292,15 @@ |
_sortBuildNodes(const void * /*context*/, const void *voidl, const void *voidr) { |
BuildCompactTrieNode *left = *(BuildCompactTrieNode **)voidl; |
BuildCompactTrieNode *right = *(BuildCompactTrieNode **)voidr; |
@@ -1705,7 +1687,7 @@ |
} |
// Next, the "parent ends word" flag. If that differs, we cannot coalesce. |
if (left->fParentEndsWord != right->fParentEndsWord) { |
-@@ -898,12 +1322,19 @@ |
+@@ -898,12 +1311,19 @@ |
if (result != 0) { |
return result; |
} |
@@ -1728,7 +1710,7 @@ |
// We need to compare the links vectors. They should be the |
// same size because the strings were equal. |
// We compare the node IDs instead of the pointers, to handle |
-@@ -914,9 +1345,10 @@ |
+@@ -914,9 +1334,10 @@ |
int32_t count = hleft->fLinks.size(); |
for (int32_t i = 0; i < count && result == 0; ++i) { |
result = ((BuildCompactTrieNode *)(hleft->fLinks[i]))->fNodeID - |
@@ -1740,7 +1722,7 @@ |
// If they are equal to each other, mark them (speeds coalescing) |
if (result == 0) { |
left->fHasDuplicate = TRUE; |
-@@ -1031,20 +1463,25 @@ |
+@@ -1031,20 +1452,25 @@ |
// Add node 0, used as the NULL pointer/sentinel. |
nodes.addElement((int32_t)0, status); |
@@ -1770,7 +1752,7 @@ |
#ifdef DEBUG_TRIE_DICT |
(void) ::times(&timing); |
fprintf(stderr, "Compact trie built, %d nodes, time user %f system %f\n", |
-@@ -1077,21 +1514,37 @@ |
+@@ -1077,21 +1503,37 @@ |
return NULL; |
} |
@@ -1814,7 +1796,7 @@ |
status = U_ILLEGAL_ARGUMENT_ERROR; |
return NULL; |
} |
-@@ -1111,9 +1564,14 @@ |
+@@ -1111,9 +1553,14 @@ |
status = U_MEMORY_ALLOCATION_ERROR; |
return NULL; |
} |
@@ -1831,7 +1813,7 @@ |
header->nodeCount = nodeCount; |
header->offsets[0] = 0; // Sentinel |
header->root = translate.elementAti(root->fNodeID); |
-@@ -1123,23 +1581,40 @@ |
+@@ -1123,23 +1570,40 @@ |
} |
#endif |
uint32_t offset = offsetof(CompactTrieHeader,offsets)+(nodeCount*sizeof(uint32_t)); |
@@ -1875,7 +1857,7 @@ |
// Collect statistics on node types and sizes |
int hCount = 0; |
int vCount = 0; |
-@@ -1148,68 +1623,85 @@ |
+@@ -1148,68 +1612,85 @@ |
size_t hItemCount = 0; |
size_t vItemCount = 0; |
uint32_t previousOff = offset; |
@@ -1981,7 +1963,7 @@ |
if (nodeCount == 0 || U_FAILURE(status)) { |
// Failure, or terminal node |
return NULL; |
-@@ -1234,29 +1726,41 @@ |
+@@ -1234,29 +1715,41 @@ |
previous = latest; |
} |
if (latest != NULL) { |
@@ -2029,7 +2011,7 @@ |
if (U_FAILURE(status)) { |
delete root; // Clean up |
delete result; |
-@@ -1270,8 +1774,8 @@ |
+@@ -1270,8 +1763,8 @@ |
U_CAPI int32_t U_EXPORT2 |
triedict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, |
@@ -2040,7 +2022,7 @@ |
if (status == NULL || U_FAILURE(*status)) { |
return 0; |
} |
-@@ -1286,14 +1790,14 @@ |
+@@ -1286,14 +1779,14 @@ |
// |
const UDataInfo *pInfo = (const UDataInfo *)((const uint8_t *)inData+4); |
if(!( pInfo->dataFormat[0]==0x54 && /* dataFormat="TrDc" */ |
@@ -2062,7 +2044,7 @@ |
*status=U_UNSUPPORTED_ERROR; |
return 0; |
} |
-@@ -1311,8 +1815,10 @@ |
+@@ -1311,8 +1804,10 @@ |
// |
const uint8_t *inBytes =(const uint8_t *)inData+headerSize; |
const CompactTrieHeader *header = (const CompactTrieHeader *)inBytes; |
@@ -2075,7 +2057,7 @@ |
{ |
udata_printError(ds, "triedict_swap(): CompactTrieHeader is invalid.\n"); |
*status=U_UNSUPPORTED_ERROR; |
-@@ -1333,10 +1839,10 @@ |
+@@ -1333,10 +1828,10 @@ |
// |
if (length < sizeWithUData) { |
udata_printError(ds, "triedict_swap(): too few bytes (%d after ICU Data header) for trie data.\n", |
@@ -2088,7 +2070,7 @@ |
// |
// Swap the Data. Do the data itself first, then the CompactTrieHeader, because |
-@@ -1355,20 +1861,38 @@ |
+@@ -1355,20 +1850,38 @@ |
} |
// We need to loop through all the nodes in the offset table, and swap each one. |
@@ -2133,7 +2115,7 @@ |
uint16_t equal = ds->readUInt16(inBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal); |
ds->writeUInt16(outBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal)); |
} |
-@@ -1381,26 +1905,62 @@ |
+@@ -1381,26 +1894,62 @@ |
word = ds->readUInt16(inHNode->entries[j].equal); |
ds->writeUInt16(&outHNode->entries[j].equal, word); |
} |
@@ -2209,7 +2191,7 @@ |
return sizeWithUData; |
} |
--- source/common/triedict.h 2006-06-06 15:38:49.000000000 -0700 |
-+++ source/common/triedict.h 2009-07-27 13:01:17.723390000 -0700 |
++++ source/common/triedict.h 2011-01-21 14:12:45.496927000 -0800 |
@@ -47,7 +47,6 @@ |
U_NAMESPACE_BEGIN |
@@ -2448,27 +2430,22 @@ |
- /* TRIEDICT_H */ |
+/* TRIEDICT_H */ |
#endif |
---- source/data/brkitr/brkfiles.mk 2009-04-21 15:42:37.000000000 -0700 |
-+++ source/data/brkitr/brkfiles.mk 2009-07-27 13:01:17.730379000 -0700 |
-@@ -34,13 +34,12 @@ |
+--- source/data/Makefile.in 2010-10-29 13:21:33.000000000 -0700 |
++++ source/data/Makefile.in 2011-01-26 16:24:24.856798000 -0800 |
+@@ -509,8 +520,9 @@ |
+ #################################################### CTD |
+ # CTD FILES |
+-$(BRKBLDDIR)/%.ctd: $(BRKSRCDIR)/%.txt $(TOOLBINDIR)/genctd$(TOOLEXEEXT) $(DAT_FILES) |
+- $(INVOKE) $(TOOLBINDIR)/genctd -c -i $(BUILDDIR) -o $@ $< |
++# .ctd file now generated regardless of whether dictionary file exists |
++$(BRKBLDDIR)/%.ctd: $(TOOLBINDIR)/genctd$(TOOLEXEEXT) $(DAT_FILES) |
++ $(INVOKE) $(TOOLBINDIR)/genctd -c -i $(BUILDDIR) -o $@ $(BRKSRCDIR)/$(*F).txt |
- # List of compact trie dictionary files (ctd). |
--BRK_CTD_SOURCE = thaidict.txt |
-+BRK_CTD_SOURCE = thaidict.txt cjdict.txt |
- |
- |
- # List of break iterator files (brk). |
--BRK_SOURCE = word_POSIX.txt word_ja.txt sent_el.txt char_th.txt char.txt word.txt line.txt sent.txt title.txt |
-+BRK_SOURCE = word_POSIX.txt sent_el.txt char_th.txt char.txt word.txt line.txt sent.txt title.txt |
- |
- |
- # Ordinary resources |
--BRK_RES_SOURCE = el.txt en.txt en_US.txt en_US_POSIX.txt ja.txt th.txt |
-- |
-+BRK_RES_SOURCE = el.txt en.txt en_US.txt en_US_POSIX.txt th.txt |
---- source/data/brkitr/root.txt 2009-06-24 14:06:38.000000000 -0700 |
-+++ source/data/brkitr/root.txt 2009-07-27 13:01:17.733382000 -0700 |
+ #################################################### CFU |
+ # CFU FILES |
+--- source/data/brkitr/root.txt 2010-07-28 17:18:28.000000000 -0700 |
++++ source/data/brkitr/root.txt 2011-01-21 14:12:45.653922000 -0800 |
@@ -17,5 +17,8 @@ |
} |
dictionaries{ |
@@ -2478,173 +2455,8 @@ |
+ Kata:process(dependency){"cjdict.ctd"} |
} |
} |
---- source/data/brkitr/word.txt 2009-06-24 14:06:38.000000000 -0700 |
-+++ source/data/brkitr/word.txt 2010-08-27 16:24:25.969372000 -0700 |
-@@ -29,29 +29,49 @@ |
- $Newline = [\p{Word_Break = Newline}]; |
- $Extend = [\p{Word_Break = Extend}]; |
- $Format = [\p{Word_Break = Format}]; |
-+$Hiragana = [:Hiragana:]; |
- $Katakana = [\p{Word_Break = Katakana}]; |
-+$Han = [:Han:]; |
- $ALetter = [\p{Word_Break = ALetter}]; |
--$MidNumLet = [\p{Word_Break = MidNumLet}]; |
-+# Remove two full stop characters from $MidNumLet and add them to $MidNum |
-+# to break a hostname into its components at the cost of breaking |
-+# 'e.g.' and 'i.e.' as well. |
-+# $MidNumLet is used in rules 6/7 (rules of our interest) and rules 11/12. |
-+# Because it's OR'd with $MidNum in rules 11/12, rules 11/12 are not affected |
-+# while rules 6/7 are reverted to the old behavior we want. |
-+$MidNumLet = [[\p{Word_Break = MidNumLet}] - [\u002E \uFF0E]]; |
- $MidLetter = [\p{Word_Break = MidLetter}]; |
--$MidNum = [\p{Word_Break = MidNum}]; |
--$Numeric = [\p{Word_Break = Numeric}]; |
-+$MidNum = [\p{Word_Break = MidNum}[\u002E \uFF0E]]; |
-+$Numeric = [\p{Word_Break = Numeric}[\uff10-\uff19]]; #includes fullwidth digits |
- $ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; |
- |
-+# Extra sets not to break 'HebrewLetter U+0022 HebrewLetter'. |
-+$HebrewLet = [\p{Word_Break = ALetter} & \p{Script = Hebrew} - [\u05F3]]; |
-+# U+05F3 is ALetter and U+05F4 is MidLetter so that they're covered by |
-+# the current rule 6/7. |
-+$HebrewMidLet = [\u0022]; |
- |
- # Dictionary character set, for triggering language-based break engines. Currently |
--# limited to LineBreak=Complex_Context. Note that this set only works in Unicode |
--# 5.0 or later as the definition of Complex_Context was corrected to include all |
-+# limited to LineBreak=Complex_Context and CJK. Note that this set only works |
-+# in Unicode 5.0 or later as the definition of Complex_Context was corrected to include all |
- # characters requiring dictionary break. |
- |
--$dictionary = [:LineBreak = Complex_Context:]; |
- $Control = [\p{Grapheme_Cluster_Break = Control}]; |
--$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]]; # Note: default ALetter does not |
-- # include the dictionary characters. |
-+$HangulSyllable = [\uac00-\ud7a3]; |
-+$ComplexContext = [:LineBreak = Complex_Context:]; |
-+$KanaKanji = [$Han $Hiragana $Katakana]; |
-+$dictionaryCJK = [$KanaKanji $HangulSyllable]; |
-+$dictionary = [$ComplexContext $dictionaryCJK]; |
-+ |
-+# leave CJK scripts out of ALetterPlus |
-+$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; |
-+ |
- |
- # |
- # Rules 4 Ignore Format and Extend characters, |
- # except when they appear at the beginning of a region of text. |
- # |
-+# TODO: check if handling of katakana in dictionary makes rules incorrect/void. |
- $KatakanaEx = $Katakana ($Extend | $Format)*; |
- $ALetterEx = $ALetterPlus ($Extend | $Format)*; |
- $MidNumLetEx = $MidNumLet ($Extend | $Format)*; |
-@@ -59,8 +79,8 @@ |
- $MidNumEx = $MidNum ($Extend | $Format)*; |
- $NumericEx = $Numeric ($Extend | $Format)*; |
- $ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*; |
-+$HebrewLetEx = $HebrewLet ($Extend | $Format)*; |
- |
--$Hiragana = [\p{script=Hiragana}]; |
- $Ideographic = [\p{Ideographic}]; |
- $HiraganaEx = $Hiragana ($Extend | $Format)*; |
- $IdeographicEx = $Ideographic ($Extend | $Format)*; |
-@@ -79,12 +99,14 @@ |
- # begins with a group of Format chars, or with a "word" consisting of a single |
- # char that is not in any of the listed word break categories followed by |
- # format char(s). |
--[^$CR $LF $Newline]? ($Extend | $Format)+; |
-+ # format char(s), or is not a CJK dictionary character. |
-+[^$CR $LF $Newline $dictionaryCJK]? ($Extend | $Format)+; |
- |
- $NumericEx {100}; |
- $ALetterEx {200}; |
--$KatakanaEx {300}; # note: these status values override those from rule 5 |
--$HiraganaEx {300}; # by virtual of being numerically larger. |
-+$HangulSyllable {200}; |
-+$KatakanaEx {400}; #originally 300 |
-+$HiraganaEx {400}; #originally 300 |
- $IdeographicEx {400}; # |
- |
- # |
-@@ -96,6 +118,9 @@ |
- # rule 6 and 7 |
- $ALetterEx ($MidLetterEx | $MidNumLetEx) $ALetterEx {200}; |
- |
-+# Chrome addition |
-+$HebrewLetEx $HebrewMidLet $HebrewLetEx {200}; |
-+ |
- # rule 8 |
- |
- $NumericEx $NumericEx {100}; |
-@@ -114,19 +139,25 @@ |
- |
- # rule 13 |
- |
--$KatakanaEx $KatakanaEx {300}; |
-+# To be consistent with '$KanaKanji $KanaKanji', changed |
-+# from 300 to 400. |
-+# See also TestRuleStatus in intltest/rbbiapts.cpp |
-+$KatakanaEx $KatakanaEx {400}; |
- |
- # rule 13a/b |
- |
- $ALetterEx $ExtendNumLetEx {200}; # (13a) |
- $NumericEx $ExtendNumLetEx {100}; # (13a) |
--$KatakanaEx $ExtendNumLetEx {300}; # (13a) |
-+$KatakanaEx $ExtendNumLetEx {400}; # (13a) |
- $ExtendNumLetEx $ExtendNumLetEx {200}; # (13a) |
- |
- $ExtendNumLetEx $ALetterEx {200}; # (13b) |
- $ExtendNumLetEx $NumericEx {100}; # (13b) |
--$ExtendNumLetEx $KatakanaEx {300}; # (13b) |
-- |
-+$ExtendNumLetEx $KatakanaEx {400}; # (13b) |
-+ |
-+# special handling for CJK characters: chain for later dictionary segmentation |
-+$HangulSyllable $HangulSyllable {200}; |
-+$KanaKanji $KanaKanji {400}; #different rule status if both kanji and kana found |
- |
- |
- ## ------------------------------------------------- |
-@@ -139,13 +170,15 @@ |
- $BackMidNumEx = ($Format | $Extend)* $MidNum; |
- $BackMidLetterEx = ($Format | $Extend)* $MidLetter; |
- $BackKatakanaEx = ($Format | $Extend)* $Katakana; |
-+$BackHiraganaEx = ($Extend | $Format)* $Hiragana; |
- $BackExtendNumLetEx= ($Format | $Extend)* $ExtendNumLet; |
-+$BackHebrewLetEx = ($Format | $Extend)* $HebrewLet; |
- |
- # rule 3 |
- $LF $CR; |
- |
- # rule 4 |
--($Format | $Extend)* [^$CR $LF $Newline]?; |
-+($Format | $Extend)* [^$CR $LF $Newline $dictionaryCJK]?; |
- |
- # rule 5 |
- |
-@@ -155,6 +188,8 @@ |
- |
- $BackALetterEx ($BackMidLetterEx | $BackMidNumLetEx) $BackALetterEx; |
- |
-+# Chrome addition |
-+$BackHebrewLetEx $HebrewMidLet $BackHebrewLetEx; |
- |
- # rule 8 |
- |
-@@ -181,6 +216,10 @@ |
- $BackExtendNumLetEx ($BackALetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx); |
- ($BackALetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx; |
- |
-+# special handling for CJK characters: chain for later dictionary segmentation |
-+$HangulSyllable $HangulSyllable; |
-+$KanaKanji $KanaKanji; #different rule status if both kanji and kana found |
-+ |
- ## ------------------------------------------------- |
- |
- !!safe_reverse; |
---- source/data/xml/brkitr/root.xml 2007-08-28 23:10:43.000000000 -0700 |
-+++ source/data/xml/brkitr/root.xml 2009-07-27 13:01:17.746367000 -0700 |
+--- source/data/xml/brkitr/root.xml 2010-03-01 15:13:18.000000000 -0800 |
++++ source/data/xml/brkitr/root.xml 2011-01-21 14:12:45.735922000 -0800 |
@@ -25,6 +25,9 @@ |
</icu:boundaries> |
<icu:dictionaries> |
@@ -2655,9 +2467,9 @@ |
</icu:dictionaries> |
</icu:breakIteratorData> |
</special> |
---- source/test/cintltst/creststn.c 2009-06-26 09:49:55.000000000 -0700 |
-+++ source/test/cintltst/creststn.c 2009-07-29 12:46:05.997405000 -0700 |
-@@ -2181,21 +2181,21 @@ |
+--- source/test/cintltst/creststn.c 2010-10-28 10:44:02.000000000 -0700 |
++++ source/test/cintltst/creststn.c 2011-01-21 14:12:44.995020000 -0800 |
+@@ -2188,21 +2188,21 @@ |
{ |
@@ -2684,13 +2496,8 @@ |
status = U_ZERO_ERROR; |
} |
/* simple alias */ |
-@@ -3024,4 +3024,3 @@ |
- } |
- |
- } |
-- |
---- source/test/intltest/rbbiapts.cpp 2009-06-26 09:49:55.000000000 -0700 |
-+++ source/test/intltest/rbbiapts.cpp 2009-07-28 13:56:30.208042000 -0700 |
+--- source/test/intltest/rbbiapts.cpp 2010-07-12 11:03:29.000000000 -0700 |
++++ source/test/intltest/rbbiapts.cpp 2011-01-21 14:12:45.033014000 -0800 |
@@ -156,9 +156,13 @@ |
if(*a!=*b){ |
errln("Failed: boilerplate method operator!= does not return correct results"); |
@@ -2716,7 +2523,7 @@ |
} |
void RBBIAPITest::TestgetRules() |
-@@ -643,21 +648,21 @@ |
+@@ -635,21 +640,21 @@ |
// |
void RBBIAPITest::TestRuleStatus() { |
UChar str[30]; |
@@ -2746,7 +2553,7 @@ |
UErrorCode status=U_ZERO_ERROR; |
-@@ -896,9 +901,11 @@ |
+@@ -888,9 +893,11 @@ |
URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); |
{ |
@@ -2758,9 +2565,9 @@ |
} |
{ |
---- source/test/intltest/rbbitst.cpp 2009-06-26 09:49:55.000000000 -0700 |
-+++ source/test/intltest/rbbitst.cpp 2009-07-28 15:35:18.933226000 -0700 |
-@@ -33,6 +33,8 @@ |
+--- source/test/intltest/rbbitst.cpp 2010-10-08 18:23:28.000000000 -0700 |
++++ source/test/intltest/rbbitst.cpp 2011-01-21 14:12:45.180030000 -0800 |
+@@ -35,6 +35,8 @@ |
#include <string.h> |
#include <stdio.h> |
#include <stdlib.h> |
@@ -2769,25 +2576,32 @@ |
#define TEST_ASSERT(x) {if (!(x)) { \ |
errln("Failure in file %s, line %d", __FILE__, __LINE__);}} |
-@@ -108,6 +110,8 @@ |
+@@ -138,11 +140,13 @@ |
if (exec) TestThaiBreaks(); break; |
case 23: name = "TestTailoredBreaks"; |
if (exec) TestTailoredBreaks(); break; |
+ case 24: name = "TestTrieDictWithValue"; |
+ if(exec) TestTrieDictWithValue(); break; |
+ #else |
+- case 21: case 22: case 23: name = "skip"; |
++ case 21: case 22: case 23: case 24: name = "skip"; |
+ break; |
+ #endif |
+- case 24: name = "TestDictRules"; |
++ case 25: name = "TestDictRules"; |
+ if (exec) TestDictRules(); break; |
+ case 25: name = "TestBug5532"; |
+ if (exec) TestBug5532(); break; |
+@@ -607,6 +611,8 @@ |
- default: name = ""; break; //needed to end loop |
- } |
-@@ -570,6 +574,8 @@ |
- |
void RBBITest::TestJapaneseWordBreak() { |
+// TODO: Rewrite this test for a dictionary-based word breaking. |
+#if 0 |
UErrorCode status = U_ZERO_ERROR; |
BITestData japaneseWordSelection(status); |
-@@ -591,6 +597,7 @@ |
+@@ -628,6 +634,7 @@ |
generalIteratorTest(*e, japaneseWordSelection); |
delete e; |
@@ -2795,7 +2609,7 @@ |
} |
void RBBITest::TestTrieDict() { |
-@@ -812,6 +819,372 @@ |
+@@ -849,6 +856,372 @@ |
delete compact2; |
} |
@@ -3168,7 +2982,7 @@ |
//---------------------------------------------------------------------------- |
// |
-@@ -1832,8 +2205,15 @@ |
+@@ -1870,8 +2243,15 @@ |
// Don't break in runs of hiragana or runs of ideograph, where the latter includes \u3005 \u3007 \u303B (cldrbug #2009). |
static const char jaWordText[] = "\\u79C1\\u9054\\u306B\\u4E00\\u3007\\u3007\\u3007\\u306E\\u30B3\\u30F3\\u30D4\\u30E5\\u30FC\\u30BF" |
"\\u304C\\u3042\\u308B\\u3002\\u5948\\u3005\\u306F\\u30EF\\u30FC\\u30C9\\u3067\\u3042\\u308B\\u3002"; |
@@ -3184,7 +2998,7 @@ |
// UBreakIteratorType UBRK_SENTENCE, Locale "el" |
// Add break after Greek question mark (cldrbug #2069). |
-@@ -2580,6 +2960,8 @@ |
+@@ -2672,6 +3052,8 @@ |
UnicodeSet *fNewlineSet; |
UnicodeSet *fKatakanaSet; |
UnicodeSet *fALetterSet; |
@@ -3193,7 +3007,7 @@ |
UnicodeSet *fMidNumLetSet; |
UnicodeSet *fMidLetterSet; |
UnicodeSet *fMidNumSet; |
-@@ -2588,6 +2970,7 @@ |
+@@ -2680,6 +3062,7 @@ |
UnicodeSet *fOtherSet; |
UnicodeSet *fExtendSet; |
UnicodeSet *fExtendNumLetSet; |
@@ -3201,7 +3015,7 @@ |
RegexMatcher *fMatcher; |
-@@ -2604,12 +2987,24 @@ |
+@@ -2696,12 +3079,24 @@ |
fCRSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = CR}]"), status); |
fLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = LF}]"), status); |
fNewlineSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Newline}]"), status); |
@@ -3228,7 +3042,7 @@ |
fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Format}]"), status); |
fExtendNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ExtendNumLet}]"), status); |
fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Extend}]"), status); |
-@@ -2633,13 +3028,14 @@ |
+@@ -2725,13 +3120,14 @@ |
fOtherSet->removeAll(*fFormatSet); |
fOtherSet->removeAll(*fExtendSet); |
// Inhibit dictionary characters from being tested at all. |
@@ -3244,7 +3058,7 @@ |
fSets->addElement(fMidLetterSet, status); |
fSets->addElement(fMidNumLetSet, status); |
fSets->addElement(fMidNumSet, status); |
-@@ -3871,6 +4267,7 @@ |
+@@ -3978,6 +4374,7 @@ |
for (i = bi->last(); i != BreakIterator::DONE; i = bi->previous()) { |
count --; |
if (forward[count] != i) { |
@@ -3252,7 +3066,7 @@ |
test->errln("happy break test previous() failed: expected %d but got %d", |
forward[count], i); |
break; |
-@@ -3904,23 +4301,25 @@ |
+@@ -4011,23 +4408,25 @@ |
UErrorCode status = U_ZERO_ERROR; |
// BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status); |
BreakIterator *bi = BreakIterator::createWordInstance(locale, status); |
@@ -3282,7 +3096,7 @@ |
"\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7", |
"\\u0027\\u11af\\U000e0057\\u0602", |
"\\U0001d7f2\\U000e007\\u0004\\u0589", |
-@@ -3932,7 +4331,7 @@ |
+@@ -4039,7 +4438,7 @@ |
"\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7", |
"\\u0233\\U000e0020\\u0a69\\u0d6a", |
"\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019", |
@@ -3291,7 +3105,7 @@ |
"\\ub315\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe", |
"\\ua183\\u102d\\u0bec\\u003a", |
"\\u17e8\\u06e7\\u002e\\u096d\\u003b", |
-@@ -3942,7 +4341,7 @@ |
+@@ -4049,7 +4448,7 @@ |
"\\U000e005d\\u2044\\u0731\\u0650\\u0061", |
"\\u003a\\u0664\\u00b7\\u1fba", |
"\\u003b\\u0027\\u00b7\\u47a3", |
@@ -3300,7 +3114,7 @@ |
"\\u0027\\u003a\\U0001d70f\\U0001d7df\\ubf4a\\U0001d7f5\\U0001d177\\u003a\\u0e51\\u1058\\U000e0058\\u00b7\\u0673", |
"\\uc30d\\u002e\\U000e002c\\u0c48\\u003a\\ub5a1\\u0661\\u002c", |
}; |
-@@ -3997,12 +4396,12 @@ |
+@@ -4104,12 +4503,12 @@ |
"\\U0001d7f2\\U000e007d\\u0004\\u0589", |
"\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d", |
"\\u0e01\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959", |
@@ -3315,19 +3129,19 @@ |
"\\ua183\\u102d\\u0bec\\u003a", |
"\\u17e8\\u06e7\\u002e\\u096d\\u003b", |
"\\u003a\\u0e57\\u0fad\\u002e", |
---- source/test/intltest/rbbitst.h 2009-04-22 00:53:50.000000000 -0700 |
-+++ source/test/intltest/rbbitst.h 2009-07-27 13:01:17.767342000 -0700 |
+--- source/test/intltest/rbbitst.h 2010-07-22 17:15:37.000000000 -0700 |
++++ source/test/intltest/rbbitst.h 2011-01-21 14:12:45.152007000 -0800 |
@@ -70,6 +70,7 @@ |
void TestBug5775(); |
void TestThaiBreaks(); |
void TestTailoredBreaks(); |
+ void TestTrieDictWithValue(); |
+ void TestDictRules(); |
+ void TestBug5532(); |
- void TestDebug(); |
- |
---- source/test/testdata/rbbitst.txt 2009-06-24 14:06:38.000000000 -0700 |
-+++ source/test/testdata/rbbitst.txt 2009-07-29 12:56:31.483710000 -0700 |
-@@ -162,7 +162,23 @@ |
+--- source/test/testdata/rbbitst.txt 2010-07-28 17:18:28.000000000 -0700 |
++++ source/test/testdata/rbbitst.txt 2011-01-21 14:12:45.221011000 -0800 |
+@@ -161,7 +161,23 @@ |
<data>•abc<200>\U0001D800•def<200>\U0001D3FF• •</data> |
# Hiragana & Katakana stay together, but separates from each other and Latin. |
@@ -3352,7 +3166,7 @@ |
# Words with interior formatting characters |
<data>•def\N{COMBINING ACUTE ACCENT}\N{SYRIAC ABBREVIATION MARK}ghi<200> •</data> |
-@@ -170,6 +186,8 @@ |
+@@ -169,6 +185,8 @@ |
# to test for bug #4097779 |
<data>•aa\N{COMBINING GRAVE ACCENT}a<200> •</data> |
@@ -3361,7 +3175,7 @@ |
# to test for bug #4098467 |
# What follows is a string of Korean characters (I found it in the Yellow Pages |
-@@ -179,9 +197,15 @@ |
+@@ -178,9 +196,15 @@ |
# precomposed syllables... |
<data>•\uc0c1\ud56d<200> •\ud55c\uc778<200> •\uc5f0\ud569<200> •\uc7a5\ub85c\uad50\ud68c<200> •\u1109\u1161\u11bc\u1112\u1161\u11bc<200> •\u1112\u1161\u11ab\u110b\u1175\u11ab<200> •\u110b\u1167\u11ab\u1112\u1161\u11b8<200> •\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c<200> •</data> |
@@ -3379,7 +3193,7 @@ |
# |
# Try some words from other scripts. |
-@@ -492,8 +516,7 @@ |
+@@ -491,8 +515,7 @@ |
<data>•\uc0c1•\ud56d •\ud55c•\uc778 •\uc5f0•\ud569 •\uc7a5•\ub85c•\uad50•\ud68c•</data> |
# conjoining jamo... |
@@ -3389,8 +3203,8 @@ |
# to test for bug #4117554: Fullwidth .!? should be treated as postJwrd |
<data>•\u4e01\uff0e•\u4e02\uff01•\u4e03\uff1f•</data> |
---- source/test/testdata/testaliases.txt 2009-06-24 14:06:38.000000000 -0700 |
-+++ source/test/testdata/testaliases.txt 2009-07-28 17:07:26.251120000 -0700 |
+--- source/test/testdata/testaliases.txt 2009-11-12 13:53:42.000000000 -0800 |
++++ source/test/testdata/testaliases.txt 2011-01-21 14:12:45.204005000 -0800 |
@@ -28,7 +28,7 @@ |
LocaleScript:alias { "/ICUDATA/ja/LocaleScript" } |
@@ -3400,13 +3214,13 @@ |
// aliasing arrays |
zoneTests { |
---- source/tools/genctd/genctd.cpp 2006-09-04 09:28:24.000000000 -0700 |
-+++ source/tools/genctd/genctd.cpp 2009-07-27 13:01:17.776335000 -0700 |
+--- source/tools/genctd/genctd.cpp 2009-08-04 14:09:17.000000000 -0700 |
++++ source/tools/genctd/genctd.cpp 2011-01-21 14:12:45.564923000 -0800 |
@@ -1,6 +1,6 @@ |
/* |
********************************************************************** |
--* Copyright (C) 2002-2006, International Business Machines |
-+* Copyright (C) 2002-2006,2008, International Business Machines |
+-* Copyright (C) 2002-2009, International Business Machines |
++* Copyright (C) 2002-2010, International Business Machines |
* Corporation and others. All Rights Reserved. |
********************************************************************** |
* |
@@ -3426,7 +3240,7 @@ |
#include <stdio.h> |
#include <stdlib.h> |
-@@ -198,147 +201,191 @@ |
+@@ -199,147 +202,191 @@ |
long wordFileSize; |
FILE *file; |
char *wordBufferC; |
@@ -3748,13 +3562,13 @@ |
// Now, create a CompactTrieDictionary from the mutable dictionary |
CompactTrieDictionary *ctd = new CompactTrieDictionary(*mtd, status); |
if (U_FAILURE(status)) { |
-@@ -392,4 +439,3 @@ |
+@@ -393,4 +440,3 @@ |
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
} |
- |
--- source/tools/genctd/Makefile.in 2006-12-16 13:07:01.000000000 -0800 |
-+++ source/tools/genctd/Makefile.in 2009-07-27 13:01:17.782326000 -0700 |
++++ source/tools/genctd/Makefile.in 2011-01-21 14:12:45.555920000 -0800 |
@@ -23,13 +23,13 @@ |
## Extra files to remove for 'make clean' |
CLEANFILES = *~ $(DEPS) $(MAN_FILES) |
@@ -3771,17 +3585,3 @@ |
LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M) |
OBJECTS = genctd.o |
---- source/data/Makefile.in 2009-05-20 23:03:54.000000000 -0700 |
-+++ source/data/Makefile.in 2009-10-21 15:43:18.235201000 -0700 |
-@@ -452,8 +452,9 @@ |
- #################################################### CTD |
- # CTD FILES |
- |
--$(BRKBLDDIR)/%.ctd: $(BRKSRCDIR)/%.txt $(TOOLBINDIR)/genctd$(TOOLEXEEXT) $(DAT_FILES) |
-- $(INVOKE) $(TOOLBINDIR)/genctd -c -i $(BUILDDIR) -o $@ $< |
-+# .ctd file now generated regardless of whether dictionary file exists |
-+$(BRKBLDDIR)/%.ctd: $(TOOLBINDIR)/genctd$(TOOLEXEEXT) $(DAT_FILES) |
-+ $(INVOKE) $(TOOLBINDIR)/genctd -c -i $(BUILDDIR) -o $@ $(BRKSRCDIR)/$(*F).txt |
- |
- #################################################### CFU |
- # CFU FILES |