| OLD | NEW |
| (Empty) |
| 1 diff --git a/source/common/utext.cpp b/source/common/utext.cpp | |
| 2 index 6dc31a0..0c7dd1e 100644 | |
| 3 --- a/source/common/utext.cpp | |
| 4 +++ b/source/common/utext.cpp | |
| 5 @@ -1,7 +1,7 @@ | |
| 6 /* | |
| 7 ******************************************************************************* | |
| 8 * | |
| 9 -* Copyright (C) 2005-2015, International Business Machines | |
| 10 +* Copyright (C) 2005-2016, International Business Machines | |
| 11 * Corporation and others. All Rights Reserved. | |
| 12 * | |
| 13 ******************************************************************************* | |
| 14 @@ -2524,6 +2524,7 @@ ucstrTextExtract(UText *ut, | |
| 15 ut->chunkLength = si; | |
| 16 ut->nativeIndexingLimit = si; | |
| 17 strLength = si; | |
| 18 + limit32 = si; | |
| 19 break; | |
| 20 } | |
| 21 U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must no
t happen logically */ | |
| 22 @@ -2545,16 +2546,21 @@ ucstrTextExtract(UText *ut, | |
| 23 // If the limit index points to a lead surrogate of a pair, | |
| 24 // add the corresponding trail surrogate to the destination. | |
| 25 if (si>0 && U16_IS_LEAD(s[si-1]) && | |
| 26 - ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si]))) | |
| 27 + ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si]))) | |
| 28 { | |
| 29 if (di<destCapacity) { | |
| 30 // store only if there is space in the output buffer. | |
| 31 - dest[di++] = s[si++]; | |
| 32 + dest[di++] = s[si]; | |
| 33 } | |
| 34 + si++; | |
| 35 } | |
| 36 | |
| 37 // Put iteration position at the point just following the extracted text | |
| 38 - ut->chunkOffset = uprv_min(strLength, start32 + destCapacity); | |
| 39 + if (si <= ut->chunkNativeLimit) { | |
| 40 + ut->chunkOffset = si; | |
| 41 + } else { | |
| 42 + ucstrTextAccess(ut, si, TRUE); | |
| 43 + } | |
| 44 | |
| 45 // Add a terminating NUL if space in the buffer permits, | |
| 46 // and set the error status as required. | |
| 47 diff --git a/source/test/intltest/utxttest.cpp b/source/test/intltest/utxttest.c
pp | |
| 48 index d8bedb6..801337c 100644 | |
| 49 --- a/source/test/intltest/utxttest.cpp | |
| 50 +++ b/source/test/intltest/utxttest.cpp | |
| 51 @@ -1,6 +1,6 @@ | |
| 52 /******************************************************************** | |
| 53 * COPYRIGHT: | |
| 54 - * Copyright (c) 2005-2014, International Business Machines Corporation and | |
| 55 + * Copyright (c) 2005-2016, International Business Machines Corporation and | |
| 56 * others. All Rights Reserved. | |
| 57 ********************************************************************/ | |
| 58 /************************************************************************ | |
| 59 @@ -16,6 +16,8 @@ | |
| 60 #include "unicode/utf8.h" | |
| 61 #include "unicode/ustring.h" | |
| 62 #include "unicode/uchriter.h" | |
| 63 +#include "cmemory.h" | |
| 64 +#include "cstr.h" | |
| 65 #include "utxttest.h" | |
| 66 | |
| 67 static UBool gFailed = FALSE; | |
| 68 @@ -61,6 +63,8 @@ UTextTest::runIndexedTest(int32_t index, UBool exec, | |
| 69 if (exec) Ticket10562(); break; | |
| 70 case 6: name = "Ticket10983"; | |
| 71 if (exec) Ticket10983(); break; | |
| 72 + case 7: name = "Ticket12130"; | |
| 73 + if (exec) Ticket12130(); break; | |
| 74 default: name = ""; break; | |
| 75 } | |
| 76 } | |
| 77 @@ -1501,3 +1505,79 @@ void UTextTest::Ticket10983() { | |
| 78 | |
| 79 utext_close(ut); | |
| 80 } | |
| 81 + | |
| 82 +// Ticket 12130 - extract on a UText wrapping a null terminated UChar * string | |
| 83 +// leaves the iteration position set incorrectly when the | |
| 84 +// actual string length is not yet known. | |
| 85 +// | |
| 86 +// The test text needs to be long enough that UText defers getti
ng the length. | |
| 87 + | |
| 88 +void UTextTest::Ticket12130() { | |
| 89 + UErrorCode status = U_ZERO_ERROR; | |
| 90 + | |
| 91 + const char *text8 = | |
| 92 + "Fundamentally, computers just deal with numbers. They store letters an
d other characters " | |
| 93 + "by assigning a number for each one. Before Unicode was invented, there
were hundreds " | |
| 94 + "of different encoding systems for assigning these numbers. No single e
ncoding could " | |
| 95 + "contain enough characters: for example, the European Union alone requi
res several " | |
| 96 + "different encodings to cover all its languages. Even for a single lang
uage like " | |
| 97 + "English no single encoding was adequate for all the letters, punctuati
on, and technical " | |
| 98 + "symbols in common use."; | |
| 99 + | |
| 100 + UnicodeString str(text8); | |
| 101 + const UChar *ustr = str.getTerminatedBuffer(); | |
| 102 + UText ut = UTEXT_INITIALIZER; | |
| 103 + utext_openUChars(&ut, ustr, -1, &status); | |
| 104 + UChar extractBuffer[50]; | |
| 105 + | |
| 106 + for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) { | |
| 107 + int32_t endIdx = startIdx + 20; | |
| 108 + | |
| 109 + u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer)); | |
| 110 + utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extra
ctBuffer), &status); | |
| 111 + if (U_FAILURE(status)) { | |
| 112 + errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status)); | |
| 113 + return; | |
| 114 + } | |
| 115 + int64_t ni = utext_getNativeIndex(&ut); | |
| 116 + int64_t expectedni = startIdx + 20; | |
| 117 + if (expectedni > str.length()) { | |
| 118 + expectedni = str.length(); | |
| 119 + } | |
| 120 + if (expectedni != ni) { | |
| 121 + errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__,
__LINE__, expectedni, ni); | |
| 122 + } | |
| 123 + if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) { | |
| 124 + errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"", | |
| 125 + __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))()
, CStr(UnicodeString(extractBuffer))()); | |
| 126 + } | |
| 127 + } | |
| 128 + utext_close(&ut); | |
| 129 + | |
| 130 + // Similar utext extract, this time with the string length provided to the
UText in advance, | |
| 131 + // and a buffer of larger than required capacity. | |
| 132 + | |
| 133 + utext_openUChars(&ut, ustr, str.length(), &status); | |
| 134 + for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) { | |
| 135 + int32_t endIdx = startIdx + 20; | |
| 136 + u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer)); | |
| 137 + utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extra
ctBuffer), &status); | |
| 138 + if (U_FAILURE(status)) { | |
| 139 + errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status)); | |
| 140 + return; | |
| 141 + } | |
| 142 + int64_t ni = utext_getNativeIndex(&ut); | |
| 143 + int64_t expectedni = startIdx + 20; | |
| 144 + if (expectedni > str.length()) { | |
| 145 + expectedni = str.length(); | |
| 146 + } | |
| 147 + if (expectedni != ni) { | |
| 148 + errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__,
__LINE__, expectedni, ni); | |
| 149 + } | |
| 150 + if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) { | |
| 151 + errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"", | |
| 152 + __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))()
, CStr(UnicodeString(extractBuffer))()); | |
| 153 + } | |
| 154 + } | |
| 155 + utext_close(&ut); | |
| 156 +} | |
| 157 diff --git a/source/test/intltest/utxttest.h b/source/test/intltest/utxttest.h | |
| 158 index 7091655..346563d 100644 | |
| 159 --- a/source/test/intltest/utxttest.h | |
| 160 +++ b/source/test/intltest/utxttest.h | |
| 161 @@ -1,6 +1,6 @@ | |
| 162 /******************************************************************** | |
| 163 * COPYRIGHT: | |
| 164 - * Copyright (c) 2005-2014, International Business Machines Corporation and | |
| 165 + * Copyright (c) 2005-2016, International Business Machines Corporation and | |
| 166 * others. All Rights Reserved. | |
| 167 ********************************************************************/ | |
| 168 /************************************************************************ | |
| 169 @@ -35,6 +35,7 @@ public: | |
| 170 void Ticket6847(); | |
| 171 void Ticket10562(); | |
| 172 void Ticket10983(); | |
| 173 + void Ticket12130(); | |
| 174 | |
| 175 private: | |
| 176 struct m { // Map between native indices & cod
e points. | |
| OLD | NEW |