OLD | NEW |
| (Empty) |
1 diff --git a/source/common/utext.cpp b/source/common/utext.cpp | |
2 index 6dc31a0..0c7dd1e 100644 | |
3 --- a/source/common/utext.cpp | |
4 +++ b/source/common/utext.cpp | |
5 @@ -1,7 +1,7 @@ | |
6 /* | |
7 ******************************************************************************* | |
8 * | |
9 -* Copyright (C) 2005-2015, International Business Machines | |
10 +* Copyright (C) 2005-2016, International Business Machines | |
11 * Corporation and others. All Rights Reserved. | |
12 * | |
13 ******************************************************************************* | |
14 @@ -2524,6 +2524,7 @@ ucstrTextExtract(UText *ut, | |
15 ut->chunkLength = si; | |
16 ut->nativeIndexingLimit = si; | |
17 strLength = si; | |
18 + limit32 = si; | |
19 break; | |
20 } | |
21 U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must no
t happen logically */ | |
22 @@ -2545,16 +2546,21 @@ ucstrTextExtract(UText *ut, | |
23 // If the limit index points to a lead surrogate of a pair, | |
24 // add the corresponding trail surrogate to the destination. | |
25 if (si>0 && U16_IS_LEAD(s[si-1]) && | |
26 - ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si]))) | |
27 + ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si]))) | |
28 { | |
29 if (di<destCapacity) { | |
30 // store only if there is space in the output buffer. | |
31 - dest[di++] = s[si++]; | |
32 + dest[di++] = s[si]; | |
33 } | |
34 + si++; | |
35 } | |
36 | |
37 // Put iteration position at the point just following the extracted text | |
38 - ut->chunkOffset = uprv_min(strLength, start32 + destCapacity); | |
39 + if (si <= ut->chunkNativeLimit) { | |
40 + ut->chunkOffset = si; | |
41 + } else { | |
42 + ucstrTextAccess(ut, si, TRUE); | |
43 + } | |
44 | |
45 // Add a terminating NUL if space in the buffer permits, | |
46 // and set the error status as required. | |
47 diff --git a/source/test/intltest/utxttest.cpp b/source/test/intltest/utxttest.c
pp | |
48 index d8bedb6..801337c 100644 | |
49 --- a/source/test/intltest/utxttest.cpp | |
50 +++ b/source/test/intltest/utxttest.cpp | |
51 @@ -1,6 +1,6 @@ | |
52 /******************************************************************** | |
53 * COPYRIGHT: | |
54 - * Copyright (c) 2005-2014, International Business Machines Corporation and | |
55 + * Copyright (c) 2005-2016, International Business Machines Corporation and | |
56 * others. All Rights Reserved. | |
57 ********************************************************************/ | |
58 /************************************************************************ | |
59 @@ -16,6 +16,8 @@ | |
60 #include "unicode/utf8.h" | |
61 #include "unicode/ustring.h" | |
62 #include "unicode/uchriter.h" | |
63 +#include "cmemory.h" | |
64 +#include "cstr.h" | |
65 #include "utxttest.h" | |
66 | |
67 static UBool gFailed = FALSE; | |
68 @@ -61,6 +63,8 @@ UTextTest::runIndexedTest(int32_t index, UBool exec, | |
69 if (exec) Ticket10562(); break; | |
70 case 6: name = "Ticket10983"; | |
71 if (exec) Ticket10983(); break; | |
72 + case 7: name = "Ticket12130"; | |
73 + if (exec) Ticket12130(); break; | |
74 default: name = ""; break; | |
75 } | |
76 } | |
77 @@ -1501,3 +1505,79 @@ void UTextTest::Ticket10983() { | |
78 | |
79 utext_close(ut); | |
80 } | |
81 + | |
82 +// Ticket 12130 - extract on a UText wrapping a null terminated UChar * string | |
83 +// leaves the iteration position set incorrectly when the | |
84 +// actual string length is not yet known. | |
85 +// | |
86 +// The test text needs to be long enough that UText defers getti
ng the length. | |
87 + | |
88 +void UTextTest::Ticket12130() { | |
89 + UErrorCode status = U_ZERO_ERROR; | |
90 + | |
91 + const char *text8 = | |
92 + "Fundamentally, computers just deal with numbers. They store letters an
d other characters " | |
93 + "by assigning a number for each one. Before Unicode was invented, there
were hundreds " | |
94 + "of different encoding systems for assigning these numbers. No single e
ncoding could " | |
95 + "contain enough characters: for example, the European Union alone requi
res several " | |
96 + "different encodings to cover all its languages. Even for a single lang
uage like " | |
97 + "English no single encoding was adequate for all the letters, punctuati
on, and technical " | |
98 + "symbols in common use."; | |
99 + | |
100 + UnicodeString str(text8); | |
101 + const UChar *ustr = str.getTerminatedBuffer(); | |
102 + UText ut = UTEXT_INITIALIZER; | |
103 + utext_openUChars(&ut, ustr, -1, &status); | |
104 + UChar extractBuffer[50]; | |
105 + | |
106 + for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) { | |
107 + int32_t endIdx = startIdx + 20; | |
108 + | |
109 + u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer)); | |
110 + utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extra
ctBuffer), &status); | |
111 + if (U_FAILURE(status)) { | |
112 + errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status)); | |
113 + return; | |
114 + } | |
115 + int64_t ni = utext_getNativeIndex(&ut); | |
116 + int64_t expectedni = startIdx + 20; | |
117 + if (expectedni > str.length()) { | |
118 + expectedni = str.length(); | |
119 + } | |
120 + if (expectedni != ni) { | |
121 + errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__,
__LINE__, expectedni, ni); | |
122 + } | |
123 + if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) { | |
124 + errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"", | |
125 + __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))()
, CStr(UnicodeString(extractBuffer))()); | |
126 + } | |
127 + } | |
128 + utext_close(&ut); | |
129 + | |
130 + // Similar utext extract, this time with the string length provided to the
UText in advance, | |
131 + // and a buffer of larger than required capacity. | |
132 + | |
133 + utext_openUChars(&ut, ustr, str.length(), &status); | |
134 + for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) { | |
135 + int32_t endIdx = startIdx + 20; | |
136 + u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer)); | |
137 + utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extra
ctBuffer), &status); | |
138 + if (U_FAILURE(status)) { | |
139 + errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status)); | |
140 + return; | |
141 + } | |
142 + int64_t ni = utext_getNativeIndex(&ut); | |
143 + int64_t expectedni = startIdx + 20; | |
144 + if (expectedni > str.length()) { | |
145 + expectedni = str.length(); | |
146 + } | |
147 + if (expectedni != ni) { | |
148 + errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__,
__LINE__, expectedni, ni); | |
149 + } | |
150 + if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) { | |
151 + errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"", | |
152 + __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))()
, CStr(UnicodeString(extractBuffer))()); | |
153 + } | |
154 + } | |
155 + utext_close(&ut); | |
156 +} | |
157 diff --git a/source/test/intltest/utxttest.h b/source/test/intltest/utxttest.h | |
158 index 7091655..346563d 100644 | |
159 --- a/source/test/intltest/utxttest.h | |
160 +++ b/source/test/intltest/utxttest.h | |
161 @@ -1,6 +1,6 @@ | |
162 /******************************************************************** | |
163 * COPYRIGHT: | |
164 - * Copyright (c) 2005-2014, International Business Machines Corporation and | |
165 + * Copyright (c) 2005-2016, International Business Machines Corporation and | |
166 * others. All Rights Reserved. | |
167 ********************************************************************/ | |
168 /************************************************************************ | |
169 @@ -35,6 +35,7 @@ public: | |
170 void Ticket6847(); | |
171 void Ticket10562(); | |
172 void Ticket10983(); | |
173 + void Ticket12130(); | |
174 | |
175 private: | |
176 struct m { // Map between native indices & cod
e points. | |
OLD | NEW |