OLD | NEW |
1 /** | 1 /** |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2006-2014, International Business Machines Corporation | 3 * Copyright (C) 2006-2015, International Business Machines Corporation |
4 * and others. All Rights Reserved. | 4 * and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 */ | 6 */ |
7 | 7 |
8 #include "unicode/utypes.h" | 8 #include "unicode/utypes.h" |
9 | 9 |
10 #if !UCONFIG_NO_BREAK_ITERATION | 10 #if !UCONFIG_NO_BREAK_ITERATION |
11 | 11 |
12 #include "brkeng.h" | 12 #include "brkeng.h" |
13 #include "dictbe.h" | 13 #include "dictbe.h" |
(...skipping 811 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
825 | 825 |
826 /* | 826 /* |
827 ****************************************************************** | 827 ****************************************************************** |
828 * KhmerBreakEngine | 828 * KhmerBreakEngine |
829 */ | 829 */ |
830 | 830 |
831 // How many words in a row are "good enough"? | 831 // How many words in a row are "good enough"? |
832 static const int32_t KHMER_LOOKAHEAD = 3; | 832 static const int32_t KHMER_LOOKAHEAD = 3; |
833 | 833 |
834 // Will not combine a non-word with a preceding dictionary word longer than this | 834 // Will not combine a non-word with a preceding dictionary word longer than this |
835 static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 10; | 835 static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 3; |
836 | 836 |
837 // Will not combine a non-word that shares at least this much prefix with a | 837 // Will not combine a non-word that shares at least this much prefix with a |
838 // dictionary word, with a preceding word | 838 // dictionary word, with a preceding word |
839 static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 5; | 839 static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 3; |
840 | 840 |
841 // Minimum word size | 841 // Minimum word size |
842 static const int32_t KHMER_MIN_WORD = 2; | 842 static const int32_t KHMER_MIN_WORD = 2; |
843 | 843 |
844 // Minimum number of characters for two words | 844 // Minimum number of characters for two words |
845 static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2; | 845 static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2; |
846 | 846 |
847 KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
e &status) | 847 KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
e &status) |
848 : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)), | 848 : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)), |
849 fDictionary(adoptDictionary) | 849 fDictionary(adoptDictionary) |
(...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1131 */ | 1131 */ |
1132 int32_t | 1132 int32_t |
1133 CjkBreakEngine::divideUpDictionaryRange( UText *inText, | 1133 CjkBreakEngine::divideUpDictionaryRange( UText *inText, |
1134 int32_t rangeStart, | 1134 int32_t rangeStart, |
1135 int32_t rangeEnd, | 1135 int32_t rangeEnd, |
1136 UStack &foundBreaks ) const { | 1136 UStack &foundBreaks ) const { |
1137 if (rangeStart >= rangeEnd) { | 1137 if (rangeStart >= rangeEnd) { |
1138 return 0; | 1138 return 0; |
1139 } | 1139 } |
1140 | 1140 |
1141 // UnicodeString version of input UText, NFKC normalized in necessary. | 1141 // UnicodeString version of input UText, NFKC normalized if necessary. |
1142 UnicodeString *inString; | 1142 UnicodeString inString; |
1143 | 1143 |
1144 // inputMap[inStringIndex] = corresponding native index from UText inText. | 1144 // inputMap[inStringIndex] = corresponding native index from UText inText. |
1145 // If NULL then mapping is 1:1 | 1145 // If NULL then mapping is 1:1 |
1146 UVector32 *inputMap = NULL; | 1146 LocalPointer<UVector32> inputMap; |
1147 | 1147 |
1148 UErrorCode status = U_ZERO_ERROR; | 1148 UErrorCode status = U_ZERO_ERROR; |
1149 | 1149 |
1150 | 1150 |
1151 // if UText has the input string as one contiguous UTF-16 chunk | 1151 // if UText has the input string as one contiguous UTF-16 chunk |
1152 if ((inText->providerProperties & utext_i32_flag(UTEXT_PROVIDER_STABLE_CHUNK
S)) && | 1152 if ((inText->providerProperties & utext_i32_flag(UTEXT_PROVIDER_STABLE_CHUNK
S)) && |
1153 inText->chunkNativeStart <= rangeStart && | 1153 inText->chunkNativeStart <= rangeStart && |
1154 inText->chunkNativeLimit >= rangeEnd && | 1154 inText->chunkNativeLimit >= rangeEnd && |
1155 inText->nativeIndexingLimit >= rangeEnd - inText->chunkNativeStart) { | 1155 inText->nativeIndexingLimit >= rangeEnd - inText->chunkNativeStart) { |
1156 | 1156 |
1157 // Input UTtxt is in one contiguous UTF-16 chunk. | 1157 // Input UText is in one contiguous UTF-16 chunk. |
1158 // Use Read-only aliasing UnicodeString constructor on it. | 1158 // Use Read-only aliasing UnicodeString. |
1159 inString = new UnicodeString(FALSE, | 1159 inString.setTo(FALSE, |
1160 inText->chunkContents + rangeStart - inText->chunk
NativeStart, | 1160 inText->chunkContents + rangeStart - inText->chunkNativeS
tart, |
1161 rangeEnd - rangeStart); | 1161 rangeEnd - rangeStart); |
1162 } else { | 1162 } else { |
1163 // Copy the text from the original inText (UText) to inString (UnicodeSt
ring). | 1163 // Copy the text from the original inText (UText) to inString (UnicodeSt
ring). |
1164 // Create a map from UnicodeString indices -> UText offsets. | 1164 // Create a map from UnicodeString indices -> UText offsets. |
1165 utext_setNativeIndex(inText, rangeStart); | 1165 utext_setNativeIndex(inText, rangeStart); |
1166 int32_t limit = rangeEnd; | 1166 int32_t limit = rangeEnd; |
1167 U_ASSERT(limit <= utext_nativeLength(inText)); | 1167 U_ASSERT(limit <= utext_nativeLength(inText)); |
1168 if (limit > utext_nativeLength(inText)) { | 1168 if (limit > utext_nativeLength(inText)) { |
1169 limit = utext_nativeLength(inText); | 1169 limit = utext_nativeLength(inText); |
1170 } | 1170 } |
1171 inString = new UnicodeString; | 1171 inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status); |
1172 inputMap = new UVector32(status); | 1172 if (U_FAILURE(status)) { |
| 1173 return 0; |
| 1174 } |
1173 while (utext_getNativeIndex(inText) < limit) { | 1175 while (utext_getNativeIndex(inText) < limit) { |
1174 int32_t nativePosition = utext_getNativeIndex(inText); | 1176 int32_t nativePosition = utext_getNativeIndex(inText); |
1175 UChar32 c = utext_next32(inText); | 1177 UChar32 c = utext_next32(inText); |
1176 U_ASSERT(c != U_SENTINEL); | 1178 U_ASSERT(c != U_SENTINEL); |
1177 inString->append(c); | 1179 inString.append(c); |
1178 while (inputMap->size() < inString->length()) { | 1180 while (inputMap->size() < inString.length()) { |
1179 inputMap->addElement(nativePosition, status); | 1181 inputMap->addElement(nativePosition, status); |
1180 } | 1182 } |
1181 } | 1183 } |
1182 inputMap->addElement(limit, status); | 1184 inputMap->addElement(limit, status); |
1183 } | 1185 } |
1184 | 1186 |
1185 | 1187 |
1186 if (!nfkcNorm2->isNormalized(*inString, status)) { | 1188 if (!nfkcNorm2->isNormalized(inString, status)) { |
1187 UnicodeString *normalizedInput = new UnicodeString(); | 1189 UnicodeString normalizedInput; |
1188 // normalizedMap[normalizedInput position] == original UText position. | 1190 // normalizedMap[normalizedInput position] == original UText position. |
1189 UVector32 *normalizedMap = new UVector32(status); | 1191 LocalPointer<UVector32> normalizedMap(new UVector32(status), status); |
1190 if (U_FAILURE(status)) { | 1192 if (U_FAILURE(status)) { |
1191 return 0; | 1193 return 0; |
1192 } | 1194 } |
1193 | 1195 |
1194 UnicodeString fragment; | 1196 UnicodeString fragment; |
1195 UnicodeString normalizedFragment; | 1197 UnicodeString normalizedFragment; |
1196 for (int32_t srcI = 0; srcI < inString->length();) { //
Once per normalization chunk | 1198 for (int32_t srcI = 0; srcI < inString.length();) { // Once per normali
zation chunk |
1197 fragment.remove(); | 1199 fragment.remove(); |
1198 int32_t fragmentStartI = srcI; | 1200 int32_t fragmentStartI = srcI; |
1199 UChar32 c = inString->char32At(srcI); | 1201 UChar32 c = inString.char32At(srcI); |
1200 for (;;) { | 1202 for (;;) { |
1201 fragment.append(c); | 1203 fragment.append(c); |
1202 srcI = inString->moveIndex32(srcI, 1); | 1204 srcI = inString.moveIndex32(srcI, 1); |
1203 if (srcI == inString->length()) { | 1205 if (srcI == inString.length()) { |
1204 break; | 1206 break; |
1205 } | 1207 } |
1206 c = inString->char32At(srcI); | 1208 c = inString.char32At(srcI); |
1207 if (nfkcNorm2->hasBoundaryBefore(c)) { | 1209 if (nfkcNorm2->hasBoundaryBefore(c)) { |
1208 break; | 1210 break; |
1209 } | 1211 } |
1210 } | 1212 } |
1211 nfkcNorm2->normalize(fragment, normalizedFragment, status); | 1213 nfkcNorm2->normalize(fragment, normalizedFragment, status); |
1212 normalizedInput->append(normalizedFragment); | 1214 normalizedInput.append(normalizedFragment); |
1213 | 1215 |
1214 // Map every position in the normalized chunk to the start of the ch
unk | 1216 // Map every position in the normalized chunk to the start of the ch
unk |
1215 // in the original input. | 1217 // in the original input. |
1216 int32_t fragmentOriginalStart = inputMap? inputMap->elementAti(fragm
entStartI) : fragmentStartI+rangeStart; | 1218 int32_t fragmentOriginalStart = inputMap.isValid() ? |
1217 while (normalizedMap->size() < normalizedInput->length()) { | 1219 inputMap->elementAti(fragmentStartI) : fragmentStartI+rangeS
tart; |
| 1220 while (normalizedMap->size() < normalizedInput.length()) { |
1218 normalizedMap->addElement(fragmentOriginalStart, status); | 1221 normalizedMap->addElement(fragmentOriginalStart, status); |
1219 if (U_FAILURE(status)) { | 1222 if (U_FAILURE(status)) { |
1220 break; | 1223 break; |
1221 } | 1224 } |
1222 } | 1225 } |
1223 } | 1226 } |
1224 U_ASSERT(normalizedMap->size() == normalizedInput->length()); | 1227 U_ASSERT(normalizedMap->size() == normalizedInput.length()); |
1225 int32_t nativeEnd = inputMap? inputMap->elementAti(inString->length()) :
inString->length()+rangeStart; | 1228 int32_t nativeEnd = inputMap.isValid() ? |
| 1229 inputMap->elementAti(inString.length()) : inString.length()+rang
eStart; |
1226 normalizedMap->addElement(nativeEnd, status); | 1230 normalizedMap->addElement(nativeEnd, status); |
1227 | 1231 |
1228 delete inputMap; | 1232 inputMap.moveFrom(normalizedMap); |
1229 inputMap = normalizedMap; | 1233 inString.moveFrom(normalizedInput); |
1230 delete inString; | |
1231 inString = normalizedInput; | |
1232 } | 1234 } |
1233 | 1235 |
1234 int32_t numCodePts = inString->countChar32(); | 1236 int32_t numCodePts = inString.countChar32(); |
1235 if (numCodePts != inString->length()) { | 1237 if (numCodePts != inString.length()) { |
1236 // There are supplementary characters in the input. | 1238 // There are supplementary characters in the input. |
1237 // The dictionary will produce boundary positions in terms of code point
indexes, | 1239 // The dictionary will produce boundary positions in terms of code point
indexes, |
1238 // not in terms of code unit string indexes. | 1240 // not in terms of code unit string indexes. |
1239 // Use the inputMap mechanism to take care of this in addition to indexi
ng differences | 1241 // Use the inputMap mechanism to take care of this in addition to indexi
ng differences |
1240 // from normalization and/or UTF-8 input. | 1242 // from normalization and/or UTF-8 input. |
1241 UBool hadExistingMap = (inputMap != NULL); | 1243 UBool hadExistingMap = inputMap.isValid(); |
1242 if (!hadExistingMap) { | 1244 if (!hadExistingMap) { |
1243 inputMap = new UVector32(status); | 1245 inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status
); |
| 1246 if (U_FAILURE(status)) { |
| 1247 return 0; |
| 1248 } |
1244 } | 1249 } |
1245 int32_t cpIdx = 0; | 1250 int32_t cpIdx = 0; |
1246 for (int32_t cuIdx = 0; ; cuIdx = inString->moveIndex32(cuIdx, 1)) { | 1251 for (int32_t cuIdx = 0; ; cuIdx = inString.moveIndex32(cuIdx, 1)) { |
1247 U_ASSERT(cuIdx >= cpIdx); | 1252 U_ASSERT(cuIdx >= cpIdx); |
1248 if (hadExistingMap) { | 1253 if (hadExistingMap) { |
1249 inputMap->setElementAt(inputMap->elementAti(cuIdx), cpIdx); | 1254 inputMap->setElementAt(inputMap->elementAti(cuIdx), cpIdx); |
1250 } else { | 1255 } else { |
1251 inputMap->addElement(cuIdx+rangeStart, status); | 1256 inputMap->addElement(cuIdx+rangeStart, status); |
1252 } | 1257 } |
1253 cpIdx++; | 1258 cpIdx++; |
1254 if (cuIdx == inString->length()) { | 1259 if (cuIdx == inString.length()) { |
1255 break; | 1260 break; |
1256 } | 1261 } |
1257 } | 1262 } |
1258 } | 1263 } |
1259 | 1264 |
1260 // bestSnlp[i] is the snlp of the best segmentation of the first i | 1265 // bestSnlp[i] is the snlp of the best segmentation of the first i |
1261 // code points in the range to be matched. | 1266 // code points in the range to be matched. |
1262 UVector32 bestSnlp(numCodePts + 1, status); | 1267 UVector32 bestSnlp(numCodePts + 1, status); |
1263 bestSnlp.addElement(0, status); | 1268 bestSnlp.addElement(0, status); |
1264 for(int32_t i = 1; i <= numCodePts; i++) { | 1269 for(int32_t i = 1; i <= numCodePts; i++) { |
1265 bestSnlp.addElement(kuint32max, status); | 1270 bestSnlp.addElement(kuint32max, status); |
1266 } | 1271 } |
1267 | 1272 |
1268 | 1273 |
1269 // prev[i] is the index of the last CJK code point in the previous word in | 1274 // prev[i] is the index of the last CJK code point in the previous word in |
1270 // the best segmentation of the first i characters. | 1275 // the best segmentation of the first i characters. |
1271 UVector32 prev(numCodePts + 1, status); | 1276 UVector32 prev(numCodePts + 1, status); |
1272 for(int32_t i = 0; i <= numCodePts; i++){ | 1277 for(int32_t i = 0; i <= numCodePts; i++){ |
1273 prev.addElement(-1, status); | 1278 prev.addElement(-1, status); |
1274 } | 1279 } |
1275 | 1280 |
1276 const int32_t maxWordSize = 20; | 1281 const int32_t maxWordSize = 20; |
1277 UVector32 values(numCodePts, status); | 1282 UVector32 values(numCodePts, status); |
1278 values.setSize(numCodePts); | 1283 values.setSize(numCodePts); |
1279 UVector32 lengths(numCodePts, status); | 1284 UVector32 lengths(numCodePts, status); |
1280 lengths.setSize(numCodePts); | 1285 lengths.setSize(numCodePts); |
1281 | 1286 |
1282 UText fu = UTEXT_INITIALIZER; | 1287 UText fu = UTEXT_INITIALIZER; |
1283 utext_openUnicodeString(&fu, inString, &status); | 1288 utext_openUnicodeString(&fu, &inString, &status); |
1284 | 1289 |
1285 // Dynamic programming to find the best segmentation. | 1290 // Dynamic programming to find the best segmentation. |
1286 | 1291 |
1287 // In outer loop, i is the code point index, | 1292 // In outer loop, i is the code point index, |
1288 // ix is the corresponding string (code unit) index. | 1293 // ix is the corresponding string (code unit) index. |
1289 // They differ when the string contains supplementary characters. | 1294 // They differ when the string contains supplementary characters. |
1290 int32_t ix = 0; | 1295 int32_t ix = 0; |
1291 for (int32_t i = 0; i < numCodePts; ++i, ix = inString->moveIndex32(ix, 1)
) { | 1296 for (int32_t i = 0; i < numCodePts; ++i, ix = inString.moveIndex32(ix, 1))
{ |
1292 if ((uint32_t)bestSnlp.elementAti(i) == kuint32max) { | 1297 if ((uint32_t)bestSnlp.elementAti(i) == kuint32max) { |
1293 continue; | 1298 continue; |
1294 } | 1299 } |
1295 | 1300 |
1296 int32_t count; | 1301 int32_t count; |
1297 utext_setNativeIndex(&fu, ix); | 1302 utext_setNativeIndex(&fu, ix); |
1298 count = fDictionary->matches(&fu, maxWordSize, numCodePts, | 1303 count = fDictionary->matches(&fu, maxWordSize, numCodePts, |
1299 NULL, lengths.getBuffer(), values.getBuffer(), NULL
); | 1304 NULL, lengths.getBuffer(), values.getBuffer(), NULL
); |
1300 // Note: lengths is filled with code point lengths | 1305 // Note: lengths is filled with code point lengths |
1301 // The NULL parameter is the ignored code uni
t lengths. | 1306 // The NULL parameter is the ignored code uni
t lengths. |
1302 | 1307 |
1303 // if there are no single character matches found in the dictionary | 1308 // if there are no single character matches found in the dictionary |
1304 // starting with this charcter, treat character as a 1-character word | 1309 // starting with this charcter, treat character as a 1-character word |
1305 // with the highest value possible, i.e. the least likely to occur. | 1310 // with the highest value possible, i.e. the least likely to occur. |
1306 // Exclude Korean characters from this treatment, as they should be left | 1311 // Exclude Korean characters from this treatment, as they should be left |
1307 // together by default. | 1312 // together by default. |
1308 if ((count == 0 || lengths.elementAti(0) != 1) && | 1313 if ((count == 0 || lengths.elementAti(0) != 1) && |
1309 !fHangulWordSet.contains(inString->char32At(ix))) { | 1314 !fHangulWordSet.contains(inString.char32At(ix))) { |
1310 values.setElementAt(maxSnlp, count); // 255 | 1315 values.setElementAt(maxSnlp, count); // 255 |
1311 lengths.setElementAt(1, count++); | 1316 lengths.setElementAt(1, count++); |
1312 } | 1317 } |
1313 | 1318 |
1314 for (int32_t j = 0; j < count; j++) { | 1319 for (int32_t j = 0; j < count; j++) { |
1315 uint32_t newSnlp = (uint32_t)bestSnlp.elementAti(i) + (uint32_t)valu
es.elementAti(j); | 1320 uint32_t newSnlp = (uint32_t)bestSnlp.elementAti(i) + (uint32_t)valu
es.elementAti(j); |
1316 int32_t ln_j_i = lengths.elementAti(j) + i; | 1321 int32_t ln_j_i = lengths.elementAti(j) + i; |
1317 if (newSnlp < (uint32_t)bestSnlp.elementAti(ln_j_i)) { | 1322 if (newSnlp < (uint32_t)bestSnlp.elementAti(ln_j_i)) { |
1318 bestSnlp.setElementAt(newSnlp, ln_j_i); | 1323 bestSnlp.setElementAt(newSnlp, ln_j_i); |
1319 prev.setElementAt(i, ln_j_i); | 1324 prev.setElementAt(i, ln_j_i); |
1320 } | 1325 } |
1321 } | 1326 } |
1322 | 1327 |
1323 // In Japanese, | 1328 // In Japanese, |
1324 // Katakana word in single character is pretty rare. So we apply | 1329 // Katakana word in single character is pretty rare. So we apply |
1325 // the following heuristic to Katakana: any continuous run of Katakana | 1330 // the following heuristic to Katakana: any continuous run of Katakana |
1326 // characters is considered a candidate word with a default cost | 1331 // characters is considered a candidate word with a default cost |
1327 // specified in the katakanaCost table according to its length. | 1332 // specified in the katakanaCost table according to its length. |
1328 | 1333 |
1329 bool is_prev_katakana = false; | 1334 bool is_prev_katakana = false; |
1330 bool is_katakana = isKatakana(inString->char32At(ix)); | 1335 bool is_katakana = isKatakana(inString.char32At(ix)); |
1331 int32_t katakanaRunLength = 1; | 1336 int32_t katakanaRunLength = 1; |
1332 if (!is_prev_katakana && is_katakana) { | 1337 if (!is_prev_katakana && is_katakana) { |
1333 int32_t j = inString->moveIndex32(ix, 1); | 1338 int32_t j = inString.moveIndex32(ix, 1); |
1334 // Find the end of the continuous run of Katakana characters | 1339 // Find the end of the continuous run of Katakana characters |
1335 while (j < inString->length() && katakanaRunLength < kMaxKatakanaGro
upLength && | 1340 while (j < inString.length() && katakanaRunLength < kMaxKatakanaGrou
pLength && |
1336 isKatakana(inString->char32At(j))) { | 1341 isKatakana(inString.char32At(j))) { |
1337 j = inString->moveIndex32(j, 1); | 1342 j = inString.moveIndex32(j, 1); |
1338 katakanaRunLength++; | 1343 katakanaRunLength++; |
1339 } | 1344 } |
1340 if (katakanaRunLength < kMaxKatakanaGroupLength) { | 1345 if (katakanaRunLength < kMaxKatakanaGroupLength) { |
1341 uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(kata
kanaRunLength); | 1346 uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(kata
kanaRunLength); |
1342 if (newSnlp < (uint32_t)bestSnlp.elementAti(j)) { | 1347 if (newSnlp < (uint32_t)bestSnlp.elementAti(j)) { |
1343 bestSnlp.setElementAt(newSnlp, j); | 1348 bestSnlp.setElementAt(newSnlp, j); |
1344 prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i; | 1349 prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i; |
1345 } | 1350 } |
1346 } | 1351 } |
1347 } | 1352 } |
(...skipping 25 matching lines...) Expand all Loading... |
1373 if (foundBreaks.size() == 0 || foundBreaks.peeki() < rangeStart) { | 1378 if (foundBreaks.size() == 0 || foundBreaks.peeki() < rangeStart) { |
1374 t_boundary.addElement(0, status); | 1379 t_boundary.addElement(0, status); |
1375 numBreaks++; | 1380 numBreaks++; |
1376 } | 1381 } |
1377 | 1382 |
1378 // Now that we're done, convert positions in t_boundary[] (indices in | 1383 // Now that we're done, convert positions in t_boundary[] (indices in |
1379 // the normalized input string) back to indices in the original input UText | 1384 // the normalized input string) back to indices in the original input UText |
1380 // while reversing t_boundary and pushing values to foundBreaks. | 1385 // while reversing t_boundary and pushing values to foundBreaks. |
1381 for (int32_t i = numBreaks-1; i >= 0; i--) { | 1386 for (int32_t i = numBreaks-1; i >= 0; i--) { |
1382 int32_t cpPos = t_boundary.elementAti(i); | 1387 int32_t cpPos = t_boundary.elementAti(i); |
1383 int32_t utextPos = inputMap ? inputMap->elementAti(cpPos) : cpPos + ran
geStart; | 1388 int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : c
pPos + rangeStart; |
1384 // Boundaries are added to foundBreaks output in ascending order. | 1389 // Boundaries are added to foundBreaks output in ascending order. |
1385 U_ASSERT(foundBreaks.size() == 0 ||foundBreaks.peeki() < utextPos); | 1390 U_ASSERT(foundBreaks.size() == 0 ||foundBreaks.peeki() < utextPos); |
1386 foundBreaks.push(utextPos, status); | 1391 foundBreaks.push(utextPos, status); |
1387 } | 1392 } |
1388 | 1393 |
1389 delete inString; | 1394 // inString goes out of scope |
1390 delete inputMap; | 1395 // inputMap goes out of scope |
1391 return numBreaks; | 1396 return numBreaks; |
1392 } | 1397 } |
1393 #endif | 1398 #endif |
1394 | 1399 |
1395 U_NAMESPACE_END | 1400 U_NAMESPACE_END |
1396 | 1401 |
1397 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 1402 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
1398 | 1403 |
OLD | NEW |