OLD | NEW |
| (Empty) |
1 /* | |
2 ******************************************************************************* | |
3 * | |
4 * Copyright (C) 2001-2008, International Business Machines | |
5 * Corporation and others. All Rights Reserved. | |
6 * | |
7 ******************************************************************************* | |
8 * file name: ucol_cnt.cpp | |
9 * encoding: US-ASCII | |
10 * tab size: 8 (not used) | |
11 * indentation:4 | |
12 * | |
13 * created 02/22/2001 | |
14 * created by: Vladimir Weinstein | |
15 * | |
16 * This module maintains a contraction table structure in expanded form | |
17 * and provides means to flatten this structure | |
18 * | |
19 */ | |
20 | |
21 #include "unicode/utypes.h" | |
22 | |
23 #if !UCONFIG_NO_COLLATION | |
24 | |
25 #include "unicode/uchar.h" | |
26 #include "ucol_cnt.h" | |
27 #include "cmemory.h" | |
28 | |
29 static void uprv_growTable(ContractionTable *tbl, UErrorCode *status) { | |
30 if(tbl->position == tbl->size) { | |
31 uint32_t *newData = (uint32_t *)uprv_realloc(tbl->CEs, 2*tbl->size*sizeo
f(uint32_t)); | |
32 if(newData == NULL) { | |
33 *status = U_MEMORY_ALLOCATION_ERROR; | |
34 return; | |
35 } | |
36 UChar *newCPs = (UChar *)uprv_realloc(tbl->codePoints, 2*tbl->size*sizeo
f(UChar)); | |
37 if(newCPs == NULL) { | |
38 uprv_free(newData); | |
39 *status = U_MEMORY_ALLOCATION_ERROR; | |
40 return; | |
41 } | |
42 tbl->CEs = newData; | |
43 tbl->codePoints = newCPs; | |
44 tbl->size *= 2; | |
45 } | |
46 } | |
47 | |
48 U_CAPI CntTable* U_EXPORT2 | |
49 /*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/ | |
50 uprv_cnttab_open(UNewTrie *mapping, UErrorCode *status) { | |
51 if(U_FAILURE(*status)) { | |
52 return 0; | |
53 } | |
54 CntTable *tbl = (CntTable *)uprv_malloc(sizeof(CntTable)); | |
55 if(tbl == NULL) { | |
56 *status = U_MEMORY_ALLOCATION_ERROR; | |
57 return NULL; | |
58 } | |
59 tbl->mapping = mapping; | |
60 tbl->elements = (ContractionTable **)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(
ContractionTable *)); | |
61 if(tbl->elements == NULL) { | |
62 *status = U_MEMORY_ALLOCATION_ERROR; | |
63 uprv_free(tbl); | |
64 return NULL; | |
65 } | |
66 tbl->capacity = INIT_EXP_TABLE_SIZE; | |
67 uprv_memset(tbl->elements, 0, INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *)
); | |
68 tbl->size = 0; | |
69 tbl->position = 0; | |
70 tbl->CEs = NULL; | |
71 tbl->codePoints = NULL; | |
72 tbl->offsets = NULL; | |
73 tbl->currentTag = NOT_FOUND_TAG; | |
74 return tbl; | |
75 } | |
76 | |
77 static ContractionTable *addATableElement(CntTable *table, uint32_t *key, UError
Code *status) { | |
78 ContractionTable *el = (ContractionTable *)uprv_malloc(sizeof(ContractionTab
le)); | |
79 if(el == NULL) { | |
80 goto outOfMemory; | |
81 } | |
82 el->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t)); | |
83 if(el->CEs == NULL) { | |
84 goto outOfMemory; | |
85 } | |
86 | |
87 el->codePoints = (UChar *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar)); | |
88 if(el->codePoints == NULL) { | |
89 uprv_free(el->CEs); | |
90 goto outOfMemory; | |
91 } | |
92 | |
93 el->position = 0; | |
94 el->size = INIT_EXP_TABLE_SIZE; | |
95 uprv_memset(el->CEs, 0, INIT_EXP_TABLE_SIZE*sizeof(uint32_t)); | |
96 uprv_memset(el->codePoints, 0, INIT_EXP_TABLE_SIZE*sizeof(UChar)); | |
97 | |
98 table->elements[table->size] = el; | |
99 | |
100 //uhash_put(table->elements, (void *)table->size, el, status); | |
101 | |
102 *key = table->size++; | |
103 | |
104 if(table->size == table->capacity) { | |
105 ContractionTable **newElements = (ContractionTable **)uprv_malloc(table-
>capacity*2*sizeof(ContractionTable *)); | |
106 // do realloc | |
107 /* table->elements = (ContractionTable **)realloc(table->elements
, table->capacity*2*sizeof(ContractionTable *));*/ | |
108 if(newElements == NULL) { | |
109 uprv_free(el->codePoints); | |
110 uprv_free(el->CEs); | |
111 goto outOfMemory; | |
112 } | |
113 ContractionTable **oldElements = table->elements; | |
114 uprv_memcpy(newElements, oldElements, table->capacity*sizeof(Contraction
Table *)); | |
115 uprv_memset(newElements+table->capacity, 0, table->capacity*sizeof(Contr
actionTable *)); | |
116 table->capacity *= 2; | |
117 table->elements = newElements; | |
118 uprv_free(oldElements); | |
119 } | |
120 | |
121 return el; | |
122 | |
123 outOfMemory: | |
124 *status = U_MEMORY_ALLOCATION_ERROR; | |
125 if (el) uprv_free(el); | |
126 return NULL; | |
127 } | |
128 | |
129 U_CAPI int32_t U_EXPORT2 | |
130 uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *sta
tus) { | |
131 int32_t i = 0, j = 0; | |
132 if(U_FAILURE(*status) || table->size == 0) { | |
133 return 0; | |
134 } | |
135 | |
136 table->position = 0; | |
137 | |
138 if(table->offsets != NULL) { | |
139 uprv_free(table->offsets); | |
140 } | |
141 table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t)); | |
142 if(table->offsets == NULL) { | |
143 *status = U_MEMORY_ALLOCATION_ERROR; | |
144 return 0; | |
145 } | |
146 | |
147 | |
148 /* See how much memory we need */ | |
149 for(i = 0; i<table->size; i++) { | |
150 table->offsets[i] = table->position+mainOffset; | |
151 table->position += table->elements[i]->position; | |
152 } | |
153 | |
154 /* Allocate it */ | |
155 if(table->CEs != NULL) { | |
156 uprv_free(table->CEs); | |
157 } | |
158 table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t)); | |
159 if(table->CEs == NULL) { | |
160 *status = U_MEMORY_ALLOCATION_ERROR; | |
161 uprv_free(table->offsets); | |
162 table->offsets = NULL; | |
163 return 0; | |
164 } | |
165 uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t)); | |
166 | |
167 if(table->codePoints != NULL) { | |
168 uprv_free(table->codePoints); | |
169 } | |
170 table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar)); | |
171 if(table->codePoints == NULL) { | |
172 *status = U_MEMORY_ALLOCATION_ERROR; | |
173 uprv_free(table->offsets); | |
174 table->offsets = NULL; | |
175 uprv_free(table->CEs); | |
176 table->CEs = NULL; | |
177 return 0; | |
178 } | |
179 uprv_memset(table->codePoints, '?', table->position*sizeof(UChar)); | |
180 | |
181 /* Now stuff the things in*/ | |
182 | |
183 UChar *cpPointer = table->codePoints; | |
184 uint32_t *CEPointer = table->CEs; | |
185 for(i = 0; i<table->size; i++) { | |
186 int32_t size = table->elements[i]->position; | |
187 uint8_t ccMax = 0, ccMin = 255, cc = 0; | |
188 for(j = 1; j<size; j++) { | |
189 cc = u_getCombiningClass(table->elements[i]->codePoints[j]); | |
190 if(cc>ccMax) { | |
191 ccMax = cc; | |
192 } | |
193 if(cc<ccMin) { | |
194 ccMin = cc; | |
195 } | |
196 *(cpPointer+j) = table->elements[i]->codePoints[j]; | |
197 } | |
198 *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax; | |
199 | |
200 uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t)); | |
201 for(j = 0; j<size; j++) { | |
202 if(isCntTableElement(*(CEPointer+j))) { | |
203 *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), t
able->offsets[getContractOffset(*(CEPointer+j))]); | |
204 } | |
205 } | |
206 cpPointer += size; | |
207 CEPointer += size; | |
208 } | |
209 | |
210 // TODO: this one apparently updates the contraction CEs to point to a real
address (relative to the | |
211 // start of the flat file). However, what is done below is just wrong and it
affects building of | |
212 // tailorings that have constructions in a bad way. At least, one should enu
merate the trie. Also, | |
213 // keeping a list of code points that are contractions might be smart, altho
ugh I'm not sure if it's | |
214 // feasible. | |
215 uint32_t CE; | |
216 for(i = 0; i<=0x10FFFF; i++) { | |
217 /*CE = ucmpe32_get(table->mapping, i);*/ | |
218 CE = utrie_get32(table->mapping, i, NULL); | |
219 if(isCntTableElement(CE)) { | |
220 CE = constructContractCE(getCETag(CE), table->offsets[getContractOff
set(CE)]); | |
221 /*ucmpe32_set(table->mapping, i, CE);*/ | |
222 utrie_set32(table->mapping, i, CE); | |
223 } | |
224 } | |
225 | |
226 | |
227 return table->position; | |
228 } | |
229 | |
230 static ContractionTable *uprv_cnttab_cloneContraction(ContractionTable *t, UErro
rCode *status) { | |
231 ContractionTable *r = (ContractionTable *)uprv_malloc(sizeof(ContractionTabl
e)); | |
232 if(r == NULL) { | |
233 goto outOfMemory; | |
234 } | |
235 | |
236 r->position = t->position; | |
237 r->size = t->size; | |
238 | |
239 r->codePoints = (UChar *)uprv_malloc(sizeof(UChar)*t->size); | |
240 if(r->codePoints == NULL) { | |
241 goto outOfMemory; | |
242 } | |
243 r->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->size); | |
244 if(r->CEs == NULL) { | |
245 uprv_free(r->codePoints); | |
246 goto outOfMemory; | |
247 } | |
248 uprv_memcpy(r->codePoints, t->codePoints, sizeof(UChar)*t->size); | |
249 uprv_memcpy(r->CEs, t->CEs, sizeof(uint32_t)*t->size); | |
250 | |
251 return r; | |
252 | |
253 outOfMemory: | |
254 *status = U_MEMORY_ALLOCATION_ERROR; | |
255 if (r) uprv_free(r); | |
256 return NULL; | |
257 } | |
258 | |
259 U_CAPI CntTable* U_EXPORT2 | |
260 uprv_cnttab_clone(CntTable *t, UErrorCode *status) { | |
261 if(U_FAILURE(*status)) { | |
262 return NULL; | |
263 } | |
264 int32_t i = 0; | |
265 CntTable *r = (CntTable *)uprv_malloc(sizeof(CntTable)); | |
266 /* test for NULL */ | |
267 if (r == NULL) { | |
268 goto outOfMemory; | |
269 } | |
270 r->position = t->position; | |
271 r->size = t->size; | |
272 r->capacity = t->capacity; | |
273 | |
274 r->mapping = t->mapping; | |
275 | |
276 r->elements = (ContractionTable **)uprv_malloc(t->capacity*sizeof(Contractio
nTable *)); | |
277 /* test for NULL */ | |
278 if (r->elements == NULL) { | |
279 goto outOfMemory; | |
280 } | |
281 //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable
*)); | |
282 | |
283 for(i = 0; i<t->size; i++) { | |
284 r->elements[i] = uprv_cnttab_cloneContraction(t->elements[i], status); | |
285 } | |
286 | |
287 if(t->CEs != NULL) { | |
288 r->CEs = (uint32_t *)uprv_malloc(t->position*sizeof(uint32_t)); | |
289 /* test for NULL */ | |
290 if (r->CEs == NULL) { | |
291 uprv_free(r->elements); | |
292 goto outOfMemory; | |
293 } | |
294 uprv_memcpy(r->CEs, t->CEs, t->position*sizeof(uint32_t)); | |
295 } else { | |
296 r->CEs = NULL; | |
297 } | |
298 | |
299 if(t->codePoints != NULL) { | |
300 r->codePoints = (UChar *)uprv_malloc(t->position*sizeof(UChar)); | |
301 /* test for NULL */ | |
302 if (r->codePoints == NULL) { | |
303 uprv_free(r->CEs); | |
304 uprv_free(r->elements); | |
305 goto outOfMemory; | |
306 } | |
307 uprv_memcpy(r->codePoints, t->codePoints, t->position*sizeof(UChar)); | |
308 } else { | |
309 r->codePoints = NULL; | |
310 } | |
311 | |
312 if(t->offsets != NULL) { | |
313 r->offsets = (int32_t *)uprv_malloc(t->size*sizeof(int32_t)); | |
314 /* test for NULL */ | |
315 if (r->offsets == NULL) { | |
316 uprv_free(r->codePoints); | |
317 uprv_free(r->CEs); | |
318 uprv_free(r->elements); | |
319 goto outOfMemory; | |
320 } | |
321 uprv_memcpy(r->offsets, t->offsets, t->size*sizeof(int32_t)); | |
322 } else { | |
323 r->offsets = NULL; | |
324 } | |
325 | |
326 return r; | |
327 | |
328 outOfMemory: | |
329 *status = U_MEMORY_ALLOCATION_ERROR; | |
330 if (r) uprv_free(r); | |
331 return NULL; | |
332 } | |
333 | |
334 U_CAPI void U_EXPORT2 | |
335 uprv_cnttab_close(CntTable *table) { | |
336 int32_t i = 0; | |
337 for(i = 0; i<table->size; i++) { | |
338 uprv_free(table->elements[i]->CEs); | |
339 uprv_free(table->elements[i]->codePoints); | |
340 uprv_free(table->elements[i]); | |
341 } | |
342 uprv_free(table->elements); | |
343 uprv_free(table->CEs); | |
344 uprv_free(table->offsets); | |
345 uprv_free(table->codePoints); | |
346 uprv_free(table); | |
347 } | |
348 | |
349 /* this is for adding non contractions */ | |
350 U_CAPI uint32_t U_EXPORT2 | |
351 uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UErr
orCode *status) { | |
352 element &= 0xFFFFFF; | |
353 | |
354 ContractionTable *tbl = NULL; | |
355 if(U_FAILURE(*status)) { | |
356 return 0; | |
357 } | |
358 | |
359 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { | |
360 return 0; | |
361 } | |
362 | |
363 tbl->CEs[tbl->position-1] = value; | |
364 | |
365 return(constructContractCE(table->currentTag, element)); | |
366 } | |
367 | |
368 | |
369 /* inserts a part of contraction sequence in table. Sequences behind the offset
are moved back. If element is non existent, it creates on. Returns element handl
e */ | |
370 U_CAPI uint32_t U_EXPORT2 | |
371 uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint
, uint32_t value, UErrorCode *status) { | |
372 | |
373 ContractionTable *tbl = NULL; | |
374 | |
375 if(U_FAILURE(*status)) { | |
376 return 0; | |
377 } | |
378 element &= 0xFFFFFF; | |
379 | |
380 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { | |
381 tbl = addATableElement(table, &element, status); | |
382 if (U_FAILURE(*status)) { | |
383 return 0; | |
384 } | |
385 } | |
386 | |
387 uprv_growTable(tbl, status); | |
388 | |
389 uint32_t offset = 0; | |
390 | |
391 | |
392 while(tbl->codePoints[offset] < codePoint && offset<tbl->position) { | |
393 offset++; | |
394 } | |
395 | |
396 uint32_t i = tbl->position; | |
397 for(i = tbl->position; i > offset; i--) { | |
398 tbl->CEs[i] = tbl->CEs[i-1]; | |
399 tbl->codePoints[i] = tbl->codePoints[i-1]; | |
400 } | |
401 | |
402 tbl->CEs[offset] = value; | |
403 tbl->codePoints[offset] = codePoint; | |
404 | |
405 tbl->position++; | |
406 | |
407 return(constructContractCE(table->currentTag, element)); | |
408 } | |
409 | |
410 | |
411 /* adds more contractions in table. If element is non existant, it creates on. R
eturns element handle */ | |
412 U_CAPI uint32_t U_EXPORT2 | |
413 uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, u
int32_t value, UErrorCode *status) { | |
414 | |
415 element &= 0xFFFFFF; | |
416 | |
417 ContractionTable *tbl = NULL; | |
418 | |
419 if(U_FAILURE(*status)) { | |
420 return 0; | |
421 } | |
422 | |
423 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { | |
424 tbl = addATableElement(table, &element, status); | |
425 if (U_FAILURE(*status)) { | |
426 return 0; | |
427 } | |
428 } | |
429 | |
430 uprv_growTable(tbl, status); | |
431 | |
432 tbl->CEs[tbl->position] = value; | |
433 tbl->codePoints[tbl->position] = codePoint; | |
434 | |
435 tbl->position++; | |
436 | |
437 return(constructContractCE(table->currentTag, element)); | |
438 } | |
439 | |
440 /* sets a part of contraction sequence in table. If element is non existant, it
creates on. Returns element handle */ | |
441 U_CAPI uint32_t U_EXPORT2 | |
442 uprv_cnttab_setContraction(CntTable *table, uint32_t element, uint32_t offset, U
Char codePoint, uint32_t value, UErrorCode *status) { | |
443 | |
444 element &= 0xFFFFFF; | |
445 ContractionTable *tbl = NULL; | |
446 | |
447 if(U_FAILURE(*status)) { | |
448 return 0; | |
449 } | |
450 | |
451 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { | |
452 tbl = addATableElement(table, &element, status); | |
453 if (U_FAILURE(*status)) { | |
454 return 0; | |
455 } | |
456 | |
457 } | |
458 | |
459 if(offset >= tbl->size) { | |
460 *status = U_INDEX_OUTOFBOUNDS_ERROR; | |
461 return 0; | |
462 } | |
463 tbl->CEs[offset] = value; | |
464 tbl->codePoints[offset] = codePoint; | |
465 | |
466 //return(offset); | |
467 return(constructContractCE(table->currentTag, element)); | |
468 } | |
469 | |
470 static ContractionTable *_cnttab_getContractionTable(CntTable *table, uint32_t e
lement) { | |
471 element &= 0xFFFFFF; | |
472 ContractionTable *tbl = NULL; | |
473 | |
474 if(element != 0xFFFFFF) { | |
475 tbl = table->elements[element]; /* This could also return NULL */ | |
476 } | |
477 return tbl; | |
478 } | |
479 | |
480 static int32_t _cnttab_findCP(ContractionTable *tbl, UChar codePoint) { | |
481 uint32_t position = 0; | |
482 if(tbl == NULL) { | |
483 return -1; | |
484 } | |
485 | |
486 while(codePoint > tbl->codePoints[position]) { | |
487 position++; | |
488 if(position > tbl->position) { | |
489 return -1; | |
490 } | |
491 } | |
492 if (codePoint == tbl->codePoints[position]) { | |
493 return position; | |
494 } else { | |
495 return -1; | |
496 } | |
497 } | |
498 | |
499 static uint32_t _cnttab_getCE(ContractionTable *tbl, int32_t position) { | |
500 if(tbl == NULL) { | |
501 return UCOL_NOT_FOUND; | |
502 } | |
503 if((uint32_t)position > tbl->position || position == -1) { | |
504 return UCOL_NOT_FOUND; | |
505 } else { | |
506 return tbl->CEs[position]; | |
507 } | |
508 } | |
509 | |
510 U_CAPI int32_t U_EXPORT2 | |
511 uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UErrorCod
e *status) { | |
512 | |
513 if(U_FAILURE(*status)) { | |
514 return 0; | |
515 } | |
516 | |
517 return _cnttab_findCP(_cnttab_getContractionTable(table, element), codePoint
); | |
518 } | |
519 | |
520 U_CAPI uint32_t U_EXPORT2 | |
521 uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UErrorCo
de *status) { | |
522 if(U_FAILURE(*status)) { | |
523 return UCOL_NOT_FOUND; | |
524 } | |
525 | |
526 return(_cnttab_getCE(_cnttab_getContractionTable(table, element), position))
; | |
527 } | |
528 | |
529 U_CAPI uint32_t U_EXPORT2 | |
530 uprv_cnttab_findCE(CntTable *table, uint32_t element, UChar codePoint, UErrorCod
e *status) { | |
531 if(U_FAILURE(*status)) { | |
532 return UCOL_NOT_FOUND; | |
533 } | |
534 ContractionTable *tbl = _cnttab_getContractionTable(table, element); | |
535 return _cnttab_getCE(tbl, _cnttab_findCP(tbl, codePoint)); | |
536 } | |
537 | |
538 U_CAPI UBool U_EXPORT2 | |
539 uprv_cnttab_isTailored(CntTable *table, uint32_t element, UChar *ztString, UErro
rCode *status) { | |
540 if(U_FAILURE(*status)) { | |
541 return FALSE; | |
542 } | |
543 | |
544 while(*(ztString)!=0) { | |
545 element = uprv_cnttab_findCE(table, element, *(ztString), status); | |
546 if(element == UCOL_NOT_FOUND) { | |
547 return FALSE; | |
548 } | |
549 if(!isCntTableElement(element)) { | |
550 return TRUE; | |
551 } | |
552 ztString++; | |
553 } | |
554 return (UBool)(uprv_cnttab_getCE(table, element, 0, status) != UCOL_NOT_FOUN
D); | |
555 } | |
556 | |
557 U_CAPI uint32_t U_EXPORT2 | |
558 uprv_cnttab_changeContraction(CntTable *table, uint32_t element, UChar codePoint
, uint32_t newCE, UErrorCode *status) { | |
559 | |
560 element &= 0xFFFFFF; | |
561 ContractionTable *tbl = NULL; | |
562 | |
563 if(U_FAILURE(*status)) { | |
564 return 0; | |
565 } | |
566 | |
567 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { | |
568 return 0; | |
569 } | |
570 | |
571 uint32_t position = 0; | |
572 | |
573 while(codePoint > tbl->codePoints[position]) { | |
574 position++; | |
575 if(position > tbl->position) { | |
576 return UCOL_NOT_FOUND; | |
577 } | |
578 } | |
579 if (codePoint == tbl->codePoints[position]) { | |
580 tbl->CEs[position] = newCE; | |
581 return element; | |
582 } else { | |
583 return UCOL_NOT_FOUND; | |
584 } | |
585 } | |
586 | |
587 #endif /* #if !UCONFIG_NO_COLLATION */ | |
OLD | NEW |