| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ********************************************************************** | |
| 3 * Copyright (C) 2002-2014, International Business Machines | |
| 4 * Corporation and others. All Rights Reserved. | |
| 5 ********************************************************************** | |
| 6 * file name: utfperf.cpp | |
| 7 * encoding: US-ASCII | |
| 8 * tab size: 8 (not used) | |
| 9 * indentation:4 | |
| 10 * | |
| 11 * created on: 2005Nov17 | |
| 12 * created by: Raymond Yang | |
| 13 * | |
| 14 * Ported from utfper.c created by Markus W. Scherer | |
| 15 * Performance test program for Unicode converters | |
| 16 */ | |
| 17 | |
| 18 #include <stdio.h> | |
| 19 #include <stdlib.h> | |
| 20 #include "unicode/uperf.h" | |
| 21 #include "cmemory.h" // for UPRV_LENGTHOF | |
| 22 #include "uoptions.h" | |
| 23 | |
| 24 /* definitions and text buffers */ | |
| 25 | |
| 26 #define INPUT_CAPACITY (1024*1024) | |
| 27 #define INTERMEDIATE_CAPACITY 4096 | |
| 28 #define INTERMEDIATE_SMALL_CAPACITY 20 | |
| 29 #define PIVOT_CAPACITY 1024 | |
| 30 #define OUTPUT_CAPACITY INPUT_CAPACITY | |
| 31 | |
| 32 static char utf8[INPUT_CAPACITY]; | |
| 33 static UChar pivot[INTERMEDIATE_CAPACITY]; | |
| 34 | |
| 35 static UChar output[OUTPUT_CAPACITY]; | |
| 36 static char intermediate[OUTPUT_CAPACITY]; | |
| 37 | |
| 38 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints; | |
| 39 | |
| 40 static int32_t fromUCallbackCount; | |
| 41 | |
| 42 // Command-line options specific to utfperf. | |
| 43 // Options do not have abbreviations: Force readable command lines. | |
| 44 // (Using U+0001 for abbreviation characters.) | |
| 45 enum { | |
| 46 CHARSET, | |
| 47 CHUNK_LENGTH, | |
| 48 PIVOT_LENGTH, | |
| 49 UTFPERF_OPTIONS_COUNT | |
| 50 }; | |
| 51 | |
| 52 static UOption options[UTFPERF_OPTIONS_COUNT]={ | |
| 53 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG), | |
| 54 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG), | |
| 55 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG) | |
| 56 }; | |
| 57 | |
| 58 static const char *const utfperf_usage = | |
| 59 "\t--charset Charset for which to test performance, e.g. windows-1251.\n" | |
| 60 "\t Default: UTF-8\n" | |
| 61 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n" | |
| 62 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.
\n" | |
| 63 "\t [1024]\n"; | |
| 64 | |
| 65 // Test object. | |
| 66 class UtfPerformanceTest : public UPerfTest{ | |
| 67 public: | |
| 68 UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status) | |
| 69 : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), utfperf_usa
ge, status) { | |
| 70 if (U_SUCCESS(status)) { | |
| 71 charset = options[CHARSET].value; | |
| 72 | |
| 73 chunkLength = atoi(options[CHUNK_LENGTH].value); | |
| 74 if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) { | |
| 75 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OU
TPUT_CAPACITY); | |
| 76 status = U_ILLEGAL_ARGUMENT_ERROR; | |
| 77 } | |
| 78 | |
| 79 pivotLength = atoi(options[PIVOT_LENGTH].value); | |
| 80 if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) { | |
| 81 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PI
VOT_CAPACITY); | |
| 82 status = U_ILLEGAL_ARGUMENT_ERROR; | |
| 83 } | |
| 84 | |
| 85 int32_t inputLength; | |
| 86 UPerfTest::getBuffer(inputLength, status); | |
| 87 countInputCodePoints = u_countChar32(buffer, bufferLen); | |
| 88 u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, buffer
Len, &status); | |
| 89 } | |
| 90 } | |
| 91 | |
| 92 virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char*
&name, char* par = NULL); | |
| 93 | |
| 94 const UChar *getBuffer() const { return buffer; } | |
| 95 int32_t getBufferLen() const { return bufferLen; } | |
| 96 | |
| 97 const char *charset; | |
| 98 int32_t chunkLength, pivotLength; | |
| 99 }; | |
| 100 | |
| 101 U_CDECL_BEGIN | |
| 102 // Custom callback for counting callback calls. | |
| 103 static void U_CALLCONV | |
| 104 fromUCallback(const void *context, | |
| 105 UConverterFromUnicodeArgs *fromUArgs, | |
| 106 const UChar *codeUnits, | |
| 107 int32_t length, | |
| 108 UChar32 codePoint, | |
| 109 UConverterCallbackReason reason, | |
| 110 UErrorCode *pErrorCode) { | |
| 111 if (reason <= UCNV_IRREGULAR) { | |
| 112 ++fromUCallbackCount; | |
| 113 } | |
| 114 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codeP
oint, reason, pErrorCode); | |
| 115 } | |
| 116 U_CDECL_END | |
| 117 | |
| 118 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup. | |
| 119 class Command : public UPerfFunction { | |
| 120 protected: | |
| 121 Command(const UtfPerformanceTest &testcase) | |
| 122 : testcase(testcase), | |
| 123 input(testcase.getBuffer()), inputLength(testcase.getBufferLen()), | |
| 124 errorCode(U_ZERO_ERROR) { | |
| 125 cnv=ucnv_open(testcase.charset, &errorCode); | |
| 126 if (U_FAILURE(errorCode)) { | |
| 127 fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcas
e.charset, u_errorName(errorCode)); | |
| 128 } | |
| 129 ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode); | |
| 130 } | |
| 131 public: | |
| 132 virtual ~Command(){ | |
| 133 if(U_SUCCESS(errorCode)) { | |
| 134 ucnv_close(cnv); | |
| 135 } | |
| 136 } | |
| 137 // virtual void call(UErrorCode* pErrorCode) { ... } | |
| 138 virtual long getOperationsPerIteration(){ | |
| 139 return countInputCodePoints; | |
| 140 } | |
| 141 | |
| 142 const UtfPerformanceTest &testcase; | |
| 143 const UChar *input; | |
| 144 int32_t inputLength; | |
| 145 UErrorCode errorCode; | |
| 146 UConverter *cnv; | |
| 147 }; | |
| 148 | |
| 149 // Test roundtrip UTF-16->encoding->UTF-16. | |
| 150 class Roundtrip : public Command { | |
| 151 protected: | |
| 152 Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {} | |
| 153 public: | |
| 154 static UPerfFunction* get(const UtfPerformanceTest &testcase) { | |
| 155 Roundtrip * t = new Roundtrip(testcase); | |
| 156 if (U_SUCCESS(t->errorCode)){ | |
| 157 return t; | |
| 158 } else { | |
| 159 delete t; | |
| 160 return NULL; | |
| 161 } | |
| 162 } | |
| 163 virtual void call(UErrorCode* pErrorCode){ | |
| 164 const UChar *pIn, *pInLimit; | |
| 165 UChar *pOut, *pOutLimit; | |
| 166 char *pInter, *pInterLimit; | |
| 167 const char *p; | |
| 168 UBool flush; | |
| 169 | |
| 170 ucnv_reset(cnv); | |
| 171 fromUCallbackCount=0; | |
| 172 | |
| 173 pIn=input; | |
| 174 pInLimit=input+inputLength; | |
| 175 | |
| 176 pOut=output; | |
| 177 pOutLimit=output+OUTPUT_CAPACITY; | |
| 178 | |
| 179 pInterLimit=intermediate+testcase.chunkLength; | |
| 180 | |
| 181 encodedLength=outputLength=0; | |
| 182 flush=FALSE; | |
| 183 | |
| 184 do { | |
| 185 /* convert a block of [pIn..pInLimit[ to the encoding in intermediat
e[] */ | |
| 186 pInter=intermediate; | |
| 187 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TR
UE, pErrorCode); | |
| 188 encodedLength+=(int32_t)(pInter-intermediate); | |
| 189 | |
| 190 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { | |
| 191 /* make sure that we convert once more to really flush */ | |
| 192 *pErrorCode=U_ZERO_ERROR; | |
| 193 } else if(U_FAILURE(*pErrorCode)) { | |
| 194 return; | |
| 195 } else if(pIn==pInLimit) { | |
| 196 flush=TRUE; | |
| 197 } | |
| 198 | |
| 199 /* convert the block [intermediate..pInter[ back to UTF-16 */ | |
| 200 p=intermediate; | |
| 201 ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCo
de); | |
| 202 if(U_FAILURE(*pErrorCode)) { | |
| 203 return; | |
| 204 } | |
| 205 /* intermediate must have been consumed (p==pInter) because of the c
onverter semantics */ | |
| 206 } while(!flush); | |
| 207 | |
| 208 outputLength=pOut-output; | |
| 209 if(inputLength!=outputLength) { | |
| 210 fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLeng
th %d\n", inputLength, outputLength); | |
| 211 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
| 212 } | |
| 213 } | |
| 214 }; | |
| 215 | |
| 216 // Test one-way conversion UTF-16->encoding. | |
| 217 class FromUnicode : public Command { | |
| 218 protected: | |
| 219 FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {} | |
| 220 public: | |
| 221 static UPerfFunction* get(const UtfPerformanceTest &testcase) { | |
| 222 FromUnicode * t = new FromUnicode(testcase); | |
| 223 if (U_SUCCESS(t->errorCode)){ | |
| 224 return t; | |
| 225 } else { | |
| 226 delete t; | |
| 227 return NULL; | |
| 228 } | |
| 229 } | |
| 230 virtual void call(UErrorCode* pErrorCode){ | |
| 231 const UChar *pIn, *pInLimit; | |
| 232 char *pInter, *pInterLimit; | |
| 233 | |
| 234 ucnv_resetFromUnicode(cnv); | |
| 235 fromUCallbackCount=0; | |
| 236 | |
| 237 pIn=input; | |
| 238 pInLimit=input+inputLength; | |
| 239 | |
| 240 pInterLimit=intermediate+testcase.chunkLength; | |
| 241 | |
| 242 encodedLength=0; | |
| 243 | |
| 244 for(;;) { | |
| 245 pInter=intermediate; | |
| 246 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TR
UE, pErrorCode); | |
| 247 encodedLength+=(int32_t)(pInter-intermediate); | |
| 248 | |
| 249 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { | |
| 250 /* make sure that we convert once more to really flush */ | |
| 251 *pErrorCode=U_ZERO_ERROR; | |
| 252 } else if(U_FAILURE(*pErrorCode)) { | |
| 253 return; | |
| 254 } else { | |
| 255 break; // all done | |
| 256 } | |
| 257 } | |
| 258 } | |
| 259 }; | |
| 260 | |
| 261 // Test one-way conversion UTF-8->encoding. | |
| 262 class FromUTF8 : public Command { | |
| 263 protected: | |
| 264 FromUTF8(const UtfPerformanceTest &testcase) | |
| 265 : Command(testcase), | |
| 266 utf8Cnv(NULL), | |
| 267 input8(utf8), input8Length(utf8Length) { | |
| 268 utf8Cnv=ucnv_open("UTF-8", &errorCode); | |
| 269 } | |
| 270 public: | |
| 271 static UPerfFunction* get(const UtfPerformanceTest &testcase) { | |
| 272 FromUTF8 * t = new FromUTF8(testcase); | |
| 273 if (U_SUCCESS(t->errorCode)){ | |
| 274 return t; | |
| 275 } else { | |
| 276 delete t; | |
| 277 return NULL; | |
| 278 } | |
| 279 } | |
| 280 ~FromUTF8() { | |
| 281 ucnv_close(utf8Cnv); | |
| 282 } | |
| 283 virtual void call(UErrorCode* pErrorCode){ | |
| 284 const char *pIn, *pInLimit; | |
| 285 char *pInter, *pInterLimit; | |
| 286 UChar *pivotSource, *pivotTarget, *pivotLimit; | |
| 287 | |
| 288 ucnv_resetToUnicode(utf8Cnv); | |
| 289 ucnv_resetFromUnicode(cnv); | |
| 290 fromUCallbackCount=0; | |
| 291 | |
| 292 pIn=input8; | |
| 293 pInLimit=input8+input8Length; | |
| 294 | |
| 295 pInterLimit=intermediate+testcase.chunkLength; | |
| 296 | |
| 297 pivotSource=pivotTarget=pivot; | |
| 298 pivotLimit=pivot+testcase.pivotLength; | |
| 299 | |
| 300 encodedLength=0; | |
| 301 | |
| 302 for(;;) { | |
| 303 pInter=intermediate; | |
| 304 ucnv_convertEx(cnv, utf8Cnv, | |
| 305 &pInter, pInterLimit, | |
| 306 &pIn, pInLimit, | |
| 307 pivot, &pivotSource, &pivotTarget, pivotLimit, | |
| 308 FALSE, TRUE, pErrorCode); | |
| 309 encodedLength+=(int32_t)(pInter-intermediate); | |
| 310 | |
| 311 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { | |
| 312 /* make sure that we convert once more to really flush */ | |
| 313 *pErrorCode=U_ZERO_ERROR; | |
| 314 } else if(U_FAILURE(*pErrorCode)) { | |
| 315 return; | |
| 316 } else { | |
| 317 break; // all done | |
| 318 } | |
| 319 } | |
| 320 } | |
| 321 protected: | |
| 322 UConverter *utf8Cnv; | |
| 323 const char *input8; | |
| 324 int32_t input8Length; | |
| 325 }; | |
| 326 | |
| 327 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, con
st char* &name, char* par) { | |
| 328 switch (index) { | |
| 329 case 0: name = "Roundtrip"; if (exec) return Roundtrip::get(*this);
break; | |
| 330 case 1: name = "FromUnicode"; if (exec) return FromUnicode::get(*this)
; break; | |
| 331 case 2: name = "FromUTF8"; if (exec) return FromUTF8::get(*this); b
reak; | |
| 332 default: name = ""; break; | |
| 333 } | |
| 334 return NULL; | |
| 335 } | |
| 336 | |
| 337 int main(int argc, const char *argv[]) | |
| 338 { | |
| 339 // Default values for command-line options. | |
| 340 options[CHARSET].value = "UTF-8"; | |
| 341 options[CHUNK_LENGTH].value = "4096"; | |
| 342 options[PIVOT_LENGTH].value = "1024"; | |
| 343 | |
| 344 UErrorCode status = U_ZERO_ERROR; | |
| 345 UtfPerformanceTest test(argc, argv, status); | |
| 346 | |
| 347 if (U_FAILURE(status)){ | |
| 348 printf("The error is %s\n", u_errorName(status)); | |
| 349 test.usage(); | |
| 350 return status; | |
| 351 } | |
| 352 | |
| 353 if (test.run() == FALSE){ | |
| 354 fprintf(stderr, "FAILED: Tests could not be run please check the " | |
| 355 "arguments.\n"); | |
| 356 return -1; | |
| 357 } | |
| 358 | |
| 359 if (fromUCallbackCount > 0) { | |
| 360 printf("Number of fromUnicode callback calls in the last iteration: %ld\
n", (long)fromUCallbackCount); | |
| 361 } | |
| 362 | |
| 363 return 0; | |
| 364 } | |
| OLD | NEW |