OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (C) 2002-2007, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** |
| 6 * file name: utfperf.cpp |
| 7 * encoding: US-ASCII |
| 8 * tab size: 8 (not used) |
| 9 * indentation:4 |
| 10 * |
| 11 * created on: 2005Nov17 |
| 12 * created by: Raymond Yang |
| 13 * |
| 14 * Ported from utfper.c created by Markus W. Scherer |
| 15 * Performance test program for Unicode converters |
| 16 */ |
| 17 |
| 18 #include <stdio.h> |
| 19 #include <stdlib.h> |
| 20 #include "unicode/uperf.h" |
| 21 #include "uoptions.h" |
| 22 |
| 23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
| 24 |
| 25 /* definitions and text buffers */ |
| 26 |
| 27 #define INPUT_CAPACITY (1024*1024) |
| 28 #define INTERMEDIATE_CAPACITY 4096 |
| 29 #define INTERMEDIATE_SMALL_CAPACITY 20 |
| 30 #define PIVOT_CAPACITY 1024 |
| 31 #define OUTPUT_CAPACITY INPUT_CAPACITY |
| 32 |
| 33 static char utf8[INPUT_CAPACITY]; |
| 34 static UChar pivot[INTERMEDIATE_CAPACITY]; |
| 35 |
| 36 static UChar output[OUTPUT_CAPACITY]; |
| 37 static char intermediate[OUTPUT_CAPACITY]; |
| 38 |
| 39 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints; |
| 40 |
| 41 static int32_t fromUCallbackCount; |
| 42 |
| 43 // Command-line options specific to utfperf. |
| 44 // Options do not have abbreviations: Force readable command lines. |
| 45 // (Using U+0001 for abbreviation characters.) |
| 46 enum { |
| 47 CHARSET, |
| 48 CHUNK_LENGTH, |
| 49 PIVOT_LENGTH, |
| 50 UTFPERF_OPTIONS_COUNT |
| 51 }; |
| 52 |
| 53 static UOption options[UTFPERF_OPTIONS_COUNT]={ |
| 54 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG), |
| 55 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG), |
| 56 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG) |
| 57 }; |
| 58 |
| 59 static const char *const utfperf_usage = |
| 60 "\t--charset Charset for which to test performance, e.g. windows-1251.\n" |
| 61 "\t Default: UTF-8\n" |
| 62 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n" |
| 63 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.
\n" |
| 64 "\t [1024]\n"; |
| 65 |
| 66 // Test object. |
| 67 class UtfPerformanceTest : public UPerfTest{ |
| 68 public: |
| 69 UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status) |
| 70 : UPerfTest(argc, argv, options, LENGTHOF(options), utfperf_usage, s
tatus) { |
| 71 if (U_SUCCESS(status)) { |
| 72 charset = options[CHARSET].value; |
| 73 |
| 74 chunkLength = atoi(options[CHUNK_LENGTH].value); |
| 75 if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) { |
| 76 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OU
TPUT_CAPACITY); |
| 77 status = U_ILLEGAL_ARGUMENT_ERROR; |
| 78 } |
| 79 |
| 80 pivotLength = atoi(options[PIVOT_LENGTH].value); |
| 81 if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) { |
| 82 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PI
VOT_CAPACITY); |
| 83 status = U_ILLEGAL_ARGUMENT_ERROR; |
| 84 } |
| 85 |
| 86 int32_t inputLength; |
| 87 UPerfTest::getBuffer(inputLength, status); |
| 88 countInputCodePoints = u_countChar32(buffer, bufferLen); |
| 89 u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, buffer
Len, &status); |
| 90 } |
| 91 } |
| 92 |
| 93 virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char*
&name, char* par = NULL); |
| 94 |
| 95 const UChar *getBuffer() const { return buffer; } |
| 96 int32_t getBufferLen() const { return bufferLen; } |
| 97 |
| 98 const char *charset; |
| 99 int32_t chunkLength, pivotLength; |
| 100 }; |
| 101 |
| 102 U_CDECL_BEGIN |
| 103 // Custom callback for counting callback calls. |
| 104 static void U_CALLCONV |
| 105 fromUCallback(const void *context, |
| 106 UConverterFromUnicodeArgs *fromUArgs, |
| 107 const UChar *codeUnits, |
| 108 int32_t length, |
| 109 UChar32 codePoint, |
| 110 UConverterCallbackReason reason, |
| 111 UErrorCode *pErrorCode) { |
| 112 if (reason <= UCNV_IRREGULAR) { |
| 113 ++fromUCallbackCount; |
| 114 } |
| 115 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codeP
oint, reason, pErrorCode); |
| 116 } |
| 117 U_CDECL_END |
| 118 |
| 119 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup. |
| 120 class Command : public UPerfFunction { |
| 121 protected: |
| 122 Command(const UtfPerformanceTest &testcase) |
| 123 : testcase(testcase), |
| 124 input(testcase.getBuffer()), inputLength(testcase.getBufferLen()), |
| 125 errorCode(U_ZERO_ERROR) { |
| 126 cnv=ucnv_open(testcase.charset, &errorCode); |
| 127 if (U_FAILURE(errorCode)) { |
| 128 fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcas
e.charset, u_errorName(errorCode)); |
| 129 } |
| 130 ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode); |
| 131 } |
| 132 public: |
| 133 virtual ~Command(){ |
| 134 if(U_SUCCESS(errorCode)) { |
| 135 ucnv_close(cnv); |
| 136 } |
| 137 } |
| 138 // virtual void call(UErrorCode* pErrorCode) { ... } |
| 139 virtual long getOperationsPerIteration(){ |
| 140 return countInputCodePoints; |
| 141 } |
| 142 |
| 143 const UtfPerformanceTest &testcase; |
| 144 const UChar *input; |
| 145 int32_t inputLength; |
| 146 UErrorCode errorCode; |
| 147 UConverter *cnv; |
| 148 }; |
| 149 |
| 150 // Test roundtrip UTF-16->encoding->UTF-16. |
| 151 class Roundtrip : public Command { |
| 152 protected: |
| 153 Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {} |
| 154 public: |
| 155 static UPerfFunction* get(const UtfPerformanceTest &testcase) { |
| 156 Roundtrip * t = new Roundtrip(testcase); |
| 157 if (U_SUCCESS(t->errorCode)){ |
| 158 return t; |
| 159 } else { |
| 160 delete t; |
| 161 return NULL; |
| 162 } |
| 163 } |
| 164 virtual void call(UErrorCode* pErrorCode){ |
| 165 const UChar *pIn, *pInLimit; |
| 166 UChar *pOut, *pOutLimit; |
| 167 char *pInter, *pInterLimit; |
| 168 const char *p; |
| 169 UBool flush; |
| 170 |
| 171 ucnv_reset(cnv); |
| 172 fromUCallbackCount=0; |
| 173 |
| 174 pIn=input; |
| 175 pInLimit=input+inputLength; |
| 176 |
| 177 pOut=output; |
| 178 pOutLimit=output+OUTPUT_CAPACITY; |
| 179 |
| 180 pInterLimit=intermediate+testcase.chunkLength; |
| 181 |
| 182 encodedLength=outputLength=0; |
| 183 flush=FALSE; |
| 184 |
| 185 do { |
| 186 /* convert a block of [pIn..pInLimit[ to the encoding in intermediat
e[] */ |
| 187 pInter=intermediate; |
| 188 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TR
UE, pErrorCode); |
| 189 encodedLength+=(int32_t)(pInter-intermediate); |
| 190 |
| 191 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
| 192 /* make sure that we convert once more to really flush */ |
| 193 *pErrorCode=U_ZERO_ERROR; |
| 194 } else if(U_FAILURE(*pErrorCode)) { |
| 195 return; |
| 196 } else if(pIn==pInLimit) { |
| 197 flush=TRUE; |
| 198 } |
| 199 |
| 200 /* convert the block [intermediate..pInter[ back to UTF-16 */ |
| 201 p=intermediate; |
| 202 ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCo
de); |
| 203 if(U_FAILURE(*pErrorCode)) { |
| 204 return; |
| 205 } |
| 206 /* intermediate must have been consumed (p==pInter) because of the c
onverter semantics */ |
| 207 } while(!flush); |
| 208 |
| 209 outputLength=pOut-output; |
| 210 if(inputLength!=outputLength) { |
| 211 fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLeng
th %d\n", inputLength, outputLength); |
| 212 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 213 } |
| 214 } |
| 215 }; |
| 216 |
| 217 // Test one-way conversion UTF-16->encoding. |
| 218 class FromUnicode : public Command { |
| 219 protected: |
| 220 FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {} |
| 221 public: |
| 222 static UPerfFunction* get(const UtfPerformanceTest &testcase) { |
| 223 FromUnicode * t = new FromUnicode(testcase); |
| 224 if (U_SUCCESS(t->errorCode)){ |
| 225 return t; |
| 226 } else { |
| 227 delete t; |
| 228 return NULL; |
| 229 } |
| 230 } |
| 231 virtual void call(UErrorCode* pErrorCode){ |
| 232 const UChar *pIn, *pInLimit; |
| 233 char *pInter, *pInterLimit; |
| 234 |
| 235 ucnv_resetFromUnicode(cnv); |
| 236 fromUCallbackCount=0; |
| 237 |
| 238 pIn=input; |
| 239 pInLimit=input+inputLength; |
| 240 |
| 241 pInterLimit=intermediate+testcase.chunkLength; |
| 242 |
| 243 encodedLength=0; |
| 244 |
| 245 for(;;) { |
| 246 pInter=intermediate; |
| 247 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TR
UE, pErrorCode); |
| 248 encodedLength+=(int32_t)(pInter-intermediate); |
| 249 |
| 250 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
| 251 /* make sure that we convert once more to really flush */ |
| 252 *pErrorCode=U_ZERO_ERROR; |
| 253 } else if(U_FAILURE(*pErrorCode)) { |
| 254 return; |
| 255 } else { |
| 256 break; // all done |
| 257 } |
| 258 } |
| 259 } |
| 260 }; |
| 261 |
| 262 // Test one-way conversion UTF-8->encoding. |
| 263 class FromUTF8 : public Command { |
| 264 protected: |
| 265 FromUTF8(const UtfPerformanceTest &testcase) |
| 266 : Command(testcase), |
| 267 utf8Cnv(NULL), |
| 268 input8(utf8), input8Length(utf8Length) { |
| 269 utf8Cnv=ucnv_open("UTF-8", &errorCode); |
| 270 } |
| 271 public: |
| 272 static UPerfFunction* get(const UtfPerformanceTest &testcase) { |
| 273 FromUTF8 * t = new FromUTF8(testcase); |
| 274 if (U_SUCCESS(t->errorCode)){ |
| 275 return t; |
| 276 } else { |
| 277 delete t; |
| 278 return NULL; |
| 279 } |
| 280 } |
| 281 ~FromUTF8() { |
| 282 ucnv_close(utf8Cnv); |
| 283 } |
| 284 virtual void call(UErrorCode* pErrorCode){ |
| 285 const char *pIn, *pInLimit; |
| 286 char *pInter, *pInterLimit; |
| 287 UChar *pivotSource, *pivotTarget, *pivotLimit; |
| 288 |
| 289 ucnv_resetToUnicode(utf8Cnv); |
| 290 ucnv_resetFromUnicode(cnv); |
| 291 fromUCallbackCount=0; |
| 292 |
| 293 pIn=input8; |
| 294 pInLimit=input8+input8Length; |
| 295 |
| 296 pInterLimit=intermediate+testcase.chunkLength; |
| 297 |
| 298 pivotSource=pivotTarget=pivot; |
| 299 pivotLimit=pivot+testcase.pivotLength; |
| 300 |
| 301 encodedLength=0; |
| 302 |
| 303 for(;;) { |
| 304 pInter=intermediate; |
| 305 ucnv_convertEx(cnv, utf8Cnv, |
| 306 &pInter, pInterLimit, |
| 307 &pIn, pInLimit, |
| 308 pivot, &pivotSource, &pivotTarget, pivotLimit, |
| 309 FALSE, TRUE, pErrorCode); |
| 310 encodedLength+=(int32_t)(pInter-intermediate); |
| 311 |
| 312 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
| 313 /* make sure that we convert once more to really flush */ |
| 314 *pErrorCode=U_ZERO_ERROR; |
| 315 } else if(U_FAILURE(*pErrorCode)) { |
| 316 return; |
| 317 } else { |
| 318 break; // all done |
| 319 } |
| 320 } |
| 321 } |
| 322 protected: |
| 323 UConverter *utf8Cnv; |
| 324 const char *input8; |
| 325 int32_t input8Length; |
| 326 }; |
| 327 |
| 328 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, con
st char* &name, char* par) { |
| 329 switch (index) { |
| 330 case 0: name = "Roundtrip"; if (exec) return Roundtrip::get(*this);
break; |
| 331 case 1: name = "FromUnicode"; if (exec) return FromUnicode::get(*this)
; break; |
| 332 case 2: name = "FromUTF8"; if (exec) return FromUTF8::get(*this); b
reak; |
| 333 default: name = ""; break; |
| 334 } |
| 335 return NULL; |
| 336 } |
| 337 |
| 338 int main(int argc, const char *argv[]) |
| 339 { |
| 340 // Default values for command-line options. |
| 341 options[CHARSET].value = "UTF-8"; |
| 342 options[CHUNK_LENGTH].value = "4096"; |
| 343 options[PIVOT_LENGTH].value = "1024"; |
| 344 |
| 345 UErrorCode status = U_ZERO_ERROR; |
| 346 UtfPerformanceTest test(argc, argv, status); |
| 347 |
| 348 if (U_FAILURE(status)){ |
| 349 printf("The error is %s\n", u_errorName(status)); |
| 350 test.usage(); |
| 351 return status; |
| 352 } |
| 353 |
| 354 if (test.run() == FALSE){ |
| 355 fprintf(stderr, "FAILED: Tests could not be run please check the " |
| 356 "arguments.\n"); |
| 357 return -1; |
| 358 } |
| 359 |
| 360 if (fromUCallbackCount > 0) { |
| 361 printf("Number of fromUnicode callback calls in the last iteration: %ld\
n", (long)fromUCallbackCount); |
| 362 } |
| 363 |
| 364 return 0; |
| 365 } |
OLD | NEW |