icu46/source/test/intltest/wbnf.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/test/intltest/wbnf.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 ******************************************************************************

	3 * Copyright (C) 2005-2007, International Business Machines Corporation and *

	4 * others. All Rights Reserved. *

	5 ******************************************************************************

	6 */

	7

	8 #include <stdio.h>

	9 #include <string.h>

	10 #include <stdlib.h>

	11 #include <time.h>

	12

	13 #include "wbnf.h"

	14

	15 // Most of this code is meant to test the test code. It's a self test.

	16 // Normally this isn't run.

	17 #define TEST_WBNF_TEST 0

	18

	19 ///////////////////////////////////////////////////////////

	20 //

	21 // Constants and the most basic helper classes

	22 //

	23

	24 static const char DIGIT_CHAR[] = "0123456789";

	25 static const char WHITE_SPACE[] = {'\t', ' ', '\r', '\n', 0};

	26 static const char ALPHABET[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV WXYZ";

	27 static const char SPECIAL[] = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{\|}~";

	28

	29 static inline UBool isInList(const char c /in/, const char list[] /in/){

	30 const char * p = list;

	31 for (;p != 0 && p != c; p++);

	32 return *p?TRUE:FALSE;

	33 }

	34 static inline UBool isDigit(char c) {return isInList(c, DIGIT_CHAR);}

	35 static inline UBool isWhiteSpace(char c) {return isInList(c, WHITE_SPACE);}

	36 static inline UBool isAlphabet(char c) {return isInList(c, ALPHABET);}

	37 static inline UBool isSpecialAsciiChar(char c) {return isInList(c,SPECIAL);}

	38

	39

	40

	41 ///////////////////////////////////////////////////////////

	42 //

	43 // Helper classes

	44 //

	45

	46 class Buffer_byte{

	47 // Utility class, can be treated as an auto expanded array. no boundary check.

	48

	49 typedef char byte;

	50 byte * start;

	51 byte * current;

	52 int buffer_size; // size unit is byte

	53 public:

	54 inline int content_size(){return current - start;} // size unit is byte

	55

	56 private:

	57 inline void expand(int add_size = 100){ // size unit is byte

	58 int new_size = buffer_size + add_size;

	59

	60 int cs_snap = content_size();

	61 start = (byte *) realloc(start, new_size); // may change the value of start

	62 current = start + cs_snap;

	63

	64 memset(current, 0, add_size);

	65 buffer_size = new_size;

	66 }

	67

	68 inline void expand_to(int size){

	69 int r = size - buffer_size;

	70 if (r > 0) {

	71 expand(r); // simply expand, no block alignment

	72 }

	73 }

	74 Buffer_byte(const Buffer_byte &);

	75 Buffer_byte & operator = (const Buffer_byte &);

	76 public:

	77 Buffer_byte():start(NULL),current(start),buffer_size(0){

	78 expand();

	79 }

	80 ~Buffer_byte(){

	81 free(start);

	82 }

	83

	84 inline void reset(){

	85 start != NULL ? memset(start, 0, buffer_size) : 0;

	86 current = start;

	87 }

	88

	89 // Using memory copy method to append a C array to buffer,

	90 inline void append(const void * c, int size){ // size unit is byte

	91 expand_to(content_size() + size) ;

	92 memcpy(current, c, size);

	93 current = current + size;

	94 }

	95

	96 byte * buffer(){

	97 return start;

	98 }

	99 };

	100

	101 /*

	102 The class(es) try to work as bulid-in array, so it overloads these two operato rs

	103 operator type *();

	104 type & operator[];

	105 The first is used to auto type convert, the latter is used to select member.

	106

	107 A small trick is the class does not overload the address-of operator. This

	108 behavior is different from bulid-in array, but it give us the opportunity

	109 to get the address of the class itself.

	110 */

	111 //template<typename type>

	112 // class BUFFER{

	113 // typedef BUFFER name;

	114 #define BUFFER(type, name)\

	115 class name {\

	116 private:\

	117 Buffer_byte buf;\

	118 public:\

	119 name & reset() {buf.reset(); return *this;}\

	120 name & append(type c) {buf.append(&c, sizeof(type)); return *this;}\

	121 name & append_array(const type * p, int size) {buf.append(p, sizeof(type )size); return this;}\

	122 type & operator [] (int i) { return ((type *) buf.buffer())[i];}\

	123 operator type (){return (type ) buf.buffer();} \

	124 int content_size(){return buf.content_size() / sizeof(type);}\

	125 }

	126

	127

	128 class Pick{

	129 /* The Pick is the basic language generator element*/

	130 public:

	131 // generate a string accroding the syntax

	132 // Return a null-terminated c-string. The buffer is owned by callee.

	133 virtual const char* next() = 0;

	134 virtual ~Pick(){};

	135 };

	136

	137 //typedef BUFFER<char> Buffer_char;

	138 //typedef BUFFER<int> Buffer_int;

	139 //typedef BUFFER<Pick *> Buffer_pPick;

	140 BUFFER(char, Buffer_char);

	141 BUFFER(int, Buffer_int);

	142 BUFFER(Pick *, Buffer_pPick);

	143

	144 class SymbolTable{

	145 /* Helper class.

	146 * It's a mapping table between 'variable name' and its 'active Pick object'

	147 */

	148 private:

	149 Buffer_char name_buffer; // var names storage space

	150

	151 Buffer_int names; // points to name (offset in name_buffer)

	152 Buffer_pPick refs; // points to Pick

	153

	154 int get_index(const char *const var_name){

	155 int len = names.content_size();

	156 for (int i=0; i< len; i++){

	157 if (strcmp(var_name, name_buffer + names[i]) == 0){

	158 return i;

	159 }

	160 }

	161 return -1;

	162 }

	163

	164 public:

	165 enum RESULT {EMPTY, NO_VAR, NO_REF, HAS_REF};

	166

	167 RESULT find(const char const var_name /[in] c-string/, Pick * ref = NUL L /[out] Pick */){

	168 if (!var_name) return EMPTY; // NULL name

	169

	170 int i = get_index(var_name);

	171 if (i == -1){

	172 return NO_VAR; // new name

	173 }

	174 if (!refs[i]){ // exist name, no ref

	175 return NO_REF;

	176 } else {

	177 if (ref) {

	178 *ref = refs[i];

	179 }

	180 return HAS_REF; // exist name, has ref

	181 }

	182 }

	183

	184 void put(const char const var_name, Pick const var_ref = NULL){

	185 int i = get_index(var_name);

	186 switch(find(var_name)){

	187 case EMPTY: // NULL name

	188 break;

	189 case NO_VAR: // new name

	190 int offset;

	191 offset = name_buffer.content_size();

	192 name_buffer.append_array(var_name, strlen(var_name) + 1);

	193 names.append(offset);

	194 refs.append(var_ref);

	195 break;

	196 case NO_REF: // exist name, no ref

	197 refs[i] = var_ref; // link definition with variable

	198 break;

	199 case HAS_REF: // exist name, has ref

	200 if (var_ref){

	201 refs[i] = var_ref;

	202 }

	203 break;

	204 default:

	205 ; // ASSERT(FALSE);

	206 }

	207 return;

	208 }

	209

	210 UBool is_complete(){

	211 int n = names.content_size();

	212 for (int i=0; i<n; ++i){

	213 if (refs[i] == NULL){

	214 return FALSE;

	215 }

	216 }

	217 return TRUE;

	218 }

	219

	220 void reset(){

	221 names.reset();

	222 name_buffer.reset();

	223

	224 // release memory here

	225 int s = refs.content_size();

	226 for (int i=0; i < s; i++){

	227 delete refs[i]; // TOFIX: point alias/recursion problem

	228 }

	229 refs.reset();

	230 }

	231

	232 ~SymbolTable(){

	233 reset();

	234 }

	235 };

	236

	237

	238 /*

	239 // Document of class Escaper

	240 //

	241 // ATTENTION:

	242 // From http://icu-project.org/userguide/Collate_Customization.html.

	243 // We get the precedence of escape/quote operations

	244 //

	245 // (highest) 1. backslash \

	246 // 2. two single quotes ''

	247 // 3. quoting ' '

	248 //

	249 // ICU Collation should accept following as the same string.

	250 //

	251 // 1) 'ab'c _

	252 // 2) a\bc \

	253 // 3) a'b'\c \|- They are equal.

	254 // 4) abc _/

	255 //

	256 // From "two single quotes", we have following deductions

	257 // D1. empty quoting is illgal. (obviously)

	258 // D2. no contact operation between two quotings

	259 // '.''.' is not .. it is .'.

	260 // D3. "two single quotes" cannot contact two quoting simultaneously

	261 // '..''''.' is not ..'. it is ..''.

	262 // NOTICE:

	263 // "two single quotes" can contact before one quoting

	264 // '''.' is '.

	265 // "two single quotes" can literally contact after one quoting

	266 // But, from syntax, it's one quoting including a "two single quotes"

	267 // '.''' is .'

	268 // D4. "two single quotes" cannot solely be included in quoting

	269 // '''' is not ' it is ''

	270 // NOTICE: These are legal

	271 // '.''.' is .'.

	272 // '.''' is .'

	273 //

	274 // dicision

	275 // /\

	276 // /__\

	277 // output buffer input buffer

	278 //

	279 // To make our dicision (within an atom operation) without caring input and outp ut buffer,

	280 // following calling pattern (within an atom operation) shall be avoided

	281 //

	282 // P1 open_quoting() then close_quoting() (direct violation) D1

	283 // P2 close_quoting() then open_quoting() (direct violation) D2

	284 // P3 empty open_quoting() (indirect violation) D1, D4

	285 // P4 empty close_quoting() (indirect violation) D2, D3

	286 // P5 open_quoting() then two single quotes (indirect violation) D4

	287 // P6 close_quoting() then two single quotes (indirect violation) D3

	288 //

	289 // two single quotes escaping will not open_ or close_ quoting()

	290 // The choice will not lose some quoing forms.

	291 //

	292 // For open_quoting(),

	293 // we may get this form quoting ''' P5

	294 // It may raise a bug ''''x

	295 // If we expect

	296 // '''.' let the next char open the quoting

	297 // '.''.' the quoting is already opened by preceding char

	298 //

	299 // For close_quoting()

	300 // we will get this form quoting '.''' P6

	301 // It may raise a bug '.''''.'

	302 // If we expect

	303 // '.'''\. let the next char close the quoting

	304 // '.''''.' the expectation is wrong! using '.'\''.' instead

	305 //

	306 // It's a hard work to re-adjust generation opportunity for various escaping for m.

	307 // We just simply ignore it.

	308 */

	309 class Escaper{

	310 public:

	311 enum CHOICE {YES, NO, RAND};

	312 enum ESCAPE_FORM {BSLASH_ONLY, QUOTE_ONLY, QUOTE_AND_BSLAH, RAND_ESC};

	313 private:

	314 class Bool{ // A wrapper class for CHOICE, to auto adapter UBool class

	315 private:

	316 const CHOICE tag;

	317 public:

	318 Bool(CHOICE flag=RAND):tag(flag){}

	319 operator UBool() { // conversion operator

	320 return tag == RAND ? rand()%2 : tag == YES;

	321 //if (tag == RAND){

	322 // return rand()%2 == 1;

	323 //} else {

	324 // return tag == YES ? TRUE : FALSE;

	325 //}

	326 }

	327 };

	328 public:

	329 Escaper(CHOICE escapeLiteral = RAND,

	330 CHOICE twoQuotesEscape = RAND,

	331 ESCAPE_FORM escapeForm = RAND_ESC):

	332 escape_form(escapeForm),

	333 escape_literal(escapeLiteral),

	334 two_quotes_escape(twoQuotesEscape),

	335 is_quoting(FALSE){}

	336 private:

	337 Buffer_char str;

	338 ESCAPE_FORM escape_form;

	339 Bool escape_literal;

	340 Bool two_quotes_escape;

	341 UBool quote_escape;

	342 UBool bslash_escape;

	343 UBool is_quoting;

	344

	345 void set_options(){

	346 ESCAPE_FORM t = escape_form == RAND_ESC ? (ESCAPE_FORM) (rand()%3) : esc ape_form;

	347 switch (t){

	348 case BSLASH_ONLY :

	349 bslash_escape = TRUE; quote_escape = FALSE; break;

	350 case QUOTE_ONLY:

	351 bslash_escape = FALSE;quote_escape = TRUE; break;

	352 case QUOTE_AND_BSLAH:

	353 bslash_escape = TRUE; quote_escape = TRUE; break;

	354 default:

	355 ;// error

	356 }

	357 }

	358

	359 void reset(){

	360 str.reset();

	361 is_quoting = FALSE;

	362 }

	363

	364 inline void open_quoting(){

	365 if(is_quoting){

	366 // do nothing

	367 } else {

	368 str.append('\'');

	369 is_quoting = TRUE;

	370 }

	371 }

	372 inline void close_quoting(){

	373 if(is_quoting){

	374 str.append('\'');

	375 is_quoting = FALSE;

	376 } else {

	377 // do nothing

	378 }

	379 }

	380

	381 // str [in] null-terminated c-string

	382 void append(const char * strToAppend){

	383 for(;*strToAppend != 0; strToAppend++){

	384 append(*strToAppend);

	385 }

	386 }

	387

	388 inline void append(const char c){

	389 set_options();

	390

	391 if (c == '\\'){

	392 quote_escape ? open_quoting() : close_quoting();

	393 //bslash_escape always true here

	394 str.append('\\');

	395 str.append('\\');

	396 } else if (c == '\''){

	397 if (two_quotes_escape){ // quoted using two single quotes

	398 // See documents in anonymous.design

	399 str.append('\'');

	400 str.append('\'');

	401 } else{

	402 quote_escape ? open_quoting() : close_quoting();

	403 //bslash_escape always true here

	404 str.append('\\');

	405 str.append('\'');

	406 }

	407 } else if (isSpecialAsciiChar(c) \|\| isWhiteSpace(c)){

	408 quote_escape ? open_quoting() : close_quoting();

	409 if (bslash_escape) str.append('\\');

	410 str.append(c);

	411 } else { //if (isAlphabet(c) \|\| isDigit(c) \|\| TRUE){ // treat others as literal

	412 if (escape_literal){

	413 quote_escape ? open_quoting() : close_quoting();

	414 if (bslash_escape) str.append('\\');

	415 str.append(c);

	416 } else {

	417 close_quoting();

	418 str.append(c);

	419 }

	420 }

	421 }

	422

	423 public:

	424 // Return a null-terminate c-string. The buffer is owned by callee.

	425 char * operator()(const char * literal /c-string/){

	426 str.reset();

	427 for(;*literal != 0; literal++){

	428 append(*literal);

	429 }

	430 close_quoting(); // P4 exception, to close whole quoting

	431 return str;

	432 }

	433 };

	434

	435 class WeightedRand{

	436 // Return a random number in [0, size)

	437 // Every number has different chance (aka weight) to be selected.

	438 private:

	439 Buffer_int weights;

	440 double total;

	441 WeightedRand(const WeightedRand &);

	442 WeightedRand & operator = (const WeightedRand &);

	443 public:

	444 WeightedRand(Buffer_int * weight_list = NULL, int size = 0){

	445 if ( weight_list == NULL){

	446 for (int i=0; i<size; ++i) weights.append(DEFAULT_WEIGHT);

	447 } else {

	448 int s = weight_list->content_size();

	449 if (s < size){

	450 weights.append_array( (*weight_list),s);

	451 for (int i=s; i<size; ++i) weights.append(DEFAULT_WEIGHT);

	452 } else { // s >= size

	453 weights.append_array( (*weight_list),size);

	454 }

	455 }

	456 total = 0;

	457 int c = weights.content_size();

	458 for (int i=0; i<c; ++i){

	459 total += weights[i];

	460 }

	461 }

	462

	463 void append(int weight){

	464 weights.append(weight);

	465 total += weight;

	466 }

	467

	468 // Give a random number with the consideration of weight.

	469 // Every random number is associated with a weight.

	470 // It identifies the chance to be selected,

	471 // larger weight has more chance to be selected.

	472 //

	473 //

	474 // ______________________ every slot has equal chance

	475 //

	476 // [____][_][___][______] each item has different slots, hence different chance

	477 //

	478 //

	479 // The algorithms to generate the number is illustrated by preceding figure .

	480 // First, a slot is selected by rand(). Then we translate the slot to corre sponding item.

	481 //

	482 int next(){

	483 // get a random in [0,1]

	484 double reference_mark = (double)rand() / (double)RAND_MAX;

	485

	486 // get the slot's index, 0 <= mark <= total;

	487 double mark = total * reference_mark;

	488

	489 // translate the slot to corresponding item

	490 int i=0;

	491 for (;;){

	492 mark -= weights[i]; // 0 <= mark <= total

	493 if (mark <= 0)

	494 break;

	495 i++;

	496 }

	497 return i;

	498 }

	499 };

	500

	501 ///////////////////////////////////////////////////////////

	502 //

	503 // The parser result nodes

	504 //

	505

	506 class Literal : public Pick {

	507 public:

	508 virtual const char* next(){

	509 return str;

	510 }

	511 Literal(const char * s /c-string/){

	512 str.append_array(s, strlen(s) + 1);

	513 }

	514 private:

	515 Buffer_char str; //null-terminated c-string

	516 };

	517

	518 class Variable : public Pick {

	519 public:

	520 Variable(SymbolTable * symbols, const char * varName, Pick * varRef = NULL){

	521 this->var_name.append_array(varName, strlen(varName) + 1);

	522 if ((symbol_table = symbols)){

	523 symbol_table->put(varName, varRef);

	524 }

	525 }

	526

	527 operator const char *(){

	528 return var_name;

	529 }

	530

	531 virtual const char* next(){

	532 if (symbol_table){

	533 Pick * var_ref = NULL;

	534 symbol_table->find(var_name, &var_ref);

	535 if (var_ref) {

	536 return var_ref->next();

	537 }

	538 }

	539 return ""; // dumb string

	540 }

	541 private:

	542 Buffer_char var_name;

	543 SymbolTable * symbol_table;

	544 };

	545

	546 class Quote : public Pick{

	547 public:

	548 Quote(Pick & base):item(base),e(Escaper::NO, Escaper::NO, Escaper::BSLASH_ON LY){

	549 }

	550 virtual const char* next(){

	551 return e(item.next());

	552 }

	553 private:

	554 Pick & item;

	555 Buffer_char str;

	556 Escaper e;

	557 };

	558

	559

	560 class Morph : public Pick{

	561 /*

	562 The difference between morph and an arbitrary random string is that

	563 a morph changes slowly. When we build collation rules, for example,

	564 it is a much better test if the strings we use are all in the same

	565 'neighborhood'; they share many common characters.

	566 */

	567 public:

	568 Morph(Pick & base):item(base){}

	569

	570 virtual const char* next(){

	571 current.reset();

	572 const char * s = item.next();

	573 current.append_array(s, strlen(s) + 1);

	574 if (last.content_size() == 0) {

	575 str.reset();

	576 last.reset();

	577 str.append_array(current, current.content_size());

	578 last.append_array(current, current.content_size());

	579 } else {

	580 morph();

	581 }

	582 return str;

	583 }

	584 private:

	585 Pick & item;

	586 Buffer_char str;

	587 Buffer_char last;

	588 Buffer_char current;

	589

	590 char * p_last;

	591 char * p_curr;

	592

	593 void copy_curr(){

	594 if (*p_curr) {

	595 str.append(*p_curr);

	596 p_curr++;

	597 }

	598 }

	599

	600 void copy_last(){

	601 if (*p_last) {

	602 str.append(*p_last);

	603 p_last++;

	604 }

	605 }

	606

	607 // copy 0, 1, or 2 character(s) to str

	608 void copy(){

	609 static WeightedRand wr(& Buffer_int().append(DEFAULT_WEIGHT * 10), 5);

	610

	611 switch (wr.next()){

	612 case 0: // copy last -- has 10 times chance than others

	613 copy_last();

	614 break;

	615 case 1: // copy both

	616 copy_curr();

	617 copy_last();

	618 break;

	619 case 2: // copy both

	620 copy_last();

	621 copy_curr();

	622 break;

	623 case 3:

	624 copy_curr();

	625 break;

	626 case 4: // copy nothing

	627 break;

	628 default:

	629 // ASSERT(FALSE);

	630 ;

	631 }

	632 }

	633

	634 void morph(void){

	635 int min = strlen(last);

	636 int max = strlen(current);

	637 if (min > max){

	638 int temp = min;

	639 min = max;

	640 max = temp;

	641 }

	642

	643 int len = min + rand()%(max - min + 1); // min + [0, diff]

	644 p_curr = current;

	645 p_last = last;

	646 str.reset();

	647

	648 for (; str.content_size()<len && p_curr && p_last;){

	649 copy(); // copy 0, 1, or 2 character(s) to str

	650 }

	651

	652 if (str.content_size() == len) {

	653 str.append(0);

	654 final();

	655 return;

	656 }

	657

	658 if (str.content_size() > len) { // if the last copy copied two character s

	659 str[len]=0;

	660 final();

	661 return;

	662 }

	663

	664 // str.content_size() < len

	665 if (*p_last) {

	666 for (; str.content_size() < len; copy_last());

	667 } else if (*p_curr){

	668 for (; str.content_size() < len; copy_curr());

	669 }

	670

	671 int last_len = last.content_size();

	672 for (;str.content_size() < len;){

	673 str.append(last[rand()%last_len]);

	674 }

	675 str.append(0);

	676 final();

	677 }

	678

	679 void final(){

	680 last.reset();

	681 last.append_array(current, current.content_size());

	682 }

	683 };

	684

	685 class Sequence : public Pick {

	686 public:

	687 virtual const char* next(){

	688 str.reset();

	689 int s = items.content_size();

	690 for(int i=0; i < s; i++){

	691 const char * t = items[i]->next();

	692 str.append_array(t, strlen(t));

	693 }

	694 str.append(0); // terminal null

	695 return str;

	696 }

	697

	698 void append (Pick * node){

	699 items.append(node);

	700 }

	701

	702 virtual ~Sequence(){

	703 int s = items.content_size();

	704 for(int i=0; i < s; i++){

	705 //How can assure the item is got from heap?

	706 //Let's assume it.

	707 delete items[i]; // TOFIX: point alias/recursion problem

	708 items[i] = NULL;

	709 }

	710 }

	711 private:

	712 Buffer_pPick items;

	713 Buffer_char str; //null-terminated c-string

	714 };

	715

	716 class Repeat : public Pick {

	717 private:

	718 Pick * item;

	719 Buffer_char str;

	720 WeightedRand wr;

	721 int min;

	722 int max;

	723 int select_a_count(){

	724 return min + wr.next();

	725 }

	726 public:

	727 virtual const char* next(){

	728 str.reset();

	729 int c = select_a_count();

	730 for(int i=0; i< c; i++){

	731 const char * t = item->next();

	732 str.append_array(t, strlen(t));

	733 }

	734 str.append(0);

	735 return str;

	736 }

	737

	738 Repeat(Pick * base, int minCount =0, int maxCount = 1, Buffer_int * weights = NULL):

	739 wr(weights, maxCount-minCount +1) {

	740 this->item = base;

	741 this->min = minCount;

	742 this->max = maxCount;

	743 }

	744 virtual ~Repeat(){

	745 delete item; // TOFIX: point alias/recursion problem

	746 item = NULL;

	747 }

	748 };

	749

	750

	751 class Alternation : public Pick {

	752 public:

	753 virtual const char* next(){

	754 str.reset();

	755 int i = wr.next();

	756 const char * t = items[i]->next();

	757 str.append_array(t, strlen(t) + 1);

	758 return str;

	759 }

	760 virtual ~Alternation(){

	761 int s = items.content_size();

	762 for(int i=0; i < s; i++){

	763 delete items[i]; // TOFIX: point alias/recursion problem

	764 items[i] = NULL;

	765 }

	766 }

	767

	768 Alternation & append (Pick * node, int weight = DEFAULT_WEIGHT){

	769 items.append(node);

	770 wr.append(weight);

	771 return *this;

	772 }

	773 private:

	774 Buffer_pPick items;

	775 Buffer_char str; // null-terminated c-string

	776 WeightedRand wr;

	777 };

	778

	779 ///////////////////////////////////////////////////////////

	780 //

	781 // The parser

	782 //

	783

	784 enum TokenType {STRING, VAR, NUMBER, STREAM_END, ERROR, QUESTION, STAR, PLUS, LB RACE, RBRACE, LPAR, RPAR, SEMI, EQ, COMMA, BAR, AT, WAVE, PERCENT};

	785

	786 class Scanner{

	787 friend int DumpScanner(Scanner & s, UBool dumb);

	788 private:

	789 const char * source;

	790 const char * working;

	791 const char * history; // for debug

	792 enum StateType {START, IN_NUM, IN_VAR_FIRST, IN_VAR, IN_QUOTE, IN_QUOTE_BSLA SH, IN_BSLASH, IN_STRING, DONE};

	793 StateType state;

	794 void terminated(TokenType t){

	795 working--; // return the peeked character

	796 tokenType = t;

	797 token.append(0); // close buffer

	798 state = DONE;

	799 }

	800 public:

	801 // the buffer of "source" is owned by caller

	802 Scanner(const char src/[in] c-string*/ = NULL):source(src){

	803 working = src;

	804 history = working;

	805 state = DONE;

	806 tokenType = ERROR;

	807 }

	808

	809 //void setSource(const char const src /[in] c-string*/){

	810 // (&const_cast<const char >(source)) = src;

	811 //}

	812

	813 Buffer_char token;

	814 TokenType tokenType;

	815

	816 TokenType getNextToken(){

	817 token.reset();

	818 state = START;

	819 history = working; // for debug

	820 while (state != DONE){

	821 char c = *working++;

	822 if (c == 0 && state != START){//avoid buffer overflow. for IN_QUOE, IN_ESCAPE

	823 terminated(ERROR);

	824 break; // while

	825 }

	826 switch(state){

	827 case START:

	828 tokenType = ERROR;

	829 switch(c){

	830 case '?' : tokenType = QUESTION; break;

	831 case '*' : tokenType = STAR; break;

	832 case '+' : tokenType = PLUS; break;

	833 case '{' : tokenType = LBRACE; break;

	834 case '}' : tokenType = RBRACE; break;

	835 case '(' : tokenType = LPAR; break;

	836 case ')' : tokenType = RPAR; break;

	837 case ';' : tokenType = SEMI; break;

	838 case '=' : tokenType = EQ; break;

	839 case ',' : tokenType = COMMA; break;

	840 case '\|' : tokenType = BAR; break;

	841 case '@' : tokenType = AT; break;

	842 case '~' : tokenType = WAVE; break;

	843 case '%' : tokenType = PERCENT; break;

	844 case 0 : tokenType = STREAM_END; working-- /avoid bu ffer overflow/; break;

	845 }

	846 if (tokenType != ERROR){

	847 token.append(c);

	848 token.append(0);

	849 state = DONE;

	850 break; // START

	851 }

	852 switch(c){

	853 case '$' : state = IN_VAR_FIRST; token.append(c); break ;

	854 case '\'' : state = IN_QUOTE; break;

	855 case '\\' : state = IN_BSLASH; break;

	856 default:

	857 if (isWhiteSpace(c)){ // state = START; //do no thing

	858 } else if (isDigit(c)){ state = IN_NUM; token .append(c);

	859 } else if (isAlphabet(c)){ state = IN_STRING; token .append(c);

	860 } else {terminated(ERROR);}

	861 }

	862 break;//START

	863 case IN_NUM:

	864 if (isDigit(c)){

	865 token.append(c);

	866 } else {

	867 terminated(NUMBER);

	868 }

	869 break;//IN_NUM

	870 case IN_VAR_FIRST:

	871 if (isAlphabet(c)){

	872 token.append(c);

	873 state = IN_VAR;

	874 } else {

	875 terminated(ERROR);

	876 }

	877 break; // IN_VAR_FISRT

	878 case IN_VAR:

	879 if (isAlphabet(c) \|\| isDigit(c)){

	880 token.append(c);

	881 } else {

	882 terminated(VAR);

	883 }

	884 break;//IN_VAR

	885 case IN_STRING:

	886 // About the scanner's behavior for STRING, AT, and ESCAPE:

	887 // All of them can be contacted with each other.

	888 // This means the scanner will eat up as much as possible st rings

	889 // (STRING, AT, and ESCAPE) at one time, with no regard of their

	890 // combining sequence.

	891 //

	892 if (c == '\''){

	893 state = IN_QUOTE; // the first time we see single quote

	894 } else if (c =='\\'){ // back slash character

	895 state = IN_BSLASH;

	896 } else if (isAlphabet(c) \|\| isDigit(c)){

	897 token.append(c);

	898 } else{

	899 terminated(STRING);

	900 }

	901 break;//IN_STRING

	902 case IN_QUOTE:

	903 if (c == '\''){ // the second time we see single quote

	904 state = IN_STRING; // see document in IN_STRING

	905 } else if ( c== '\\') { // backslah escape in quote

	906 state = IN_QUOTE_BSLASH;

	907 } else {

	908 token.append(c); // eat up everything, includes back sl ash

	909 }

	910 break;//IN_QUOTE

	911 case IN_QUOTE_BSLASH:

	912 case IN_BSLASH:

	913 switch (c){

	914 case 'n' : token.append('\n'); break;

	915 case 'r' : token.append('\r'); break;

	916 case 't' : token.append('\t'); break;

	917 case '\'' : token.append('\''); break;

	918 case '\\' : token.append('\\'); break;

	919 default: token.append(c); // unknown escaping, treat it as literal

	920 }

	921 if (state == IN_BSLASH){

	922 state = IN_STRING; // see document in IN_STRING

	923 } else { // state == IN_QUOTE_BSLASH

	924 state = IN_QUOTE;

	925 }

	926 break;//IN_BSLASH

	927 case DONE: /* should never happen */

	928 default:

	929 working--;

	930 tokenType = ERROR;

	931 state = DONE;

	932 break;

	933 }//switch(state)

	934 }//while (state != DONE)

	935

	936 return tokenType;

	937 }

	938 };//class Scanner

	939

	940 class Parser{

	941 friend UBool TestParser();

	942 friend class TestParserT;

	943 friend class LanguageGenerator_impl;

	944 private:

	945 Scanner s;

	946 TokenType & token;

	947 int min_max; // for the evil infinite

	948

	949 UBool match(TokenType expected){

	950 if (token == expected) {

	951 token = s.getNextToken();

	952 return TRUE;

	953 } else {

	954 //s.dumpCurrentPoint();

	955 return FALSE;

	956 }

	957 }

	958

	959 UBool weight(int & value){

	960 if (token == NUMBER){

	961 int temp = atoi(s.token);

	962 match(NUMBER);

	963 if (match(PERCENT)){

	964 value = temp;

	965 return TRUE;

	966 }

	967 }

	968 return FALSE;

	969 }

	970

	971 UBool repeat (Pick* &node /in,out/){

	972 if (node == NULL) return FALSE;

	973

	974 int count = -2;

	975 int min = -2;

	976 int max = -2;

	977 UBool question = FALSE;

	978 switch (token){

	979 case QUESTION:

	980 match(QUESTION);

	981 min = 0;

	982 max = 1;

	983 count = 2;

	984 question = TRUE;

	985 break;

	986 case STAR:

	987 match(STAR);

	988 min = 0;

	989 max = -1;

	990 count = -1;

	991 break;

	992 case PLUS:

	993 match(PLUS);

	994 min = 1;

	995 max = -1;

	996 count = -1;

	997 break;

	998 case LBRACE:

	999 match(LBRACE);

	1000 if (token != NUMBER){

	1001 return FALSE;

	1002 }else {

	1003 min = atoi(s.token);

	1004 match(NUMBER);

	1005 if (token == RBRACE){

	1006 match(RBRACE);

	1007 max = min;

	1008 count = 1;

	1009 } else if (token == COMMA) {

	1010 match(COMMA);

	1011 if (token == RBRACE){

	1012 match(RBRACE);

	1013 max = -1;

	1014 count = -1;

	1015 } else if (token == NUMBER) {

	1016 max = atoi(s.token);

	1017 match(NUMBER);

	1018 count = max - min + 1;

	1019 if (!match(RBRACE)) {

	1020 return FALSE;

	1021 }

	1022 } else {

	1023 return FALSE;

	1024 }

	1025 } else {

	1026 return FALSE;

	1027 }

	1028 }

	1029 break;

	1030 default:

	1031 return FALSE;

	1032 }

	1033

	1034 if (count == -2 \|\| min == -2 \|\| max == -2){

	1035 //ASSERT(FALSE);

	1036 return FALSE;

	1037 }

	1038

	1039 // eat up following weights

	1040 Buffer_int weights;

	1041 int w;

	1042 while (weight(w)){

	1043 weights.append(w);

	1044 }

	1045

	1046 // for the evil infinite

	1047 min_max = min_max > min ? min_max : min;

	1048 min_max = min_max > max ? min_max : max;

	1049 if (min_max > PSEUDO_INFINIT){

	1050 return FALSE; // PSEUDO_INFINIT is less than the real maximum

	1051 }

	1052 if (max == -1){ // the evil infinite

	1053 max = PSEUDO_INFINIT;

	1054 }

	1055 // for the strange question mark

	1056 if (question && weights.content_size() > 0){

	1057 Buffer_int w2;

	1058 w2.append(DEFAULT_WEIGHT - weights[0]).append(weights[0]);

	1059 node = new Repeat(node,min,max,&w2);

	1060 return TRUE;

	1061 }

	1062 node = new Repeat(node,min,max,&weights);

	1063 return TRUE;

	1064 }

	1065

	1066 UBool core(Pick* &node /out/){

	1067 if (node != NULL) return FALSE; //assert node == NULL

	1068

	1069 switch(token){

	1070 case LPAR:

	1071 match(LPAR);

	1072 if(defination(node) && match(RPAR)){

	1073 return TRUE;

	1074 }

	1075 return FALSE;

	1076 case VAR:

	1077 node = new Variable(&symbols, s.token);

	1078 match(VAR);

	1079 return TRUE;

	1080 case STRING:

	1081 node = new Literal(s.token);

	1082 match(STRING);

	1083 return TRUE;

	1084 default:

	1085 return FALSE;

	1086 }

	1087 }

	1088 UBool modified(Pick* &node /out/){

	1089 if (node != NULL) return FALSE; //assert node == NULL

	1090

	1091 if (!core(node)) {

	1092 return FALSE;

	1093 }

	1094

	1095 for (;;){

	1096 switch(token){

	1097 case WAVE:

	1098 match(WAVE);

	1099 node = new Morph(*node);

	1100 break;

	1101 case AT:

	1102 match(AT);

	1103 node = new Quote(*node);

	1104 break;

	1105 case QUESTION:

	1106 case STAR:

	1107 case PLUS:

	1108 case LBRACE:

	1109 if (!repeat(node)) return FALSE;

	1110 break;

	1111 case SEMI: // rule definiation closed

	1112 case RPAR: // within parenthesis (core closed)

	1113 case BAR: // in alternation

	1114 case NUMBER: // in alternation, with weight

	1115 case LPAR: // in sequence

	1116 case VAR: // in sequence

	1117 case STRING: // in sequence

	1118 return TRUE;

	1119 default:

	1120 return FALSE;

	1121 }

	1122 }

	1123 }

	1124

	1125

	1126 UBool sequence_list(Pick* &node /in,out/){

	1127 if (node == NULL) return FALSE; // assert node != NULL

	1128

	1129 Sequence* seq = new Sequence();

	1130 Pick * n = node;

	1131

	1132 while (token == VAR \|\| token == STRING \|\| token == LPAR){

	1133 seq->append(n);

	1134 n = NULL;

	1135 if (modified(n)){

	1136 // go on

	1137 } else {

	1138 goto FAIL;

	1139 }

	1140 }

	1141

	1142 if (token == SEMI \|\| token == RPAR \|\| token == BAR){

	1143 seq->append(n);

	1144 node = seq;

	1145 return TRUE;

	1146 }

	1147 FAIL:

	1148 delete seq;

	1149 return FALSE;

	1150

	1151 }

	1152

	1153 UBool sequence(Pick* &node /out/){

	1154 if (node != NULL) return FALSE; //assert node == NULL

	1155

	1156 if (!modified(node)) {

	1157 return FALSE;

	1158 }

	1159

	1160 if (token == VAR \|\| token == STRING \|\| token == LPAR){

	1161 return sequence_list(node);

	1162 } else {

	1163 return TRUE; // just a modified

	1164 }

	1165 }

	1166

	1167 UBool alternation_list(Pick* &node /in,out/){

	1168 if (node == NULL) return FALSE; // assert node != NULL

	1169

	1170 Alternation * alt = new Alternation();

	1171 Pick * n = node;

	1172 int w = DEFAULT_WEIGHT;

	1173

	1174 while (token == NUMBER \|\| token == BAR){

	1175 if(token == NUMBER) {

	1176 if (weight(w)){

	1177 if (token == BAR){

	1178 // the middle item, go on

	1179 } else {

	1180 // the last item or encounter error

	1181 break; //while

	1182 }

	1183 } else {

	1184 goto FAIL;

	1185 }

	1186 } // else token == BAR

	1187 match(BAR);

	1188 alt->append(n,w);

	1189

	1190 n = NULL;

	1191 w = DEFAULT_WEIGHT;

	1192 if (sequence(n)){

	1193 // go on

	1194 } else {

	1195 goto FAIL;

	1196 }

	1197 }

	1198

	1199 if (token == SEMI \|\| token == RPAR) {

	1200 alt->append(n,w);

	1201 node = alt;

	1202 return TRUE;

	1203 }

	1204 FAIL:

	1205 delete alt;

	1206 return FALSE;

	1207 }

	1208

	1209 UBool alternation(Pick* &node /out/){

	1210 if (node != NULL) return FALSE; //assert node == NULL

	1211

	1212 // 'sequence' has higher precedence than 'alternation'

	1213 if (!sequence(node)){

	1214 return FALSE;

	1215 }

	1216

	1217 if (token == BAR \|\| token == NUMBER){ // find a real alternation1, creat e it.

	1218 return alternation_list(node);

	1219 } else {

	1220 return TRUE; // just a sequence_old

	1221 }

	1222 }

	1223

	1224

	1225 UBool defination(Pick* &node /out/){

	1226 if (node != NULL) return FALSE; //assert node == NULL

	1227 return alternation(node);

	1228 }

	1229

	1230 UBool rule(){

	1231 if (token == VAR){

	1232 Buffer_char name;

	1233 name.append_array(s.token, strlen(s.token) + 1);

	1234 match(VAR);

	1235

	1236 if (match(EQ)){

	1237 Pick * t = NULL;

	1238 if(defination(t)){

	1239 symbols.put(name, t);

	1240 return match(SEMI);

	1241 }

	1242 }

	1243 }

	1244 return FALSE;

	1245 }

	1246 public:

	1247 UBool rules(){

	1248 symbols.reset();

	1249 token = s.getNextToken();

	1250 while (rule()){

	1251 }

	1252 if (token == STREAM_END){

	1253 return TRUE;

	1254 } else {

	1255 //s.dumpCurrentPoint();

	1256 return FALSE;

	1257 }

	1258 }

	1259

	1260 public:

	1261 SymbolTable symbols;

	1262

	1263 Parser(const char *const source):s(source), token(s.tokenType){

	1264 min_max = -2;

	1265 }

	1266 UBool parse(){

	1267 return rules();

	1268 }

	1269

	1270 }; // class Parser

	1271

	1272

	1273 ///////////////////////////////////////////////////////////

	1274 //

	1275 //

	1276 //

	1277

	1278 int DumpScanner(Scanner & s, UBool dump = TRUE){

	1279 int len = strlen(s.source);

	1280 int error_start_offset = s.history - s.source;

	1281 if (dump){

	1282 printf("\n=================== DumpScanner ================\n");

	1283 fwrite(s.source, len, 1, stdout);

	1284 printf("\n-----parsed-------------------------------------\n");

	1285 fwrite(s.source, s.history - s.source, 1, stdout);

	1286 printf("\n-----current------------------------------------\n");

	1287 fwrite(s.history, s.working - s.history, 1, stdout);

	1288 printf("\n-----unparsed-----------------------------------\n");

	1289 fwrite(s.working, (s.source + len - s.working), 1, stdout);

	1290 printf("\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n");

	1291 }

	1292 return error_start_offset;

	1293 }

	1294

	1295 class LanguageGenerator_impl{

	1296 public:

	1297 LanguageGenerator_impl(const char const bnf_definition, const char const t op_node)

	1298 :par(bnf_definition), top_node_name(top_node){

	1299 srand((unsigned)time( NULL ));

	1300 }

	1301

	1302 LanguageGenerator::PARSE_RESULT parseBNF(UBool debug = TRUE){

	1303 if (par.parse()){

	1304 if (par.symbols.find(top_node_name, &top_node_ref) == SymbolTable::H AS_REF) {

	1305 if (par.symbols.is_complete()) {

	1306 return LanguageGenerator::OK;

	1307 } else {

	1308 if (debug) printf("The bnf definition is incomplete.\n");

	1309 return LanguageGenerator::INCOMPLETE;

	1310 }

	1311 } else {

	1312 if (debug) printf("No top node is found.\n");

	1313 return LanguageGenerator::NO_TOP_NODE;

	1314 }

	1315 } else {

	1316 if(debug) {

	1317 printf("The bnf definition is wrong\n");

	1318 DumpScanner(par.s, TRUE);

	1319 }

	1320 return LanguageGenerator::BNF_DEF_WRONG;

	1321 }

	1322 }

	1323 const char * next(){

	1324 return top_node_ref->next();

	1325 }

	1326

	1327 private:

	1328 Parser par;

	1329 const char *const top_node_name;

	1330 Pick * top_node_ref;

	1331 };

	1332

	1333 LanguageGenerator::LanguageGenerator():lang_gen(NULL){

	1334 }

	1335

	1336 LanguageGenerator::~LanguageGenerator(){

	1337 delete lang_gen;

	1338 }

	1339

	1340 LanguageGenerator::PARSE_RESULT LanguageGenerator::parseBNF(const char const bn f_definition /in/, const char const top_node/in/, UBool debug){

	1341 if (lang_gen){

	1342 delete lang_gen;

	1343 }

	1344 lang_gen = new LanguageGenerator_impl(bnf_definition, top_node);

	1345 PARSE_RESULT r = lang_gen->parseBNF(debug);

	1346 if (r != OK){

	1347 delete lang_gen;

	1348 lang_gen = NULL;

	1349 return r;

	1350 } else {

	1351 return r;

	1352 }

	1353 }

	1354 const char *LanguageGenerator::next(){ // Return a null-terminated c-string. The buffer is owned by callee.

	1355 if (lang_gen){

	1356 return lang_gen->next();

	1357 }else {

	1358 return "";

	1359 }

	1360 }

	1361

	1362 ///////////////////////////////////////////////////////////

	1363 //

	1364 // The test code for WBNF

	1365 //

	1366

	1367 #define CALL(fun) \

	1368 if (fun()){ \

	1369 printf("Pass: " #fun "\n");\

	1370 } else { \

	1371 printf("FAILED: !!! " #fun " !!!\n"); \

	1372 }

	1373

	1374 #define DUMP_R(fun, var, times) \

	1375 {printf("\n========= " #fun " =============\n"); \

	1376 for (int i=0; i<times; i++) { \

	1377 const char * t = var.next();\

	1378 fwrite(t,strlen(t),1,stdout); \

	1379 printf("\n"); \

	1380 } \

	1381 printf("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n");}

	1382

	1383

	1384

	1385 #if TEST_WBNF_TEST

	1386 static UBool TestQuote(){

	1387 const char *const str = "This ' A !,z\| qq [] .new\tline";

	1388 //const char *const str_r = "This \\' A '!,'z'\|' qq '[]' '.'new\tline";

	1389 ////

	1390 //// :( we must quote our string to following C syntax

	1391 //// cannot type the literal here, it makes our code rather human unread able

	1392 //// very very unconformable!

	1393 ////

	1394 ///*

	1395 //*/

	1396

	1397 //const char *const s1 = "ab'c";

	1398 //const char (* s1_r1) [] = { "ab''c", // ab''c

	1399 // "ab\\'c", // ab\'c

	1400 // };//

	1401 ///*

	1402 // . '.' \.

	1403 // .. \.\. '.'\. '.'\. '..' // '.''.' wrong

	1404 //*/

	1405

	1406 //const char *const s2 = "a..'.b"; // a..'.b

	1407 //const char (*s2_r) [] = { "a'..''.'b" // a'..''.'b

	1408 // ,"a'..\\'.'b" // a'..\'.'b

	1409 // ,"a'..'\\''.'b" // a'..'\''.'b

	1410 // };//

	1411

	1412 //const char *const s3 = "a..\\.b"; // a..\.b

	1413 //const char (*s3_r) [] = { "a'..\\\\.'b" // a'..\\.'b

	1414 // ,"a'..'\\\\'.'b" // a'..'\\'.'b

	1415 // };//

	1416

	1417 // // no catact operation, no choice, must be com pact

	1418

	1419 srand((unsigned)time( NULL ));

	1420

	1421 //Escaper l(Escaper::NO, Escaper::NO, Escaper::RAND_ESC);

	1422 Pick *p = new Literal(str);

	1423 Quote q(*p);

	1424

	1425 DUMP_R(TestQuote, (*p), 1);

	1426 DUMP_R(TestQuote, q, 20);

	1427 return FALSE;

	1428 }

	1429 static UBool TestLiteral(){

	1430 const char * s = "test string99.";

	1431 Literal n(s);

	1432 const char * r = n.next();

	1433 return strcmp(s,r) == 0;

	1434 }

	1435

	1436 static UBool TestSequence(){

	1437 Sequence seq;

	1438 seq.append(new Literal("abc "));

	1439 seq.append(new Literal(", s"));

	1440

	1441 return strcmp(seq.next(), "abc , s") == 0;

	1442 }

	1443 static UBool TestAlternation(){

	1444 srand((unsigned)time( NULL ));

	1445 Alternation alt;

	1446 alt.append(new Literal("aaa_10%"),10);

	1447 alt.append(new Literal("bbb_0%"),0);

	1448 alt.append(new Literal("ccc_10%"),10);

	1449 alt.append(new Literal("ddddddd_50%"),50);

	1450

	1451 DUMP_R(TestAlternation, alt, 50);

	1452

	1453 return FALSE;

	1454 }

	1455

	1456 static UBool TestBuffer(){

	1457 Buffer_int t;

	1458 t.append(1).append(0).append(5);

	1459 int s = t.content_size();

	1460 for (int i=0; i<s; ++i){

	1461 printf("%d\n", t[i]);

	1462 }

	1463 return FALSE;

	1464 }

	1465

	1466 static UBool TestWeightedRand(){

	1467 srand((unsigned)time( NULL ));

	1468 Buffer_int t;

	1469 t.append(1).append(0).append(5);

	1470 WeightedRand wr(&Buffer_int().append(10).append(0).append(50),4);

	1471 // WeightedRand wr(&t,3);

	1472 for (int i=0; i< 50; ++i){

	1473 printf("%d\n", wr.next());

	1474 }

	1475 return FALSE;

	1476 }

	1477

	1478 static UBool TestRepeat(){

	1479 srand((unsigned)time( NULL ));

	1480 Repeat rep(new Literal("aaa1-5 "), 1, 5);

	1481 DUMP_R(TestRepeat, rep, 50);

	1482

	1483 Repeat r2(new Literal("b{1,3}1%0%5% "), 1, 3, &Buffer_int().append(1).append (0).append(5));

	1484 DUMP_R(TestRepeat, r2, 50);

	1485

	1486 Repeat r3(new Literal("aaa5-5 "), 5, 5);

	1487 DUMP_R(TestRepeat, r3, 50);

	1488

	1489 return FALSE;

	1490 }

	1491

	1492 static UBool TestVariable(){

	1493 SymbolTable tab;

	1494 Pick * value = new Literal("string1");

	1495 Variable var1(&tab, "x", value);

	1496

	1497 Variable var2(&tab, "y");

	1498 // tab.put(var2, value); // TOFIX: point alias/recursion problem

	1499 Pick * value2 = new Literal("string2");

	1500 tab.put(var2, value2);

	1501

	1502 Pick * value3 = new Literal("string3");

	1503 Variable var3(&tab, "z");

	1504 tab.put("z", value3);

	1505

	1506 UBool pass;

	1507 pass = strcmp(var1.next(), value->next()) == 0;

	1508 pass = pass && strcmp(var2.next(), value2->next()) == 0;

	1509 pass = pass && strcmp(var3.next(), value3->next()) == 0;

	1510 return pass;

	1511 }

	1512

	1513 static UBool TestSymbolTable(){

	1514 Literal * n1 = new Literal("string1");

	1515 Literal * n2 = new Literal("string2");

	1516 SymbolTable t;

	1517 t.put("abc", n1);

	1518 t.put("$aaa", n2);

	1519 // t.put("alias", n1); // TOFIX: point alias/recursion problem

	1520 t.put("bbb");

	1521

	1522 UBool pass;

	1523 pass = t.find(NULL) == SymbolTable::EMPTY;

	1524 pass = pass && t.find("ccc") == SymbolTable::NO_VAR;

	1525 pass = pass && t.find("bbb") == SymbolTable::NO_REF;

	1526 pass = pass && t.find("abc") == SymbolTable::HAS_REF;

	1527 pass = pass && t.find("$aaa") == SymbolTable::HAS_REF;

	1528

	1529 t.reset();

	1530 pass = pass && t.find("abc") == SymbolTable::NO_VAR;

	1531 return pass;

	1532 }

	1533

	1534

	1535 static UBool TestScanner(void){

	1536 //const char str1[] = "$root = $command{0,5} $reset $mostRules{1,20};";

	1537 //const char str1_r[][20] = {"$root", "=", "$command", "{", "0", ",", "5", " }",

	1538 // "$reset", "$mostRules", "{", "1", ",", "20", "}", ";"};

	1539

	1540 const char str2[] = "$p2 =(\\\\ $s $string $s)? 25%;";

	1541 const char str2_r[][20] = {"$p2", "=", "(", "\\", "$s", "$string", "$s", ")" , "?", "25", "%", ";"};

	1542

	1543 const char *str = str2;

	1544 const char (*str_r)[20] = str2_r;

	1545 int tokenNum = sizeof(str2_r)/sizeof(char[20]);

	1546

	1547 Scanner t(str);

	1548 UBool pass = TRUE;

	1549 t.getNextToken();

	1550 int i = 0;

	1551 while (pass){

	1552 if (t.tokenType == STREAM_END){

	1553 pass = pass? i == tokenNum : FALSE;

	1554 break;//while

	1555 } else if (t.tokenType == ERROR){

	1556 pass = FALSE;

	1557 break;//while

	1558 } else {

	1559 pass = strcmp( &(t.token[0]), str_r[i++]) == 0;

	1560 t.getNextToken();

	1561 }

	1562 }

	1563

	1564 //const char ts[] = "$commandList = '['"

	1565 //" ( alternate ' ' $alternateOptions"

	1566 //" \| backwards ' 2'"

	1567 //" \| normalization ' ' $onoff "

	1568 //" \| caseLevel ' ' $onoff "

	1569 //" \| hiraganaQ ' ' $onoff"

	1570 //" \| caseFirst ' ' $caseFirstOptions"

	1571 //" \| strength ' ' $strengthOptions"

	1572 //" ) ']';" ;

	1573

	1574 //Scanner t2(ts);

	1575 //pass = TRUE;

	1576 //do {

	1577 // t2.getNextToken();

	1578 // if (t2.tokenType == ERROR){

	1579 // DumpScanner(t2);

	1580 // return FALSE;

	1581 // }

	1582 //}while (t.tokenType != STREAM_END);

	1583

	1584 return pass;

	1585 }

	1586

	1587 class TestParserT {

	1588 public:

	1589 UBool operator () (const char *const str, const int exp_error_offset = -1, const UBool dump = TRUE){

	1590 Parser par(str);

	1591 if (par.rules()){

	1592 if ( exp_error_offset == -1){

	1593 return TRUE;

	1594 }else {

	1595 DumpScanner(par.s,dump);

	1596 return FALSE;

	1597 }

	1598 }else {

	1599 return DumpScanner(par.s, dump) == exp_error_offset;

	1600 }

	1601 }

	1602 };

	1603

	1604 UBool TestParser(){

	1605 TestParserT test;

	1606

	1607 UBool pass = TRUE;

	1608 pass = pass && test ("$s = ' ' ? 50%;");

	1609 pass = pass && test("$x = ($var {1,2}) 3%;"); // legal

	1610 pass = pass && test("$x = $var {1,2} 3% \| b 4%;"); // legal

	1611 pass = pass && test("$x = $var {1,2} 3%;"); // legal

	1612 pass = pass && test("$m = $c ? 2% 4% \| $r 5% \| $n 25%;"); // legal

	1613 pass = pass && test("$a = b ? 2% \| c 5%;"); // legal

	1614 pass = pass && test("$x = A B 5% C 10% \| D;", 8, FALSE); // illegal 5%

	1615 pass = pass && test("$x = aa 45% \| bb 5% cc;", 19, FALSE);// illegal cc

	1616 pass = pass && test("$x = (b 5%) (c 6%);"); // legal

	1617 pass = pass && test("$x = (b 5%) c 6%;", 13, FALSE); // illegal 6%

	1618 pass = pass && test("$x = b 5% (c 6%);", 9, FALSE); // illegal (c 6%)

	1619 pass = pass && test("$x = b 5% c 6%;", 9, FALSE); // illegal c 6%

	1620 pass = pass && test("$x = b 5%;"); // legal

	1621 pass = pass && test("$x = aa 45% \| bb 5% cc;", 19, FALSE);// illegal cc

	1622 pass = pass && test("$x = a \| b \| c 4% \| d 5%;"); // legal

	1623 pass = pass && test("$s = ' ' ? 50% abc;"); // legal

	1624 pass = pass && test("$s = a \| c d \| e f;"); // legal

	1625 pass = pass && test( "$z = q 0% \| p 1% \| r 100%;"); // legal How to check parsed tree??

	1626

	1627 pass = pass && test("$s = ' ' ? 50%;");

	1628 pass = pass && test("$relationList = '<' \| '<<' \| ';' \| '<<<' \| ',' \| '=';" );

	1629 pass = pass && test("$p1 = ($string $s '\|' $s)? 25%;");

	1630 pass = pass && test("$p2 = (\\\\ $s $string $s)? 25%;");

	1631 pass = pass && test("$rel2 = $p1 $string $s $p2;");

	1632 pass = pass && test("$relation = $relationList $s ($rel1 \| $rel2) $crlf;");

	1633 pass = pass && test("$command = $commandList $crlf;");

	1634 pass = pass && test("$reset = '&' $s ($beforeList $s)? 10% ($positionList 10 0% \| $string 10%) $crlf;");

	1635 pass = pass && test("$mostRules = $command 1% \| $reset 5% \| $relation 25%;") ;

	1636 pass = pass && test("$root = $command{0,5} $reset $mostRules{1,20};");

	1637

	1638 const char collationBNF[] =

	1639 "$s = ' '? 50%;"

	1640 "$crlf = '\r\n';"

	1641

	1642 "$alternateOptions = non'-'ignorable \| shifted;"

	1643 "$onoff = on \| off;"

	1644 "$caseFirstOptions = off \| upper \| lower;"

	1645 "$strengthOptions = '1' \| '2' \| '3' \| '4' \| 'I';"

	1646 "$commandList = '['"

	1647 " ( alternate ' ' $alternateOptions"

	1648 " \| backwards ' 2'"

	1649 " \| normalization ' ' $onoff "

	1650 " \| caseLevel ' ' $onoff "

	1651 " \| hiraganaQ ' ' $onoff"

	1652 " \| caseFirst ' ' $caseFirstOptions"

	1653 " \| strength ' ' $strengthOptions"

	1654 " ) ']';"

	1655 "$command = $commandList $crlf;"

	1656

	1657 "$ignorableTypes = (tertiary \| secondary \| primary) ' ' ignorable;"

	1658 "$allTypes = variable \| regular \| implicit \| trailing \| $ignorableTypes;"

	1659 "$positionList = '[' (first \| last) ' ' $allTypes ']';"

	1660

	1661 "$beforeList = '[before ' ('1' \| '2' \| '3') ']';"

	1662

	1663 "$relationList = ("

	1664 " '<'"

	1665 " \| '<<'"

	1666 " \| ';'"

	1667 " \| '<<<'"

	1668 " \| ','"

	1669 " \| '='"

	1670 ");"

	1671 "$string = $magic;"

	1672 "$rel1 = '[variable top]' $s;"

	1673 "$p1 = ($string $s '\|' $s)? 25%;"

	1674 "$p2 = (\\\\ $s $string $s)? 25%;"

	1675 "$rel2 = $p1 $string $s $p2;"

	1676 "$relation = $relationList $s ($rel1 \| $rel2) $crlf;"

	1677

	1678 "$reset = '&' $s ($beforeList $s)? 10% ($positionList 1% \| $string 10%) $crl f;"

	1679 "$mostRules = $command 1% \| $reset 5% \| $relation 25%;"

	1680 "$root = $command{0,5} $reset $mostRules{1,20};"

	1681 ;

	1682

	1683 pass = pass && test(collationBNF);

	1684

	1685

	1686 return pass;

	1687 }

	1688

	1689 static UBool TestMorph(){

	1690 srand((unsigned)time( NULL ));

	1691

	1692 Alternation * alt = new Alternation();

	1693

	1694 (*alt)

	1695 .append(new Literal("a")).append(new Literal("b")).append(new Literal("c"))

	1696 .append(new Literal("d")).append(new Literal("e")).append(new Literal("f"))

	1697 .append(new Literal("g")).append(new Literal("h")).append(new Literal("i"))

	1698 .append(new Literal("j")).append(new Literal("k")).append(new Literal("l"))

	1699 .append(new Literal("m")).append(new Literal("n")).append(new Literal("o"))

	1700 ;

	1701

	1702 Repeat * rep = new Repeat( alt ,5,5 );

	1703 Morph m( *rep);

	1704

	1705 // DUMP_R(TestMorph,(*rep),20);

	1706 DUMP_R(TestMorph,m,100);

	1707

	1708 return FALSE;

	1709 }

	1710

	1711 #endif

	1712

	1713 static UBool TestLanguageGenerator(){

	1714 //LanguageGenerator g;

	1715 //const char *const s = "$s = p 0% \| q 1%;";

	1716 //g.parseBNF(s, "$s");

	1717 UBool pass;

	1718 //= strcmp("q", g.next()) == 0;

	1719

	1720 const char *const def =

	1721 //"$a = $b;"

	1722 //"$b = $c;"

	1723 //"$c = $t;"

	1724 //"$t = abc $z{1,2};"

	1725 //"$k = a \| b \| c \| d \| e \| f \| g ;"

	1726 //"$z = q 0% \| p 1% \| r 1%;"

	1727 "$x = a ? 0%;"

	1728 ; // end of string

	1729 // const char * s = "abczz";

	1730 //

	1731 //

	1732 LanguageGenerator g;

	1733 pass = g.parseBNF(def, "$x",TRUE);

	1734 //// LanguageGenerator g(collationBNF, "$root", "$magic", new MagicNode());

	1735 //

	1736 if (pass != LanguageGenerator::OK) return FALSE;

	1737

	1738 DUMP_R(TestLanguageGenerator, g, 20);

	1739 return pass;

	1740

	1741 ////UBool pass = strcmp(s,r) == 0;

	1742

	1743 //if (pass){

	1744 // printf("TestRandomLanguageGenerator passed.\n");

	1745 //} else {

	1746 // printf("TestRandomLanguageGenerator FAILED!!!\n");

	1747 //}

	1748 //return pass;

	1749 }

	1750

	1751 void TestWbnf(void){

	1752 srand((unsigned)time( NULL ));

	1753

	1754 //CALL(TestLiteral);

	1755 //CALL(TestSequence);

	1756 //CALL(TestSymbolTable);

	1757 //CALL(TestVariable);

	1758

	1759 //TestRepeat();

	1760 //TestAlternation();

	1761 //TestMorph();

	1762

	1763 //TestQuote();

	1764 //TestBuffer();

	1765 //TestWeightedRand();

	1766

	1767 //CALL(TestScanner);

	1768 //CALL(TestParser);

	1769 CALL(TestLanguageGenerator);

	1770 }

	1771

OLD	NEW

« no previous file with comments | « icu46/source/test/intltest/wbnf.h ('k') | icu46/source/test/intltest/windttst.h » ('j') | no next file with comments »