source/test/intltest/wbnf.cpp - Issue 845603002: Update ICU to 54.1 step 1

Side by Side Diff: source/test/intltest/wbnf.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /*

2 ******************************************************************************

3 * Copyright (C) 2005-2007, International Business Machines Corporation and *

4 * others. All Rights Reserved. *

5 ******************************************************************************

6 */

7

8 #include <stdio.h>

9 #include <string.h>

10 #include <stdlib.h>

11 #include <time.h>

12

13 #include "wbnf.h"

14

15 // Most of this code is meant to test the test code. It's a self test.

16 // Normally this isn't run.

17 #define TEST_WBNF_TEST 0

18

19 ///////////////////////////////////////////////////////////

20 //

21 // Constants and the most basic helper classes

22 //

23

24 static const char DIGIT_CHAR[] = "0123456789";

25 static const char WHITE_SPACE[] = {'\t', ' ', '\r', '\n', 0};

26 static const char ALPHABET[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV WXYZ";

27 static const char SPECIAL[] = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{\|}~";

28

29 static inline UBool isInList(const char c /in/, const char list[] /in/){

30 const char * p = list;

31 for (;p != 0 && p != c; p++);

32 return *p?TRUE:FALSE;

33 }

34 static inline UBool isDigit(char c) {return isInList(c, DIGIT_CHAR);}

35 static inline UBool isWhiteSpace(char c) {return isInList(c, WHITE_SPACE);}

36 static inline UBool isAlphabet(char c) {return isInList(c, ALPHABET);}

37 static inline UBool isSpecialAsciiChar(char c) {return isInList(c,SPECIAL);}

38

39

40

41 ///////////////////////////////////////////////////////////

42 //

43 // Helper classes

44 //

45

46 class Buffer_byte{

47 // Utility class, can be treated as an auto expanded array. no boundary check.

48

49 typedef char byte;

50 byte * start;

51 byte * current;

52 int buffer_size; // size unit is byte

53 public:

54 inline int content_size(){return current - start;} // size unit is byte

55

56 private:

57 inline void expand(int add_size = 100){ // size unit is byte

58 int new_size = buffer_size + add_size;

59

60 int cs_snap = content_size();

61 start = (byte *) realloc(start, new_size); // may change the value of start

62 current = start + cs_snap;

63

64 memset(current, 0, add_size);

65 buffer_size = new_size;

66 }

67

68 inline void expand_to(int size){

69 int r = size - buffer_size;

70 if (r > 0) {

71 expand(r); // simply expand, no block alignment

72 }

73 }

74 Buffer_byte(const Buffer_byte &);

75 Buffer_byte & operator = (const Buffer_byte &);

76 public:

77 Buffer_byte():start(NULL),current(start),buffer_size(0){

78 expand();

79 }

80 ~Buffer_byte(){

81 free(start);

82 }

83

84 inline void reset(){

85 start != NULL ? memset(start, 0, buffer_size) : 0;

86 current = start;

87 }

88

89 // Using memory copy method to append a C array to buffer,

90 inline void append(const void * c, int size){ // size unit is byte

91 expand_to(content_size() + size) ;

92 memcpy(current, c, size);

93 current = current + size;

94 }

95

96 byte * buffer(){

97 return start;

98 }

99 };

100

101 /*

102 The class(es) try to work as bulid-in array, so it overloads these two operato rs

103 operator type *();

104 type & operator[];

105 The first is used to auto type convert, the latter is used to select member.

106

107 A small trick is the class does not overload the address-of operator. This

108 behavior is different from bulid-in array, but it give us the opportunity

109 to get the address of the class itself.

110 */

111 //template<typename type>

112 // class BUFFER{

113 // typedef BUFFER name;

114 #define BUFFER(type, name)\

115 class name {\

116 private:\

117 Buffer_byte buf;\

118 public:\

119 name & reset() {buf.reset(); return *this;}\

120 name & append(type c) {buf.append(&c, sizeof(type)); return *this;}\

121 name & append_array(const type * p, int size) {buf.append(p, sizeof(type )size); return this;}\

122 type & operator [] (int i) { return ((type *) buf.buffer())[i];}\

123 operator type (){return (type ) buf.buffer();} \

124 int content_size(){return buf.content_size() / sizeof(type);}\

125 }

126

127

128 class Pick{

129 /* The Pick is the basic language generator element*/

130 public:

131 // generate a string accroding the syntax

132 // Return a null-terminated c-string. The buffer is owned by callee.

133 virtual const char* next() = 0;

134 virtual ~Pick(){};

135 };

136

137 //typedef BUFFER<char> Buffer_char;

138 //typedef BUFFER<int> Buffer_int;

139 //typedef BUFFER<Pick *> Buffer_pPick;

140 BUFFER(char, Buffer_char);

141 BUFFER(int, Buffer_int);

142 BUFFER(Pick *, Buffer_pPick);

143

144 class SymbolTable{

145 /* Helper class.

146 * It's a mapping table between 'variable name' and its 'active Pick object'

147 */

148 private:

149 Buffer_char name_buffer; // var names storage space

150

151 Buffer_int names; // points to name (offset in name_buffer)

152 Buffer_pPick refs; // points to Pick

153

154 int get_index(const char *const var_name){

155 int len = names.content_size();

156 for (int i=0; i< len; i++){

157 if (strcmp(var_name, name_buffer + names[i]) == 0){

158 return i;

159 }

160 }

161 return -1;

162 }

163

164 public:

165 enum RESULT {EMPTY, NO_VAR, NO_REF, HAS_REF};

166

167 RESULT find(const char const var_name /[in] c-string/, Pick * ref = NUL L /[out] Pick */){

168 if (!var_name) return EMPTY; // NULL name

169

170 int i = get_index(var_name);

171 if (i == -1){

172 return NO_VAR; // new name

173 }

174 if (!refs[i]){ // exist name, no ref

175 return NO_REF;

176 } else {

177 if (ref) {

178 *ref = refs[i];

179 }

180 return HAS_REF; // exist name, has ref

181 }

182 }

183

184 void put(const char const var_name, Pick const var_ref = NULL){

185 int i = get_index(var_name);

186 switch(find(var_name)){

187 case EMPTY: // NULL name

188 break;

189 case NO_VAR: // new name

190 int offset;

191 offset = name_buffer.content_size();

192 name_buffer.append_array(var_name, strlen(var_name) + 1);

193 names.append(offset);

194 refs.append(var_ref);

195 break;

196 case NO_REF: // exist name, no ref

197 refs[i] = var_ref; // link definition with variable

198 break;

199 case HAS_REF: // exist name, has ref

200 if (var_ref){

201 refs[i] = var_ref;

202 }

203 break;

204 default:

205 ; // ASSERT(FALSE);

206 }

207 return;

208 }

209

210 UBool is_complete(){

211 int n = names.content_size();

212 for (int i=0; i<n; ++i){

213 if (refs[i] == NULL){

214 return FALSE;

215 }

216 }

217 return TRUE;

218 }

219

220 void reset(){

221 names.reset();

222 name_buffer.reset();

223

224 // release memory here

225 int s = refs.content_size();

226 for (int i=0; i < s; i++){

227 delete refs[i]; // TOFIX: point alias/recursion problem

228 }

229 refs.reset();

230 }

231

232 ~SymbolTable(){

233 reset();

234 }

235 };

236

237

238 /*

239 // Document of class Escaper

240 //

241 // ATTENTION:

242 // From http://icu-project.org/userguide/Collate_Customization.html.

243 // We get the precedence of escape/quote operations

244 //

245 // (highest) 1. backslash \

246 // 2. two single quotes ''

247 // 3. quoting ' '

248 //

249 // ICU Collation should accept following as the same string.

250 //

251 // 1) 'ab'c _

252 // 2) a\bc \

253 // 3) a'b'\c \|- They are equal.

254 // 4) abc _/

255 //

256 // From "two single quotes", we have following deductions

257 // D1. empty quoting is illgal. (obviously)

258 // D2. no contact operation between two quotings

259 // '.''.' is not .. it is .'.

260 // D3. "two single quotes" cannot contact two quoting simultaneously

261 // '..''''.' is not ..'. it is ..''.

262 // NOTICE:

263 // "two single quotes" can contact before one quoting

264 // '''.' is '.

265 // "two single quotes" can literally contact after one quoting

266 // But, from syntax, it's one quoting including a "two single quotes"

267 // '.''' is .'

268 // D4. "two single quotes" cannot solely be included in quoting

269 // '''' is not ' it is ''

270 // NOTICE: These are legal

271 // '.''.' is .'.

272 // '.''' is .'

273 //

274 // dicision

275 // /\

276 // /__\

277 // output buffer input buffer

278 //

279 // To make our dicision (within an atom operation) without caring input and outp ut buffer,

280 // following calling pattern (within an atom operation) shall be avoided

281 //

282 // P1 open_quoting() then close_quoting() (direct violation) D1

283 // P2 close_quoting() then open_quoting() (direct violation) D2

284 // P3 empty open_quoting() (indirect violation) D1, D4

285 // P4 empty close_quoting() (indirect violation) D2, D3

286 // P5 open_quoting() then two single quotes (indirect violation) D4

287 // P6 close_quoting() then two single quotes (indirect violation) D3

288 //

289 // two single quotes escaping will not open_ or close_ quoting()

290 // The choice will not lose some quoing forms.

291 //

292 // For open_quoting(),

293 // we may get this form quoting ''' P5

294 // It may raise a bug ''''x

295 // If we expect

296 // '''.' let the next char open the quoting

297 // '.''.' the quoting is already opened by preceding char

298 //

299 // For close_quoting()

300 // we will get this form quoting '.''' P6

301 // It may raise a bug '.''''.'

302 // If we expect

303 // '.'''\. let the next char close the quoting

304 // '.''''.' the expectation is wrong! using '.'\''.' instead

305 //

306 // It's a hard work to re-adjust generation opportunity for various escaping for m.

307 // We just simply ignore it.

308 */

309 class Escaper{

310 public:

311 enum CHOICE {YES, NO, RAND};

312 enum ESCAPE_FORM {BSLASH_ONLY, QUOTE_ONLY, QUOTE_AND_BSLAH, RAND_ESC};

313 private:

314 class Bool{ // A wrapper class for CHOICE, to auto adapter UBool class

315 private:

316 const CHOICE tag;

317 public:

318 Bool(CHOICE flag=RAND):tag(flag){}

319 operator UBool() { // conversion operator

320 return tag == RAND ? rand()%2 : tag == YES;

321 //if (tag == RAND){

322 // return rand()%2 == 1;

323 //} else {

324 // return tag == YES ? TRUE : FALSE;

325 //}

326 }

327 };

328 public:

329 Escaper(CHOICE escapeLiteral = RAND,

330 CHOICE twoQuotesEscape = RAND,

331 ESCAPE_FORM escapeForm = RAND_ESC):

332 escape_form(escapeForm),

333 escape_literal(escapeLiteral),

334 two_quotes_escape(twoQuotesEscape),

335 is_quoting(FALSE){}

336 private:

337 Buffer_char str;

338 ESCAPE_FORM escape_form;

339 Bool escape_literal;

340 Bool two_quotes_escape;

341 UBool quote_escape;

342 UBool bslash_escape;

343 UBool is_quoting;

344

345 void set_options(){

346 ESCAPE_FORM t = escape_form == RAND_ESC ? (ESCAPE_FORM) (rand()%3) : esc ape_form;

347 switch (t){

348 case BSLASH_ONLY :

349 bslash_escape = TRUE; quote_escape = FALSE; break;

350 case QUOTE_ONLY:

351 bslash_escape = FALSE;quote_escape = TRUE; break;

352 case QUOTE_AND_BSLAH:

353 bslash_escape = TRUE; quote_escape = TRUE; break;

354 default:

355 ;// error

356 }

357 }

358

359 void reset(){

360 str.reset();

361 is_quoting = FALSE;

362 }

363

364 inline void open_quoting(){

365 if(is_quoting){

366 // do nothing

367 } else {

368 str.append('\'');

369 is_quoting = TRUE;

370 }

371 }

372 inline void close_quoting(){

373 if(is_quoting){

374 str.append('\'');

375 is_quoting = FALSE;

376 } else {

377 // do nothing

378 }

379 }

380

381 // str [in] null-terminated c-string

382 void append(const char * strToAppend){

383 for(;*strToAppend != 0; strToAppend++){

384 append(*strToAppend);

385 }

386 }

387

388 inline void append(const char c){

389 set_options();

390

391 if (c == '\\'){

392 quote_escape ? open_quoting() : close_quoting();

393 //bslash_escape always true here

394 str.append('\\');

395 str.append('\\');

396 } else if (c == '\''){

397 if (two_quotes_escape){ // quoted using two single quotes

398 // See documents in anonymous.design

399 str.append('\'');

400 str.append('\'');

401 } else{

402 quote_escape ? open_quoting() : close_quoting();

403 //bslash_escape always true here

404 str.append('\\');

405 str.append('\'');

406 }

407 } else if (isSpecialAsciiChar(c) \|\| isWhiteSpace(c)){

408 quote_escape ? open_quoting() : close_quoting();

409 if (bslash_escape) str.append('\\');

410 str.append(c);

411 } else { //if (isAlphabet(c) \|\| isDigit(c) \|\| TRUE){ // treat others as literal

412 if (escape_literal){

413 quote_escape ? open_quoting() : close_quoting();

414 if (bslash_escape) str.append('\\');

415 str.append(c);

416 } else {

417 close_quoting();

418 str.append(c);

419 }

420 }

421 }

422

423 public:

424 // Return a null-terminate c-string. The buffer is owned by callee.

425 char * operator()(const char * literal /c-string/){

426 str.reset();

427 for(;*literal != 0; literal++){

428 append(*literal);

429 }

430 close_quoting(); // P4 exception, to close whole quoting

431 return str;

432 }

433 };

434

435 class WeightedRand{

436 // Return a random number in [0, size)

437 // Every number has different chance (aka weight) to be selected.

438 private:

439 Buffer_int weights;

440 double total;

441 WeightedRand(const WeightedRand &);

442 WeightedRand & operator = (const WeightedRand &);

443 public:

444 WeightedRand(Buffer_int * weight_list = NULL, int size = 0){

445 if ( weight_list == NULL){

446 for (int i=0; i<size; ++i) weights.append(DEFAULT_WEIGHT);

447 } else {

448 int s = weight_list->content_size();

449 if (s < size){

450 weights.append_array( (*weight_list),s);

451 for (int i=s; i<size; ++i) weights.append(DEFAULT_WEIGHT);

452 } else { // s >= size

453 weights.append_array( (*weight_list),size);

454 }

455 }

456 total = 0;

457 int c = weights.content_size();

458 for (int i=0; i<c; ++i){

459 total += weights[i];

460 }

461 }

462

463 void append(int weight){

464 weights.append(weight);

465 total += weight;

466 }

467

468 // Give a random number with the consideration of weight.

469 // Every random number is associated with a weight.

470 // It identifies the chance to be selected,

471 // larger weight has more chance to be selected.

472 //

473 //

474 // ______________________ every slot has equal chance

475 //

476 // [____][_][___][______] each item has different slots, hence different chance

477 //

478 //

479 // The algorithms to generate the number is illustrated by preceding figure .

480 // First, a slot is selected by rand(). Then we translate the slot to corre sponding item.

481 //

482 int next(){

483 // get a random in [0,1]

484 double reference_mark = (double)rand() / (double)RAND_MAX;

485

486 // get the slot's index, 0 <= mark <= total;

487 double mark = total * reference_mark;

488

489 // translate the slot to corresponding item

490 int i=0;

491 for (;;){

492 mark -= weights[i]; // 0 <= mark <= total

493 if (mark <= 0)

494 break;

495 i++;

496 }

497 return i;

498 }

499 };

500

501 ///////////////////////////////////////////////////////////

502 //

503 // The parser result nodes

504 //

505

506 class Literal : public Pick {

507 public:

508 virtual const char* next(){

509 return str;

510 }

511 Literal(const char * s /c-string/){

512 str.append_array(s, strlen(s) + 1);

513 }

514 private:

515 Buffer_char str; //null-terminated c-string

516 };

517

518 class Variable : public Pick {

519 public:

520 Variable(SymbolTable * symbols, const char * varName, Pick * varRef = NULL){

521 this->var_name.append_array(varName, strlen(varName) + 1);

522 if ((symbol_table = symbols)){

523 symbol_table->put(varName, varRef);

524 }

525 }

526

527 operator const char *(){

528 return var_name;

529 }

530

531 virtual const char* next(){

532 if (symbol_table){

533 Pick * var_ref = NULL;

534 symbol_table->find(var_name, &var_ref);

535 if (var_ref) {

536 return var_ref->next();

537 }

538 }

539 return ""; // dumb string

540 }

541 private:

542 Buffer_char var_name;

543 SymbolTable * symbol_table;

544 };

545

546 class Quote : public Pick{

547 public:

548 Quote(Pick & base):item(base),e(Escaper::NO, Escaper::NO, Escaper::BSLASH_ON LY){

549 }

550 virtual const char* next(){

551 return e(item.next());

552 }

553 private:

554 Pick & item;

555 Buffer_char str;

556 Escaper e;

557 };

558

559

560 class Morph : public Pick{

561 /*

562 The difference between morph and an arbitrary random string is that

563 a morph changes slowly. When we build collation rules, for example,

564 it is a much better test if the strings we use are all in the same

565 'neighborhood'; they share many common characters.

566 */

567 public:

568 Morph(Pick & base):item(base){}

569

570 virtual const char* next(){

571 current.reset();

572 const char * s = item.next();

573 current.append_array(s, strlen(s) + 1);

574 if (last.content_size() == 0) {

575 str.reset();

576 last.reset();

577 str.append_array(current, current.content_size());

578 last.append_array(current, current.content_size());

579 } else {

580 morph();

581 }

582 return str;

583 }

584 private:

585 Pick & item;

586 Buffer_char str;

587 Buffer_char last;

588 Buffer_char current;

589

590 char * p_last;

591 char * p_curr;

592

593 void copy_curr(){

594 if (*p_curr) {

595 str.append(*p_curr);

596 p_curr++;

597 }

598 }

599

600 void copy_last(){

601 if (*p_last) {

602 str.append(*p_last);

603 p_last++;

604 }

605 }

606

607 // copy 0, 1, or 2 character(s) to str

608 void copy(){

609 static WeightedRand wr(& Buffer_int().append(DEFAULT_WEIGHT * 10), 5);

610

611 switch (wr.next()){

612 case 0: // copy last -- has 10 times chance than others

613 copy_last();

614 break;

615 case 1: // copy both

616 copy_curr();

617 copy_last();

618 break;

619 case 2: // copy both

620 copy_last();

621 copy_curr();

622 break;

623 case 3:

624 copy_curr();

625 break;

626 case 4: // copy nothing

627 break;

628 default:

629 // ASSERT(FALSE);

630 ;

631 }

632 }

633

634 void morph(void){

635 int min = strlen(last);

636 int max = strlen(current);

637 if (min > max){

638 int temp = min;

639 min = max;

640 max = temp;

641 }

642

643 int len = min + rand()%(max - min + 1); // min + [0, diff]

644 p_curr = current;

645 p_last = last;

646 str.reset();

647

648 for (; str.content_size()<len && p_curr && p_last;){

649 copy(); // copy 0, 1, or 2 character(s) to str

650 }

651

652 if (str.content_size() == len) {

653 str.append(0);

654 final();

655 return;

656 }

657

658 if (str.content_size() > len) { // if the last copy copied two character s

659 str[len]=0;

660 final();

661 return;

662 }

663

664 // str.content_size() < len

665 if (*p_last) {

666 for (; str.content_size() < len; copy_last());

667 } else if (*p_curr){

668 for (; str.content_size() < len; copy_curr());

669 }

670

671 int last_len = last.content_size();

672 for (;str.content_size() < len;){

673 str.append(last[rand()%last_len]);

674 }

675 str.append(0);

676 final();

677 }

678

679 void final(){

680 last.reset();

681 last.append_array(current, current.content_size());

682 }

683 };

684

685 class Sequence : public Pick {

686 public:

687 virtual const char* next(){

688 str.reset();

689 int s = items.content_size();

690 for(int i=0; i < s; i++){

691 const char * t = items[i]->next();

692 str.append_array(t, strlen(t));

693 }

694 str.append(0); // terminal null

695 return str;

696 }

697

698 void append (Pick * node){

699 items.append(node);

700 }

701

702 virtual ~Sequence(){

703 int s = items.content_size();

704 for(int i=0; i < s; i++){

705 //How can assure the item is got from heap?

706 //Let's assume it.

707 delete items[i]; // TOFIX: point alias/recursion problem

708 items[i] = NULL;

709 }

710 }

711 private:

712 Buffer_pPick items;

713 Buffer_char str; //null-terminated c-string

714 };

715

716 class Repeat : public Pick {

717 private:

718 Pick * item;

719 Buffer_char str;

720 WeightedRand wr;

721 int min;

722 int max;

723 int select_a_count(){

724 return min + wr.next();

725 }

726 public:

727 virtual const char* next(){

728 str.reset();

729 int c = select_a_count();

730 for(int i=0; i< c; i++){

731 const char * t = item->next();

732 str.append_array(t, strlen(t));

733 }

734 str.append(0);

735 return str;

736 }

737

738 Repeat(Pick * base, int minCount =0, int maxCount = 1, Buffer_int * weights = NULL):

739 wr(weights, maxCount-minCount +1) {

740 this->item = base;

741 this->min = minCount;

742 this->max = maxCount;

743 }

744 virtual ~Repeat(){

745 delete item; // TOFIX: point alias/recursion problem

746 item = NULL;

747 }

748 };

749

750

751 class Alternation : public Pick {

752 public:

753 virtual const char* next(){

754 str.reset();

755 int i = wr.next();

756 const char * t = items[i]->next();

757 str.append_array(t, strlen(t) + 1);

758 return str;

759 }

760 virtual ~Alternation(){

761 int s = items.content_size();

762 for(int i=0; i < s; i++){

763 delete items[i]; // TOFIX: point alias/recursion problem

764 items[i] = NULL;

765 }

766 }

767

768 Alternation & append (Pick * node, int weight = DEFAULT_WEIGHT){

769 items.append(node);

770 wr.append(weight);

771 return *this;

772 }

773 private:

774 Buffer_pPick items;

775 Buffer_char str; // null-terminated c-string

776 WeightedRand wr;

777 };

778

779 ///////////////////////////////////////////////////////////

780 //

781 // The parser

782 //

783

784 enum TokenType {STRING, VAR, NUMBER, STREAM_END, ERROR, QUESTION, STAR, PLUS, LB RACE, RBRACE, LPAR, RPAR, SEMI, EQ, COMMA, BAR, AT, WAVE, PERCENT};

785

786 class Scanner{

787 friend int DumpScanner(Scanner & s, UBool dumb);

788 private:

789 const char * source;

790 const char * working;

791 const char * history; // for debug

792 enum StateType {START, IN_NUM, IN_VAR_FIRST, IN_VAR, IN_QUOTE, IN_QUOTE_BSLA SH, IN_BSLASH, IN_STRING, DONE};

793 StateType state;

794 void terminated(TokenType t){

795 working--; // return the peeked character

796 tokenType = t;

797 token.append(0); // close buffer

798 state = DONE;

799 }

800 public:

801 // the buffer of "source" is owned by caller

802 Scanner(const char src/[in] c-string*/ = NULL):source(src){

803 working = src;

804 history = working;

805 state = DONE;

806 tokenType = ERROR;

807 }

808

809 //void setSource(const char const src /[in] c-string*/){

810 // (&const_cast<const char >(source)) = src;

811 //}

812

813 Buffer_char token;

814 TokenType tokenType;

815

816 TokenType getNextToken(){

817 token.reset();

818 state = START;

819 history = working; // for debug

820 while (state != DONE){

821 char c = *working++;

822 if (c == 0 && state != START){//avoid buffer overflow. for IN_QUOE, IN_ESCAPE

823 terminated(ERROR);

824 break; // while

825 }

826 switch(state){

827 case START:

828 tokenType = ERROR;

829 switch(c){

830 case '?' : tokenType = QUESTION; break;

831 case '*' : tokenType = STAR; break;

832 case '+' : tokenType = PLUS; break;

833 case '{' : tokenType = LBRACE; break;

834 case '}' : tokenType = RBRACE; break;

835 case '(' : tokenType = LPAR; break;

836 case ')' : tokenType = RPAR; break;

837 case ';' : tokenType = SEMI; break;

838 case '=' : tokenType = EQ; break;

839 case ',' : tokenType = COMMA; break;

840 case '\|' : tokenType = BAR; break;

841 case '@' : tokenType = AT; break;

842 case '~' : tokenType = WAVE; break;

843 case '%' : tokenType = PERCENT; break;

844 case 0 : tokenType = STREAM_END; working-- /avoid bu ffer overflow/; break;

845 }

846 if (tokenType != ERROR){

847 token.append(c);

848 token.append(0);

849 state = DONE;

850 break; // START

851 }

852 switch(c){

853 case '$' : state = IN_VAR_FIRST; token.append(c); break ;

854 case '\'' : state = IN_QUOTE; break;

855 case '\\' : state = IN_BSLASH; break;

856 default:

857 if (isWhiteSpace(c)){ // state = START; //do no thing

858 } else if (isDigit(c)){ state = IN_NUM; token .append(c);

859 } else if (isAlphabet(c)){ state = IN_STRING; token .append(c);

860 } else {terminated(ERROR);}

861 }

862 break;//START

863 case IN_NUM:

864 if (isDigit(c)){

865 token.append(c);

866 } else {

867 terminated(NUMBER);

868 }

869 break;//IN_NUM

870 case IN_VAR_FIRST:

871 if (isAlphabet(c)){

872 token.append(c);

873 state = IN_VAR;

874 } else {

875 terminated(ERROR);

876 }

877 break; // IN_VAR_FISRT

878 case IN_VAR:

879 if (isAlphabet(c) \|\| isDigit(c)){

880 token.append(c);

881 } else {

882 terminated(VAR);

883 }

884 break;//IN_VAR

885 case IN_STRING:

886 // About the scanner's behavior for STRING, AT, and ESCAPE:

887 // All of them can be contacted with each other.

888 // This means the scanner will eat up as much as possible st rings

889 // (STRING, AT, and ESCAPE) at one time, with no regard of their

890 // combining sequence.

891 //

892 if (c == '\''){

893 state = IN_QUOTE; // the first time we see single quote

894 } else if (c =='\\'){ // back slash character

895 state = IN_BSLASH;

896 } else if (isAlphabet(c) \|\| isDigit(c)){

897 token.append(c);

898 } else{

899 terminated(STRING);

900 }

901 break;//IN_STRING

902 case IN_QUOTE:

903 if (c == '\''){ // the second time we see single quote

904 state = IN_STRING; // see document in IN_STRING

905 } else if ( c== '\\') { // backslah escape in quote

906 state = IN_QUOTE_BSLASH;

907 } else {

908 token.append(c); // eat up everything, includes back sl ash

909 }

910 break;//IN_QUOTE

911 case IN_QUOTE_BSLASH:

912 case IN_BSLASH:

913 switch (c){

914 case 'n' : token.append('\n'); break;

915 case 'r' : token.append('\r'); break;

916 case 't' : token.append('\t'); break;

917 case '\'' : token.append('\''); break;

918 case '\\' : token.append('\\'); break;

919 default: token.append(c); // unknown escaping, treat it as literal

920 }

921 if (state == IN_BSLASH){

922 state = IN_STRING; // see document in IN_STRING

923 } else { // state == IN_QUOTE_BSLASH

924 state = IN_QUOTE;

925 }

926 break;//IN_BSLASH

927 case DONE: /* should never happen */

928 default:

929 working--;

930 tokenType = ERROR;

931 state = DONE;

932 break;

933 }//switch(state)

934 }//while (state != DONE)

935

936 return tokenType;

937 }

938 };//class Scanner

939

940 class Parser{

941 friend UBool TestParser();

942 friend class TestParserT;

943 friend class LanguageGenerator_impl;

944 private:

945 Scanner s;

946 TokenType & token;

947 int min_max; // for the evil infinite

948

949 UBool match(TokenType expected){

950 if (token == expected) {

951 token = s.getNextToken();

952 return TRUE;

953 } else {

954 //s.dumpCurrentPoint();

955 return FALSE;

956 }

957 }

958

959 UBool weight(int & value){

960 if (token == NUMBER){

961 int temp = atoi(s.token);

962 match(NUMBER);

963 if (match(PERCENT)){

964 value = temp;

965 return TRUE;

966 }

967 }

968 return FALSE;

969 }

970

971 UBool repeat (Pick* &node /in,out/){

972 if (node == NULL) return FALSE;

973

974 int count = -2;

975 int min = -2;

976 int max = -2;

977 UBool question = FALSE;

978 switch (token){

979 case QUESTION:

980 match(QUESTION);

981 min = 0;

982 max = 1;

983 count = 2;

984 question = TRUE;

985 break;

986 case STAR:

987 match(STAR);

988 min = 0;

989 max = -1;

990 count = -1;

991 break;

992 case PLUS:

993 match(PLUS);

994 min = 1;

995 max = -1;

996 count = -1;

997 break;

998 case LBRACE:

999 match(LBRACE);

1000 if (token != NUMBER){

1001 return FALSE;

1002 }else {

1003 min = atoi(s.token);

1004 match(NUMBER);

1005 if (token == RBRACE){

1006 match(RBRACE);

1007 max = min;

1008 count = 1;

1009 } else if (token == COMMA) {

1010 match(COMMA);

1011 if (token == RBRACE){

1012 match(RBRACE);

1013 max = -1;

1014 count = -1;

1015 } else if (token == NUMBER) {

1016 max = atoi(s.token);

1017 match(NUMBER);

1018 count = max - min + 1;

1019 if (!match(RBRACE)) {

1020 return FALSE;

1021 }

1022 } else {

1023 return FALSE;

1024 }

1025 } else {

1026 return FALSE;

1027 }

1028 }

1029 break;

1030 default:

1031 return FALSE;

1032 }

1033

1034 if (count == -2 \|\| min == -2 \|\| max == -2){

1035 //ASSERT(FALSE);

1036 return FALSE;

1037 }

1038

1039 // eat up following weights

1040 Buffer_int weights;

1041 int w;

1042 while (weight(w)){

1043 weights.append(w);

1044 }

1045

1046 // for the evil infinite

1047 min_max = min_max > min ? min_max : min;

1048 min_max = min_max > max ? min_max : max;

1049 if (min_max > PSEUDO_INFINIT){

1050 return FALSE; // PSEUDO_INFINIT is less than the real maximum

1051 }

1052 if (max == -1){ // the evil infinite

1053 max = PSEUDO_INFINIT;

1054 }

1055 // for the strange question mark

1056 if (question && weights.content_size() > 0){

1057 Buffer_int w2;

1058 w2.append(DEFAULT_WEIGHT - weights[0]).append(weights[0]);

1059 node = new Repeat(node,min,max,&w2);

1060 return TRUE;

1061 }

1062 node = new Repeat(node,min,max,&weights);

1063 return TRUE;

1064 }

1065

1066 UBool core(Pick* &node /out/){

1067 if (node != NULL) return FALSE; //assert node == NULL

1068

1069 switch(token){

1070 case LPAR:

1071 match(LPAR);

1072 if(defination(node) && match(RPAR)){

1073 return TRUE;

1074 }

1075 return FALSE;

1076 case VAR:

1077 node = new Variable(&symbols, s.token);

1078 match(VAR);

1079 return TRUE;

1080 case STRING:

1081 node = new Literal(s.token);

1082 match(STRING);

1083 return TRUE;

1084 default:

1085 return FALSE;

1086 }

1087 }

1088 UBool modified(Pick* &node /out/){

1089 if (node != NULL) return FALSE; //assert node == NULL

1090

1091 if (!core(node)) {

1092 return FALSE;

1093 }

1094

1095 for (;;){

1096 switch(token){

1097 case WAVE:

1098 match(WAVE);

1099 node = new Morph(*node);

1100 break;

1101 case AT:

1102 match(AT);

1103 node = new Quote(*node);

1104 break;

1105 case QUESTION:

1106 case STAR:

1107 case PLUS:

1108 case LBRACE:

1109 if (!repeat(node)) return FALSE;

1110 break;

1111 case SEMI: // rule definiation closed

1112 case RPAR: // within parenthesis (core closed)

1113 case BAR: // in alternation

1114 case NUMBER: // in alternation, with weight

1115 case LPAR: // in sequence

1116 case VAR: // in sequence

1117 case STRING: // in sequence

1118 return TRUE;

1119 default:

1120 return FALSE;

1121 }

1122 }

1123 }

1124

1125

1126 UBool sequence_list(Pick* &node /in,out/){

1127 if (node == NULL) return FALSE; // assert node != NULL

1128

1129 Sequence* seq = new Sequence();

1130 Pick * n = node;

1131

1132 while (token == VAR \|\| token == STRING \|\| token == LPAR){

1133 seq->append(n);

1134 n = NULL;

1135 if (modified(n)){

1136 // go on

1137 } else {

1138 goto FAIL;

1139 }

1140 }

1141

1142 if (token == SEMI \|\| token == RPAR \|\| token == BAR){

1143 seq->append(n);

1144 node = seq;

1145 return TRUE;

1146 }

1147 FAIL:

1148 delete seq;

1149 return FALSE;

1150

1151 }

1152

1153 UBool sequence(Pick* &node /out/){

1154 if (node != NULL) return FALSE; //assert node == NULL

1155

1156 if (!modified(node)) {

1157 return FALSE;

1158 }

1159

1160 if (token == VAR \|\| token == STRING \|\| token == LPAR){

1161 return sequence_list(node);

1162 } else {

1163 return TRUE; // just a modified

1164 }

1165 }

1166

1167 UBool alternation_list(Pick* &node /in,out/){

1168 if (node == NULL) return FALSE; // assert node != NULL

1169

1170 Alternation * alt = new Alternation();

1171 Pick * n = node;

1172 int w = DEFAULT_WEIGHT;

1173

1174 while (token == NUMBER \|\| token == BAR){

1175 if(token == NUMBER) {

1176 if (weight(w)){

1177 if (token == BAR){

1178 // the middle item, go on

1179 } else {

1180 // the last item or encounter error

1181 break; //while

1182 }

1183 } else {

1184 goto FAIL;

1185 }

1186 } // else token == BAR

1187 match(BAR);

1188 alt->append(n,w);

1189

1190 n = NULL;

1191 w = DEFAULT_WEIGHT;

1192 if (sequence(n)){

1193 // go on

1194 } else {

1195 goto FAIL;

1196 }

1197 }

1198

1199 if (token == SEMI \|\| token == RPAR) {

1200 alt->append(n,w);

1201 node = alt;

1202 return TRUE;

1203 }

1204 FAIL:

1205 delete alt;

1206 return FALSE;

1207 }

1208

1209 UBool alternation(Pick* &node /out/){

1210 if (node != NULL) return FALSE; //assert node == NULL

1211

1212 // 'sequence' has higher precedence than 'alternation'

1213 if (!sequence(node)){

1214 return FALSE;

1215 }

1216

1217 if (token == BAR \|\| token == NUMBER){ // find a real alternation1, creat e it.

1218 return alternation_list(node);

1219 } else {

1220 return TRUE; // just a sequence_old

1221 }

1222 }

1223

1224

1225 UBool defination(Pick* &node /out/){

1226 if (node != NULL) return FALSE; //assert node == NULL

1227 return alternation(node);

1228 }

1229

1230 UBool rule(){

1231 if (token == VAR){

1232 Buffer_char name;

1233 name.append_array(s.token, strlen(s.token) + 1);

1234 match(VAR);

1235

1236 if (match(EQ)){

1237 Pick * t = NULL;

1238 if(defination(t)){

1239 symbols.put(name, t);

1240 return match(SEMI);

1241 }

1242 }

1243 }

1244 return FALSE;

1245 }

1246 public:

1247 UBool rules(){

1248 symbols.reset();

1249 token = s.getNextToken();

1250 while (rule()){

1251 }

1252 if (token == STREAM_END){

1253 return TRUE;

1254 } else {

1255 //s.dumpCurrentPoint();

1256 return FALSE;

1257 }

1258 }

1259

1260 public:

1261 SymbolTable symbols;

1262

1263 Parser(const char *const source):s(source), token(s.tokenType){

1264 min_max = -2;

1265 }

1266 UBool parse(){

1267 return rules();

1268 }

1269

1270 }; // class Parser

1271

1272

1273 ///////////////////////////////////////////////////////////

1274 //

1275 //

1276 //

1277

1278 int DumpScanner(Scanner & s, UBool dump = TRUE){

1279 int len = strlen(s.source);

1280 int error_start_offset = s.history - s.source;

1281 if (dump){

1282 printf("\n=================== DumpScanner ================\n");

1283 fwrite(s.source, len, 1, stdout);

1284 printf("\n-----parsed-------------------------------------\n");

1285 fwrite(s.source, s.history - s.source, 1, stdout);

1286 printf("\n-----current------------------------------------\n");

1287 fwrite(s.history, s.working - s.history, 1, stdout);

1288 printf("\n-----unparsed-----------------------------------\n");

1289 fwrite(s.working, (s.source + len - s.working), 1, stdout);

1290 printf("\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n");

1291 }

1292 return error_start_offset;

1293 }

1294

1295 class LanguageGenerator_impl{

1296 public:

1297 LanguageGenerator_impl(const char const bnf_definition, const char const t op_node)

1298 :par(bnf_definition), top_node_name(top_node){

1299 srand((unsigned)time( NULL ));

1300 }

1301

1302 LanguageGenerator::PARSE_RESULT parseBNF(UBool debug = TRUE){

1303 if (par.parse()){

1304 if (par.symbols.find(top_node_name, &top_node_ref) == SymbolTable::H AS_REF) {

1305 if (par.symbols.is_complete()) {

1306 return LanguageGenerator::OK;

1307 } else {

1308 if (debug) printf("The bnf definition is incomplete.\n");

1309 return LanguageGenerator::INCOMPLETE;

1310 }

1311 } else {

1312 if (debug) printf("No top node is found.\n");

1313 return LanguageGenerator::NO_TOP_NODE;

1314 }

1315 } else {

1316 if(debug) {

1317 printf("The bnf definition is wrong\n");

1318 DumpScanner(par.s, TRUE);

1319 }

1320 return LanguageGenerator::BNF_DEF_WRONG;

1321 }

1322 }

1323 const char * next(){

1324 return top_node_ref->next();

1325 }

1326

1327 private:

1328 Parser par;

1329 const char *const top_node_name;

1330 Pick * top_node_ref;

1331 };

1332

1333 LanguageGenerator::LanguageGenerator():lang_gen(NULL){

1334 }

1335

1336 LanguageGenerator::~LanguageGenerator(){

1337 delete lang_gen;

1338 }

1339

1340 LanguageGenerator::PARSE_RESULT LanguageGenerator::parseBNF(const char const bn f_definition /in/, const char const top_node/in/, UBool debug){

1341 if (lang_gen){

1342 delete lang_gen;

1343 }

1344 lang_gen = new LanguageGenerator_impl(bnf_definition, top_node);

1345 PARSE_RESULT r = lang_gen->parseBNF(debug);

1346 if (r != OK){

1347 delete lang_gen;

1348 lang_gen = NULL;

1349 return r;

1350 } else {

1351 return r;

1352 }

1353 }

1354 const char *LanguageGenerator::next(){ // Return a null-terminated c-string. The buffer is owned by callee.

1355 if (lang_gen){

1356 return lang_gen->next();

1357 }else {

1358 return "";

1359 }

1360 }

1361

1362 ///////////////////////////////////////////////////////////

1363 //

1364 // The test code for WBNF

1365 //

1366

1367 #define CALL(fun) \

1368 if (fun()){ \

1369 printf("Pass: " #fun "\n");\

1370 } else { \

1371 printf("FAILED: !!! " #fun " !!!\n"); \

1372 }

1373

1374 #define DUMP_R(fun, var, times) \

1375 {printf("\n========= " #fun " =============\n"); \

1376 for (int i=0; i<times; i++) { \

1377 const char * t = var.next();\

1378 fwrite(t,strlen(t),1,stdout); \

1379 printf("\n"); \

1380 } \

1381 printf("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n");}

1382

1383

1384

1385 #if TEST_WBNF_TEST

1386 static UBool TestQuote(){

1387 const char *const str = "This ' A !,z\| qq [] .new\tline";

1388 //const char *const str_r = "This \\' A '!,'z'\|' qq '[]' '.'new\tline";

1389 ////

1390 //// :( we must quote our string to following C syntax

1391 //// cannot type the literal here, it makes our code rather human unread able

1392 //// very very unconformable!

1393 ////

1394 ///*

1395 //*/

1396

1397 //const char *const s1 = "ab'c";

1398 //const char (* s1_r1) [] = { "ab''c", // ab''c

1399 // "ab\\'c", // ab\'c

1400 // };//

1401 ///*

1402 // . '.' \.

1403 // .. \.\. '.'\. '.'\. '..' // '.''.' wrong

1404 //*/

1405

1406 //const char *const s2 = "a..'.b"; // a..'.b

1407 //const char (*s2_r) [] = { "a'..''.'b" // a'..''.'b

1408 // ,"a'..\\'.'b" // a'..\'.'b

1409 // ,"a'..'\\''.'b" // a'..'\''.'b

1410 // };//

1411

1412 //const char *const s3 = "a..\\.b"; // a..\.b

1413 //const char (*s3_r) [] = { "a'..\\\\.'b" // a'..\\.'b

1414 // ,"a'..'\\\\'.'b" // a'..'\\'.'b

1415 // };//

1416

1417 // // no catact operation, no choice, must be com pact

1418

1419 srand((unsigned)time( NULL ));

1420

1421 //Escaper l(Escaper::NO, Escaper::NO, Escaper::RAND_ESC);

1422 Pick *p = new Literal(str);

1423 Quote q(*p);

1424

1425 DUMP_R(TestQuote, (*p), 1);

1426 DUMP_R(TestQuote, q, 20);

1427 return FALSE;

1428 }

1429 static UBool TestLiteral(){

1430 const char * s = "test string99.";

1431 Literal n(s);

1432 const char * r = n.next();

1433 return strcmp(s,r) == 0;

1434 }

1435

1436 static UBool TestSequence(){

1437 Sequence seq;

1438 seq.append(new Literal("abc "));

1439 seq.append(new Literal(", s"));

1440

1441 return strcmp(seq.next(), "abc , s") == 0;

1442 }

1443 static UBool TestAlternation(){

1444 srand((unsigned)time( NULL ));

1445 Alternation alt;

1446 alt.append(new Literal("aaa_10%"),10);

1447 alt.append(new Literal("bbb_0%"),0);

1448 alt.append(new Literal("ccc_10%"),10);

1449 alt.append(new Literal("ddddddd_50%"),50);

1450

1451 DUMP_R(TestAlternation, alt, 50);

1452

1453 return FALSE;

1454 }

1455

1456 static UBool TestBuffer(){

1457 Buffer_int t;

1458 t.append(1).append(0).append(5);

1459 int s = t.content_size();

1460 for (int i=0; i<s; ++i){

1461 printf("%d\n", t[i]);

1462 }

1463 return FALSE;

1464 }

1465

1466 static UBool TestWeightedRand(){

1467 srand((unsigned)time( NULL ));

1468 Buffer_int t;

1469 t.append(1).append(0).append(5);

1470 WeightedRand wr(&Buffer_int().append(10).append(0).append(50),4);

1471 // WeightedRand wr(&t,3);

1472 for (int i=0; i< 50; ++i){

1473 printf("%d\n", wr.next());

1474 }

1475 return FALSE;

1476 }

1477

1478 static UBool TestRepeat(){

1479 srand((unsigned)time( NULL ));

1480 Repeat rep(new Literal("aaa1-5 "), 1, 5);

1481 DUMP_R(TestRepeat, rep, 50);

1482

1483 Repeat r2(new Literal("b{1,3}1%0%5% "), 1, 3, &Buffer_int().append(1).append (0).append(5));

1484 DUMP_R(TestRepeat, r2, 50);

1485

1486 Repeat r3(new Literal("aaa5-5 "), 5, 5);

1487 DUMP_R(TestRepeat, r3, 50);

1488

1489 return FALSE;

1490 }

1491

1492 static UBool TestVariable(){

1493 SymbolTable tab;

1494 Pick * value = new Literal("string1");

1495 Variable var1(&tab, "x", value);

1496

1497 Variable var2(&tab, "y");

1498 // tab.put(var2, value); // TOFIX: point alias/recursion problem

1499 Pick * value2 = new Literal("string2");

1500 tab.put(var2, value2);

1501

1502 Pick * value3 = new Literal("string3");

1503 Variable var3(&tab, "z");

1504 tab.put("z", value3);

1505

1506 UBool pass;

1507 pass = strcmp(var1.next(), value->next()) == 0;

1508 pass = pass && strcmp(var2.next(), value2->next()) == 0;

1509 pass = pass && strcmp(var3.next(), value3->next()) == 0;

1510 return pass;

1511 }

1512

1513 static UBool TestSymbolTable(){

1514 Literal * n1 = new Literal("string1");

1515 Literal * n2 = new Literal("string2");

1516 SymbolTable t;

1517 t.put("abc", n1);

1518 t.put("$aaa", n2);

1519 // t.put("alias", n1); // TOFIX: point alias/recursion problem

1520 t.put("bbb");

1521

1522 UBool pass;

1523 pass = t.find(NULL) == SymbolTable::EMPTY;

1524 pass = pass && t.find("ccc") == SymbolTable::NO_VAR;

1525 pass = pass && t.find("bbb") == SymbolTable::NO_REF;

1526 pass = pass && t.find("abc") == SymbolTable::HAS_REF;

1527 pass = pass && t.find("$aaa") == SymbolTable::HAS_REF;

1528

1529 t.reset();

1530 pass = pass && t.find("abc") == SymbolTable::NO_VAR;

1531 return pass;

1532 }

1533

1534

1535 static UBool TestScanner(void){

1536 //const char str1[] = "$root = $command{0,5} $reset $mostRules{1,20};";

1537 //const char str1_r[][20] = {"$root", "=", "$command", "{", "0", ",", "5", " }",

1538 // "$reset", "$mostRules", "{", "1", ",", "20", "}", ";"};

1539

1540 const char str2[] = "$p2 =(\\\\ $s $string $s)? 25%;";

1541 const char str2_r[][20] = {"$p2", "=", "(", "\\", "$s", "$string", "$s", ")" , "?", "25", "%", ";"};

1542

1543 const char *str = str2;

1544 const char (*str_r)[20] = str2_r;

1545 int tokenNum = sizeof(str2_r)/sizeof(char[20]);

1546

1547 Scanner t(str);

1548 UBool pass = TRUE;

1549 t.getNextToken();

1550 int i = 0;

1551 while (pass){

1552 if (t.tokenType == STREAM_END){

1553 pass = pass? i == tokenNum : FALSE;

1554 break;//while

1555 } else if (t.tokenType == ERROR){

1556 pass = FALSE;

1557 break;//while

1558 } else {

1559 pass = strcmp( &(t.token[0]), str_r[i++]) == 0;

1560 t.getNextToken();

1561 }

1562 }

1563

1564 //const char ts[] = "$commandList = '['"

1565 //" ( alternate ' ' $alternateOptions"

1566 //" \| backwards ' 2'"

1567 //" \| normalization ' ' $onoff "

1568 //" \| caseLevel ' ' $onoff "

1569 //" \| hiraganaQ ' ' $onoff"

1570 //" \| caseFirst ' ' $caseFirstOptions"

1571 //" \| strength ' ' $strengthOptions"

1572 //" ) ']';" ;

1573

1574 //Scanner t2(ts);

1575 //pass = TRUE;

1576 //do {

1577 // t2.getNextToken();

1578 // if (t2.tokenType == ERROR){

1579 // DumpScanner(t2);

1580 // return FALSE;

1581 // }

1582 //}while (t.tokenType != STREAM_END);

1583

1584 return pass;

1585 }

1586

1587 class TestParserT {

1588 public:

1589 UBool operator () (const char *const str, const int exp_error_offset = -1, const UBool dump = TRUE){

1590 Parser par(str);

1591 if (par.rules()){

1592 if ( exp_error_offset == -1){

1593 return TRUE;

1594 }else {

1595 DumpScanner(par.s,dump);

1596 return FALSE;

1597 }

1598 }else {

1599 return DumpScanner(par.s, dump) == exp_error_offset;

1600 }

1601 }

1602 };

1603

1604 UBool TestParser(){

1605 TestParserT test;

1606

1607 UBool pass = TRUE;

1608 pass = pass && test ("$s = ' ' ? 50%;");

1609 pass = pass && test("$x = ($var {1,2}) 3%;"); // legal

1610 pass = pass && test("$x = $var {1,2} 3% \| b 4%;"); // legal

1611 pass = pass && test("$x = $var {1,2} 3%;"); // legal

1612 pass = pass && test("$m = $c ? 2% 4% \| $r 5% \| $n 25%;"); // legal

1613 pass = pass && test("$a = b ? 2% \| c 5%;"); // legal

1614 pass = pass && test("$x = A B 5% C 10% \| D;", 8, FALSE); // illegal 5%

1615 pass = pass && test("$x = aa 45% \| bb 5% cc;", 19, FALSE);// illegal cc

1616 pass = pass && test("$x = (b 5%) (c 6%);"); // legal

1617 pass = pass && test("$x = (b 5%) c 6%;", 13, FALSE); // illegal 6%

1618 pass = pass && test("$x = b 5% (c 6%);", 9, FALSE); // illegal (c 6%)

1619 pass = pass && test("$x = b 5% c 6%;", 9, FALSE); // illegal c 6%

1620 pass = pass && test("$x = b 5%;"); // legal

1621 pass = pass && test("$x = aa 45% \| bb 5% cc;", 19, FALSE);// illegal cc

1622 pass = pass && test("$x = a \| b \| c 4% \| d 5%;"); // legal

1623 pass = pass && test("$s = ' ' ? 50% abc;"); // legal

1624 pass = pass && test("$s = a \| c d \| e f;"); // legal

1625 pass = pass && test( "$z = q 0% \| p 1% \| r 100%;"); // legal How to check parsed tree??

1626

1627 pass = pass && test("$s = ' ' ? 50%;");

1628 pass = pass && test("$relationList = '<' \| '<<' \| ';' \| '<<<' \| ',' \| '=';" );

1629 pass = pass && test("$p1 = ($string $s '\|' $s)? 25%;");

1630 pass = pass && test("$p2 = (\\\\ $s $string $s)? 25%;");

1631 pass = pass && test("$rel2 = $p1 $string $s $p2;");

1632 pass = pass && test("$relation = $relationList $s ($rel1 \| $rel2) $crlf;");

1633 pass = pass && test("$command = $commandList $crlf;");

1634 pass = pass && test("$reset = '&' $s ($beforeList $s)? 10% ($positionList 10 0% \| $string 10%) $crlf;");

1635 pass = pass && test("$mostRules = $command 1% \| $reset 5% \| $relation 25%;") ;

1636 pass = pass && test("$root = $command{0,5} $reset $mostRules{1,20};");

1637

1638 const char collationBNF[] =

1639 "$s = ' '? 50%;"

1640 "$crlf = '\r\n';"

1641

1642 "$alternateOptions = non'-'ignorable \| shifted;"

1643 "$onoff = on \| off;"

1644 "$caseFirstOptions = off \| upper \| lower;"

1645 "$strengthOptions = '1' \| '2' \| '3' \| '4' \| 'I';"

1646 "$commandList = '['"

1647 " ( alternate ' ' $alternateOptions"

1648 " \| backwards ' 2'"

1649 " \| normalization ' ' $onoff "

1650 " \| caseLevel ' ' $onoff "

1651 " \| hiraganaQ ' ' $onoff"

1652 " \| caseFirst ' ' $caseFirstOptions"

1653 " \| strength ' ' $strengthOptions"

1654 " ) ']';"

1655 "$command = $commandList $crlf;"

1656

1657 "$ignorableTypes = (tertiary \| secondary \| primary) ' ' ignorable;"

1658 "$allTypes = variable \| regular \| implicit \| trailing \| $ignorableTypes;"

1659 "$positionList = '[' (first \| last) ' ' $allTypes ']';"

1660

1661 "$beforeList = '[before ' ('1' \| '2' \| '3') ']';"

1662

1663 "$relationList = ("

1664 " '<'"

1665 " \| '<<'"

1666 " \| ';'"

1667 " \| '<<<'"

1668 " \| ','"

1669 " \| '='"

1670 ");"

1671 "$string = $magic;"

1672 "$rel1 = '[variable top]' $s;"

1673 "$p1 = ($string $s '\|' $s)? 25%;"

1674 "$p2 = (\\\\ $s $string $s)? 25%;"

1675 "$rel2 = $p1 $string $s $p2;"

1676 "$relation = $relationList $s ($rel1 \| $rel2) $crlf;"

1677

1678 "$reset = '&' $s ($beforeList $s)? 10% ($positionList 1% \| $string 10%) $crl f;"

1679 "$mostRules = $command 1% \| $reset 5% \| $relation 25%;"

1680 "$root = $command{0,5} $reset $mostRules{1,20};"

1681 ;

1682

1683 pass = pass && test(collationBNF);

1684

1685

1686 return pass;

1687 }

1688

1689 static UBool TestMorph(){

1690 srand((unsigned)time( NULL ));

1691

1692 Alternation * alt = new Alternation();

1693

1694 (*alt)

1695 .append(new Literal("a")).append(new Literal("b")).append(new Literal("c"))

1696 .append(new Literal("d")).append(new Literal("e")).append(new Literal("f"))

1697 .append(new Literal("g")).append(new Literal("h")).append(new Literal("i"))

1698 .append(new Literal("j")).append(new Literal("k")).append(new Literal("l"))

1699 .append(new Literal("m")).append(new Literal("n")).append(new Literal("o"))

1700 ;

1701

1702 Repeat * rep = new Repeat( alt ,5,5 );

1703 Morph m( *rep);

1704

1705 // DUMP_R(TestMorph,(*rep),20);

1706 DUMP_R(TestMorph,m,100);

1707

1708 return FALSE;

1709 }

1710

1711 #endif

1712

1713 static UBool TestLanguageGenerator(){

1714 //LanguageGenerator g;

1715 //const char *const s = "$s = p 0% \| q 1%;";

1716 //g.parseBNF(s, "$s");

1717 UBool pass;

1718 //= strcmp("q", g.next()) == 0;

1719

1720 const char *const def =

1721 //"$a = $b;"

1722 //"$b = $c;"

1723 //"$c = $t;"

1724 //"$t = abc $z{1,2};"

1725 //"$k = a \| b \| c \| d \| e \| f \| g ;"

1726 //"$z = q 0% \| p 1% \| r 1%;"

1727 "$x = a ? 0%;"

1728 ; // end of string

1729 // const char * s = "abczz";

1730 //

1731 //

1732 LanguageGenerator g;

1733 pass = g.parseBNF(def, "$x",TRUE);

1734 //// LanguageGenerator g(collationBNF, "$root", "$magic", new MagicNode());

1735 //

1736 if (pass != LanguageGenerator::OK) return FALSE;

1737

1738 DUMP_R(TestLanguageGenerator, g, 20);

1739 return pass;

1740

1741 ////UBool pass = strcmp(s,r) == 0;

1742

1743 //if (pass){

1744 // printf("TestRandomLanguageGenerator passed.\n");

1745 //} else {

1746 // printf("TestRandomLanguageGenerator FAILED!!!\n");

1747 //}

1748 //return pass;

1749 }

1750

1751 void TestWbnf(void){

1752 srand((unsigned)time( NULL ));

1753

1754 //CALL(TestLiteral);

1755 //CALL(TestSequence);

1756 //CALL(TestSymbolTable);

1757 //CALL(TestVariable);

1758

1759 //TestRepeat();

1760 //TestAlternation();

1761 //TestMorph();

1762

1763 //TestQuote();

1764 //TestBuffer();

1765 //TestWeightedRand();

1766

1767 //CALL(TestScanner);

1768 //CALL(TestParser);

1769 CALL(TestLanguageGenerator);

1770 }

1771

OLD	NEW

« no previous file with comments | « source/test/intltest/wbnf.h ('k') | source/test/iotest/Makefile.in » ('j') | no next file with comments »