icu46/source/test/intltest/wbnf.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/test/intltest/wbnf.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/test/intltest/wbnf.cpp

===================================================================

--- icu46/source/test/intltest/wbnf.cpp (revision 0)

+++ icu46/source/test/intltest/wbnf.cpp (revision 0)

@@ -0,0 +1,1771 @@

+/*

+ ******************************************************************************

+ */

+#include <stdio.h>

+#include <string.h>

+#include <stdlib.h>

+#include <time.h>

+#include "wbnf.h"

+// Most of this code is meant to test the test code. It's a self test.

+// Normally this isn't run.

+#define TEST_WBNF_TEST 0

+///////////////////////////////////////////////////////////

+//

+// Constants and the most basic helper classes

+//

+static const char DIGIT_CHAR[] = "0123456789";

+static const char WHITE_SPACE[] = {'\t', ' ', '\r', '\n', 0};

+static const char ALPHABET[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";

+static const char SPECIAL[] = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";

+static inline UBool isInList(const char c /*in*/, const char list[] /*in*/){

+ const char * p = list;

+ for (;*p != 0 && *p != c; p++);

+ return *p?TRUE:FALSE;

+static inline UBool isDigit(char c) {return isInList(c, DIGIT_CHAR);}

+static inline UBool isWhiteSpace(char c) {return isInList(c, WHITE_SPACE);}

+static inline UBool isAlphabet(char c) {return isInList(c, ALPHABET);}

+static inline UBool isSpecialAsciiChar(char c) {return isInList(c,SPECIAL);}

+///////////////////////////////////////////////////////////

+//

+// Helper classes

+//

+class Buffer_byte{

+// Utility class, can be treated as an auto expanded array. no boundary check.

+ typedef char byte;

+ byte * start;

+ byte * current;

+ int buffer_size; // size unit is byte

+public:

+ inline int content_size(){return current - start;} // size unit is byte

+private:

+ inline void expand(int add_size = 100){ // size unit is byte

+ int new_size = buffer_size + add_size;

+ int cs_snap = content_size();

+ start = (byte *) realloc(start, new_size); // may change the value of start

+ current = start + cs_snap;

+ memset(current, 0, add_size);

+ buffer_size = new_size;

+ }

+ inline void expand_to(int size){

+ int r = size - buffer_size;

+ if (r > 0) {

+ expand(r); // simply expand, no block alignment

+ }

+ Buffer_byte(const Buffer_byte &);

+ Buffer_byte & operator = (const Buffer_byte &);

+public:

+ Buffer_byte():start(NULL),current(start),buffer_size(0){

+ expand();

+ }

+ ~Buffer_byte(){

+ free(start);

+ }

+ inline void reset(){

+ start != NULL ? memset(start, 0, buffer_size) : 0;

+ current = start;

+ }

+ // Using memory copy method to append a C array to buffer,

+ inline void append(const void * c, int size){ // size unit is byte

+ expand_to(content_size() + size) ;

+ memcpy(current, c, size);

+ current = current + size;

+ }

+ byte * buffer(){

+ return start;

+ }

+};

+/*

+ The class(es) try to work as bulid-in array, so it overloads these two operators

+ operator type *();

+ type & operator[];

+ The first is used to auto type convert, the latter is used to select member.

+ A small trick is the class does not overload the address-of operator. This

+ behavior is different from bulid-in array, but it give us the opportunity

+ to get the address of the class itself.

+*/

+//template<typename type>

+// class BUFFER{

+// typedef BUFFER name;

+#define BUFFER(type, name)\

+ class name {\

+ private:\

+ Buffer_byte buf;\

+ public:\

+ name & reset() {buf.reset(); return *this;}\

+ name & append(type c) {buf.append(&c, sizeof(type)); return *this;}\

+ name & append_array(const type * p, int size) {buf.append(p, sizeof(type)*size); return *this;}\

+ type & operator [] (int i) { return ((type *) buf.buffer())[i];}\

+ operator type *(){return (type *) buf.buffer();} \

+ int content_size(){return buf.content_size() / sizeof(type);}\

+ }

+class Pick{

+/* The Pick is the basic language generator element*/

+public:

+ // generate a string accroding the syntax

+ // Return a null-terminated c-string. The buffer is owned by callee.

+ virtual const char* next() = 0;

+ virtual ~Pick(){};

+};

+//typedef BUFFER<char> Buffer_char;

+//typedef BUFFER<int> Buffer_int;

+//typedef BUFFER<Pick *> Buffer_pPick;

+BUFFER(char, Buffer_char);

+BUFFER(int, Buffer_int);

+BUFFER(Pick *, Buffer_pPick);

+class SymbolTable{

+/* Helper class.

+* It's a mapping table between 'variable name' and its 'active Pick object'

+*/

+private:

+ Buffer_char name_buffer; // var names storage space

+ Buffer_int names; // points to name (offset in name_buffer)

+ Buffer_pPick refs; // points to Pick

+ int get_index(const char *const var_name){

+ int len = names.content_size();

+ for (int i=0; i< len; i++){

+ if (strcmp(var_name, name_buffer + names[i]) == 0){

+ return i;

+ }

+ return -1;

+ }

+public:

+ enum RESULT {EMPTY, NO_VAR, NO_REF, HAS_REF};

+ RESULT find(const char *const var_name /*[in] c-string*/, Pick * * ref = NULL /*[out] Pick* */){

+ if (!var_name) return EMPTY; // NULL name

+ int i = get_index(var_name);

+ if (i == -1){

+ return NO_VAR; // new name

+ }

+ if (!refs[i]){ // exist name, no ref

+ return NO_REF;

+ } else {

+ if (ref) {

+ *ref = refs[i];

+ }

+ return HAS_REF; // exist name, has ref

+ }

+ void put(const char *const var_name, Pick *const var_ref = NULL){

+ int i = get_index(var_name);

+ switch(find(var_name)){

+ case EMPTY: // NULL name

+ break;

+ case NO_VAR: // new name

+ int offset;

+ offset = name_buffer.content_size();

+ name_buffer.append_array(var_name, strlen(var_name) + 1);

+ names.append(offset);

+ refs.append(var_ref);

+ break;

+ case NO_REF: // exist name, no ref

+ refs[i] = var_ref; // link definition with variable

+ break;

+ case HAS_REF: // exist name, has ref

+ if (var_ref){

+ refs[i] = var_ref;

+ }

+ break;

+ default:

+ ; // ASSERT(FALSE);

+ }

+ return;

+ }

+ UBool is_complete(){

+ int n = names.content_size();

+ for (int i=0; i<n; ++i){

+ if (refs[i] == NULL){

+ return FALSE;

+ }

+ return TRUE;

+ }

+ void reset(){

+ names.reset();

+ name_buffer.reset();

+ // release memory here

+ int s = refs.content_size();

+ for (int i=0; i < s; i++){

+ delete refs[i]; // TOFIX: point alias/recursion problem

+ }

+ refs.reset();

+ }

+ ~SymbolTable(){

+ reset();

+ }

+};

+/*

+// Document of class Escaper

+//

+// ATTENTION:

+// From http://icu-project.org/userguide/Collate_Customization.html.

+// We get the precedence of escape/quote operations

+//

+// (highest) 1. backslash \

+// 2. two single quotes ''

+// 3. quoting ' '

+//

+// ICU Collation should accept following as the same string.

+//

+// 1) 'ab'c _

+// 2) a\bc \

+// 3) a'b'\c |- They are equal.

+// 4) abc _/

+//

+// From "two single quotes", we have following deductions

+// D1. empty quoting is illgal. (obviously)

+// D2. no contact operation between two quotings

+// '.''.' is not .. it is .'.

+// D3. "two single quotes" cannot contact two quoting simultaneously

+// '..''''.' is not ..'. it is ..''.

+// NOTICE:

+// "two single quotes" can contact before one quoting

+// '''.' is '.

+// "two single quotes" can literally contact after one quoting

+// But, from syntax, it's one quoting including a "two single quotes"

+// '.''' is .'

+// D4. "two single quotes" cannot solely be included in quoting

+// '''' is not ' it is ''

+// NOTICE: These are legal

+// '.''.' is .'.

+// '.''' is .'

+//

+// dicision

+// /\

+// /__\

+// output buffer input buffer

+//

+// To make our dicision (within an atom operation) without caring input and output buffer,

+// following calling pattern (within an atom operation) shall be avoided

+//

+// P1 open_quoting() then close_quoting() (direct violation) D1

+// P2 close_quoting() then open_quoting() (direct violation) D2

+// P3 empty open_quoting() (indirect violation) D1, D4

+// P4 empty close_quoting() (indirect violation) D2, D3

+// P5 open_quoting() then two single quotes (indirect violation) D4

+// P6 close_quoting() then two single quotes (indirect violation) D3

+//

+// two single quotes escaping will not open_ or close_ quoting()

+// The choice will not lose some quoing forms.

+//

+// For open_quoting(),

+// we may get this form quoting ''' P5

+// It may raise a bug ''''x

+// If we expect

+// '''.' let the next char open the quoting

+// '.''.' the quoting is already opened by preceding char

+//

+// For close_quoting()

+// we will get this form quoting '.''' P6

+// It may raise a bug '.''''.'

+// If we expect

+// '.'''\. let the next char close the quoting

+// '.''''.' the expectation is wrong! using '.'\''.' instead

+//

+// It's a hard work to re-adjust generation opportunity for various escaping form.

+// We just simply ignore it.

+*/

+class Escaper{

+public:

+ enum CHOICE {YES, NO, RAND};

+ enum ESCAPE_FORM {BSLASH_ONLY, QUOTE_ONLY, QUOTE_AND_BSLAH, RAND_ESC};

+private:

+ class Bool{ // A wrapper class for CHOICE, to auto adapter UBool class

+ private:

+ const CHOICE tag;

+ public:

+ Bool(CHOICE flag=RAND):tag(flag){}

+ operator UBool() { // conversion operator

+ return tag == RAND ? rand()%2 : tag == YES;

+ //if (tag == RAND){

+ // return rand()%2 == 1;

+ //} else {

+ // return tag == YES ? TRUE : FALSE;

+ //}

+ }

+ };

+public:

+ Escaper(CHOICE escapeLiteral = RAND,

+ CHOICE twoQuotesEscape = RAND,

+ ESCAPE_FORM escapeForm = RAND_ESC):

+ escape_form(escapeForm),

+ escape_literal(escapeLiteral),

+ two_quotes_escape(twoQuotesEscape),

+ is_quoting(FALSE){}

+private:

+ Buffer_char str;

+ ESCAPE_FORM escape_form;

+ Bool escape_literal;

+ Bool two_quotes_escape;

+ UBool quote_escape;

+ UBool bslash_escape;

+ UBool is_quoting;

+ void set_options(){

+ ESCAPE_FORM t = escape_form == RAND_ESC ? (ESCAPE_FORM) (rand()%3) : escape_form;

+ switch (t){

+ case BSLASH_ONLY :

+ bslash_escape = TRUE; quote_escape = FALSE; break;

+ case QUOTE_ONLY:

+ bslash_escape = FALSE;quote_escape = TRUE; break;

+ case QUOTE_AND_BSLAH:

+ bslash_escape = TRUE; quote_escape = TRUE; break;

+ default:

+ ;// error

+ }

+ void reset(){

+ str.reset();

+ is_quoting = FALSE;

+ }

+ inline void open_quoting(){

+ if(is_quoting){

+ // do nothing

+ } else {

+ str.append('\'');

+ is_quoting = TRUE;

+ }

+ inline void close_quoting(){

+ if(is_quoting){

+ str.append('\'');

+ is_quoting = FALSE;

+ } else {

+ // do nothing

+ }

+ // str [in] null-terminated c-string

+ void append(const char * strToAppend){

+ for(;*strToAppend != 0; strToAppend++){

+ append(*strToAppend);

+ }

+ inline void append(const char c){

+ set_options();

+ if (c == '\\'){

+ quote_escape ? open_quoting() : close_quoting();

+ //bslash_escape always true here

+ str.append('\\');

+ } else if (c == '\''){

+ if (two_quotes_escape){ // quoted using two single quotes

+ // See documents in anonymous.design

+ str.append('\'');

+ } else{

+ quote_escape ? open_quoting() : close_quoting();

+ //bslash_escape always true here

+ str.append('\\');

+ str.append('\'');

+ }

+ } else if (isSpecialAsciiChar(c) || isWhiteSpace(c)){

+ quote_escape ? open_quoting() : close_quoting();

+ if (bslash_escape) str.append('\\');

+ str.append(c);

+ } else { //if (isAlphabet(c) || isDigit(c) || TRUE){ // treat others as literal

+ if (escape_literal){

+ quote_escape ? open_quoting() : close_quoting();

+ if (bslash_escape) str.append('\\');

+ str.append(c);

+ } else {

+ close_quoting();

+ str.append(c);

+ }

+public:

+ // Return a null-terminate c-string. The buffer is owned by callee.

+ char * operator()(const char * literal /*c-string*/){

+ str.reset();

+ for(;*literal != 0; literal++){

+ append(*literal);

+ }

+ close_quoting(); // P4 exception, to close whole quoting

+ return str;

+ }

+};

+class WeightedRand{

+// Return a random number in [0, size)

+// Every number has different chance (aka weight) to be selected.

+private:

+ Buffer_int weights;

+ double total;

+ WeightedRand(const WeightedRand &);

+ WeightedRand & operator = (const WeightedRand &);

+public:

+ WeightedRand(Buffer_int * weight_list = NULL, int size = 0){

+ if ( weight_list == NULL){

+ for (int i=0; i<size; ++i) weights.append(DEFAULT_WEIGHT);

+ } else {

+ int s = weight_list->content_size();

+ if (s < size){

+ weights.append_array( (*weight_list),s);

+ for (int i=s; i<size; ++i) weights.append(DEFAULT_WEIGHT);

+ } else { // s >= size

+ weights.append_array( (*weight_list),size);

+ }

+ total = 0;

+ int c = weights.content_size();

+ for (int i=0; i<c; ++i){

+ total += weights[i];

+ }

+ void append(int weight){

+ weights.append(weight);

+ total += weight;

+ }

+ // Give a random number with the consideration of weight.

+ // Every random number is associated with a weight.

+ // It identifies the chance to be selected,

+ // larger weight has more chance to be selected.

+ //

+ // ______________________ every slot has equal chance

+ //

+ // [____][_][___][______] each item has different slots, hence different chance

+ //

+ // The algorithms to generate the number is illustrated by preceding figure.

+ // First, a slot is selected by rand(). Then we translate the slot to corresponding item.

+ //

+ int next(){

+ // get a random in [0,1]

+ double reference_mark = (double)rand() / (double)RAND_MAX;

+ // get the slot's index, 0 <= mark <= total;

+ double mark = total * reference_mark;

+ // translate the slot to corresponding item

+ int i=0;

+ for (;;){

+ mark -= weights[i]; // 0 <= mark <= total

+ if (mark <= 0)

+ break;

+ i++;

+ }

+ return i;

+ }

+};

+///////////////////////////////////////////////////////////

+//

+// The parser result nodes

+//

+class Literal : public Pick {

+public:

+ virtual const char* next(){

+ return str;

+ }

+ Literal(const char * s /*c-string*/){

+ str.append_array(s, strlen(s) + 1);

+ }

+private:

+ Buffer_char str; //null-terminated c-string

+};

+class Variable : public Pick {

+public:

+ Variable(SymbolTable * symbols, const char * varName, Pick * varRef = NULL){

+ this->var_name.append_array(varName, strlen(varName) + 1);

+ if ((symbol_table = symbols)){

+ symbol_table->put(varName, varRef);

+ }

+ operator const char *(){

+ return var_name;

+ }

+ virtual const char* next(){

+ if (symbol_table){

+ Pick * var_ref = NULL;

+ symbol_table->find(var_name, &var_ref);

+ if (var_ref) {

+ return var_ref->next();

+ }

+ return ""; // dumb string

+ }

+private:

+ Buffer_char var_name;

+ SymbolTable * symbol_table;

+};

+class Quote : public Pick{

+public:

+ Quote(Pick & base):item(base),e(Escaper::NO, Escaper::NO, Escaper::BSLASH_ONLY){

+ }

+ virtual const char* next(){

+ return e(item.next());

+ }

+private:

+ Pick & item;

+ Buffer_char str;

+ Escaper e;

+};

+class Morph : public Pick{

+/*

+The difference between morph and an arbitrary random string is that

+a morph changes slowly. When we build collation rules, for example,

+it is a much better test if the strings we use are all in the same

+'neighborhood'; they share many common characters.

+*/

+public:

+ Morph(Pick & base):item(base){}

+ virtual const char* next(){

+ current.reset();

+ const char * s = item.next();

+ current.append_array(s, strlen(s) + 1);

+ if (last.content_size() == 0) {

+ str.reset();

+ last.reset();

+ str.append_array(current, current.content_size());

+ last.append_array(current, current.content_size());

+ } else {

+ morph();

+ }

+ return str;

+ }

+private:

+ Pick & item;

+ Buffer_char str;

+ Buffer_char last;

+ Buffer_char current;

+ char * p_last;

+ char * p_curr;

+ void copy_curr(){

+ if (*p_curr) {

+ str.append(*p_curr);

+ p_curr++;

+ }

+ void copy_last(){

+ if (*p_last) {

+ str.append(*p_last);

+ p_last++;

+ }

+ // copy 0, 1, or 2 character(s) to str

+ void copy(){

+ static WeightedRand wr(& Buffer_int().append(DEFAULT_WEIGHT * 10), 5);

+ switch (wr.next()){

+ case 0: // copy last -- has 10 times chance than others

+ copy_last();

+ break;

+ case 1: // copy both

+ copy_curr();

+ copy_last();

+ break;

+ case 2: // copy both

+ copy_last();

+ copy_curr();

+ break;

+ case 3:

+ copy_curr();

+ break;

+ case 4: // copy nothing

+ break;

+ default:

+ // ASSERT(FALSE);

+ ;

+ }

+ void morph(void){

+ int min = strlen(last);

+ int max = strlen(current);

+ if (min > max){

+ int temp = min;

+ min = max;

+ max = temp;

+ }

+ int len = min + rand()%(max - min + 1); // min + [0, diff]

+ p_curr = current;

+ p_last = last;

+ str.reset();

+ for (; str.content_size()<len && *p_curr && *p_last;){

+ copy(); // copy 0, 1, or 2 character(s) to str

+ }

+ if (str.content_size() == len) {

+ str.append(0);

+ final();

+ return;

+ }

+ if (str.content_size() > len) { // if the last copy copied two characters

+ str[len]=0;

+ final();

+ return;

+ }

+ // str.content_size() < len

+ if (*p_last) {

+ for (; str.content_size() < len; copy_last());

+ } else if (*p_curr){

+ for (; str.content_size() < len; copy_curr());

+ }

+ int last_len = last.content_size();

+ for (;str.content_size() < len;){

+ str.append(last[rand()%last_len]);

+ }

+ str.append(0);

+ final();

+ }

+ void final(){

+ last.reset();

+ last.append_array(current, current.content_size());

+ }

+};

+class Sequence : public Pick {

+public:

+ virtual const char* next(){

+ str.reset();

+ int s = items.content_size();

+ for(int i=0; i < s; i++){

+ const char * t = items[i]->next();

+ str.append_array(t, strlen(t));

+ }

+ str.append(0); // terminal null

+ return str;

+ }

+ void append (Pick * node){

+ items.append(node);

+ }

+ virtual ~Sequence(){

+ int s = items.content_size();

+ for(int i=0; i < s; i++){

+ //How can assure the item is got from heap?

+ //Let's assume it.

+ delete items[i]; // TOFIX: point alias/recursion problem

+ items[i] = NULL;

+ }

+private:

+ Buffer_pPick items;

+ Buffer_char str; //null-terminated c-string

+};

+class Repeat : public Pick {

+private:

+ Pick * item;

+ Buffer_char str;

+ WeightedRand wr;

+ int min;

+ int max;

+ int select_a_count(){

+ return min + wr.next();

+ }

+public:

+ virtual const char* next(){

+ str.reset();

+ int c = select_a_count();

+ for(int i=0; i< c; i++){

+ const char * t = item->next();

+ str.append_array(t, strlen(t));

+ }

+ str.append(0);

+ return str;

+ }

+ Repeat(Pick * base, int minCount =0, int maxCount = 1, Buffer_int * weights = NULL):

+ wr(weights, maxCount-minCount +1) {

+ this->item = base;

+ this->min = minCount;

+ this->max = maxCount;

+ }

+ virtual ~Repeat(){

+ delete item; // TOFIX: point alias/recursion problem

+ item = NULL;

+ }

+};

+class Alternation : public Pick {

+public:

+ virtual const char* next(){

+ str.reset();

+ int i = wr.next();

+ const char * t = items[i]->next();

+ str.append_array(t, strlen(t) + 1);

+ return str;

+ }

+ virtual ~Alternation(){

+ int s = items.content_size();

+ for(int i=0; i < s; i++){

+ delete items[i]; // TOFIX: point alias/recursion problem

+ items[i] = NULL;

+ }

+ Alternation & append (Pick * node, int weight = DEFAULT_WEIGHT){

+ items.append(node);

+ wr.append(weight);

+ return *this;

+ }

+private:

+ Buffer_pPick items;

+ Buffer_char str; // null-terminated c-string

+ WeightedRand wr;

+};

+///////////////////////////////////////////////////////////

+//

+// The parser

+//

+enum TokenType {STRING, VAR, NUMBER, STREAM_END, ERROR, QUESTION, STAR, PLUS, LBRACE, RBRACE, LPAR, RPAR, SEMI, EQ, COMMA, BAR, AT, WAVE, PERCENT};

+class Scanner{

+friend int DumpScanner(Scanner & s, UBool dumb);

+private:

+ const char * source;

+ const char * working;

+ const char * history; // for debug

+ enum StateType {START, IN_NUM, IN_VAR_FIRST, IN_VAR, IN_QUOTE, IN_QUOTE_BSLASH, IN_BSLASH, IN_STRING, DONE};

+ StateType state;

+ void terminated(TokenType t){

+ working--; // return the peeked character

+ tokenType = t;

+ token.append(0); // close buffer

+ state = DONE;

+ }

+public:

+ // the buffer of "source" is owned by caller

+ Scanner(const char *src/*[in] c-string*/ = NULL):source(src){

+ working = src;

+ history = working;

+ state = DONE;

+ tokenType = ERROR;

+ }

+ //void setSource(const char *const src /*[in] c-string*/){

+ // *(&const_cast<const char *>(source)) = src;

+ //}

+ Buffer_char token;

+ TokenType tokenType;

+ TokenType getNextToken(){

+ token.reset();

+ state = START;

+ history = working; // for debug

+ while (state != DONE){

+ char c = *working++;

+ if (c == 0 && state != START){//avoid buffer overflow. for IN_QUOE, IN_ESCAPE

+ terminated(ERROR);

+ break; // while

+ }

+ switch(state){

+ case START:

+ tokenType = ERROR;

+ switch(c){

+ case '?' : tokenType = QUESTION; break;

+ case '*' : tokenType = STAR; break;

+ case '+' : tokenType = PLUS; break;

+ case '{' : tokenType = LBRACE; break;

+ case '}' : tokenType = RBRACE; break;

+ case '(' : tokenType = LPAR; break;

+ case ')' : tokenType = RPAR; break;

+ case ';' : tokenType = SEMI; break;

+ case '=' : tokenType = EQ; break;

+ case ',' : tokenType = COMMA; break;

+ case '|' : tokenType = BAR; break;

+ case '@' : tokenType = AT; break;

+ case '~' : tokenType = WAVE; break;

+ case '%' : tokenType = PERCENT; break;

+ case 0 : tokenType = STREAM_END; working-- /*avoid buffer overflow*/; break;

+ }

+ if (tokenType != ERROR){

+ token.append(c);

+ token.append(0);

+ state = DONE;

+ break; // START

+ }

+ switch(c){

+ case '$' : state = IN_VAR_FIRST; token.append(c); break;

+ case '\'' : state = IN_QUOTE; break;

+ case '\\' : state = IN_BSLASH; break;

+ default:

+ if (isWhiteSpace(c)){ // state = START; //do nothing

+ } else if (isDigit(c)){ state = IN_NUM; token.append(c);

+ } else if (isAlphabet(c)){ state = IN_STRING; token.append(c);

+ } else {terminated(ERROR);}

+ }

+ break;//START

+ case IN_NUM:

+ if (isDigit(c)){

+ token.append(c);

+ } else {

+ terminated(NUMBER);

+ }

+ break;//IN_NUM

+ case IN_VAR_FIRST:

+ if (isAlphabet(c)){

+ token.append(c);

+ state = IN_VAR;

+ } else {

+ terminated(ERROR);

+ }

+ break; // IN_VAR_FISRT

+ case IN_VAR:

+ if (isAlphabet(c) || isDigit(c)){

+ token.append(c);

+ } else {

+ terminated(VAR);

+ }

+ break;//IN_VAR

+ case IN_STRING:

+ // About the scanner's behavior for STRING, AT, and ESCAPE:

+ // All of them can be contacted with each other.

+ // This means the scanner will eat up as much as possible strings

+ // (STRING, AT, and ESCAPE) at one time, with no regard of their

+ // combining sequence.

+ //

+ if (c == '\''){

+ state = IN_QUOTE; // the first time we see single quote

+ } else if (c =='\\'){ // back slash character

+ state = IN_BSLASH;

+ } else if (isAlphabet(c) || isDigit(c)){

+ token.append(c);

+ } else{

+ terminated(STRING);

+ }

+ break;//IN_STRING

+ case IN_QUOTE:

+ if (c == '\''){ // the second time we see single quote

+ state = IN_STRING; // see document in IN_STRING

+ } else if ( c== '\\') { // backslah escape in quote

+ state = IN_QUOTE_BSLASH;

+ } else {

+ token.append(c); // eat up everything, includes back slash

+ }

+ break;//IN_QUOTE

+ case IN_QUOTE_BSLASH:

+ case IN_BSLASH:

+ switch (c){

+ case 'n' : token.append('\n'); break;

+ case 'r' : token.append('\r'); break;

+ case 't' : token.append('\t'); break;

+ case '\'' : token.append('\''); break;

+ case '\\' : token.append('\\'); break;

+ default: token.append(c); // unknown escaping, treat it as literal

+ }

+ if (state == IN_BSLASH){

+ state = IN_STRING; // see document in IN_STRING

+ } else { // state == IN_QUOTE_BSLASH

+ state = IN_QUOTE;

+ }

+ break;//IN_BSLASH

+ case DONE: /* should never happen */

+ default:

+ working--;

+ tokenType = ERROR;

+ state = DONE;

+ break;

+ }//switch(state)

+ }//while (state != DONE)

+ return tokenType;

+ }

+};//class Scanner

+class Parser{

+friend UBool TestParser();

+friend class TestParserT;

+friend class LanguageGenerator_impl;

+private:

+ Scanner s;

+ TokenType & token;

+ int min_max; // for the evil infinite

+ UBool match(TokenType expected){

+ if (token == expected) {

+ token = s.getNextToken();

+ return TRUE;

+ } else {

+ //s.dumpCurrentPoint();

+ return FALSE;

+ }

+ UBool weight(int & value){

+ if (token == NUMBER){

+ int temp = atoi(s.token);

+ match(NUMBER);

+ if (match(PERCENT)){

+ value = temp;

+ return TRUE;

+ }

+ return FALSE;

+ }

+ UBool repeat (Pick* &node /*in,out*/){

+ if (node == NULL) return FALSE;

+ int count = -2;

+ int min = -2;

+ int max = -2;

+ UBool question = FALSE;

+ switch (token){

+ case QUESTION:

+ match(QUESTION);

+ min = 0;

+ max = 1;

+ count = 2;

+ question = TRUE;

+ break;

+ case STAR:

+ match(STAR);

+ min = 0;

+ max = -1;

+ count = -1;

+ break;

+ case PLUS:

+ match(PLUS);

+ min = 1;

+ max = -1;

+ count = -1;

+ break;

+ case LBRACE:

+ match(LBRACE);

+ if (token != NUMBER){

+ return FALSE;

+ }else {

+ min = atoi(s.token);

+ match(NUMBER);

+ if (token == RBRACE){

+ match(RBRACE);

+ max = min;

+ count = 1;

+ } else if (token == COMMA) {

+ match(COMMA);

+ if (token == RBRACE){

+ match(RBRACE);

+ max = -1;

+ count = -1;

+ } else if (token == NUMBER) {

+ max = atoi(s.token);

+ match(NUMBER);

+ count = max - min + 1;

+ if (!match(RBRACE)) {

+ return FALSE;

+ }

+ } else {

+ return FALSE;

+ }

+ } else {

+ return FALSE;

+ }

+ break;

+ default:

+ return FALSE;

+ }

+ if (count == -2 || min == -2 || max == -2){

+ //ASSERT(FALSE);

+ return FALSE;

+ }

+ // eat up following weights

+ Buffer_int weights;

+ int w;

+ while (weight(w)){

+ weights.append(w);

+ }

+ // for the evil infinite

+ min_max = min_max > min ? min_max : min;

+ min_max = min_max > max ? min_max : max;

+ if (min_max > PSEUDO_INFINIT){

+ return FALSE; // PSEUDO_INFINIT is less than the real maximum

+ }

+ if (max == -1){ // the evil infinite

+ max = PSEUDO_INFINIT;

+ }

+ // for the strange question mark

+ if (question && weights.content_size() > 0){

+ Buffer_int w2;

+ w2.append(DEFAULT_WEIGHT - weights[0]).append(weights[0]);

+ node = new Repeat(node,min,max,&w2);

+ return TRUE;

+ }

+ node = new Repeat(node,min,max,&weights);

+ return TRUE;

+ }

+ UBool core(Pick* &node /*out*/){

+ if (node != NULL) return FALSE; //assert node == NULL

+ switch(token){

+ case LPAR:

+ match(LPAR);

+ if(defination(node) && match(RPAR)){

+ return TRUE;

+ }

+ return FALSE;

+ case VAR:

+ node = new Variable(&symbols, s.token);

+ match(VAR);

+ return TRUE;

+ case STRING:

+ node = new Literal(s.token);

+ match(STRING);

+ return TRUE;

+ default:

+ return FALSE;

+ }

+ UBool modified(Pick* &node /*out*/){

+ if (node != NULL) return FALSE; //assert node == NULL

+ if (!core(node)) {

+ return FALSE;

+ }

+ for (;;){

+ switch(token){

+ case WAVE:

+ match(WAVE);

+ node = new Morph(*node);

+ break;

+ case AT:

+ match(AT);

+ node = new Quote(*node);

+ break;

+ case QUESTION:

+ case STAR:

+ case PLUS:

+ case LBRACE:

+ if (!repeat(node)) return FALSE;

+ break;

+ case SEMI: // rule definiation closed

+ case RPAR: // within parenthesis (core closed)

+ case BAR: // in alternation

+ case NUMBER: // in alternation, with weight

+ case LPAR: // in sequence

+ case VAR: // in sequence

+ case STRING: // in sequence

+ return TRUE;

+ default:

+ return FALSE;

+ }

+ UBool sequence_list(Pick* &node /*in,out*/){

+ if (node == NULL) return FALSE; // assert node != NULL

+ Sequence* seq = new Sequence();

+ Pick * n = node;

+ while (token == VAR || token == STRING || token == LPAR){

+ seq->append(n);

+ n = NULL;

+ if (modified(n)){

+ // go on

+ } else {

+ goto FAIL;

+ }

+ if (token == SEMI || token == RPAR || token == BAR){

+ seq->append(n);

+ node = seq;

+ return TRUE;

+ }

+FAIL:

+ delete seq;

+ return FALSE;

+ }

+ UBool sequence(Pick* &node /*out*/){

+ if (node != NULL) return FALSE; //assert node == NULL

+ if (!modified(node)) {

+ return FALSE;

+ }

+ if (token == VAR || token == STRING || token == LPAR){

+ return sequence_list(node);

+ } else {

+ return TRUE; // just a modified

+ }

+ UBool alternation_list(Pick* &node /*in,out*/){

+ if (node == NULL) return FALSE; // assert node != NULL

+ Alternation * alt = new Alternation();

+ Pick * n = node;

+ int w = DEFAULT_WEIGHT;

+ while (token == NUMBER || token == BAR){

+ if(token == NUMBER) {

+ if (weight(w)){

+ if (token == BAR){

+ // the middle item, go on

+ } else {

+ // the last item or encounter error

+ break; //while

+ }

+ } else {

+ goto FAIL;

+ }

+ } // else token == BAR

+ match(BAR);

+ alt->append(n,w);

+ n = NULL;

+ w = DEFAULT_WEIGHT;

+ if (sequence(n)){

+ // go on

+ } else {

+ goto FAIL;

+ }

+ if (token == SEMI || token == RPAR) {

+ alt->append(n,w);

+ node = alt;

+ return TRUE;

+ }

+FAIL:

+ delete alt;

+ return FALSE;

+ }

+ UBool alternation(Pick* &node /*out*/){

+ if (node != NULL) return FALSE; //assert node == NULL

+ // 'sequence' has higher precedence than 'alternation'

+ if (!sequence(node)){

+ return FALSE;

+ }

+ if (token == BAR || token == NUMBER){ // find a real alternation1, create it.

+ return alternation_list(node);

+ } else {

+ return TRUE; // just a sequence_old

+ }

+ UBool defination(Pick* &node /*out*/){

+ if (node != NULL) return FALSE; //assert node == NULL

+ return alternation(node);

+ }

+ UBool rule(){

+ if (token == VAR){

+ Buffer_char name;

+ name.append_array(s.token, strlen(s.token) + 1);

+ match(VAR);

+ if (match(EQ)){

+ Pick * t = NULL;

+ if(defination(t)){

+ symbols.put(name, t);

+ return match(SEMI);

+ }

+ return FALSE;

+ }

+public:

+ UBool rules(){

+ symbols.reset();

+ token = s.getNextToken();

+ while (rule()){

+ }

+ if (token == STREAM_END){

+ return TRUE;

+ } else {

+ //s.dumpCurrentPoint();

+ return FALSE;

+ }

+public:

+ SymbolTable symbols;

+ Parser(const char *const source):s(source), token(s.tokenType){

+ min_max = -2;

+ }

+ UBool parse(){

+ return rules();

+ }

+}; // class Parser

+///////////////////////////////////////////////////////////

+//

+int DumpScanner(Scanner & s, UBool dump = TRUE){

+ int len = strlen(s.source);

+ int error_start_offset = s.history - s.source;

+ if (dump){

+ printf("\n=================== DumpScanner ================\n");

+ fwrite(s.source, len, 1, stdout);

+ printf("\n-----parsed-------------------------------------\n");

+ fwrite(s.source, s.history - s.source, 1, stdout);

+ printf("\n-----current------------------------------------\n");

+ fwrite(s.history, s.working - s.history, 1, stdout);

+ printf("\n-----unparsed-----------------------------------\n");

+ fwrite(s.working, (s.source + len - s.working), 1, stdout);

+ printf("\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n");

+ }

+ return error_start_offset;

+class LanguageGenerator_impl{

+public:

+ LanguageGenerator_impl(const char *const bnf_definition, const char *const top_node)

+ :par(bnf_definition), top_node_name(top_node){

+ srand((unsigned)time( NULL ));

+ }

+ LanguageGenerator::PARSE_RESULT parseBNF(UBool debug = TRUE){

+ if (par.parse()){

+ if (par.symbols.find(top_node_name, &top_node_ref) == SymbolTable::HAS_REF) {

+ if (par.symbols.is_complete()) {

+ return LanguageGenerator::OK;

+ } else {

+ if (debug) printf("The bnf definition is incomplete.\n");

+ return LanguageGenerator::INCOMPLETE;

+ }

+ } else {

+ if (debug) printf("No top node is found.\n");

+ return LanguageGenerator::NO_TOP_NODE;

+ }

+ } else {

+ if(debug) {

+ printf("The bnf definition is wrong\n");

+ DumpScanner(par.s, TRUE);

+ }

+ return LanguageGenerator::BNF_DEF_WRONG;

+ }

+ const char * next(){

+ return top_node_ref->next();

+ }

+private:

+ Parser par;

+ const char *const top_node_name;

+ Pick * top_node_ref;

+};

+LanguageGenerator::LanguageGenerator():lang_gen(NULL){

+LanguageGenerator::~LanguageGenerator(){

+ delete lang_gen;

+LanguageGenerator::PARSE_RESULT LanguageGenerator::parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug){

+ if (lang_gen){

+ delete lang_gen;

+ }

+ lang_gen = new LanguageGenerator_impl(bnf_definition, top_node);

+ PARSE_RESULT r = lang_gen->parseBNF(debug);

+ if (r != OK){

+ delete lang_gen;

+ lang_gen = NULL;

+ return r;

+ } else {

+ return r;

+ }

+const char *LanguageGenerator::next(){ // Return a null-terminated c-string. The buffer is owned by callee.

+ if (lang_gen){

+ return lang_gen->next();

+ }else {

+ return "";

+ }

+///////////////////////////////////////////////////////////

+//

+// The test code for WBNF

+//

+#define CALL(fun) \

+ if (fun()){ \

+ printf("Pass: " #fun "\n");\

+ } else { \

+ printf("FAILED: !!! " #fun " !!!\n"); \

+ }

+#define DUMP_R(fun, var, times) \

+ {printf("\n========= " #fun " =============\n"); \

+ for (int i=0; i<times; i++) { \

+ const char * t = var.next();\

+ fwrite(t,strlen(t),1,stdout); \

+ printf("\n"); \

+ } \

+ printf("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n");}

+#if TEST_WBNF_TEST

+static UBool TestQuote(){

+ const char *const str = "This ' A !,z| qq [] .new\tline";

+ //const char *const str_r = "This \\' A '!,'z'|' qq '[]' '.'new\tline";

+ ////

+ //// :( we must quote our string to following C syntax

+ //// cannot type the literal here, it makes our code rather human unreadable

+ //// very very unconformable!

+ ////

+ ///*

+ //*/

+ //const char *const s1 = "ab'c";

+ //const char (* s1_r1) [] = { "ab''c", // ab''c

+ // "ab\\'c", // ab\'c

+ // };//

+ ///*

+ // . '.' \.

+ // .. \.\. '.'\. '.'\. '..' // '.''.' wrong

+ //*/

+ //const char *const s2 = "a..'.b"; // a..'.b

+ //const char (*s2_r) [] = { "a'..''.'b" // a'..''.'b

+ // ,"a'..\\'.'b" // a'..\'.'b

+ // ,"a'..'\\''.'b" // a'..'\''.'b

+ // };//

+ //const char *const s3 = "a..\\.b"; // a..\.b

+ //const char (*s3_r) [] = { "a'..\\\\.'b" // a'..\\.'b

+ // ,"a'..'\\\\'.'b" // a'..'\\'.'b

+ // };//

+ // // no catact operation, no choice, must be compact

+ srand((unsigned)time( NULL ));

+ //Escaper l(Escaper::NO, Escaper::NO, Escaper::RAND_ESC);

+ Pick *p = new Literal(str);

+ Quote q(*p);

+ DUMP_R(TestQuote, (*p), 1);

+ DUMP_R(TestQuote, q, 20);

+ return FALSE;

+static UBool TestLiteral(){

+ const char * s = "test string99.";

+ Literal n(s);

+ const char * r = n.next();

+ return strcmp(s,r) == 0;

+static UBool TestSequence(){

+ Sequence seq;

+ seq.append(new Literal("abc "));

+ seq.append(new Literal(", s"));

+ return strcmp(seq.next(), "abc , s") == 0;

+static UBool TestAlternation(){

+ srand((unsigned)time( NULL ));

+ Alternation alt;

+ alt.append(new Literal("aaa_10%"),10);

+ alt.append(new Literal("bbb_0%"),0);

+ alt.append(new Literal("ccc_10%"),10);

+ alt.append(new Literal("ddddddd_50%"),50);

+ DUMP_R(TestAlternation, alt, 50);

+ return FALSE;

+static UBool TestBuffer(){

+ Buffer_int t;

+ t.append(1).append(0).append(5);

+ int s = t.content_size();

+ for (int i=0; i<s; ++i){

+ printf("%d\n", t[i]);

+ }

+ return FALSE;

+static UBool TestWeightedRand(){

+ srand((unsigned)time( NULL ));

+ Buffer_int t;

+ t.append(1).append(0).append(5);

+ WeightedRand wr(&Buffer_int().append(10).append(0).append(50),4);

+// WeightedRand wr(&t,3);

+ for (int i=0; i< 50; ++i){

+ printf("%d\n", wr.next());

+ }

+ return FALSE;

+static UBool TestRepeat(){

+ srand((unsigned)time( NULL ));

+ Repeat rep(new Literal("aaa1-5 "), 1, 5);

+ DUMP_R(TestRepeat, rep, 50);

+ Repeat r2(new Literal("b{1,3}1%0%5% "), 1, 3, &Buffer_int().append(1).append(0).append(5));

+ DUMP_R(TestRepeat, r2, 50);

+ Repeat r3(new Literal("aaa5-5 "), 5, 5);

+ DUMP_R(TestRepeat, r3, 50);

+ return FALSE;

+static UBool TestVariable(){

+ SymbolTable tab;

+ Pick * value = new Literal("string1");

+ Variable var1(&tab, "x", value);

+ Variable var2(&tab, "y");

+// tab.put(var2, value); // TOFIX: point alias/recursion problem

+ Pick * value2 = new Literal("string2");

+ tab.put(var2, value2);

+ Pick * value3 = new Literal("string3");

+ Variable var3(&tab, "z");

+ tab.put("z", value3);

+ UBool pass;

+ pass = strcmp(var1.next(), value->next()) == 0;

+ pass = pass && strcmp(var2.next(), value2->next()) == 0;

+ pass = pass && strcmp(var3.next(), value3->next()) == 0;

+ return pass;

+static UBool TestSymbolTable(){

+ Literal * n1 = new Literal("string1");

+ Literal * n2 = new Literal("string2");

+ SymbolTable t;

+ t.put("abc", n1);

+ t.put("$aaa", n2);

+// t.put("alias", n1); // TOFIX: point alias/recursion problem

+ t.put("bbb");

+ UBool pass;

+ pass = t.find(NULL) == SymbolTable::EMPTY;

+ pass = pass && t.find("ccc") == SymbolTable::NO_VAR;

+ pass = pass && t.find("bbb") == SymbolTable::NO_REF;

+ pass = pass && t.find("abc") == SymbolTable::HAS_REF;

+ pass = pass && t.find("$aaa") == SymbolTable::HAS_REF;

+ t.reset();

+ pass = pass && t.find("abc") == SymbolTable::NO_VAR;

+ return pass;

+static UBool TestScanner(void){

+ //const char str1[] = "$root = $command{0,5} $reset $mostRules{1,20};";

+ //const char str1_r[][20] = {"$root", "=", "$command", "{", "0", ",", "5", "}",

+ // "$reset", "$mostRules", "{", "1", ",", "20", "}", ";"};

+ const char str2[] = "$p2 =(\\\\ $s $string $s)? 25%;";

+ const char str2_r[][20] = {"$p2", "=", "(", "\\", "$s", "$string", "$s", ")", "?", "25", "%", ";"};

+ const char *str = str2;

+ const char (*str_r)[20] = str2_r;

+ int tokenNum = sizeof(str2_r)/sizeof(char[20]);

+ Scanner t(str);

+ UBool pass = TRUE;

+ t.getNextToken();

+ int i = 0;

+ while (pass){

+ if (t.tokenType == STREAM_END){

+ pass = pass? i == tokenNum : FALSE;

+ break;//while

+ } else if (t.tokenType == ERROR){

+ pass = FALSE;

+ break;//while

+ } else {

+ pass = strcmp( &(t.token[0]), str_r[i++]) == 0;

+ t.getNextToken();

+ }

+ //const char ts[] = "$commandList = '['"

+ //" ( alternate ' ' $alternateOptions"

+ //" | backwards ' 2'"

+ //" | normalization ' ' $onoff "

+ //" | caseLevel ' ' $onoff "

+ //" | hiraganaQ ' ' $onoff"

+ //" | caseFirst ' ' $caseFirstOptions"

+ //" | strength ' ' $strengthOptions"

+ //" ) ']';" ;

+ //Scanner t2(ts);

+ //pass = TRUE;

+ //do {

+ // t2.getNextToken();

+ // if (t2.tokenType == ERROR){

+ // DumpScanner(t2);

+ // return FALSE;

+ // }

+ //}while (t.tokenType != STREAM_END);

+ return pass;

+class TestParserT {

+public:

+UBool operator () (const char *const str, const int exp_error_offset = -1, const UBool dump = TRUE){

+ Parser par(str);

+ if (par.rules()){

+ if ( exp_error_offset == -1){

+ return TRUE;

+ }else {

+ DumpScanner(par.s,dump);

+ return FALSE;

+ }

+ }else {

+ return DumpScanner(par.s, dump) == exp_error_offset;

+ }

+};

+UBool TestParser(){

+ TestParserT test;

+ UBool pass = TRUE;

+ pass = pass && test ("$s = ' ' ? 50%;");

+ pass = pass && test("$x = ($var {1,2}) 3%;"); // legal

+ pass = pass && test("$x = $var {1,2} 3% | b 4%;"); // legal

+ pass = pass && test("$x = $var {1,2} 3%;"); // legal

+ pass = pass && test("$m = $c ? 2% 4% | $r 5% | $n 25%;"); // legal

+ pass = pass && test("$a = b ? 2% | c 5%;"); // legal

+ pass = pass && test("$x = A B 5% C 10% | D;", 8, FALSE); // illegal 5%

+ pass = pass && test("$x = aa 45% | bb 5% cc;", 19, FALSE);// illegal cc

+ pass = pass && test("$x = (b 5%) (c 6%);"); // legal

+ pass = pass && test("$x = (b 5%) c 6%;", 13, FALSE); // illegal 6%

+ pass = pass && test("$x = b 5% (c 6%);", 9, FALSE); // illegal (c 6%)

+ pass = pass && test("$x = b 5% c 6%;", 9, FALSE); // illegal c 6%

+ pass = pass && test("$x = b 5%;"); // legal

+ pass = pass && test("$x = aa 45% | bb 5% cc;", 19, FALSE);// illegal cc

+ pass = pass && test("$x = a | b | c 4% | d 5%;"); // legal

+ pass = pass && test("$s = ' ' ? 50% abc;"); // legal

+ pass = pass && test("$s = a | c d | e f;"); // legal

+ pass = pass && test( "$z = q 0% | p 1% | r 100%;"); // legal How to check parsed tree??

+ pass = pass && test("$s = ' ' ? 50%;");

+ pass = pass && test("$relationList = '<' | '<<' | ';' | '<<<' | ',' | '=';");

+ pass = pass && test("$p1 = ($string $s '|' $s)? 25%;");

+ pass = pass && test("$p2 = (\\\\ $s $string $s)? 25%;");

+ pass = pass && test("$rel2 = $p1 $string $s $p2;");

+ pass = pass && test("$relation = $relationList $s ($rel1 | $rel2) $crlf;");

+ pass = pass && test("$command = $commandList $crlf;");

+ pass = pass && test("$reset = '&' $s ($beforeList $s)? 10% ($positionList 100% | $string 10%) $crlf;");

+ pass = pass && test("$mostRules = $command 1% | $reset 5% | $relation 25%;");

+ pass = pass && test("$root = $command{0,5} $reset $mostRules{1,20};");

+ const char collationBNF[] =

+ "$s = ' '? 50%;"

+ "$crlf = '\r\n';"

+ "$alternateOptions = non'-'ignorable | shifted;"

+ "$onoff = on | off;"

+ "$caseFirstOptions = off | upper | lower;"

+ "$strengthOptions = '1' | '2' | '3' | '4' | 'I';"

+ "$commandList = '['"

+ " ( alternate ' ' $alternateOptions"

+ " | backwards ' 2'"

+ " | normalization ' ' $onoff "

+ " | caseLevel ' ' $onoff "

+ " | hiraganaQ ' ' $onoff"

+ " | caseFirst ' ' $caseFirstOptions"

+ " | strength ' ' $strengthOptions"

+ " ) ']';"

+ "$command = $commandList $crlf;"

+ "$ignorableTypes = (tertiary | secondary | primary) ' ' ignorable;"

+ "$allTypes = variable | regular | implicit | trailing | $ignorableTypes;"

+ "$positionList = '[' (first | last) ' ' $allTypes ']';"

+ "$beforeList = '[before ' ('1' | '2' | '3') ']';"

+ "$relationList = ("

+ " '<'"

+ " | '<<'"

+ " | ';'"

+ " | '<<<'"

+ " | ','"

+ " | '='"

+ ");"

+ "$string = $magic;"

+ "$rel1 = '[variable top]' $s;"

+ "$p1 = ($string $s '|' $s)? 25%;"

+ "$p2 = (\\\\ $s $string $s)? 25%;"

+ "$rel2 = $p1 $string $s $p2;"

+ "$relation = $relationList $s ($rel1 | $rel2) $crlf;"

+ "$reset = '&' $s ($beforeList $s)? 10% ($positionList 1% | $string 10%) $crlf;"

+ "$mostRules = $command 1% | $reset 5% | $relation 25%;"

+ "$root = $command{0,5} $reset $mostRules{1,20};"

+ ;

+ pass = pass && test(collationBNF);

+ return pass;

+static UBool TestMorph(){

+ srand((unsigned)time( NULL ));

+ Alternation * alt = new Alternation();

+ (*alt)

+ .append(new Literal("a")).append(new Literal("b")).append(new Literal("c"))

+ .append(new Literal("d")).append(new Literal("e")).append(new Literal("f"))

+ .append(new Literal("g")).append(new Literal("h")).append(new Literal("i"))

+ .append(new Literal("j")).append(new Literal("k")).append(new Literal("l"))

+ .append(new Literal("m")).append(new Literal("n")).append(new Literal("o"))

+ ;

+ Repeat * rep = new Repeat( alt ,5,5 );

+ Morph m( *rep);

+// DUMP_R(TestMorph,(*rep),20);

+ DUMP_R(TestMorph,m,100);

+ return FALSE;

+#endif

+static UBool TestLanguageGenerator(){

+ //LanguageGenerator g;

+ //const char *const s = "$s = p 0% | q 1%;";

+ //g.parseBNF(s, "$s");

+ UBool pass;

+ //= strcmp("q", g.next()) == 0;

+ const char *const def =

+ //"$a = $b;"

+ //"$b = $c;"

+ //"$c = $t;"

+ //"$t = abc $z{1,2};"

+ //"$k = a | b | c | d | e | f | g ;"

+ //"$z = q 0% | p 1% | r 1%;"

+ "$x = a ? 0%;"

+ ; // end of string

+// const char * s = "abczz";

+//

+ LanguageGenerator g;

+ pass = g.parseBNF(def, "$x",TRUE);

+//// LanguageGenerator g(collationBNF, "$root", "$magic", new MagicNode());

+//

+ if (pass != LanguageGenerator::OK) return FALSE;

+ DUMP_R(TestLanguageGenerator, g, 20);

+ return pass;

+ ////UBool pass = strcmp(s,r) == 0;

+ //if (pass){

+ // printf("TestRandomLanguageGenerator passed.\n");

+ //} else {

+ // printf("TestRandomLanguageGenerator FAILED!!!\n");

+ //}

+ //return pass;

+void TestWbnf(void){

+ srand((unsigned)time( NULL ));

+ //CALL(TestLiteral);

+ //CALL(TestSequence);

+ //CALL(TestSymbolTable);

+ //CALL(TestVariable);

+ //TestRepeat();

+ //TestAlternation();

+ //TestMorph();

+ //TestQuote();

+ //TestBuffer();

+ //TestWeightedRand();

+ //CALL(TestScanner);

+ //CALL(TestParser);

+ CALL(TestLanguageGenerator);

Property changes on: icu46/source/test/intltest/wbnf.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/test/intltest/wbnf.h ('k') | icu46/source/test/intltest/windttst.h » ('j') | no next file with comments »