Index: source/test/intltest/wbnf.cpp |
diff --git a/source/test/intltest/wbnf.cpp b/source/test/intltest/wbnf.cpp |
deleted file mode 100644 |
index 7d08de7081b57f7af8c6b0d3bf4e669bc4301dca..0000000000000000000000000000000000000000 |
--- a/source/test/intltest/wbnf.cpp |
+++ /dev/null |
@@ -1,1771 +0,0 @@ |
-/* |
- ****************************************************************************** |
- * Copyright (C) 2005-2007, International Business Machines Corporation and * |
- * others. All Rights Reserved. * |
- ****************************************************************************** |
- */ |
- |
-#include <stdio.h> |
-#include <string.h> |
-#include <stdlib.h> |
-#include <time.h> |
- |
-#include "wbnf.h" |
- |
-// Most of this code is meant to test the test code. It's a self test. |
-// Normally this isn't run. |
-#define TEST_WBNF_TEST 0 |
- |
-/////////////////////////////////////////////////////////// |
-// |
-// Constants and the most basic helper classes |
-// |
- |
-static const char DIGIT_CHAR[] = "0123456789"; |
-static const char WHITE_SPACE[] = {'\t', ' ', '\r', '\n', 0}; |
-static const char ALPHABET[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
-static const char SPECIAL[] = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; |
- |
-static inline UBool isInList(const char c /*in*/, const char list[] /*in*/){ |
- const char * p = list; |
- for (;*p != 0 && *p != c; p++); |
- return *p?TRUE:FALSE; |
-} |
-static inline UBool isDigit(char c) {return isInList(c, DIGIT_CHAR);} |
-static inline UBool isWhiteSpace(char c) {return isInList(c, WHITE_SPACE);} |
-static inline UBool isAlphabet(char c) {return isInList(c, ALPHABET);} |
-static inline UBool isSpecialAsciiChar(char c) {return isInList(c,SPECIAL);} |
- |
- |
- |
-/////////////////////////////////////////////////////////// |
-// |
-// Helper classes |
-// |
- |
-class Buffer_byte{ |
-// Utility class, can be treated as an auto expanded array. no boundary check. |
- |
- typedef char byte; |
- byte * start; |
- byte * current; |
- int buffer_size; // size unit is byte |
-public: |
- inline int content_size(){return current - start;} // size unit is byte |
- |
-private: |
- inline void expand(int add_size = 100){ // size unit is byte |
- int new_size = buffer_size + add_size; |
- |
- int cs_snap = content_size(); |
- start = (byte *) realloc(start, new_size); // may change the value of start |
- current = start + cs_snap; |
- |
- memset(current, 0, add_size); |
- buffer_size = new_size; |
- } |
- |
- inline void expand_to(int size){ |
- int r = size - buffer_size; |
- if (r > 0) { |
- expand(r); // simply expand, no block alignment |
- } |
- } |
- Buffer_byte(const Buffer_byte &); |
- Buffer_byte & operator = (const Buffer_byte &); |
-public: |
- Buffer_byte():start(NULL),current(start),buffer_size(0){ |
- expand(); |
- } |
- ~Buffer_byte(){ |
- free(start); |
- } |
- |
- inline void reset(){ |
- start != NULL ? memset(start, 0, buffer_size) : 0; |
- current = start; |
- } |
- |
- // Using memory copy method to append a C array to buffer, |
- inline void append(const void * c, int size){ // size unit is byte |
- expand_to(content_size() + size) ; |
- memcpy(current, c, size); |
- current = current + size; |
- } |
- |
- byte * buffer(){ |
- return start; |
- } |
-}; |
- |
-/* |
- The class(es) try to work as bulid-in array, so it overloads these two operators |
- operator type *(); |
- type & operator[]; |
- The first is used to auto type convert, the latter is used to select member. |
- |
- A small trick is the class does not overload the address-of operator. This |
- behavior is different from bulid-in array, but it give us the opportunity |
- to get the address of the class itself. |
-*/ |
-//template<typename type> |
-// class BUFFER{ |
-// typedef BUFFER name; |
-#define BUFFER(type, name)\ |
- class name {\ |
- private:\ |
- Buffer_byte buf;\ |
- public:\ |
- name & reset() {buf.reset(); return *this;}\ |
- name & append(type c) {buf.append(&c, sizeof(type)); return *this;}\ |
- name & append_array(const type * p, int size) {buf.append(p, sizeof(type)*size); return *this;}\ |
- type & operator [] (int i) { return ((type *) buf.buffer())[i];}\ |
- operator type *(){return (type *) buf.buffer();} \ |
- int content_size(){return buf.content_size() / sizeof(type);}\ |
- } |
- |
- |
-class Pick{ |
-/* The Pick is the basic language generator element*/ |
-public: |
- // generate a string accroding the syntax |
- // Return a null-terminated c-string. The buffer is owned by callee. |
- virtual const char* next() = 0; |
- virtual ~Pick(){}; |
-}; |
- |
-//typedef BUFFER<char> Buffer_char; |
-//typedef BUFFER<int> Buffer_int; |
-//typedef BUFFER<Pick *> Buffer_pPick; |
-BUFFER(char, Buffer_char); |
-BUFFER(int, Buffer_int); |
-BUFFER(Pick *, Buffer_pPick); |
- |
-class SymbolTable{ |
-/* Helper class. |
-* It's a mapping table between 'variable name' and its 'active Pick object' |
-*/ |
-private: |
- Buffer_char name_buffer; // var names storage space |
- |
- Buffer_int names; // points to name (offset in name_buffer) |
- Buffer_pPick refs; // points to Pick |
- |
- int get_index(const char *const var_name){ |
- int len = names.content_size(); |
- for (int i=0; i< len; i++){ |
- if (strcmp(var_name, name_buffer + names[i]) == 0){ |
- return i; |
- } |
- } |
- return -1; |
- } |
- |
-public: |
- enum RESULT {EMPTY, NO_VAR, NO_REF, HAS_REF}; |
- |
- RESULT find(const char *const var_name /*[in] c-string*/, Pick * * ref = NULL /*[out] Pick* */){ |
- if (!var_name) return EMPTY; // NULL name |
- |
- int i = get_index(var_name); |
- if (i == -1){ |
- return NO_VAR; // new name |
- } |
- if (!refs[i]){ // exist name, no ref |
- return NO_REF; |
- } else { |
- if (ref) { |
- *ref = refs[i]; |
- } |
- return HAS_REF; // exist name, has ref |
- } |
- } |
- |
- void put(const char *const var_name, Pick *const var_ref = NULL){ |
- int i = get_index(var_name); |
- switch(find(var_name)){ |
- case EMPTY: // NULL name |
- break; |
- case NO_VAR: // new name |
- int offset; |
- offset = name_buffer.content_size(); |
- name_buffer.append_array(var_name, strlen(var_name) + 1); |
- names.append(offset); |
- refs.append(var_ref); |
- break; |
- case NO_REF: // exist name, no ref |
- refs[i] = var_ref; // link definition with variable |
- break; |
- case HAS_REF: // exist name, has ref |
- if (var_ref){ |
- refs[i] = var_ref; |
- } |
- break; |
- default: |
- ; // ASSERT(FALSE); |
- } |
- return; |
- } |
- |
- UBool is_complete(){ |
- int n = names.content_size(); |
- for (int i=0; i<n; ++i){ |
- if (refs[i] == NULL){ |
- return FALSE; |
- } |
- } |
- return TRUE; |
- } |
- |
- void reset(){ |
- names.reset(); |
- name_buffer.reset(); |
- |
- // release memory here |
- int s = refs.content_size(); |
- for (int i=0; i < s; i++){ |
- delete refs[i]; // TOFIX: point alias/recursion problem |
- } |
- refs.reset(); |
- } |
- |
- ~SymbolTable(){ |
- reset(); |
- } |
-}; |
- |
- |
-/* |
-// Document of class Escaper |
-// |
-// ATTENTION: |
-// From http://icu-project.org/userguide/Collate_Customization.html. |
-// We get the precedence of escape/quote operations |
-// |
-// (highest) 1. backslash \ |
-// 2. two single quotes '' |
-// 3. quoting ' ' |
-// |
-// ICU Collation should accept following as the same string. |
-// |
-// 1) 'ab'c _ |
-// 2) a\bc \ |
-// 3) a'b'\c |- They are equal. |
-// 4) abc _/ |
-// |
-// From "two single quotes", we have following deductions |
-// D1. empty quoting is illgal. (obviously) |
-// D2. no contact operation between two quotings |
-// '.''.' is not .. it is .'. |
-// D3. "two single quotes" cannot contact two quoting simultaneously |
-// '..''''.' is not ..'. it is ..''. |
-// NOTICE: |
-// "two single quotes" can contact before one quoting |
-// '''.' is '. |
-// "two single quotes" can literally contact after one quoting |
-// But, from syntax, it's one quoting including a "two single quotes" |
-// '.''' is .' |
-// D4. "two single quotes" cannot solely be included in quoting |
-// '''' is not ' it is '' |
-// NOTICE: These are legal |
-// '.''.' is .'. |
-// '.''' is .' |
-// |
-// dicision |
-// /\ |
-// /__\ |
-// output buffer input buffer |
-// |
-// To make our dicision (within an atom operation) without caring input and output buffer, |
-// following calling pattern (within an atom operation) shall be avoided |
-// |
-// P1 open_quoting() then close_quoting() (direct violation) D1 |
-// P2 close_quoting() then open_quoting() (direct violation) D2 |
-// P3 empty open_quoting() (indirect violation) D1, D4 |
-// P4 empty close_quoting() (indirect violation) D2, D3 |
-// P5 open_quoting() then two single quotes (indirect violation) D4 |
-// P6 close_quoting() then two single quotes (indirect violation) D3 |
-// |
-// two single quotes escaping will not open_ or close_ quoting() |
-// The choice will not lose some quoing forms. |
-// |
-// For open_quoting(), |
-// we may get this form quoting ''' P5 |
-// It may raise a bug ''''x |
-// If we expect |
-// '''.' let the next char open the quoting |
-// '.''.' the quoting is already opened by preceding char |
-// |
-// For close_quoting() |
-// we will get this form quoting '.''' P6 |
-// It may raise a bug '.''''.' |
-// If we expect |
-// '.'''\. let the next char close the quoting |
-// '.''''.' the expectation is wrong! using '.'\''.' instead |
-// |
-// It's a hard work to re-adjust generation opportunity for various escaping form. |
-// We just simply ignore it. |
-*/ |
-class Escaper{ |
-public: |
- enum CHOICE {YES, NO, RAND}; |
- enum ESCAPE_FORM {BSLASH_ONLY, QUOTE_ONLY, QUOTE_AND_BSLAH, RAND_ESC}; |
-private: |
- class Bool{ // A wrapper class for CHOICE, to auto adapter UBool class |
- private: |
- const CHOICE tag; |
- public: |
- Bool(CHOICE flag=RAND):tag(flag){} |
- operator UBool() { // conversion operator |
- return tag == RAND ? rand()%2 : tag == YES; |
- //if (tag == RAND){ |
- // return rand()%2 == 1; |
- //} else { |
- // return tag == YES ? TRUE : FALSE; |
- //} |
- } |
- }; |
-public: |
- Escaper(CHOICE escapeLiteral = RAND, |
- CHOICE twoQuotesEscape = RAND, |
- ESCAPE_FORM escapeForm = RAND_ESC): |
- escape_form(escapeForm), |
- escape_literal(escapeLiteral), |
- two_quotes_escape(twoQuotesEscape), |
- is_quoting(FALSE){} |
-private: |
- Buffer_char str; |
- ESCAPE_FORM escape_form; |
- Bool escape_literal; |
- Bool two_quotes_escape; |
- UBool quote_escape; |
- UBool bslash_escape; |
- UBool is_quoting; |
- |
- void set_options(){ |
- ESCAPE_FORM t = escape_form == RAND_ESC ? (ESCAPE_FORM) (rand()%3) : escape_form; |
- switch (t){ |
- case BSLASH_ONLY : |
- bslash_escape = TRUE; quote_escape = FALSE; break; |
- case QUOTE_ONLY: |
- bslash_escape = FALSE;quote_escape = TRUE; break; |
- case QUOTE_AND_BSLAH: |
- bslash_escape = TRUE; quote_escape = TRUE; break; |
- default: |
- ;// error |
- } |
- } |
- |
- void reset(){ |
- str.reset(); |
- is_quoting = FALSE; |
- } |
- |
- inline void open_quoting(){ |
- if(is_quoting){ |
- // do nothing |
- } else { |
- str.append('\''); |
- is_quoting = TRUE; |
- } |
- } |
- inline void close_quoting(){ |
- if(is_quoting){ |
- str.append('\''); |
- is_quoting = FALSE; |
- } else { |
- // do nothing |
- } |
- } |
- |
- // str [in] null-terminated c-string |
- void append(const char * strToAppend){ |
- for(;*strToAppend != 0; strToAppend++){ |
- append(*strToAppend); |
- } |
- } |
- |
- inline void append(const char c){ |
- set_options(); |
- |
- if (c == '\\'){ |
- quote_escape ? open_quoting() : close_quoting(); |
- //bslash_escape always true here |
- str.append('\\'); |
- str.append('\\'); |
- } else if (c == '\''){ |
- if (two_quotes_escape){ // quoted using two single quotes |
- // See documents in anonymous.design |
- str.append('\''); |
- str.append('\''); |
- } else{ |
- quote_escape ? open_quoting() : close_quoting(); |
- //bslash_escape always true here |
- str.append('\\'); |
- str.append('\''); |
- } |
- } else if (isSpecialAsciiChar(c) || isWhiteSpace(c)){ |
- quote_escape ? open_quoting() : close_quoting(); |
- if (bslash_escape) str.append('\\'); |
- str.append(c); |
- } else { //if (isAlphabet(c) || isDigit(c) || TRUE){ // treat others as literal |
- if (escape_literal){ |
- quote_escape ? open_quoting() : close_quoting(); |
- if (bslash_escape) str.append('\\'); |
- str.append(c); |
- } else { |
- close_quoting(); |
- str.append(c); |
- } |
- } |
- } |
- |
-public: |
- // Return a null-terminate c-string. The buffer is owned by callee. |
- char * operator()(const char * literal /*c-string*/){ |
- str.reset(); |
- for(;*literal != 0; literal++){ |
- append(*literal); |
- } |
- close_quoting(); // P4 exception, to close whole quoting |
- return str; |
- } |
-}; |
- |
-class WeightedRand{ |
-// Return a random number in [0, size) |
-// Every number has different chance (aka weight) to be selected. |
-private: |
- Buffer_int weights; |
- double total; |
- WeightedRand(const WeightedRand &); |
- WeightedRand & operator = (const WeightedRand &); |
-public: |
- WeightedRand(Buffer_int * weight_list = NULL, int size = 0){ |
- if ( weight_list == NULL){ |
- for (int i=0; i<size; ++i) weights.append(DEFAULT_WEIGHT); |
- } else { |
- int s = weight_list->content_size(); |
- if (s < size){ |
- weights.append_array( (*weight_list),s); |
- for (int i=s; i<size; ++i) weights.append(DEFAULT_WEIGHT); |
- } else { // s >= size |
- weights.append_array( (*weight_list),size); |
- } |
- } |
- total = 0; |
- int c = weights.content_size(); |
- for (int i=0; i<c; ++i){ |
- total += weights[i]; |
- } |
- } |
- |
- void append(int weight){ |
- weights.append(weight); |
- total += weight; |
- } |
- |
- // Give a random number with the consideration of weight. |
- // Every random number is associated with a weight. |
- // It identifies the chance to be selected, |
- // larger weight has more chance to be selected. |
- // |
- // |
- // ______________________ every slot has equal chance |
- // |
- // [____][_][___][______] each item has different slots, hence different chance |
- // |
- // |
- // The algorithms to generate the number is illustrated by preceding figure. |
- // First, a slot is selected by rand(). Then we translate the slot to corresponding item. |
- // |
- int next(){ |
- // get a random in [0,1] |
- double reference_mark = (double)rand() / (double)RAND_MAX; |
- |
- // get the slot's index, 0 <= mark <= total; |
- double mark = total * reference_mark; |
- |
- // translate the slot to corresponding item |
- int i=0; |
- for (;;){ |
- mark -= weights[i]; // 0 <= mark <= total |
- if (mark <= 0) |
- break; |
- i++; |
- } |
- return i; |
- } |
-}; |
- |
-/////////////////////////////////////////////////////////// |
-// |
-// The parser result nodes |
-// |
- |
-class Literal : public Pick { |
-public: |
- virtual const char* next(){ |
- return str; |
- } |
- Literal(const char * s /*c-string*/){ |
- str.append_array(s, strlen(s) + 1); |
- } |
-private: |
- Buffer_char str; //null-terminated c-string |
-}; |
- |
-class Variable : public Pick { |
-public: |
- Variable(SymbolTable * symbols, const char * varName, Pick * varRef = NULL){ |
- this->var_name.append_array(varName, strlen(varName) + 1); |
- if ((symbol_table = symbols)){ |
- symbol_table->put(varName, varRef); |
- } |
- } |
- |
- operator const char *(){ |
- return var_name; |
- } |
- |
- virtual const char* next(){ |
- if (symbol_table){ |
- Pick * var_ref = NULL; |
- symbol_table->find(var_name, &var_ref); |
- if (var_ref) { |
- return var_ref->next(); |
- } |
- } |
- return ""; // dumb string |
- } |
-private: |
- Buffer_char var_name; |
- SymbolTable * symbol_table; |
-}; |
- |
-class Quote : public Pick{ |
-public: |
- Quote(Pick & base):item(base),e(Escaper::NO, Escaper::NO, Escaper::BSLASH_ONLY){ |
- } |
- virtual const char* next(){ |
- return e(item.next()); |
- } |
-private: |
- Pick & item; |
- Buffer_char str; |
- Escaper e; |
-}; |
- |
- |
-class Morph : public Pick{ |
-/* |
-The difference between morph and an arbitrary random string is that |
-a morph changes slowly. When we build collation rules, for example, |
-it is a much better test if the strings we use are all in the same |
-'neighborhood'; they share many common characters. |
-*/ |
-public: |
- Morph(Pick & base):item(base){} |
- |
- virtual const char* next(){ |
- current.reset(); |
- const char * s = item.next(); |
- current.append_array(s, strlen(s) + 1); |
- if (last.content_size() == 0) { |
- str.reset(); |
- last.reset(); |
- str.append_array(current, current.content_size()); |
- last.append_array(current, current.content_size()); |
- } else { |
- morph(); |
- } |
- return str; |
- } |
-private: |
- Pick & item; |
- Buffer_char str; |
- Buffer_char last; |
- Buffer_char current; |
- |
- char * p_last; |
- char * p_curr; |
- |
- void copy_curr(){ |
- if (*p_curr) { |
- str.append(*p_curr); |
- p_curr++; |
- } |
- } |
- |
- void copy_last(){ |
- if (*p_last) { |
- str.append(*p_last); |
- p_last++; |
- } |
- } |
- |
- // copy 0, 1, or 2 character(s) to str |
- void copy(){ |
- static WeightedRand wr(& Buffer_int().append(DEFAULT_WEIGHT * 10), 5); |
- |
- switch (wr.next()){ |
- case 0: // copy last -- has 10 times chance than others |
- copy_last(); |
- break; |
- case 1: // copy both |
- copy_curr(); |
- copy_last(); |
- break; |
- case 2: // copy both |
- copy_last(); |
- copy_curr(); |
- break; |
- case 3: |
- copy_curr(); |
- break; |
- case 4: // copy nothing |
- break; |
- default: |
- // ASSERT(FALSE); |
- ; |
- } |
- } |
- |
- void morph(void){ |
- int min = strlen(last); |
- int max = strlen(current); |
- if (min > max){ |
- int temp = min; |
- min = max; |
- max = temp; |
- } |
- |
- int len = min + rand()%(max - min + 1); // min + [0, diff] |
- p_curr = current; |
- p_last = last; |
- str.reset(); |
- |
- for (; str.content_size()<len && *p_curr && *p_last;){ |
- copy(); // copy 0, 1, or 2 character(s) to str |
- } |
- |
- if (str.content_size() == len) { |
- str.append(0); |
- final(); |
- return; |
- } |
- |
- if (str.content_size() > len) { // if the last copy copied two characters |
- str[len]=0; |
- final(); |
- return; |
- } |
- |
- // str.content_size() < len |
- if (*p_last) { |
- for (; str.content_size() < len; copy_last()); |
- } else if (*p_curr){ |
- for (; str.content_size() < len; copy_curr()); |
- } |
- |
- int last_len = last.content_size(); |
- for (;str.content_size() < len;){ |
- str.append(last[rand()%last_len]); |
- } |
- str.append(0); |
- final(); |
- } |
- |
- void final(){ |
- last.reset(); |
- last.append_array(current, current.content_size()); |
- } |
-}; |
- |
-class Sequence : public Pick { |
-public: |
- virtual const char* next(){ |
- str.reset(); |
- int s = items.content_size(); |
- for(int i=0; i < s; i++){ |
- const char * t = items[i]->next(); |
- str.append_array(t, strlen(t)); |
- } |
- str.append(0); // terminal null |
- return str; |
- } |
- |
- void append (Pick * node){ |
- items.append(node); |
- } |
- |
- virtual ~Sequence(){ |
- int s = items.content_size(); |
- for(int i=0; i < s; i++){ |
- //How can assure the item is got from heap? |
- //Let's assume it. |
- delete items[i]; // TOFIX: point alias/recursion problem |
- items[i] = NULL; |
- } |
- } |
-private: |
- Buffer_pPick items; |
- Buffer_char str; //null-terminated c-string |
-}; |
- |
-class Repeat : public Pick { |
-private: |
- Pick * item; |
- Buffer_char str; |
- WeightedRand wr; |
- int min; |
- int max; |
- int select_a_count(){ |
- return min + wr.next(); |
- } |
-public: |
- virtual const char* next(){ |
- str.reset(); |
- int c = select_a_count(); |
- for(int i=0; i< c; i++){ |
- const char * t = item->next(); |
- str.append_array(t, strlen(t)); |
- } |
- str.append(0); |
- return str; |
- } |
- |
- Repeat(Pick * base, int minCount =0, int maxCount = 1, Buffer_int * weights = NULL): |
- wr(weights, maxCount-minCount +1) { |
- this->item = base; |
- this->min = minCount; |
- this->max = maxCount; |
- } |
- virtual ~Repeat(){ |
- delete item; // TOFIX: point alias/recursion problem |
- item = NULL; |
- } |
-}; |
- |
- |
-class Alternation : public Pick { |
-public: |
- virtual const char* next(){ |
- str.reset(); |
- int i = wr.next(); |
- const char * t = items[i]->next(); |
- str.append_array(t, strlen(t) + 1); |
- return str; |
- } |
- virtual ~Alternation(){ |
- int s = items.content_size(); |
- for(int i=0; i < s; i++){ |
- delete items[i]; // TOFIX: point alias/recursion problem |
- items[i] = NULL; |
- } |
- } |
- |
- Alternation & append (Pick * node, int weight = DEFAULT_WEIGHT){ |
- items.append(node); |
- wr.append(weight); |
- return *this; |
- } |
-private: |
- Buffer_pPick items; |
- Buffer_char str; // null-terminated c-string |
- WeightedRand wr; |
-}; |
- |
-/////////////////////////////////////////////////////////// |
-// |
-// The parser |
-// |
- |
-enum TokenType {STRING, VAR, NUMBER, STREAM_END, ERROR, QUESTION, STAR, PLUS, LBRACE, RBRACE, LPAR, RPAR, SEMI, EQ, COMMA, BAR, AT, WAVE, PERCENT}; |
- |
-class Scanner{ |
-friend int DumpScanner(Scanner & s, UBool dumb); |
-private: |
- const char * source; |
- const char * working; |
- const char * history; // for debug |
- enum StateType {START, IN_NUM, IN_VAR_FIRST, IN_VAR, IN_QUOTE, IN_QUOTE_BSLASH, IN_BSLASH, IN_STRING, DONE}; |
- StateType state; |
- void terminated(TokenType t){ |
- working--; // return the peeked character |
- tokenType = t; |
- token.append(0); // close buffer |
- state = DONE; |
- } |
-public: |
- // the buffer of "source" is owned by caller |
- Scanner(const char *src/*[in] c-string*/ = NULL):source(src){ |
- working = src; |
- history = working; |
- state = DONE; |
- tokenType = ERROR; |
- } |
- |
- //void setSource(const char *const src /*[in] c-string*/){ |
- // *(&const_cast<const char *>(source)) = src; |
- //} |
- |
- Buffer_char token; |
- TokenType tokenType; |
- |
- TokenType getNextToken(){ |
- token.reset(); |
- state = START; |
- history = working; // for debug |
- while (state != DONE){ |
- char c = *working++; |
- if (c == 0 && state != START){//avoid buffer overflow. for IN_QUOE, IN_ESCAPE |
- terminated(ERROR); |
- break; // while |
- } |
- switch(state){ |
- case START: |
- tokenType = ERROR; |
- switch(c){ |
- case '?' : tokenType = QUESTION; break; |
- case '*' : tokenType = STAR; break; |
- case '+' : tokenType = PLUS; break; |
- case '{' : tokenType = LBRACE; break; |
- case '}' : tokenType = RBRACE; break; |
- case '(' : tokenType = LPAR; break; |
- case ')' : tokenType = RPAR; break; |
- case ';' : tokenType = SEMI; break; |
- case '=' : tokenType = EQ; break; |
- case ',' : tokenType = COMMA; break; |
- case '|' : tokenType = BAR; break; |
- case '@' : tokenType = AT; break; |
- case '~' : tokenType = WAVE; break; |
- case '%' : tokenType = PERCENT; break; |
- case 0 : tokenType = STREAM_END; working-- /*avoid buffer overflow*/; break; |
- } |
- if (tokenType != ERROR){ |
- token.append(c); |
- token.append(0); |
- state = DONE; |
- break; // START |
- } |
- switch(c){ |
- case '$' : state = IN_VAR_FIRST; token.append(c); break; |
- case '\'' : state = IN_QUOTE; break; |
- case '\\' : state = IN_BSLASH; break; |
- default: |
- if (isWhiteSpace(c)){ // state = START; //do nothing |
- } else if (isDigit(c)){ state = IN_NUM; token.append(c); |
- } else if (isAlphabet(c)){ state = IN_STRING; token.append(c); |
- } else {terminated(ERROR);} |
- } |
- break;//START |
- case IN_NUM: |
- if (isDigit(c)){ |
- token.append(c); |
- } else { |
- terminated(NUMBER); |
- } |
- break;//IN_NUM |
- case IN_VAR_FIRST: |
- if (isAlphabet(c)){ |
- token.append(c); |
- state = IN_VAR; |
- } else { |
- terminated(ERROR); |
- } |
- break; // IN_VAR_FISRT |
- case IN_VAR: |
- if (isAlphabet(c) || isDigit(c)){ |
- token.append(c); |
- } else { |
- terminated(VAR); |
- } |
- break;//IN_VAR |
- case IN_STRING: |
- // About the scanner's behavior for STRING, AT, and ESCAPE: |
- // All of them can be contacted with each other. |
- // This means the scanner will eat up as much as possible strings |
- // (STRING, AT, and ESCAPE) at one time, with no regard of their |
- // combining sequence. |
- // |
- if (c == '\''){ |
- state = IN_QUOTE; // the first time we see single quote |
- } else if (c =='\\'){ // back slash character |
- state = IN_BSLASH; |
- } else if (isAlphabet(c) || isDigit(c)){ |
- token.append(c); |
- } else{ |
- terminated(STRING); |
- } |
- break;//IN_STRING |
- case IN_QUOTE: |
- if (c == '\''){ // the second time we see single quote |
- state = IN_STRING; // see document in IN_STRING |
- } else if ( c== '\\') { // backslah escape in quote |
- state = IN_QUOTE_BSLASH; |
- } else { |
- token.append(c); // eat up everything, includes back slash |
- } |
- break;//IN_QUOTE |
- case IN_QUOTE_BSLASH: |
- case IN_BSLASH: |
- switch (c){ |
- case 'n' : token.append('\n'); break; |
- case 'r' : token.append('\r'); break; |
- case 't' : token.append('\t'); break; |
- case '\'' : token.append('\''); break; |
- case '\\' : token.append('\\'); break; |
- default: token.append(c); // unknown escaping, treat it as literal |
- } |
- if (state == IN_BSLASH){ |
- state = IN_STRING; // see document in IN_STRING |
- } else { // state == IN_QUOTE_BSLASH |
- state = IN_QUOTE; |
- } |
- break;//IN_BSLASH |
- case DONE: /* should never happen */ |
- default: |
- working--; |
- tokenType = ERROR; |
- state = DONE; |
- break; |
- }//switch(state) |
- }//while (state != DONE) |
- |
- return tokenType; |
- } |
-};//class Scanner |
- |
-class Parser{ |
-friend UBool TestParser(); |
-friend class TestParserT; |
-friend class LanguageGenerator_impl; |
-private: |
- Scanner s; |
- TokenType & token; |
- int min_max; // for the evil infinite |
- |
- UBool match(TokenType expected){ |
- if (token == expected) { |
- token = s.getNextToken(); |
- return TRUE; |
- } else { |
- //s.dumpCurrentPoint(); |
- return FALSE; |
- } |
- } |
- |
- UBool weight(int & value){ |
- if (token == NUMBER){ |
- int temp = atoi(s.token); |
- match(NUMBER); |
- if (match(PERCENT)){ |
- value = temp; |
- return TRUE; |
- } |
- } |
- return FALSE; |
- } |
- |
- UBool repeat (Pick* &node /*in,out*/){ |
- if (node == NULL) return FALSE; |
- |
- int count = -2; |
- int min = -2; |
- int max = -2; |
- UBool question = FALSE; |
- switch (token){ |
- case QUESTION: |
- match(QUESTION); |
- min = 0; |
- max = 1; |
- count = 2; |
- question = TRUE; |
- break; |
- case STAR: |
- match(STAR); |
- min = 0; |
- max = -1; |
- count = -1; |
- break; |
- case PLUS: |
- match(PLUS); |
- min = 1; |
- max = -1; |
- count = -1; |
- break; |
- case LBRACE: |
- match(LBRACE); |
- if (token != NUMBER){ |
- return FALSE; |
- }else { |
- min = atoi(s.token); |
- match(NUMBER); |
- if (token == RBRACE){ |
- match(RBRACE); |
- max = min; |
- count = 1; |
- } else if (token == COMMA) { |
- match(COMMA); |
- if (token == RBRACE){ |
- match(RBRACE); |
- max = -1; |
- count = -1; |
- } else if (token == NUMBER) { |
- max = atoi(s.token); |
- match(NUMBER); |
- count = max - min + 1; |
- if (!match(RBRACE)) { |
- return FALSE; |
- } |
- } else { |
- return FALSE; |
- } |
- } else { |
- return FALSE; |
- } |
- } |
- break; |
- default: |
- return FALSE; |
- } |
- |
- if (count == -2 || min == -2 || max == -2){ |
- //ASSERT(FALSE); |
- return FALSE; |
- } |
- |
- // eat up following weights |
- Buffer_int weights; |
- int w; |
- while (weight(w)){ |
- weights.append(w); |
- } |
- |
- // for the evil infinite |
- min_max = min_max > min ? min_max : min; |
- min_max = min_max > max ? min_max : max; |
- if (min_max > PSEUDO_INFINIT){ |
- return FALSE; // PSEUDO_INFINIT is less than the real maximum |
- } |
- if (max == -1){ // the evil infinite |
- max = PSEUDO_INFINIT; |
- } |
- // for the strange question mark |
- if (question && weights.content_size() > 0){ |
- Buffer_int w2; |
- w2.append(DEFAULT_WEIGHT - weights[0]).append(weights[0]); |
- node = new Repeat(node,min,max,&w2); |
- return TRUE; |
- } |
- node = new Repeat(node,min,max,&weights); |
- return TRUE; |
- } |
- |
- UBool core(Pick* &node /*out*/){ |
- if (node != NULL) return FALSE; //assert node == NULL |
- |
- switch(token){ |
- case LPAR: |
- match(LPAR); |
- if(defination(node) && match(RPAR)){ |
- return TRUE; |
- } |
- return FALSE; |
- case VAR: |
- node = new Variable(&symbols, s.token); |
- match(VAR); |
- return TRUE; |
- case STRING: |
- node = new Literal(s.token); |
- match(STRING); |
- return TRUE; |
- default: |
- return FALSE; |
- } |
- } |
- UBool modified(Pick* &node /*out*/){ |
- if (node != NULL) return FALSE; //assert node == NULL |
- |
- if (!core(node)) { |
- return FALSE; |
- } |
- |
- for (;;){ |
- switch(token){ |
- case WAVE: |
- match(WAVE); |
- node = new Morph(*node); |
- break; |
- case AT: |
- match(AT); |
- node = new Quote(*node); |
- break; |
- case QUESTION: |
- case STAR: |
- case PLUS: |
- case LBRACE: |
- if (!repeat(node)) return FALSE; |
- break; |
- case SEMI: // rule definiation closed |
- case RPAR: // within parenthesis (core closed) |
- case BAR: // in alternation |
- case NUMBER: // in alternation, with weight |
- case LPAR: // in sequence |
- case VAR: // in sequence |
- case STRING: // in sequence |
- return TRUE; |
- default: |
- return FALSE; |
- } |
- } |
- } |
- |
- |
- UBool sequence_list(Pick* &node /*in,out*/){ |
- if (node == NULL) return FALSE; // assert node != NULL |
- |
- Sequence* seq = new Sequence(); |
- Pick * n = node; |
- |
- while (token == VAR || token == STRING || token == LPAR){ |
- seq->append(n); |
- n = NULL; |
- if (modified(n)){ |
- // go on |
- } else { |
- goto FAIL; |
- } |
- } |
- |
- if (token == SEMI || token == RPAR || token == BAR){ |
- seq->append(n); |
- node = seq; |
- return TRUE; |
- } |
-FAIL: |
- delete seq; |
- return FALSE; |
- |
- } |
- |
- UBool sequence(Pick* &node /*out*/){ |
- if (node != NULL) return FALSE; //assert node == NULL |
- |
- if (!modified(node)) { |
- return FALSE; |
- } |
- |
- if (token == VAR || token == STRING || token == LPAR){ |
- return sequence_list(node); |
- } else { |
- return TRUE; // just a modified |
- } |
- } |
- |
- UBool alternation_list(Pick* &node /*in,out*/){ |
- if (node == NULL) return FALSE; // assert node != NULL |
- |
- Alternation * alt = new Alternation(); |
- Pick * n = node; |
- int w = DEFAULT_WEIGHT; |
- |
- while (token == NUMBER || token == BAR){ |
- if(token == NUMBER) { |
- if (weight(w)){ |
- if (token == BAR){ |
- // the middle item, go on |
- } else { |
- // the last item or encounter error |
- break; //while |
- } |
- } else { |
- goto FAIL; |
- } |
- } // else token == BAR |
- match(BAR); |
- alt->append(n,w); |
- |
- n = NULL; |
- w = DEFAULT_WEIGHT; |
- if (sequence(n)){ |
- // go on |
- } else { |
- goto FAIL; |
- } |
- } |
- |
- if (token == SEMI || token == RPAR) { |
- alt->append(n,w); |
- node = alt; |
- return TRUE; |
- } |
-FAIL: |
- delete alt; |
- return FALSE; |
- } |
- |
- UBool alternation(Pick* &node /*out*/){ |
- if (node != NULL) return FALSE; //assert node == NULL |
- |
- // 'sequence' has higher precedence than 'alternation' |
- if (!sequence(node)){ |
- return FALSE; |
- } |
- |
- if (token == BAR || token == NUMBER){ // find a real alternation1, create it. |
- return alternation_list(node); |
- } else { |
- return TRUE; // just a sequence_old |
- } |
- } |
- |
- |
- UBool defination(Pick* &node /*out*/){ |
- if (node != NULL) return FALSE; //assert node == NULL |
- return alternation(node); |
- } |
- |
- UBool rule(){ |
- if (token == VAR){ |
- Buffer_char name; |
- name.append_array(s.token, strlen(s.token) + 1); |
- match(VAR); |
- |
- if (match(EQ)){ |
- Pick * t = NULL; |
- if(defination(t)){ |
- symbols.put(name, t); |
- return match(SEMI); |
- } |
- } |
- } |
- return FALSE; |
- } |
-public: |
- UBool rules(){ |
- symbols.reset(); |
- token = s.getNextToken(); |
- while (rule()){ |
- } |
- if (token == STREAM_END){ |
- return TRUE; |
- } else { |
- //s.dumpCurrentPoint(); |
- return FALSE; |
- } |
- } |
- |
-public: |
- SymbolTable symbols; |
- |
- Parser(const char *const source):s(source), token(s.tokenType){ |
- min_max = -2; |
- } |
- UBool parse(){ |
- return rules(); |
- } |
- |
-}; // class Parser |
- |
- |
-/////////////////////////////////////////////////////////// |
-// |
-// |
-// |
- |
-int DumpScanner(Scanner & s, UBool dump = TRUE){ |
- int len = strlen(s.source); |
- int error_start_offset = s.history - s.source; |
- if (dump){ |
- printf("\n=================== DumpScanner ================\n"); |
- fwrite(s.source, len, 1, stdout); |
- printf("\n-----parsed-------------------------------------\n"); |
- fwrite(s.source, s.history - s.source, 1, stdout); |
- printf("\n-----current------------------------------------\n"); |
- fwrite(s.history, s.working - s.history, 1, stdout); |
- printf("\n-----unparsed-----------------------------------\n"); |
- fwrite(s.working, (s.source + len - s.working), 1, stdout); |
- printf("\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n"); |
- } |
- return error_start_offset; |
-} |
- |
-class LanguageGenerator_impl{ |
-public: |
- LanguageGenerator_impl(const char *const bnf_definition, const char *const top_node) |
- :par(bnf_definition), top_node_name(top_node){ |
- srand((unsigned)time( NULL )); |
- } |
- |
- LanguageGenerator::PARSE_RESULT parseBNF(UBool debug = TRUE){ |
- if (par.parse()){ |
- if (par.symbols.find(top_node_name, &top_node_ref) == SymbolTable::HAS_REF) { |
- if (par.symbols.is_complete()) { |
- return LanguageGenerator::OK; |
- } else { |
- if (debug) printf("The bnf definition is incomplete.\n"); |
- return LanguageGenerator::INCOMPLETE; |
- } |
- } else { |
- if (debug) printf("No top node is found.\n"); |
- return LanguageGenerator::NO_TOP_NODE; |
- } |
- } else { |
- if(debug) { |
- printf("The bnf definition is wrong\n"); |
- DumpScanner(par.s, TRUE); |
- } |
- return LanguageGenerator::BNF_DEF_WRONG; |
- } |
- } |
- const char * next(){ |
- return top_node_ref->next(); |
- } |
- |
-private: |
- Parser par; |
- const char *const top_node_name; |
- Pick * top_node_ref; |
-}; |
- |
-LanguageGenerator::LanguageGenerator():lang_gen(NULL){ |
-} |
- |
-LanguageGenerator::~LanguageGenerator(){ |
- delete lang_gen; |
-} |
- |
-LanguageGenerator::PARSE_RESULT LanguageGenerator::parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug){ |
- if (lang_gen){ |
- delete lang_gen; |
- } |
- lang_gen = new LanguageGenerator_impl(bnf_definition, top_node); |
- PARSE_RESULT r = lang_gen->parseBNF(debug); |
- if (r != OK){ |
- delete lang_gen; |
- lang_gen = NULL; |
- return r; |
- } else { |
- return r; |
- } |
-} |
-const char *LanguageGenerator::next(){ // Return a null-terminated c-string. The buffer is owned by callee. |
- if (lang_gen){ |
- return lang_gen->next(); |
- }else { |
- return ""; |
- } |
-} |
- |
-/////////////////////////////////////////////////////////// |
-// |
-// The test code for WBNF |
-// |
- |
-#define CALL(fun) \ |
- if (fun()){ \ |
- printf("Pass: " #fun "\n");\ |
- } else { \ |
- printf("FAILED: !!! " #fun " !!!\n"); \ |
- } |
- |
-#define DUMP_R(fun, var, times) \ |
- {printf("\n========= " #fun " =============\n"); \ |
- for (int i=0; i<times; i++) { \ |
- const char * t = var.next();\ |
- fwrite(t,strlen(t),1,stdout); \ |
- printf("\n"); \ |
- } \ |
- printf("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n");} |
- |
- |
- |
-#if TEST_WBNF_TEST |
-static UBool TestQuote(){ |
- const char *const str = "This ' A !,z| qq [] .new\tline"; |
- //const char *const str_r = "This \\' A '!,'z'|' qq '[]' '.'new\tline"; |
- //// |
- //// :( we must quote our string to following C syntax |
- //// cannot type the literal here, it makes our code rather human unreadable |
- //// very very unconformable! |
- //// |
- ///* |
- //*/ |
- |
- //const char *const s1 = "ab'c"; |
- //const char (* s1_r1) [] = { "ab''c", // ab''c |
- // "ab\\'c", // ab\'c |
- // };// |
- ///* |
- // . '.' \. |
- // .. \.\. '.'\. '.'\. '..' // '.''.' wrong |
- //*/ |
- |
- //const char *const s2 = "a..'.b"; // a..'.b |
- //const char (*s2_r) [] = { "a'..''.'b" // a'..''.'b |
- // ,"a'..\\'.'b" // a'..\'.'b |
- // ,"a'..'\\''.'b" // a'..'\''.'b |
- // };// |
- |
- //const char *const s3 = "a..\\.b"; // a..\.b |
- //const char (*s3_r) [] = { "a'..\\\\.'b" // a'..\\.'b |
- // ,"a'..'\\\\'.'b" // a'..'\\'.'b |
- // };// |
- |
- // // no catact operation, no choice, must be compact |
- |
- srand((unsigned)time( NULL )); |
- |
- //Escaper l(Escaper::NO, Escaper::NO, Escaper::RAND_ESC); |
- Pick *p = new Literal(str); |
- Quote q(*p); |
- |
- DUMP_R(TestQuote, (*p), 1); |
- DUMP_R(TestQuote, q, 20); |
- return FALSE; |
-} |
-static UBool TestLiteral(){ |
- const char * s = "test string99."; |
- Literal n(s); |
- const char * r = n.next(); |
- return strcmp(s,r) == 0; |
-} |
- |
-static UBool TestSequence(){ |
- Sequence seq; |
- seq.append(new Literal("abc ")); |
- seq.append(new Literal(", s")); |
- |
- return strcmp(seq.next(), "abc , s") == 0; |
-} |
-static UBool TestAlternation(){ |
- srand((unsigned)time( NULL )); |
- Alternation alt; |
- alt.append(new Literal("aaa_10%"),10); |
- alt.append(new Literal("bbb_0%"),0); |
- alt.append(new Literal("ccc_10%"),10); |
- alt.append(new Literal("ddddddd_50%"),50); |
- |
- DUMP_R(TestAlternation, alt, 50); |
- |
- return FALSE; |
-} |
- |
-static UBool TestBuffer(){ |
- Buffer_int t; |
- t.append(1).append(0).append(5); |
- int s = t.content_size(); |
- for (int i=0; i<s; ++i){ |
- printf("%d\n", t[i]); |
- } |
- return FALSE; |
-} |
- |
-static UBool TestWeightedRand(){ |
- srand((unsigned)time( NULL )); |
- Buffer_int t; |
- t.append(1).append(0).append(5); |
- WeightedRand wr(&Buffer_int().append(10).append(0).append(50),4); |
-// WeightedRand wr(&t,3); |
- for (int i=0; i< 50; ++i){ |
- printf("%d\n", wr.next()); |
- } |
- return FALSE; |
-} |
- |
-static UBool TestRepeat(){ |
- srand((unsigned)time( NULL )); |
- Repeat rep(new Literal("aaa1-5 "), 1, 5); |
- DUMP_R(TestRepeat, rep, 50); |
- |
- Repeat r2(new Literal("b{1,3}1%0%5% "), 1, 3, &Buffer_int().append(1).append(0).append(5)); |
- DUMP_R(TestRepeat, r2, 50); |
- |
- Repeat r3(new Literal("aaa5-5 "), 5, 5); |
- DUMP_R(TestRepeat, r3, 50); |
- |
- return FALSE; |
-} |
- |
-static UBool TestVariable(){ |
- SymbolTable tab; |
- Pick * value = new Literal("string1"); |
- Variable var1(&tab, "x", value); |
- |
- Variable var2(&tab, "y"); |
-// tab.put(var2, value); // TOFIX: point alias/recursion problem |
- Pick * value2 = new Literal("string2"); |
- tab.put(var2, value2); |
- |
- Pick * value3 = new Literal("string3"); |
- Variable var3(&tab, "z"); |
- tab.put("z", value3); |
- |
- UBool pass; |
- pass = strcmp(var1.next(), value->next()) == 0; |
- pass = pass && strcmp(var2.next(), value2->next()) == 0; |
- pass = pass && strcmp(var3.next(), value3->next()) == 0; |
- return pass; |
-} |
- |
-static UBool TestSymbolTable(){ |
- Literal * n1 = new Literal("string1"); |
- Literal * n2 = new Literal("string2"); |
- SymbolTable t; |
- t.put("abc", n1); |
- t.put("$aaa", n2); |
-// t.put("alias", n1); // TOFIX: point alias/recursion problem |
- t.put("bbb"); |
- |
- UBool pass; |
- pass = t.find(NULL) == SymbolTable::EMPTY; |
- pass = pass && t.find("ccc") == SymbolTable::NO_VAR; |
- pass = pass && t.find("bbb") == SymbolTable::NO_REF; |
- pass = pass && t.find("abc") == SymbolTable::HAS_REF; |
- pass = pass && t.find("$aaa") == SymbolTable::HAS_REF; |
- |
- t.reset(); |
- pass = pass && t.find("abc") == SymbolTable::NO_VAR; |
- return pass; |
-} |
- |
- |
-static UBool TestScanner(void){ |
- //const char str1[] = "$root = $command{0,5} $reset $mostRules{1,20};"; |
- //const char str1_r[][20] = {"$root", "=", "$command", "{", "0", ",", "5", "}", |
- // "$reset", "$mostRules", "{", "1", ",", "20", "}", ";"}; |
- |
- const char str2[] = "$p2 =(\\\\ $s $string $s)? 25%;"; |
- const char str2_r[][20] = {"$p2", "=", "(", "\\", "$s", "$string", "$s", ")", "?", "25", "%", ";"}; |
- |
- const char *str = str2; |
- const char (*str_r)[20] = str2_r; |
- int tokenNum = sizeof(str2_r)/sizeof(char[20]); |
- |
- Scanner t(str); |
- UBool pass = TRUE; |
- t.getNextToken(); |
- int i = 0; |
- while (pass){ |
- if (t.tokenType == STREAM_END){ |
- pass = pass? i == tokenNum : FALSE; |
- break;//while |
- } else if (t.tokenType == ERROR){ |
- pass = FALSE; |
- break;//while |
- } else { |
- pass = strcmp( &(t.token[0]), str_r[i++]) == 0; |
- t.getNextToken(); |
- } |
- } |
- |
- //const char ts[] = "$commandList = '['" |
- //" ( alternate ' ' $alternateOptions" |
- //" | backwards ' 2'" |
- //" | normalization ' ' $onoff " |
- //" | caseLevel ' ' $onoff " |
- //" | hiraganaQ ' ' $onoff" |
- //" | caseFirst ' ' $caseFirstOptions" |
- //" | strength ' ' $strengthOptions" |
- //" ) ']';" ; |
- |
- //Scanner t2(ts); |
- //pass = TRUE; |
- //do { |
- // t2.getNextToken(); |
- // if (t2.tokenType == ERROR){ |
- // DumpScanner(t2); |
- // return FALSE; |
- // } |
- //}while (t.tokenType != STREAM_END); |
- |
- return pass; |
-} |
- |
-class TestParserT { |
-public: |
-UBool operator () (const char *const str, const int exp_error_offset = -1, const UBool dump = TRUE){ |
- Parser par(str); |
- if (par.rules()){ |
- if ( exp_error_offset == -1){ |
- return TRUE; |
- }else { |
- DumpScanner(par.s,dump); |
- return FALSE; |
- } |
- }else { |
- return DumpScanner(par.s, dump) == exp_error_offset; |
- } |
-} |
-}; |
- |
-UBool TestParser(){ |
- TestParserT test; |
- |
- UBool pass = TRUE; |
- pass = pass && test ("$s = ' ' ? 50%;"); |
- pass = pass && test("$x = ($var {1,2}) 3%;"); // legal |
- pass = pass && test("$x = $var {1,2} 3% | b 4%;"); // legal |
- pass = pass && test("$x = $var {1,2} 3%;"); // legal |
- pass = pass && test("$m = $c ? 2% 4% | $r 5% | $n 25%;"); // legal |
- pass = pass && test("$a = b ? 2% | c 5%;"); // legal |
- pass = pass && test("$x = A B 5% C 10% | D;", 8, FALSE); // illegal 5% |
- pass = pass && test("$x = aa 45% | bb 5% cc;", 19, FALSE);// illegal cc |
- pass = pass && test("$x = (b 5%) (c 6%);"); // legal |
- pass = pass && test("$x = (b 5%) c 6%;", 13, FALSE); // illegal 6% |
- pass = pass && test("$x = b 5% (c 6%);", 9, FALSE); // illegal (c 6%) |
- pass = pass && test("$x = b 5% c 6%;", 9, FALSE); // illegal c 6% |
- pass = pass && test("$x = b 5%;"); // legal |
- pass = pass && test("$x = aa 45% | bb 5% cc;", 19, FALSE);// illegal cc |
- pass = pass && test("$x = a | b | c 4% | d 5%;"); // legal |
- pass = pass && test("$s = ' ' ? 50% abc;"); // legal |
- pass = pass && test("$s = a | c d | e f;"); // legal |
- pass = pass && test( "$z = q 0% | p 1% | r 100%;"); // legal How to check parsed tree?? |
- |
- pass = pass && test("$s = ' ' ? 50%;"); |
- pass = pass && test("$relationList = '<' | '<<' | ';' | '<<<' | ',' | '=';"); |
- pass = pass && test("$p1 = ($string $s '|' $s)? 25%;"); |
- pass = pass && test("$p2 = (\\\\ $s $string $s)? 25%;"); |
- pass = pass && test("$rel2 = $p1 $string $s $p2;"); |
- pass = pass && test("$relation = $relationList $s ($rel1 | $rel2) $crlf;"); |
- pass = pass && test("$command = $commandList $crlf;"); |
- pass = pass && test("$reset = '&' $s ($beforeList $s)? 10% ($positionList 100% | $string 10%) $crlf;"); |
- pass = pass && test("$mostRules = $command 1% | $reset 5% | $relation 25%;"); |
- pass = pass && test("$root = $command{0,5} $reset $mostRules{1,20};"); |
- |
- const char collationBNF[] = |
- "$s = ' '? 50%;" |
- "$crlf = '\r\n';" |
- |
- "$alternateOptions = non'-'ignorable | shifted;" |
- "$onoff = on | off;" |
- "$caseFirstOptions = off | upper | lower;" |
- "$strengthOptions = '1' | '2' | '3' | '4' | 'I';" |
- "$commandList = '['" |
- " ( alternate ' ' $alternateOptions" |
- " | backwards ' 2'" |
- " | normalization ' ' $onoff " |
- " | caseLevel ' ' $onoff " |
- " | hiraganaQ ' ' $onoff" |
- " | caseFirst ' ' $caseFirstOptions" |
- " | strength ' ' $strengthOptions" |
- " ) ']';" |
- "$command = $commandList $crlf;" |
- |
- "$ignorableTypes = (tertiary | secondary | primary) ' ' ignorable;" |
- "$allTypes = variable | regular | implicit | trailing | $ignorableTypes;" |
- "$positionList = '[' (first | last) ' ' $allTypes ']';" |
- |
- "$beforeList = '[before ' ('1' | '2' | '3') ']';" |
- |
- "$relationList = (" |
- " '<'" |
- " | '<<'" |
- " | ';'" |
- " | '<<<'" |
- " | ','" |
- " | '='" |
- ");" |
- "$string = $magic;" |
- "$rel1 = '[variable top]' $s;" |
- "$p1 = ($string $s '|' $s)? 25%;" |
- "$p2 = (\\\\ $s $string $s)? 25%;" |
- "$rel2 = $p1 $string $s $p2;" |
- "$relation = $relationList $s ($rel1 | $rel2) $crlf;" |
- |
- "$reset = '&' $s ($beforeList $s)? 10% ($positionList 1% | $string 10%) $crlf;" |
- "$mostRules = $command 1% | $reset 5% | $relation 25%;" |
- "$root = $command{0,5} $reset $mostRules{1,20};" |
- ; |
- |
- pass = pass && test(collationBNF); |
- |
- |
- return pass; |
-} |
- |
-static UBool TestMorph(){ |
- srand((unsigned)time( NULL )); |
- |
- Alternation * alt = new Alternation(); |
- |
- (*alt) |
- .append(new Literal("a")).append(new Literal("b")).append(new Literal("c")) |
- .append(new Literal("d")).append(new Literal("e")).append(new Literal("f")) |
- .append(new Literal("g")).append(new Literal("h")).append(new Literal("i")) |
- .append(new Literal("j")).append(new Literal("k")).append(new Literal("l")) |
- .append(new Literal("m")).append(new Literal("n")).append(new Literal("o")) |
- ; |
- |
- Repeat * rep = new Repeat( alt ,5,5 ); |
- Morph m( *rep); |
- |
-// DUMP_R(TestMorph,(*rep),20); |
- DUMP_R(TestMorph,m,100); |
- |
- return FALSE; |
-} |
- |
-#endif |
- |
-static UBool TestLanguageGenerator(){ |
- //LanguageGenerator g; |
- //const char *const s = "$s = p 0% | q 1%;"; |
- //g.parseBNF(s, "$s"); |
- UBool pass; |
- //= strcmp("q", g.next()) == 0; |
- |
- const char *const def = |
- //"$a = $b;" |
- //"$b = $c;" |
- //"$c = $t;" |
- //"$t = abc $z{1,2};" |
- //"$k = a | b | c | d | e | f | g ;" |
- //"$z = q 0% | p 1% | r 1%;" |
- "$x = a ? 0%;" |
- ; // end of string |
-// const char * s = "abczz"; |
-// |
-// |
- LanguageGenerator g; |
- pass = g.parseBNF(def, "$x",TRUE); |
-//// LanguageGenerator g(collationBNF, "$root", "$magic", new MagicNode()); |
-// |
- if (pass != LanguageGenerator::OK) return FALSE; |
- |
- DUMP_R(TestLanguageGenerator, g, 20); |
- return pass; |
- |
- ////UBool pass = strcmp(s,r) == 0; |
- |
- //if (pass){ |
- // printf("TestRandomLanguageGenerator passed.\n"); |
- //} else { |
- // printf("TestRandomLanguageGenerator FAILED!!!\n"); |
- //} |
- //return pass; |
-} |
- |
-void TestWbnf(void){ |
- srand((unsigned)time( NULL )); |
- |
- //CALL(TestLiteral); |
- //CALL(TestSequence); |
- //CALL(TestSymbolTable); |
- //CALL(TestVariable); |
- |
- //TestRepeat(); |
- //TestAlternation(); |
- //TestMorph(); |
- |
- //TestQuote(); |
- //TestBuffer(); |
- //TestWeightedRand(); |
- |
- //CALL(TestScanner); |
- //CALL(TestParser); |
- CALL(TestLanguageGenerator); |
-} |
- |