| Index: icu46/source/test/intltest/wbnf.cpp
|
| ===================================================================
|
| --- icu46/source/test/intltest/wbnf.cpp (revision 0)
|
| +++ icu46/source/test/intltest/wbnf.cpp (revision 0)
|
| @@ -0,0 +1,1771 @@
|
| +/*
|
| + ******************************************************************************
|
| + * Copyright (C) 2005-2007, International Business Machines Corporation and *
|
| + * others. All Rights Reserved. *
|
| + ******************************************************************************
|
| + */
|
| +
|
| +#include <stdio.h>
|
| +#include <string.h>
|
| +#include <stdlib.h>
|
| +#include <time.h>
|
| +
|
| +#include "wbnf.h"
|
| +
|
| +// Most of this code is meant to test the test code. It's a self test.
|
| +// Normally this isn't run.
|
| +#define TEST_WBNF_TEST 0
|
| +
|
| +///////////////////////////////////////////////////////////
|
| +//
|
| +// Constants and the most basic helper classes
|
| +//
|
| +
|
| +static const char DIGIT_CHAR[] = "0123456789";
|
| +static const char WHITE_SPACE[] = {'\t', ' ', '\r', '\n', 0};
|
| +static const char ALPHABET[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
| +static const char SPECIAL[] = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
|
| +
|
| +static inline UBool isInList(const char c /*in*/, const char list[] /*in*/){
|
| + const char * p = list;
|
| + for (;*p != 0 && *p != c; p++);
|
| + return *p?TRUE:FALSE;
|
| +}
|
| +static inline UBool isDigit(char c) {return isInList(c, DIGIT_CHAR);}
|
| +static inline UBool isWhiteSpace(char c) {return isInList(c, WHITE_SPACE);}
|
| +static inline UBool isAlphabet(char c) {return isInList(c, ALPHABET);}
|
| +static inline UBool isSpecialAsciiChar(char c) {return isInList(c,SPECIAL);}
|
| +
|
| +
|
| +
|
| +///////////////////////////////////////////////////////////
|
| +//
|
| +// Helper classes
|
| +//
|
| +
|
| +class Buffer_byte{
|
| +// Utility class, can be treated as an auto expanded array. no boundary check.
|
| +
|
| + typedef char byte;
|
| + byte * start;
|
| + byte * current;
|
| + int buffer_size; // size unit is byte
|
| +public:
|
| + inline int content_size(){return current - start;} // size unit is byte
|
| +
|
| +private:
|
| + inline void expand(int add_size = 100){ // size unit is byte
|
| + int new_size = buffer_size + add_size;
|
| +
|
| + int cs_snap = content_size();
|
| + start = (byte *) realloc(start, new_size); // may change the value of start
|
| + current = start + cs_snap;
|
| +
|
| + memset(current, 0, add_size);
|
| + buffer_size = new_size;
|
| + }
|
| +
|
| + inline void expand_to(int size){
|
| + int r = size - buffer_size;
|
| + if (r > 0) {
|
| + expand(r); // simply expand, no block alignment
|
| + }
|
| + }
|
| + Buffer_byte(const Buffer_byte &);
|
| + Buffer_byte & operator = (const Buffer_byte &);
|
| +public:
|
| + Buffer_byte():start(NULL),current(start),buffer_size(0){
|
| + expand();
|
| + }
|
| + ~Buffer_byte(){
|
| + free(start);
|
| + }
|
| +
|
| + inline void reset(){
|
| + start != NULL ? memset(start, 0, buffer_size) : 0;
|
| + current = start;
|
| + }
|
| +
|
| + // Using memory copy method to append a C array to buffer,
|
| + inline void append(const void * c, int size){ // size unit is byte
|
| + expand_to(content_size() + size) ;
|
| + memcpy(current, c, size);
|
| + current = current + size;
|
| + }
|
| +
|
| + byte * buffer(){
|
| + return start;
|
| + }
|
| +};
|
| +
|
| +/*
|
| + The class(es) try to work as bulid-in array, so it overloads these two operators
|
| + operator type *();
|
| + type & operator[];
|
| + The first is used to auto type convert, the latter is used to select member.
|
| +
|
| + A small trick is the class does not overload the address-of operator. This
|
| + behavior is different from bulid-in array, but it give us the opportunity
|
| + to get the address of the class itself.
|
| +*/
|
| +//template<typename type>
|
| +// class BUFFER{
|
| +// typedef BUFFER name;
|
| +#define BUFFER(type, name)\
|
| + class name {\
|
| + private:\
|
| + Buffer_byte buf;\
|
| + public:\
|
| + name & reset() {buf.reset(); return *this;}\
|
| + name & append(type c) {buf.append(&c, sizeof(type)); return *this;}\
|
| + name & append_array(const type * p, int size) {buf.append(p, sizeof(type)*size); return *this;}\
|
| + type & operator [] (int i) { return ((type *) buf.buffer())[i];}\
|
| + operator type *(){return (type *) buf.buffer();} \
|
| + int content_size(){return buf.content_size() / sizeof(type);}\
|
| + }
|
| +
|
| +
|
| +class Pick{
|
| +/* The Pick is the basic language generator element*/
|
| +public:
|
| + // generate a string accroding the syntax
|
| + // Return a null-terminated c-string. The buffer is owned by callee.
|
| + virtual const char* next() = 0;
|
| + virtual ~Pick(){};
|
| +};
|
| +
|
| +//typedef BUFFER<char> Buffer_char;
|
| +//typedef BUFFER<int> Buffer_int;
|
| +//typedef BUFFER<Pick *> Buffer_pPick;
|
| +BUFFER(char, Buffer_char);
|
| +BUFFER(int, Buffer_int);
|
| +BUFFER(Pick *, Buffer_pPick);
|
| +
|
| +class SymbolTable{
|
| +/* Helper class.
|
| +* It's a mapping table between 'variable name' and its 'active Pick object'
|
| +*/
|
| +private:
|
| + Buffer_char name_buffer; // var names storage space
|
| +
|
| + Buffer_int names; // points to name (offset in name_buffer)
|
| + Buffer_pPick refs; // points to Pick
|
| +
|
| + int get_index(const char *const var_name){
|
| + int len = names.content_size();
|
| + for (int i=0; i< len; i++){
|
| + if (strcmp(var_name, name_buffer + names[i]) == 0){
|
| + return i;
|
| + }
|
| + }
|
| + return -1;
|
| + }
|
| +
|
| +public:
|
| + enum RESULT {EMPTY, NO_VAR, NO_REF, HAS_REF};
|
| +
|
| + RESULT find(const char *const var_name /*[in] c-string*/, Pick * * ref = NULL /*[out] Pick* */){
|
| + if (!var_name) return EMPTY; // NULL name
|
| +
|
| + int i = get_index(var_name);
|
| + if (i == -1){
|
| + return NO_VAR; // new name
|
| + }
|
| + if (!refs[i]){ // exist name, no ref
|
| + return NO_REF;
|
| + } else {
|
| + if (ref) {
|
| + *ref = refs[i];
|
| + }
|
| + return HAS_REF; // exist name, has ref
|
| + }
|
| + }
|
| +
|
| + void put(const char *const var_name, Pick *const var_ref = NULL){
|
| + int i = get_index(var_name);
|
| + switch(find(var_name)){
|
| + case EMPTY: // NULL name
|
| + break;
|
| + case NO_VAR: // new name
|
| + int offset;
|
| + offset = name_buffer.content_size();
|
| + name_buffer.append_array(var_name, strlen(var_name) + 1);
|
| + names.append(offset);
|
| + refs.append(var_ref);
|
| + break;
|
| + case NO_REF: // exist name, no ref
|
| + refs[i] = var_ref; // link definition with variable
|
| + break;
|
| + case HAS_REF: // exist name, has ref
|
| + if (var_ref){
|
| + refs[i] = var_ref;
|
| + }
|
| + break;
|
| + default:
|
| + ; // ASSERT(FALSE);
|
| + }
|
| + return;
|
| + }
|
| +
|
| + UBool is_complete(){
|
| + int n = names.content_size();
|
| + for (int i=0; i<n; ++i){
|
| + if (refs[i] == NULL){
|
| + return FALSE;
|
| + }
|
| + }
|
| + return TRUE;
|
| + }
|
| +
|
| + void reset(){
|
| + names.reset();
|
| + name_buffer.reset();
|
| +
|
| + // release memory here
|
| + int s = refs.content_size();
|
| + for (int i=0; i < s; i++){
|
| + delete refs[i]; // TOFIX: point alias/recursion problem
|
| + }
|
| + refs.reset();
|
| + }
|
| +
|
| + ~SymbolTable(){
|
| + reset();
|
| + }
|
| +};
|
| +
|
| +
|
| +/*
|
| +// Document of class Escaper
|
| +//
|
| +// ATTENTION:
|
| +// From http://icu-project.org/userguide/Collate_Customization.html.
|
| +// We get the precedence of escape/quote operations
|
| +//
|
| +// (highest) 1. backslash \
|
| +// 2. two single quotes ''
|
| +// 3. quoting ' '
|
| +//
|
| +// ICU Collation should accept following as the same string.
|
| +//
|
| +// 1) 'ab'c _
|
| +// 2) a\bc \
|
| +// 3) a'b'\c |- They are equal.
|
| +// 4) abc _/
|
| +//
|
| +// From "two single quotes", we have following deductions
|
| +// D1. empty quoting is illgal. (obviously)
|
| +// D2. no contact operation between two quotings
|
| +// '.''.' is not .. it is .'.
|
| +// D3. "two single quotes" cannot contact two quoting simultaneously
|
| +// '..''''.' is not ..'. it is ..''.
|
| +// NOTICE:
|
| +// "two single quotes" can contact before one quoting
|
| +// '''.' is '.
|
| +// "two single quotes" can literally contact after one quoting
|
| +// But, from syntax, it's one quoting including a "two single quotes"
|
| +// '.''' is .'
|
| +// D4. "two single quotes" cannot solely be included in quoting
|
| +// '''' is not ' it is ''
|
| +// NOTICE: These are legal
|
| +// '.''.' is .'.
|
| +// '.''' is .'
|
| +//
|
| +// dicision
|
| +// /\
|
| +// /__\
|
| +// output buffer input buffer
|
| +//
|
| +// To make our dicision (within an atom operation) without caring input and output buffer,
|
| +// following calling pattern (within an atom operation) shall be avoided
|
| +//
|
| +// P1 open_quoting() then close_quoting() (direct violation) D1
|
| +// P2 close_quoting() then open_quoting() (direct violation) D2
|
| +// P3 empty open_quoting() (indirect violation) D1, D4
|
| +// P4 empty close_quoting() (indirect violation) D2, D3
|
| +// P5 open_quoting() then two single quotes (indirect violation) D4
|
| +// P6 close_quoting() then two single quotes (indirect violation) D3
|
| +//
|
| +// two single quotes escaping will not open_ or close_ quoting()
|
| +// The choice will not lose some quoing forms.
|
| +//
|
| +// For open_quoting(),
|
| +// we may get this form quoting ''' P5
|
| +// It may raise a bug ''''x
|
| +// If we expect
|
| +// '''.' let the next char open the quoting
|
| +// '.''.' the quoting is already opened by preceding char
|
| +//
|
| +// For close_quoting()
|
| +// we will get this form quoting '.''' P6
|
| +// It may raise a bug '.''''.'
|
| +// If we expect
|
| +// '.'''\. let the next char close the quoting
|
| +// '.''''.' the expectation is wrong! using '.'\''.' instead
|
| +//
|
| +// It's a hard work to re-adjust generation opportunity for various escaping form.
|
| +// We just simply ignore it.
|
| +*/
|
| +class Escaper{
|
| +public:
|
| + enum CHOICE {YES, NO, RAND};
|
| + enum ESCAPE_FORM {BSLASH_ONLY, QUOTE_ONLY, QUOTE_AND_BSLAH, RAND_ESC};
|
| +private:
|
| + class Bool{ // A wrapper class for CHOICE, to auto adapter UBool class
|
| + private:
|
| + const CHOICE tag;
|
| + public:
|
| + Bool(CHOICE flag=RAND):tag(flag){}
|
| + operator UBool() { // conversion operator
|
| + return tag == RAND ? rand()%2 : tag == YES;
|
| + //if (tag == RAND){
|
| + // return rand()%2 == 1;
|
| + //} else {
|
| + // return tag == YES ? TRUE : FALSE;
|
| + //}
|
| + }
|
| + };
|
| +public:
|
| + Escaper(CHOICE escapeLiteral = RAND,
|
| + CHOICE twoQuotesEscape = RAND,
|
| + ESCAPE_FORM escapeForm = RAND_ESC):
|
| + escape_form(escapeForm),
|
| + escape_literal(escapeLiteral),
|
| + two_quotes_escape(twoQuotesEscape),
|
| + is_quoting(FALSE){}
|
| +private:
|
| + Buffer_char str;
|
| + ESCAPE_FORM escape_form;
|
| + Bool escape_literal;
|
| + Bool two_quotes_escape;
|
| + UBool quote_escape;
|
| + UBool bslash_escape;
|
| + UBool is_quoting;
|
| +
|
| + void set_options(){
|
| + ESCAPE_FORM t = escape_form == RAND_ESC ? (ESCAPE_FORM) (rand()%3) : escape_form;
|
| + switch (t){
|
| + case BSLASH_ONLY :
|
| + bslash_escape = TRUE; quote_escape = FALSE; break;
|
| + case QUOTE_ONLY:
|
| + bslash_escape = FALSE;quote_escape = TRUE; break;
|
| + case QUOTE_AND_BSLAH:
|
| + bslash_escape = TRUE; quote_escape = TRUE; break;
|
| + default:
|
| + ;// error
|
| + }
|
| + }
|
| +
|
| + void reset(){
|
| + str.reset();
|
| + is_quoting = FALSE;
|
| + }
|
| +
|
| + inline void open_quoting(){
|
| + if(is_quoting){
|
| + // do nothing
|
| + } else {
|
| + str.append('\'');
|
| + is_quoting = TRUE;
|
| + }
|
| + }
|
| + inline void close_quoting(){
|
| + if(is_quoting){
|
| + str.append('\'');
|
| + is_quoting = FALSE;
|
| + } else {
|
| + // do nothing
|
| + }
|
| + }
|
| +
|
| + // str [in] null-terminated c-string
|
| + void append(const char * strToAppend){
|
| + for(;*strToAppend != 0; strToAppend++){
|
| + append(*strToAppend);
|
| + }
|
| + }
|
| +
|
| + inline void append(const char c){
|
| + set_options();
|
| +
|
| + if (c == '\\'){
|
| + quote_escape ? open_quoting() : close_quoting();
|
| + //bslash_escape always true here
|
| + str.append('\\');
|
| + str.append('\\');
|
| + } else if (c == '\''){
|
| + if (two_quotes_escape){ // quoted using two single quotes
|
| + // See documents in anonymous.design
|
| + str.append('\'');
|
| + str.append('\'');
|
| + } else{
|
| + quote_escape ? open_quoting() : close_quoting();
|
| + //bslash_escape always true here
|
| + str.append('\\');
|
| + str.append('\'');
|
| + }
|
| + } else if (isSpecialAsciiChar(c) || isWhiteSpace(c)){
|
| + quote_escape ? open_quoting() : close_quoting();
|
| + if (bslash_escape) str.append('\\');
|
| + str.append(c);
|
| + } else { //if (isAlphabet(c) || isDigit(c) || TRUE){ // treat others as literal
|
| + if (escape_literal){
|
| + quote_escape ? open_quoting() : close_quoting();
|
| + if (bslash_escape) str.append('\\');
|
| + str.append(c);
|
| + } else {
|
| + close_quoting();
|
| + str.append(c);
|
| + }
|
| + }
|
| + }
|
| +
|
| +public:
|
| + // Return a null-terminate c-string. The buffer is owned by callee.
|
| + char * operator()(const char * literal /*c-string*/){
|
| + str.reset();
|
| + for(;*literal != 0; literal++){
|
| + append(*literal);
|
| + }
|
| + close_quoting(); // P4 exception, to close whole quoting
|
| + return str;
|
| + }
|
| +};
|
| +
|
| +class WeightedRand{
|
| +// Return a random number in [0, size)
|
| +// Every number has different chance (aka weight) to be selected.
|
| +private:
|
| + Buffer_int weights;
|
| + double total;
|
| + WeightedRand(const WeightedRand &);
|
| + WeightedRand & operator = (const WeightedRand &);
|
| +public:
|
| + WeightedRand(Buffer_int * weight_list = NULL, int size = 0){
|
| + if ( weight_list == NULL){
|
| + for (int i=0; i<size; ++i) weights.append(DEFAULT_WEIGHT);
|
| + } else {
|
| + int s = weight_list->content_size();
|
| + if (s < size){
|
| + weights.append_array( (*weight_list),s);
|
| + for (int i=s; i<size; ++i) weights.append(DEFAULT_WEIGHT);
|
| + } else { // s >= size
|
| + weights.append_array( (*weight_list),size);
|
| + }
|
| + }
|
| + total = 0;
|
| + int c = weights.content_size();
|
| + for (int i=0; i<c; ++i){
|
| + total += weights[i];
|
| + }
|
| + }
|
| +
|
| + void append(int weight){
|
| + weights.append(weight);
|
| + total += weight;
|
| + }
|
| +
|
| + // Give a random number with the consideration of weight.
|
| + // Every random number is associated with a weight.
|
| + // It identifies the chance to be selected,
|
| + // larger weight has more chance to be selected.
|
| + //
|
| + //
|
| + // ______________________ every slot has equal chance
|
| + //
|
| + // [____][_][___][______] each item has different slots, hence different chance
|
| + //
|
| + //
|
| + // The algorithms to generate the number is illustrated by preceding figure.
|
| + // First, a slot is selected by rand(). Then we translate the slot to corresponding item.
|
| + //
|
| + int next(){
|
| + // get a random in [0,1]
|
| + double reference_mark = (double)rand() / (double)RAND_MAX;
|
| +
|
| + // get the slot's index, 0 <= mark <= total;
|
| + double mark = total * reference_mark;
|
| +
|
| + // translate the slot to corresponding item
|
| + int i=0;
|
| + for (;;){
|
| + mark -= weights[i]; // 0 <= mark <= total
|
| + if (mark <= 0)
|
| + break;
|
| + i++;
|
| + }
|
| + return i;
|
| + }
|
| +};
|
| +
|
| +///////////////////////////////////////////////////////////
|
| +//
|
| +// The parser result nodes
|
| +//
|
| +
|
| +class Literal : public Pick {
|
| +public:
|
| + virtual const char* next(){
|
| + return str;
|
| + }
|
| + Literal(const char * s /*c-string*/){
|
| + str.append_array(s, strlen(s) + 1);
|
| + }
|
| +private:
|
| + Buffer_char str; //null-terminated c-string
|
| +};
|
| +
|
| +class Variable : public Pick {
|
| +public:
|
| + Variable(SymbolTable * symbols, const char * varName, Pick * varRef = NULL){
|
| + this->var_name.append_array(varName, strlen(varName) + 1);
|
| + if ((symbol_table = symbols)){
|
| + symbol_table->put(varName, varRef);
|
| + }
|
| + }
|
| +
|
| + operator const char *(){
|
| + return var_name;
|
| + }
|
| +
|
| + virtual const char* next(){
|
| + if (symbol_table){
|
| + Pick * var_ref = NULL;
|
| + symbol_table->find(var_name, &var_ref);
|
| + if (var_ref) {
|
| + return var_ref->next();
|
| + }
|
| + }
|
| + return ""; // dumb string
|
| + }
|
| +private:
|
| + Buffer_char var_name;
|
| + SymbolTable * symbol_table;
|
| +};
|
| +
|
| +class Quote : public Pick{
|
| +public:
|
| + Quote(Pick & base):item(base),e(Escaper::NO, Escaper::NO, Escaper::BSLASH_ONLY){
|
| + }
|
| + virtual const char* next(){
|
| + return e(item.next());
|
| + }
|
| +private:
|
| + Pick & item;
|
| + Buffer_char str;
|
| + Escaper e;
|
| +};
|
| +
|
| +
|
| +class Morph : public Pick{
|
| +/*
|
| +The difference between morph and an arbitrary random string is that
|
| +a morph changes slowly. When we build collation rules, for example,
|
| +it is a much better test if the strings we use are all in the same
|
| +'neighborhood'; they share many common characters.
|
| +*/
|
| +public:
|
| + Morph(Pick & base):item(base){}
|
| +
|
| + virtual const char* next(){
|
| + current.reset();
|
| + const char * s = item.next();
|
| + current.append_array(s, strlen(s) + 1);
|
| + if (last.content_size() == 0) {
|
| + str.reset();
|
| + last.reset();
|
| + str.append_array(current, current.content_size());
|
| + last.append_array(current, current.content_size());
|
| + } else {
|
| + morph();
|
| + }
|
| + return str;
|
| + }
|
| +private:
|
| + Pick & item;
|
| + Buffer_char str;
|
| + Buffer_char last;
|
| + Buffer_char current;
|
| +
|
| + char * p_last;
|
| + char * p_curr;
|
| +
|
| + void copy_curr(){
|
| + if (*p_curr) {
|
| + str.append(*p_curr);
|
| + p_curr++;
|
| + }
|
| + }
|
| +
|
| + void copy_last(){
|
| + if (*p_last) {
|
| + str.append(*p_last);
|
| + p_last++;
|
| + }
|
| + }
|
| +
|
| + // copy 0, 1, or 2 character(s) to str
|
| + void copy(){
|
| + static WeightedRand wr(& Buffer_int().append(DEFAULT_WEIGHT * 10), 5);
|
| +
|
| + switch (wr.next()){
|
| + case 0: // copy last -- has 10 times chance than others
|
| + copy_last();
|
| + break;
|
| + case 1: // copy both
|
| + copy_curr();
|
| + copy_last();
|
| + break;
|
| + case 2: // copy both
|
| + copy_last();
|
| + copy_curr();
|
| + break;
|
| + case 3:
|
| + copy_curr();
|
| + break;
|
| + case 4: // copy nothing
|
| + break;
|
| + default:
|
| + // ASSERT(FALSE);
|
| + ;
|
| + }
|
| + }
|
| +
|
| + void morph(void){
|
| + int min = strlen(last);
|
| + int max = strlen(current);
|
| + if (min > max){
|
| + int temp = min;
|
| + min = max;
|
| + max = temp;
|
| + }
|
| +
|
| + int len = min + rand()%(max - min + 1); // min + [0, diff]
|
| + p_curr = current;
|
| + p_last = last;
|
| + str.reset();
|
| +
|
| + for (; str.content_size()<len && *p_curr && *p_last;){
|
| + copy(); // copy 0, 1, or 2 character(s) to str
|
| + }
|
| +
|
| + if (str.content_size() == len) {
|
| + str.append(0);
|
| + final();
|
| + return;
|
| + }
|
| +
|
| + if (str.content_size() > len) { // if the last copy copied two characters
|
| + str[len]=0;
|
| + final();
|
| + return;
|
| + }
|
| +
|
| + // str.content_size() < len
|
| + if (*p_last) {
|
| + for (; str.content_size() < len; copy_last());
|
| + } else if (*p_curr){
|
| + for (; str.content_size() < len; copy_curr());
|
| + }
|
| +
|
| + int last_len = last.content_size();
|
| + for (;str.content_size() < len;){
|
| + str.append(last[rand()%last_len]);
|
| + }
|
| + str.append(0);
|
| + final();
|
| + }
|
| +
|
| + void final(){
|
| + last.reset();
|
| + last.append_array(current, current.content_size());
|
| + }
|
| +};
|
| +
|
| +class Sequence : public Pick {
|
| +public:
|
| + virtual const char* next(){
|
| + str.reset();
|
| + int s = items.content_size();
|
| + for(int i=0; i < s; i++){
|
| + const char * t = items[i]->next();
|
| + str.append_array(t, strlen(t));
|
| + }
|
| + str.append(0); // terminal null
|
| + return str;
|
| + }
|
| +
|
| + void append (Pick * node){
|
| + items.append(node);
|
| + }
|
| +
|
| + virtual ~Sequence(){
|
| + int s = items.content_size();
|
| + for(int i=0; i < s; i++){
|
| + //How can assure the item is got from heap?
|
| + //Let's assume it.
|
| + delete items[i]; // TOFIX: point alias/recursion problem
|
| + items[i] = NULL;
|
| + }
|
| + }
|
| +private:
|
| + Buffer_pPick items;
|
| + Buffer_char str; //null-terminated c-string
|
| +};
|
| +
|
| +class Repeat : public Pick {
|
| +private:
|
| + Pick * item;
|
| + Buffer_char str;
|
| + WeightedRand wr;
|
| + int min;
|
| + int max;
|
| + int select_a_count(){
|
| + return min + wr.next();
|
| + }
|
| +public:
|
| + virtual const char* next(){
|
| + str.reset();
|
| + int c = select_a_count();
|
| + for(int i=0; i< c; i++){
|
| + const char * t = item->next();
|
| + str.append_array(t, strlen(t));
|
| + }
|
| + str.append(0);
|
| + return str;
|
| + }
|
| +
|
| + Repeat(Pick * base, int minCount =0, int maxCount = 1, Buffer_int * weights = NULL):
|
| + wr(weights, maxCount-minCount +1) {
|
| + this->item = base;
|
| + this->min = minCount;
|
| + this->max = maxCount;
|
| + }
|
| + virtual ~Repeat(){
|
| + delete item; // TOFIX: point alias/recursion problem
|
| + item = NULL;
|
| + }
|
| +};
|
| +
|
| +
|
| +class Alternation : public Pick {
|
| +public:
|
| + virtual const char* next(){
|
| + str.reset();
|
| + int i = wr.next();
|
| + const char * t = items[i]->next();
|
| + str.append_array(t, strlen(t) + 1);
|
| + return str;
|
| + }
|
| + virtual ~Alternation(){
|
| + int s = items.content_size();
|
| + for(int i=0; i < s; i++){
|
| + delete items[i]; // TOFIX: point alias/recursion problem
|
| + items[i] = NULL;
|
| + }
|
| + }
|
| +
|
| + Alternation & append (Pick * node, int weight = DEFAULT_WEIGHT){
|
| + items.append(node);
|
| + wr.append(weight);
|
| + return *this;
|
| + }
|
| +private:
|
| + Buffer_pPick items;
|
| + Buffer_char str; // null-terminated c-string
|
| + WeightedRand wr;
|
| +};
|
| +
|
| +///////////////////////////////////////////////////////////
|
| +//
|
| +// The parser
|
| +//
|
| +
|
| +enum TokenType {STRING, VAR, NUMBER, STREAM_END, ERROR, QUESTION, STAR, PLUS, LBRACE, RBRACE, LPAR, RPAR, SEMI, EQ, COMMA, BAR, AT, WAVE, PERCENT};
|
| +
|
| +class Scanner{
|
| +friend int DumpScanner(Scanner & s, UBool dumb);
|
| +private:
|
| + const char * source;
|
| + const char * working;
|
| + const char * history; // for debug
|
| + enum StateType {START, IN_NUM, IN_VAR_FIRST, IN_VAR, IN_QUOTE, IN_QUOTE_BSLASH, IN_BSLASH, IN_STRING, DONE};
|
| + StateType state;
|
| + void terminated(TokenType t){
|
| + working--; // return the peeked character
|
| + tokenType = t;
|
| + token.append(0); // close buffer
|
| + state = DONE;
|
| + }
|
| +public:
|
| + // the buffer of "source" is owned by caller
|
| + Scanner(const char *src/*[in] c-string*/ = NULL):source(src){
|
| + working = src;
|
| + history = working;
|
| + state = DONE;
|
| + tokenType = ERROR;
|
| + }
|
| +
|
| + //void setSource(const char *const src /*[in] c-string*/){
|
| + // *(&const_cast<const char *>(source)) = src;
|
| + //}
|
| +
|
| + Buffer_char token;
|
| + TokenType tokenType;
|
| +
|
| + TokenType getNextToken(){
|
| + token.reset();
|
| + state = START;
|
| + history = working; // for debug
|
| + while (state != DONE){
|
| + char c = *working++;
|
| + if (c == 0 && state != START){//avoid buffer overflow. for IN_QUOE, IN_ESCAPE
|
| + terminated(ERROR);
|
| + break; // while
|
| + }
|
| + switch(state){
|
| + case START:
|
| + tokenType = ERROR;
|
| + switch(c){
|
| + case '?' : tokenType = QUESTION; break;
|
| + case '*' : tokenType = STAR; break;
|
| + case '+' : tokenType = PLUS; break;
|
| + case '{' : tokenType = LBRACE; break;
|
| + case '}' : tokenType = RBRACE; break;
|
| + case '(' : tokenType = LPAR; break;
|
| + case ')' : tokenType = RPAR; break;
|
| + case ';' : tokenType = SEMI; break;
|
| + case '=' : tokenType = EQ; break;
|
| + case ',' : tokenType = COMMA; break;
|
| + case '|' : tokenType = BAR; break;
|
| + case '@' : tokenType = AT; break;
|
| + case '~' : tokenType = WAVE; break;
|
| + case '%' : tokenType = PERCENT; break;
|
| + case 0 : tokenType = STREAM_END; working-- /*avoid buffer overflow*/; break;
|
| + }
|
| + if (tokenType != ERROR){
|
| + token.append(c);
|
| + token.append(0);
|
| + state = DONE;
|
| + break; // START
|
| + }
|
| + switch(c){
|
| + case '$' : state = IN_VAR_FIRST; token.append(c); break;
|
| + case '\'' : state = IN_QUOTE; break;
|
| + case '\\' : state = IN_BSLASH; break;
|
| + default:
|
| + if (isWhiteSpace(c)){ // state = START; //do nothing
|
| + } else if (isDigit(c)){ state = IN_NUM; token.append(c);
|
| + } else if (isAlphabet(c)){ state = IN_STRING; token.append(c);
|
| + } else {terminated(ERROR);}
|
| + }
|
| + break;//START
|
| + case IN_NUM:
|
| + if (isDigit(c)){
|
| + token.append(c);
|
| + } else {
|
| + terminated(NUMBER);
|
| + }
|
| + break;//IN_NUM
|
| + case IN_VAR_FIRST:
|
| + if (isAlphabet(c)){
|
| + token.append(c);
|
| + state = IN_VAR;
|
| + } else {
|
| + terminated(ERROR);
|
| + }
|
| + break; // IN_VAR_FISRT
|
| + case IN_VAR:
|
| + if (isAlphabet(c) || isDigit(c)){
|
| + token.append(c);
|
| + } else {
|
| + terminated(VAR);
|
| + }
|
| + break;//IN_VAR
|
| + case IN_STRING:
|
| + // About the scanner's behavior for STRING, AT, and ESCAPE:
|
| + // All of them can be contacted with each other.
|
| + // This means the scanner will eat up as much as possible strings
|
| + // (STRING, AT, and ESCAPE) at one time, with no regard of their
|
| + // combining sequence.
|
| + //
|
| + if (c == '\''){
|
| + state = IN_QUOTE; // the first time we see single quote
|
| + } else if (c =='\\'){ // back slash character
|
| + state = IN_BSLASH;
|
| + } else if (isAlphabet(c) || isDigit(c)){
|
| + token.append(c);
|
| + } else{
|
| + terminated(STRING);
|
| + }
|
| + break;//IN_STRING
|
| + case IN_QUOTE:
|
| + if (c == '\''){ // the second time we see single quote
|
| + state = IN_STRING; // see document in IN_STRING
|
| + } else if ( c== '\\') { // backslah escape in quote
|
| + state = IN_QUOTE_BSLASH;
|
| + } else {
|
| + token.append(c); // eat up everything, includes back slash
|
| + }
|
| + break;//IN_QUOTE
|
| + case IN_QUOTE_BSLASH:
|
| + case IN_BSLASH:
|
| + switch (c){
|
| + case 'n' : token.append('\n'); break;
|
| + case 'r' : token.append('\r'); break;
|
| + case 't' : token.append('\t'); break;
|
| + case '\'' : token.append('\''); break;
|
| + case '\\' : token.append('\\'); break;
|
| + default: token.append(c); // unknown escaping, treat it as literal
|
| + }
|
| + if (state == IN_BSLASH){
|
| + state = IN_STRING; // see document in IN_STRING
|
| + } else { // state == IN_QUOTE_BSLASH
|
| + state = IN_QUOTE;
|
| + }
|
| + break;//IN_BSLASH
|
| + case DONE: /* should never happen */
|
| + default:
|
| + working--;
|
| + tokenType = ERROR;
|
| + state = DONE;
|
| + break;
|
| + }//switch(state)
|
| + }//while (state != DONE)
|
| +
|
| + return tokenType;
|
| + }
|
| +};//class Scanner
|
| +
|
| +class Parser{
|
| +friend UBool TestParser();
|
| +friend class TestParserT;
|
| +friend class LanguageGenerator_impl;
|
| +private:
|
| + Scanner s;
|
| + TokenType & token;
|
| + int min_max; // for the evil infinite
|
| +
|
| + UBool match(TokenType expected){
|
| + if (token == expected) {
|
| + token = s.getNextToken();
|
| + return TRUE;
|
| + } else {
|
| + //s.dumpCurrentPoint();
|
| + return FALSE;
|
| + }
|
| + }
|
| +
|
| + UBool weight(int & value){
|
| + if (token == NUMBER){
|
| + int temp = atoi(s.token);
|
| + match(NUMBER);
|
| + if (match(PERCENT)){
|
| + value = temp;
|
| + return TRUE;
|
| + }
|
| + }
|
| + return FALSE;
|
| + }
|
| +
|
| + UBool repeat (Pick* &node /*in,out*/){
|
| + if (node == NULL) return FALSE;
|
| +
|
| + int count = -2;
|
| + int min = -2;
|
| + int max = -2;
|
| + UBool question = FALSE;
|
| + switch (token){
|
| + case QUESTION:
|
| + match(QUESTION);
|
| + min = 0;
|
| + max = 1;
|
| + count = 2;
|
| + question = TRUE;
|
| + break;
|
| + case STAR:
|
| + match(STAR);
|
| + min = 0;
|
| + max = -1;
|
| + count = -1;
|
| + break;
|
| + case PLUS:
|
| + match(PLUS);
|
| + min = 1;
|
| + max = -1;
|
| + count = -1;
|
| + break;
|
| + case LBRACE:
|
| + match(LBRACE);
|
| + if (token != NUMBER){
|
| + return FALSE;
|
| + }else {
|
| + min = atoi(s.token);
|
| + match(NUMBER);
|
| + if (token == RBRACE){
|
| + match(RBRACE);
|
| + max = min;
|
| + count = 1;
|
| + } else if (token == COMMA) {
|
| + match(COMMA);
|
| + if (token == RBRACE){
|
| + match(RBRACE);
|
| + max = -1;
|
| + count = -1;
|
| + } else if (token == NUMBER) {
|
| + max = atoi(s.token);
|
| + match(NUMBER);
|
| + count = max - min + 1;
|
| + if (!match(RBRACE)) {
|
| + return FALSE;
|
| + }
|
| + } else {
|
| + return FALSE;
|
| + }
|
| + } else {
|
| + return FALSE;
|
| + }
|
| + }
|
| + break;
|
| + default:
|
| + return FALSE;
|
| + }
|
| +
|
| + if (count == -2 || min == -2 || max == -2){
|
| + //ASSERT(FALSE);
|
| + return FALSE;
|
| + }
|
| +
|
| + // eat up following weights
|
| + Buffer_int weights;
|
| + int w;
|
| + while (weight(w)){
|
| + weights.append(w);
|
| + }
|
| +
|
| + // for the evil infinite
|
| + min_max = min_max > min ? min_max : min;
|
| + min_max = min_max > max ? min_max : max;
|
| + if (min_max > PSEUDO_INFINIT){
|
| + return FALSE; // PSEUDO_INFINIT is less than the real maximum
|
| + }
|
| + if (max == -1){ // the evil infinite
|
| + max = PSEUDO_INFINIT;
|
| + }
|
| + // for the strange question mark
|
| + if (question && weights.content_size() > 0){
|
| + Buffer_int w2;
|
| + w2.append(DEFAULT_WEIGHT - weights[0]).append(weights[0]);
|
| + node = new Repeat(node,min,max,&w2);
|
| + return TRUE;
|
| + }
|
| + node = new Repeat(node,min,max,&weights);
|
| + return TRUE;
|
| + }
|
| +
|
| + UBool core(Pick* &node /*out*/){
|
| + if (node != NULL) return FALSE; //assert node == NULL
|
| +
|
| + switch(token){
|
| + case LPAR:
|
| + match(LPAR);
|
| + if(defination(node) && match(RPAR)){
|
| + return TRUE;
|
| + }
|
| + return FALSE;
|
| + case VAR:
|
| + node = new Variable(&symbols, s.token);
|
| + match(VAR);
|
| + return TRUE;
|
| + case STRING:
|
| + node = new Literal(s.token);
|
| + match(STRING);
|
| + return TRUE;
|
| + default:
|
| + return FALSE;
|
| + }
|
| + }
|
| + UBool modified(Pick* &node /*out*/){
|
| + if (node != NULL) return FALSE; //assert node == NULL
|
| +
|
| + if (!core(node)) {
|
| + return FALSE;
|
| + }
|
| +
|
| + for (;;){
|
| + switch(token){
|
| + case WAVE:
|
| + match(WAVE);
|
| + node = new Morph(*node);
|
| + break;
|
| + case AT:
|
| + match(AT);
|
| + node = new Quote(*node);
|
| + break;
|
| + case QUESTION:
|
| + case STAR:
|
| + case PLUS:
|
| + case LBRACE:
|
| + if (!repeat(node)) return FALSE;
|
| + break;
|
| + case SEMI: // rule definiation closed
|
| + case RPAR: // within parenthesis (core closed)
|
| + case BAR: // in alternation
|
| + case NUMBER: // in alternation, with weight
|
| + case LPAR: // in sequence
|
| + case VAR: // in sequence
|
| + case STRING: // in sequence
|
| + return TRUE;
|
| + default:
|
| + return FALSE;
|
| + }
|
| + }
|
| + }
|
| +
|
| +
|
| + UBool sequence_list(Pick* &node /*in,out*/){
|
| + if (node == NULL) return FALSE; // assert node != NULL
|
| +
|
| + Sequence* seq = new Sequence();
|
| + Pick * n = node;
|
| +
|
| + while (token == VAR || token == STRING || token == LPAR){
|
| + seq->append(n);
|
| + n = NULL;
|
| + if (modified(n)){
|
| + // go on
|
| + } else {
|
| + goto FAIL;
|
| + }
|
| + }
|
| +
|
| + if (token == SEMI || token == RPAR || token == BAR){
|
| + seq->append(n);
|
| + node = seq;
|
| + return TRUE;
|
| + }
|
| +FAIL:
|
| + delete seq;
|
| + return FALSE;
|
| +
|
| + }
|
| +
|
| + UBool sequence(Pick* &node /*out*/){
|
| + if (node != NULL) return FALSE; //assert node == NULL
|
| +
|
| + if (!modified(node)) {
|
| + return FALSE;
|
| + }
|
| +
|
| + if (token == VAR || token == STRING || token == LPAR){
|
| + return sequence_list(node);
|
| + } else {
|
| + return TRUE; // just a modified
|
| + }
|
| + }
|
| +
|
| + UBool alternation_list(Pick* &node /*in,out*/){
|
| + if (node == NULL) return FALSE; // assert node != NULL
|
| +
|
| + Alternation * alt = new Alternation();
|
| + Pick * n = node;
|
| + int w = DEFAULT_WEIGHT;
|
| +
|
| + while (token == NUMBER || token == BAR){
|
| + if(token == NUMBER) {
|
| + if (weight(w)){
|
| + if (token == BAR){
|
| + // the middle item, go on
|
| + } else {
|
| + // the last item or encounter error
|
| + break; //while
|
| + }
|
| + } else {
|
| + goto FAIL;
|
| + }
|
| + } // else token == BAR
|
| + match(BAR);
|
| + alt->append(n,w);
|
| +
|
| + n = NULL;
|
| + w = DEFAULT_WEIGHT;
|
| + if (sequence(n)){
|
| + // go on
|
| + } else {
|
| + goto FAIL;
|
| + }
|
| + }
|
| +
|
| + if (token == SEMI || token == RPAR) {
|
| + alt->append(n,w);
|
| + node = alt;
|
| + return TRUE;
|
| + }
|
| +FAIL:
|
| + delete alt;
|
| + return FALSE;
|
| + }
|
| +
|
| + UBool alternation(Pick* &node /*out*/){
|
| + if (node != NULL) return FALSE; //assert node == NULL
|
| +
|
| + // 'sequence' has higher precedence than 'alternation'
|
| + if (!sequence(node)){
|
| + return FALSE;
|
| + }
|
| +
|
| + if (token == BAR || token == NUMBER){ // find a real alternation1, create it.
|
| + return alternation_list(node);
|
| + } else {
|
| + return TRUE; // just a sequence_old
|
| + }
|
| + }
|
| +
|
| +
|
| + UBool defination(Pick* &node /*out*/){
|
| + if (node != NULL) return FALSE; //assert node == NULL
|
| + return alternation(node);
|
| + }
|
| +
|
| + UBool rule(){
|
| + if (token == VAR){
|
| + Buffer_char name;
|
| + name.append_array(s.token, strlen(s.token) + 1);
|
| + match(VAR);
|
| +
|
| + if (match(EQ)){
|
| + Pick * t = NULL;
|
| + if(defination(t)){
|
| + symbols.put(name, t);
|
| + return match(SEMI);
|
| + }
|
| + }
|
| + }
|
| + return FALSE;
|
| + }
|
| +public:
|
| + UBool rules(){
|
| + symbols.reset();
|
| + token = s.getNextToken();
|
| + while (rule()){
|
| + }
|
| + if (token == STREAM_END){
|
| + return TRUE;
|
| + } else {
|
| + //s.dumpCurrentPoint();
|
| + return FALSE;
|
| + }
|
| + }
|
| +
|
| +public:
|
| + SymbolTable symbols;
|
| +
|
| + Parser(const char *const source):s(source), token(s.tokenType){
|
| + min_max = -2;
|
| + }
|
| + UBool parse(){
|
| + return rules();
|
| + }
|
| +
|
| +}; // class Parser
|
| +
|
| +
|
| +///////////////////////////////////////////////////////////
|
| +//
|
| +//
|
| +//
|
| +
|
| +int DumpScanner(Scanner & s, UBool dump = TRUE){
|
| + int len = strlen(s.source);
|
| + int error_start_offset = s.history - s.source;
|
| + if (dump){
|
| + printf("\n=================== DumpScanner ================\n");
|
| + fwrite(s.source, len, 1, stdout);
|
| + printf("\n-----parsed-------------------------------------\n");
|
| + fwrite(s.source, s.history - s.source, 1, stdout);
|
| + printf("\n-----current------------------------------------\n");
|
| + fwrite(s.history, s.working - s.history, 1, stdout);
|
| + printf("\n-----unparsed-----------------------------------\n");
|
| + fwrite(s.working, (s.source + len - s.working), 1, stdout);
|
| + printf("\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n");
|
| + }
|
| + return error_start_offset;
|
| +}
|
| +
|
| +class LanguageGenerator_impl{
|
| +public:
|
| + LanguageGenerator_impl(const char *const bnf_definition, const char *const top_node)
|
| + :par(bnf_definition), top_node_name(top_node){
|
| + srand((unsigned)time( NULL ));
|
| + }
|
| +
|
| + LanguageGenerator::PARSE_RESULT parseBNF(UBool debug = TRUE){
|
| + if (par.parse()){
|
| + if (par.symbols.find(top_node_name, &top_node_ref) == SymbolTable::HAS_REF) {
|
| + if (par.symbols.is_complete()) {
|
| + return LanguageGenerator::OK;
|
| + } else {
|
| + if (debug) printf("The bnf definition is incomplete.\n");
|
| + return LanguageGenerator::INCOMPLETE;
|
| + }
|
| + } else {
|
| + if (debug) printf("No top node is found.\n");
|
| + return LanguageGenerator::NO_TOP_NODE;
|
| + }
|
| + } else {
|
| + if(debug) {
|
| + printf("The bnf definition is wrong\n");
|
| + DumpScanner(par.s, TRUE);
|
| + }
|
| + return LanguageGenerator::BNF_DEF_WRONG;
|
| + }
|
| + }
|
| + const char * next(){
|
| + return top_node_ref->next();
|
| + }
|
| +
|
| +private:
|
| + Parser par;
|
| + const char *const top_node_name;
|
| + Pick * top_node_ref;
|
| +};
|
| +
|
| +LanguageGenerator::LanguageGenerator():lang_gen(NULL){
|
| +}
|
| +
|
| +LanguageGenerator::~LanguageGenerator(){
|
| + delete lang_gen;
|
| +}
|
| +
|
| +LanguageGenerator::PARSE_RESULT LanguageGenerator::parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug){
|
| + if (lang_gen){
|
| + delete lang_gen;
|
| + }
|
| + lang_gen = new LanguageGenerator_impl(bnf_definition, top_node);
|
| + PARSE_RESULT r = lang_gen->parseBNF(debug);
|
| + if (r != OK){
|
| + delete lang_gen;
|
| + lang_gen = NULL;
|
| + return r;
|
| + } else {
|
| + return r;
|
| + }
|
| +}
|
| +const char *LanguageGenerator::next(){ // Return a null-terminated c-string. The buffer is owned by callee.
|
| + if (lang_gen){
|
| + return lang_gen->next();
|
| + }else {
|
| + return "";
|
| + }
|
| +}
|
| +
|
| +///////////////////////////////////////////////////////////
|
| +//
|
| +// The test code for WBNF
|
| +//
|
| +
|
| +#define CALL(fun) \
|
| + if (fun()){ \
|
| + printf("Pass: " #fun "\n");\
|
| + } else { \
|
| + printf("FAILED: !!! " #fun " !!!\n"); \
|
| + }
|
| +
|
| +#define DUMP_R(fun, var, times) \
|
| + {printf("\n========= " #fun " =============\n"); \
|
| + for (int i=0; i<times; i++) { \
|
| + const char * t = var.next();\
|
| + fwrite(t,strlen(t),1,stdout); \
|
| + printf("\n"); \
|
| + } \
|
| + printf("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n");}
|
| +
|
| +
|
| +
|
| +#if TEST_WBNF_TEST
|
| +static UBool TestQuote(){
|
| + const char *const str = "This ' A !,z| qq [] .new\tline";
|
| + //const char *const str_r = "This \\' A '!,'z'|' qq '[]' '.'new\tline";
|
| + ////
|
| + //// :( we must quote our string to following C syntax
|
| + //// cannot type the literal here, it makes our code rather human unreadable
|
| + //// very very unconformable!
|
| + ////
|
| + ///*
|
| + //*/
|
| +
|
| + //const char *const s1 = "ab'c";
|
| + //const char (* s1_r1) [] = { "ab''c", // ab''c
|
| + // "ab\\'c", // ab\'c
|
| + // };//
|
| + ///*
|
| + // . '.' \.
|
| + // .. \.\. '.'\. '.'\. '..' // '.''.' wrong
|
| + //*/
|
| +
|
| + //const char *const s2 = "a..'.b"; // a..'.b
|
| + //const char (*s2_r) [] = { "a'..''.'b" // a'..''.'b
|
| + // ,"a'..\\'.'b" // a'..\'.'b
|
| + // ,"a'..'\\''.'b" // a'..'\''.'b
|
| + // };//
|
| +
|
| + //const char *const s3 = "a..\\.b"; // a..\.b
|
| + //const char (*s3_r) [] = { "a'..\\\\.'b" // a'..\\.'b
|
| + // ,"a'..'\\\\'.'b" // a'..'\\'.'b
|
| + // };//
|
| +
|
| + // // no catact operation, no choice, must be compact
|
| +
|
| + srand((unsigned)time( NULL ));
|
| +
|
| + //Escaper l(Escaper::NO, Escaper::NO, Escaper::RAND_ESC);
|
| + Pick *p = new Literal(str);
|
| + Quote q(*p);
|
| +
|
| + DUMP_R(TestQuote, (*p), 1);
|
| + DUMP_R(TestQuote, q, 20);
|
| + return FALSE;
|
| +}
|
| +static UBool TestLiteral(){
|
| + const char * s = "test string99.";
|
| + Literal n(s);
|
| + const char * r = n.next();
|
| + return strcmp(s,r) == 0;
|
| +}
|
| +
|
| +static UBool TestSequence(){
|
| + Sequence seq;
|
| + seq.append(new Literal("abc "));
|
| + seq.append(new Literal(", s"));
|
| +
|
| + return strcmp(seq.next(), "abc , s") == 0;
|
| +}
|
| +static UBool TestAlternation(){
|
| + srand((unsigned)time( NULL ));
|
| + Alternation alt;
|
| + alt.append(new Literal("aaa_10%"),10);
|
| + alt.append(new Literal("bbb_0%"),0);
|
| + alt.append(new Literal("ccc_10%"),10);
|
| + alt.append(new Literal("ddddddd_50%"),50);
|
| +
|
| + DUMP_R(TestAlternation, alt, 50);
|
| +
|
| + return FALSE;
|
| +}
|
| +
|
| +static UBool TestBuffer(){
|
| + Buffer_int t;
|
| + t.append(1).append(0).append(5);
|
| + int s = t.content_size();
|
| + for (int i=0; i<s; ++i){
|
| + printf("%d\n", t[i]);
|
| + }
|
| + return FALSE;
|
| +}
|
| +
|
| +static UBool TestWeightedRand(){
|
| + srand((unsigned)time( NULL ));
|
| + Buffer_int t;
|
| + t.append(1).append(0).append(5);
|
| + WeightedRand wr(&Buffer_int().append(10).append(0).append(50),4);
|
| +// WeightedRand wr(&t,3);
|
| + for (int i=0; i< 50; ++i){
|
| + printf("%d\n", wr.next());
|
| + }
|
| + return FALSE;
|
| +}
|
| +
|
| +static UBool TestRepeat(){
|
| + srand((unsigned)time( NULL ));
|
| + Repeat rep(new Literal("aaa1-5 "), 1, 5);
|
| + DUMP_R(TestRepeat, rep, 50);
|
| +
|
| + Repeat r2(new Literal("b{1,3}1%0%5% "), 1, 3, &Buffer_int().append(1).append(0).append(5));
|
| + DUMP_R(TestRepeat, r2, 50);
|
| +
|
| + Repeat r3(new Literal("aaa5-5 "), 5, 5);
|
| + DUMP_R(TestRepeat, r3, 50);
|
| +
|
| + return FALSE;
|
| +}
|
| +
|
| +static UBool TestVariable(){
|
| + SymbolTable tab;
|
| + Pick * value = new Literal("string1");
|
| + Variable var1(&tab, "x", value);
|
| +
|
| + Variable var2(&tab, "y");
|
| +// tab.put(var2, value); // TOFIX: point alias/recursion problem
|
| + Pick * value2 = new Literal("string2");
|
| + tab.put(var2, value2);
|
| +
|
| + Pick * value3 = new Literal("string3");
|
| + Variable var3(&tab, "z");
|
| + tab.put("z", value3);
|
| +
|
| + UBool pass;
|
| + pass = strcmp(var1.next(), value->next()) == 0;
|
| + pass = pass && strcmp(var2.next(), value2->next()) == 0;
|
| + pass = pass && strcmp(var3.next(), value3->next()) == 0;
|
| + return pass;
|
| +}
|
| +
|
| +static UBool TestSymbolTable(){
|
| + Literal * n1 = new Literal("string1");
|
| + Literal * n2 = new Literal("string2");
|
| + SymbolTable t;
|
| + t.put("abc", n1);
|
| + t.put("$aaa", n2);
|
| +// t.put("alias", n1); // TOFIX: point alias/recursion problem
|
| + t.put("bbb");
|
| +
|
| + UBool pass;
|
| + pass = t.find(NULL) == SymbolTable::EMPTY;
|
| + pass = pass && t.find("ccc") == SymbolTable::NO_VAR;
|
| + pass = pass && t.find("bbb") == SymbolTable::NO_REF;
|
| + pass = pass && t.find("abc") == SymbolTable::HAS_REF;
|
| + pass = pass && t.find("$aaa") == SymbolTable::HAS_REF;
|
| +
|
| + t.reset();
|
| + pass = pass && t.find("abc") == SymbolTable::NO_VAR;
|
| + return pass;
|
| +}
|
| +
|
| +
|
| +static UBool TestScanner(void){
|
| + //const char str1[] = "$root = $command{0,5} $reset $mostRules{1,20};";
|
| + //const char str1_r[][20] = {"$root", "=", "$command", "{", "0", ",", "5", "}",
|
| + // "$reset", "$mostRules", "{", "1", ",", "20", "}", ";"};
|
| +
|
| + const char str2[] = "$p2 =(\\\\ $s $string $s)? 25%;";
|
| + const char str2_r[][20] = {"$p2", "=", "(", "\\", "$s", "$string", "$s", ")", "?", "25", "%", ";"};
|
| +
|
| + const char *str = str2;
|
| + const char (*str_r)[20] = str2_r;
|
| + int tokenNum = sizeof(str2_r)/sizeof(char[20]);
|
| +
|
| + Scanner t(str);
|
| + UBool pass = TRUE;
|
| + t.getNextToken();
|
| + int i = 0;
|
| + while (pass){
|
| + if (t.tokenType == STREAM_END){
|
| + pass = pass? i == tokenNum : FALSE;
|
| + break;//while
|
| + } else if (t.tokenType == ERROR){
|
| + pass = FALSE;
|
| + break;//while
|
| + } else {
|
| + pass = strcmp( &(t.token[0]), str_r[i++]) == 0;
|
| + t.getNextToken();
|
| + }
|
| + }
|
| +
|
| + //const char ts[] = "$commandList = '['"
|
| + //" ( alternate ' ' $alternateOptions"
|
| + //" | backwards ' 2'"
|
| + //" | normalization ' ' $onoff "
|
| + //" | caseLevel ' ' $onoff "
|
| + //" | hiraganaQ ' ' $onoff"
|
| + //" | caseFirst ' ' $caseFirstOptions"
|
| + //" | strength ' ' $strengthOptions"
|
| + //" ) ']';" ;
|
| +
|
| + //Scanner t2(ts);
|
| + //pass = TRUE;
|
| + //do {
|
| + // t2.getNextToken();
|
| + // if (t2.tokenType == ERROR){
|
| + // DumpScanner(t2);
|
| + // return FALSE;
|
| + // }
|
| + //}while (t.tokenType != STREAM_END);
|
| +
|
| + return pass;
|
| +}
|
| +
|
| +class TestParserT {
|
| +public:
|
| +UBool operator () (const char *const str, const int exp_error_offset = -1, const UBool dump = TRUE){
|
| + Parser par(str);
|
| + if (par.rules()){
|
| + if ( exp_error_offset == -1){
|
| + return TRUE;
|
| + }else {
|
| + DumpScanner(par.s,dump);
|
| + return FALSE;
|
| + }
|
| + }else {
|
| + return DumpScanner(par.s, dump) == exp_error_offset;
|
| + }
|
| +}
|
| +};
|
| +
|
| +UBool TestParser(){
|
| + TestParserT test;
|
| +
|
| + UBool pass = TRUE;
|
| + pass = pass && test ("$s = ' ' ? 50%;");
|
| + pass = pass && test("$x = ($var {1,2}) 3%;"); // legal
|
| + pass = pass && test("$x = $var {1,2} 3% | b 4%;"); // legal
|
| + pass = pass && test("$x = $var {1,2} 3%;"); // legal
|
| + pass = pass && test("$m = $c ? 2% 4% | $r 5% | $n 25%;"); // legal
|
| + pass = pass && test("$a = b ? 2% | c 5%;"); // legal
|
| + pass = pass && test("$x = A B 5% C 10% | D;", 8, FALSE); // illegal 5%
|
| + pass = pass && test("$x = aa 45% | bb 5% cc;", 19, FALSE);// illegal cc
|
| + pass = pass && test("$x = (b 5%) (c 6%);"); // legal
|
| + pass = pass && test("$x = (b 5%) c 6%;", 13, FALSE); // illegal 6%
|
| + pass = pass && test("$x = b 5% (c 6%);", 9, FALSE); // illegal (c 6%)
|
| + pass = pass && test("$x = b 5% c 6%;", 9, FALSE); // illegal c 6%
|
| + pass = pass && test("$x = b 5%;"); // legal
|
| + pass = pass && test("$x = aa 45% | bb 5% cc;", 19, FALSE);// illegal cc
|
| + pass = pass && test("$x = a | b | c 4% | d 5%;"); // legal
|
| + pass = pass && test("$s = ' ' ? 50% abc;"); // legal
|
| + pass = pass && test("$s = a | c d | e f;"); // legal
|
| + pass = pass && test( "$z = q 0% | p 1% | r 100%;"); // legal How to check parsed tree??
|
| +
|
| + pass = pass && test("$s = ' ' ? 50%;");
|
| + pass = pass && test("$relationList = '<' | '<<' | ';' | '<<<' | ',' | '=';");
|
| + pass = pass && test("$p1 = ($string $s '|' $s)? 25%;");
|
| + pass = pass && test("$p2 = (\\\\ $s $string $s)? 25%;");
|
| + pass = pass && test("$rel2 = $p1 $string $s $p2;");
|
| + pass = pass && test("$relation = $relationList $s ($rel1 | $rel2) $crlf;");
|
| + pass = pass && test("$command = $commandList $crlf;");
|
| + pass = pass && test("$reset = '&' $s ($beforeList $s)? 10% ($positionList 100% | $string 10%) $crlf;");
|
| + pass = pass && test("$mostRules = $command 1% | $reset 5% | $relation 25%;");
|
| + pass = pass && test("$root = $command{0,5} $reset $mostRules{1,20};");
|
| +
|
| + const char collationBNF[] =
|
| + "$s = ' '? 50%;"
|
| + "$crlf = '\r\n';"
|
| +
|
| + "$alternateOptions = non'-'ignorable | shifted;"
|
| + "$onoff = on | off;"
|
| + "$caseFirstOptions = off | upper | lower;"
|
| + "$strengthOptions = '1' | '2' | '3' | '4' | 'I';"
|
| + "$commandList = '['"
|
| + " ( alternate ' ' $alternateOptions"
|
| + " | backwards ' 2'"
|
| + " | normalization ' ' $onoff "
|
| + " | caseLevel ' ' $onoff "
|
| + " | hiraganaQ ' ' $onoff"
|
| + " | caseFirst ' ' $caseFirstOptions"
|
| + " | strength ' ' $strengthOptions"
|
| + " ) ']';"
|
| + "$command = $commandList $crlf;"
|
| +
|
| + "$ignorableTypes = (tertiary | secondary | primary) ' ' ignorable;"
|
| + "$allTypes = variable | regular | implicit | trailing | $ignorableTypes;"
|
| + "$positionList = '[' (first | last) ' ' $allTypes ']';"
|
| +
|
| + "$beforeList = '[before ' ('1' | '2' | '3') ']';"
|
| +
|
| + "$relationList = ("
|
| + " '<'"
|
| + " | '<<'"
|
| + " | ';'"
|
| + " | '<<<'"
|
| + " | ','"
|
| + " | '='"
|
| + ");"
|
| + "$string = $magic;"
|
| + "$rel1 = '[variable top]' $s;"
|
| + "$p1 = ($string $s '|' $s)? 25%;"
|
| + "$p2 = (\\\\ $s $string $s)? 25%;"
|
| + "$rel2 = $p1 $string $s $p2;"
|
| + "$relation = $relationList $s ($rel1 | $rel2) $crlf;"
|
| +
|
| + "$reset = '&' $s ($beforeList $s)? 10% ($positionList 1% | $string 10%) $crlf;"
|
| + "$mostRules = $command 1% | $reset 5% | $relation 25%;"
|
| + "$root = $command{0,5} $reset $mostRules{1,20};"
|
| + ;
|
| +
|
| + pass = pass && test(collationBNF);
|
| +
|
| +
|
| + return pass;
|
| +}
|
| +
|
| +static UBool TestMorph(){
|
| + srand((unsigned)time( NULL ));
|
| +
|
| + Alternation * alt = new Alternation();
|
| +
|
| + (*alt)
|
| + .append(new Literal("a")).append(new Literal("b")).append(new Literal("c"))
|
| + .append(new Literal("d")).append(new Literal("e")).append(new Literal("f"))
|
| + .append(new Literal("g")).append(new Literal("h")).append(new Literal("i"))
|
| + .append(new Literal("j")).append(new Literal("k")).append(new Literal("l"))
|
| + .append(new Literal("m")).append(new Literal("n")).append(new Literal("o"))
|
| + ;
|
| +
|
| + Repeat * rep = new Repeat( alt ,5,5 );
|
| + Morph m( *rep);
|
| +
|
| +// DUMP_R(TestMorph,(*rep),20);
|
| + DUMP_R(TestMorph,m,100);
|
| +
|
| + return FALSE;
|
| +}
|
| +
|
| +#endif
|
| +
|
| +static UBool TestLanguageGenerator(){
|
| + //LanguageGenerator g;
|
| + //const char *const s = "$s = p 0% | q 1%;";
|
| + //g.parseBNF(s, "$s");
|
| + UBool pass;
|
| + //= strcmp("q", g.next()) == 0;
|
| +
|
| + const char *const def =
|
| + //"$a = $b;"
|
| + //"$b = $c;"
|
| + //"$c = $t;"
|
| + //"$t = abc $z{1,2};"
|
| + //"$k = a | b | c | d | e | f | g ;"
|
| + //"$z = q 0% | p 1% | r 1%;"
|
| + "$x = a ? 0%;"
|
| + ; // end of string
|
| +// const char * s = "abczz";
|
| +//
|
| +//
|
| + LanguageGenerator g;
|
| + pass = g.parseBNF(def, "$x",TRUE);
|
| +//// LanguageGenerator g(collationBNF, "$root", "$magic", new MagicNode());
|
| +//
|
| + if (pass != LanguageGenerator::OK) return FALSE;
|
| +
|
| + DUMP_R(TestLanguageGenerator, g, 20);
|
| + return pass;
|
| +
|
| + ////UBool pass = strcmp(s,r) == 0;
|
| +
|
| + //if (pass){
|
| + // printf("TestRandomLanguageGenerator passed.\n");
|
| + //} else {
|
| + // printf("TestRandomLanguageGenerator FAILED!!!\n");
|
| + //}
|
| + //return pass;
|
| +}
|
| +
|
| +void TestWbnf(void){
|
| + srand((unsigned)time( NULL ));
|
| +
|
| + //CALL(TestLiteral);
|
| + //CALL(TestSequence);
|
| + //CALL(TestSymbolTable);
|
| + //CALL(TestVariable);
|
| +
|
| + //TestRepeat();
|
| + //TestAlternation();
|
| + //TestMorph();
|
| +
|
| + //TestQuote();
|
| + //TestBuffer();
|
| + //TestWeightedRand();
|
| +
|
| + //CALL(TestScanner);
|
| + //CALL(TestParser);
|
| + CALL(TestLanguageGenerator);
|
| +}
|
| +
|
|
|
| Property changes on: icu46/source/test/intltest/wbnf.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|