| Index: source/test/intltest/wbnf.h
|
| diff --git a/source/test/intltest/wbnf.h b/source/test/intltest/wbnf.h
|
| deleted file mode 100644
|
| index 7b76f8f2daa111e6450246da6d3aa8629b865b91..0000000000000000000000000000000000000000
|
| --- a/source/test/intltest/wbnf.h
|
| +++ /dev/null
|
| @@ -1,157 +0,0 @@
|
| -/*
|
| - ******************************************************************************
|
| - * Copyright (C) 2005, International Business Machines Corporation and *
|
| - * others. All Rights Reserved. *
|
| - ******************************************************************************
|
| - */
|
| -/*
|
| - WBNF, Weighted BNF, is an extend BNF. The most difference between WBNF
|
| - and standard BNF is the WBNF accepts weight for its alternation items.
|
| - The weight specifies the opportunity it will be selected.
|
| -
|
| - The purpose of WBNF is to help generate a random string from a given grammar
|
| - which can be described with standard BNF. The introduction of 'weight'
|
| - is to guide the generator to give the specific parts different chances to be
|
| - generated.
|
| -
|
| - Usually, the user gives LanguageGenerator the grammar description in WBNF,
|
| - then LanguageGenerator will generate a random string on every next() call.
|
| - The return code of parseBNF() can help user to determine the error,
|
| - either in the grammar description or in the WBNF parser itself.
|
| -
|
| -
|
| - The grammar of WBNF itself can be described in standard BNF,
|
| -
|
| - escaping = _single character with a leading back slash, either inside or outside quoting_
|
| - quoting = _quoted with a pair of single quotation marks_
|
| - string = string alphabet | string digit | string quoting | string escaping |
|
| - alphabet | quoting | escaping
|
| - alphabet =
|
| - digit =
|
| - integer = integer digit | digit
|
| - weight = integer %
|
| - weight-list = weight-list weight | weight
|
| - var = var alphabet | var digit | $ alphabet
|
| -
|
| - var-defs = var-defs var-def | var-def
|
| - var-def = var '=' definition;
|
| -
|
| - alternation = alternation '|' alt-item | alt-item
|
| - alt-item = sequence | sequence weight
|
| -
|
| - sequence = sequence modified | modified
|
| -
|
| - modified = core | morph | quote | repeat
|
| - morph = modified ~
|
| - quote = modified @
|
| - repeat = modified quantifier | modified quantifier weight-list
|
| - quantifier = ? | * | + | { integer , integer} | {integer, } | {integer}
|
| -
|
| - core = var | string | '(' definition ')'
|
| -
|
| - definition = core | modified | sequence | alternation
|
| - definition = alternation
|
| -
|
| - Remarks:
|
| - o Following characters are literals in preceding definition
|
| - but are syntax symbols in WBNF
|
| -
|
| - % $ ~ @ ? * + { } ,
|
| -
|
| - o Following character are syntax symbols in preceding definition
|
| - (sapce) contact operation, or separators to increase readability
|
| - = definition
|
| - | selection operation
|
| - ( ) precedence select
|
| - ' ' override special-character to plain character
|
| -
|
| - o the definition of 'escaping' and 'quoting' are preceding definition text
|
| - o infinite is actually a predefine value PSEUDO_INFINIT defined in this file
|
| - o if weight is not presented in "alt-item' and 'repeat',
|
| - a default weight DEFAULT_WEIGHT defined in this file is used
|
| -
|
| - o * == {0, }
|
| - + == {1, }
|
| - ? == {0, 1}
|
| -
|
| - o the weight-list for repeat assigns the weights for repeat itmes one by one
|
| -
|
| - demo{1,3} 30% 40% 100% == (demo)30% | (demodemo)40% | (demodemodemo)100%
|
| -
|
| - To find more explain of the weight-list, please see the LIMITATION of the grammar
|
| -
|
| - o but the weight-list for question mark has different meaning
|
| -
|
| - demo ? 30% != demo{0,1} 30% 100%
|
| - demo ? 30% == demo{0,1} 70% 30%
|
| -
|
| - the 70% is calculated from (DEFAULT_WEIGHT - weight)
|
| -
|
| -
|
| - Known LIMITATION of the grammar
|
| - For 'repeat', the parser will eat up as much as possible weights at one time,
|
| - discard superfluous weights if it is too much,
|
| - fill insufficient weights with default weight if it is too less.
|
| - This behavior means following definitions are equal
|
| -
|
| - demo{1,3} 30% 40% 100%
|
| - demo{1,3} 30% 40% 100% 50%
|
| - demo{1,3} 30% 40%
|
| -
|
| - This behavior will cause a little confusion when defining an alternation
|
| -
|
| - demo{1,3} 30% 40% 100% 50% | show 20%
|
| -
|
| - is interpreted as
|
| -
|
| - (demo{1,3} 30% 40% 100%) 100% | show 20%
|
| -
|
| - not
|
| -
|
| - (demo{1,3} 30% 40% 100%) 50% | show 20%
|
| -
|
| - to get an expected definition, please use parentheses.
|
| -
|
| - Known LIMITATION of current implement
|
| - Due to the well known point alias problem, current Parser will be effectively
|
| - crashed if the definition looks like
|
| -
|
| - $a = demo;
|
| - $b = $a;
|
| - $c = $a;
|
| - or
|
| - $a = demo;
|
| - $b = $a $a;
|
| - or
|
| - $a = demo;
|
| - $b = $b $a;
|
| -
|
| - The crash will occur at delete operation in destructor or other memory release code.
|
| - Several plans are on hard to fix the problem. Use a smart point with reference count,
|
| - or use a central memory management solution. But now, it works well with collation
|
| - monkey test, which is the only user for WBNF.
|
| -*/
|
| -
|
| -#ifndef _WBNF
|
| -#define _WBNF
|
| -
|
| -#include "unicode/utypes.h"
|
| -
|
| -const int DEFAULT_WEIGHT = 100;
|
| -const int PSEUDO_INFINIT = 200;
|
| -
|
| -class LanguageGenerator_impl;
|
| -
|
| -class LanguageGenerator{
|
| - LanguageGenerator_impl * lang_gen;
|
| -public:
|
| - enum PARSE_RESULT {OK, BNF_DEF_WRONG, INCOMPLETE, NO_TOP_NODE};
|
| - LanguageGenerator();
|
| - ~LanguageGenerator();
|
| - PARSE_RESULT parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug=FALSE);
|
| - const char *next(); /* Return a null-terminated c-string. The buffer is owned by callee. */
|
| -};
|
| -
|
| -void TestWbnf(void);
|
| -
|
| -#endif /* _WBNF */
|
|
|