| Index: icu46/source/test/intltest/wbnf.h
|
| ===================================================================
|
| --- icu46/source/test/intltest/wbnf.h (revision 0)
|
| +++ icu46/source/test/intltest/wbnf.h (revision 0)
|
| @@ -0,0 +1,157 @@
|
| +/*
|
| + ******************************************************************************
|
| + * Copyright (C) 2005, International Business Machines Corporation and *
|
| + * others. All Rights Reserved. *
|
| + ******************************************************************************
|
| + */
|
| +/*
|
| + WBNF, Weighted BNF, is an extend BNF. The most difference between WBNF
|
| + and standard BNF is the WBNF accepts weight for its alternation items.
|
| + The weight specifies the opportunity it will be selected.
|
| +
|
| + The purpose of WBNF is to help generate a random string from a given grammar
|
| + which can be described with standard BNF. The introduction of 'weight'
|
| + is to guide the generator to give the specific parts different chances to be
|
| + generated.
|
| +
|
| + Usually, the user gives LanguageGenerator the grammar description in WBNF,
|
| + then LanguageGenerator will generate a random string on every next() call.
|
| + The return code of parseBNF() can help user to determine the error,
|
| + either in the grammar description or in the WBNF parser itself.
|
| +
|
| +
|
| + The grammar of WBNF itself can be described in standard BNF,
|
| +
|
| + escaping = _single character with a leading back slash, either inside or outside quoting_
|
| + quoting = _quoted with a pair of single quotation marks_
|
| + string = string alphabet | string digit | string quoting | string escaping |
|
| + alphabet | quoting | escaping
|
| + alphabet =
|
| + digit =
|
| + integer = integer digit | digit
|
| + weight = integer %
|
| + weight-list = weight-list weight | weight
|
| + var = var alphabet | var digit | $ alphabet
|
| +
|
| + var-defs = var-defs var-def | var-def
|
| + var-def = var '=' definition;
|
| +
|
| + alternation = alternation '|' alt-item | alt-item
|
| + alt-item = sequence | sequence weight
|
| +
|
| + sequence = sequence modified | modified
|
| +
|
| + modified = core | morph | quote | repeat
|
| + morph = modified ~
|
| + quote = modified @
|
| + repeat = modified quantifier | modified quantifier weight-list
|
| + quantifier = ? | * | + | { integer , integer} | {integer, } | {integer}
|
| +
|
| + core = var | string | '(' definition ')'
|
| +
|
| + definition = core | modified | sequence | alternation
|
| + definition = alternation
|
| +
|
| + Remarks:
|
| + o Following characters are literals in preceding definition
|
| + but are syntax symbols in WBNF
|
| +
|
| + % $ ~ @ ? * + { } ,
|
| +
|
| + o Following character are syntax symbols in preceding definition
|
| + (sapce) contact operation, or separators to increase readability
|
| + = definition
|
| + | selection operation
|
| + ( ) precedence select
|
| + ' ' override special-character to plain character
|
| +
|
| + o the definition of 'escaping' and 'quoting' are preceding definition text
|
| + o infinite is actually a predefine value PSEUDO_INFINIT defined in this file
|
| + o if weight is not presented in "alt-item' and 'repeat',
|
| + a default weight DEFAULT_WEIGHT defined in this file is used
|
| +
|
| + o * == {0, }
|
| + + == {1, }
|
| + ? == {0, 1}
|
| +
|
| + o the weight-list for repeat assigns the weights for repeat itmes one by one
|
| +
|
| + demo{1,3} 30% 40% 100% == (demo)30% | (demodemo)40% | (demodemodemo)100%
|
| +
|
| + To find more explain of the weight-list, please see the LIMITATION of the grammar
|
| +
|
| + o but the weight-list for question mark has different meaning
|
| +
|
| + demo ? 30% != demo{0,1} 30% 100%
|
| + demo ? 30% == demo{0,1} 70% 30%
|
| +
|
| + the 70% is calculated from (DEFAULT_WEIGHT - weight)
|
| +
|
| +
|
| + Known LIMITATION of the grammar
|
| + For 'repeat', the parser will eat up as much as possible weights at one time,
|
| + discard superfluous weights if it is too much,
|
| + fill insufficient weights with default weight if it is too less.
|
| + This behavior means following definitions are equal
|
| +
|
| + demo{1,3} 30% 40% 100%
|
| + demo{1,3} 30% 40% 100% 50%
|
| + demo{1,3} 30% 40%
|
| +
|
| + This behavior will cause a little confusion when defining an alternation
|
| +
|
| + demo{1,3} 30% 40% 100% 50% | show 20%
|
| +
|
| + is interpreted as
|
| +
|
| + (demo{1,3} 30% 40% 100%) 100% | show 20%
|
| +
|
| + not
|
| +
|
| + (demo{1,3} 30% 40% 100%) 50% | show 20%
|
| +
|
| + to get an expected definition, please use parentheses.
|
| +
|
| + Known LIMITATION of current implement
|
| + Due to the well known point alias problem, current Parser will be effectively
|
| + crashed if the definition looks like
|
| +
|
| + $a = demo;
|
| + $b = $a;
|
| + $c = $a;
|
| + or
|
| + $a = demo;
|
| + $b = $a $a;
|
| + or
|
| + $a = demo;
|
| + $b = $b $a;
|
| +
|
| + The crash will occur at delete operation in destructor or other memory release code.
|
| + Several plans are on hard to fix the problem. Use a smart point with reference count,
|
| + or use a central memory management solution. But now, it works well with collation
|
| + monkey test, which is the only user for WBNF.
|
| +*/
|
| +
|
| +#ifndef _WBNF
|
| +#define _WBNF
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +const int DEFAULT_WEIGHT = 100;
|
| +const int PSEUDO_INFINIT = 200;
|
| +
|
| +class LanguageGenerator_impl;
|
| +
|
| +class LanguageGenerator{
|
| + LanguageGenerator_impl * lang_gen;
|
| +public:
|
| + enum PARSE_RESULT {OK, BNF_DEF_WRONG, INCOMPLETE, NO_TOP_NODE};
|
| + LanguageGenerator();
|
| + ~LanguageGenerator();
|
| + PARSE_RESULT parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug=FALSE);
|
| + const char *next(); /* Return a null-terminated c-string. The buffer is owned by callee. */
|
| +};
|
| +
|
| +void TestWbnf(void);
|
| +
|
| +#endif /* _WBNF */
|
|
|
| Property changes on: icu46/source/test/intltest/wbnf.h
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|