OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ****************************************************************************** |
| 3 * Copyright (C) 2005, International Business Machines Corporation and * |
| 4 * others. All Rights Reserved. * |
| 5 ****************************************************************************** |
| 6 */ |
| 7 /* |
| 8 WBNF, Weighted BNF, is an extend BNF. The most difference between WBNF |
| 9 and standard BNF is the WBNF accepts weight for its alternation items. |
| 10 The weight specifies the opportunity it will be selected. |
| 11 |
| 12 The purpose of WBNF is to help generate a random string from a given grammar |
| 13 which can be described with standard BNF. The introduction of 'weight' |
| 14 is to guide the generator to give the specific parts different chances to be |
| 15 generated. |
| 16 |
| 17 Usually, the user gives LanguageGenerator the grammar description in WBNF, |
| 18 then LanguageGenerator will generate a random string on every next() call. |
| 19 The return code of parseBNF() can help user to determine the error, |
| 20 either in the grammar description or in the WBNF parser itself. |
| 21 |
| 22 |
| 23 The grammar of WBNF itself can be described in standard BNF, |
| 24 |
| 25 escaping = _single character with a leading back slash, either inside
or outside quoting_ |
| 26 quoting = _quoted with a pair of single quotation marks_ |
| 27 string = string alphabet | string digit | string quoting | string e
scaping | |
| 28 alphabet | quoting | escaping |
| 29 alphabet = |
| 30 digit = |
| 31 integer = integer digit | digit |
| 32 weight = integer % |
| 33 weight-list = weight-list weight | weight |
| 34 var = var alphabet | var digit | $ alphabet |
| 35 |
| 36 var-defs = var-defs var-def | var-def |
| 37 var-def = var '=' definition; |
| 38 |
| 39 alternation = alternation '|' alt-item | alt-item |
| 40 alt-item = sequence | sequence weight |
| 41 |
| 42 sequence = sequence modified | modified |
| 43 |
| 44 modified = core | morph | quote | repeat |
| 45 morph = modified ~ |
| 46 quote = modified @ |
| 47 repeat = modified quantifier | modified quantifier weight-list |
| 48 quantifier = ? | * | + | { integer , integer} | {integer, } | {integer} |
| 49 |
| 50 core = var | string | '(' definition ')' |
| 51 |
| 52 definition = core | modified | sequence | alternation |
| 53 definition = alternation |
| 54 |
| 55 Remarks: |
| 56 o Following characters are literals in preceding definition |
| 57 but are syntax symbols in WBNF |
| 58 |
| 59 % $ ~ @ ? * + { } , |
| 60 |
| 61 o Following character are syntax symbols in preceding definition |
| 62 (sapce) contact operation, or separators to increase readability |
| 63 = definition |
| 64 | selection operation |
| 65 ( ) precedence select |
| 66 ' ' override special-character to plain character |
| 67 |
| 68 o the definition of 'escaping' and 'quoting' are preceding definition text |
| 69 o infinite is actually a predefine value PSEUDO_INFINIT defined in this file
|
| 70 o if weight is not presented in "alt-item' and 'repeat', |
| 71 a default weight DEFAULT_WEIGHT defined in this file is used |
| 72 |
| 73 o * == {0, } |
| 74 + == {1, } |
| 75 ? == {0, 1} |
| 76 |
| 77 o the weight-list for repeat assigns the weights for repeat itmes one by one |
| 78 |
| 79 demo{1,3} 30% 40% 100% == (demo)30% | (demodemo)40% | (demodemodemo)100% |
| 80 |
| 81 To find more explain of the weight-list, please see the LIMITATION of the
grammar |
| 82 |
| 83 o but the weight-list for question mark has different meaning |
| 84 |
| 85 demo ? 30% != demo{0,1} 30% 100% |
| 86 demo ? 30% == demo{0,1} 70% 30% |
| 87 |
| 88 the 70% is calculated from (DEFAULT_WEIGHT - weight) |
| 89 |
| 90 |
| 91 Known LIMITATION of the grammar |
| 92 For 'repeat', the parser will eat up as much as possible weights at one time
, |
| 93 discard superfluous weights if it is too much, |
| 94 fill insufficient weights with default weight if it is too less. |
| 95 This behavior means following definitions are equal |
| 96 |
| 97 demo{1,3} 30% 40% 100% |
| 98 demo{1,3} 30% 40% 100% 50% |
| 99 demo{1,3} 30% 40% |
| 100 |
| 101 This behavior will cause a little confusion when defining an alternation |
| 102 |
| 103 demo{1,3} 30% 40% 100% 50% | show 20% |
| 104 |
| 105 is interpreted as |
| 106 |
| 107 (demo{1,3} 30% 40% 100%) 100% | show 20% |
| 108 |
| 109 not |
| 110 |
| 111 (demo{1,3} 30% 40% 100%) 50% | show 20% |
| 112 |
| 113 to get an expected definition, please use parentheses. |
| 114 |
| 115 Known LIMITATION of current implement |
| 116 Due to the well known point alias problem, current Parser will be effectivel
y |
| 117 crashed if the definition looks like |
| 118 |
| 119 $a = demo; |
| 120 $b = $a; |
| 121 $c = $a; |
| 122 or |
| 123 $a = demo; |
| 124 $b = $a $a; |
| 125 or |
| 126 $a = demo; |
| 127 $b = $b $a; |
| 128 |
| 129 The crash will occur at delete operation in destructor or other memory relea
se code. |
| 130 Several plans are on hard to fix the problem. Use a smart point with referen
ce count, |
| 131 or use a central memory management solution. But now, it works well with col
lation |
| 132 monkey test, which is the only user for WBNF. |
| 133 */ |
| 134 |
| 135 #ifndef _WBNF |
| 136 #define _WBNF |
| 137 |
| 138 #include "unicode/utypes.h" |
| 139 |
| 140 const int DEFAULT_WEIGHT = 100; |
| 141 const int PSEUDO_INFINIT = 200; |
| 142 |
| 143 class LanguageGenerator_impl; |
| 144 |
| 145 class LanguageGenerator{ |
| 146 LanguageGenerator_impl * lang_gen; |
| 147 public: |
| 148 enum PARSE_RESULT {OK, BNF_DEF_WRONG, INCOMPLETE, NO_TOP_NODE}; |
| 149 LanguageGenerator(); |
| 150 ~LanguageGenerator(); |
| 151 PARSE_RESULT parseBNF(const char *const bnf_definition /*in*/, const char *c
onst top_node/*in*/, UBool debug=FALSE); |
| 152 const char *next(); /* Return a null-terminated c-string. The buffer is owne
d by callee. */ |
| 153 }; |
| 154 |
| 155 void TestWbnf(void); |
| 156 |
| 157 #endif /* _WBNF */ |
OLD | NEW |