OLD | NEW |
| (Empty) |
1 /* | |
2 ****************************************************************************** | |
3 * Copyright (C) 2005, International Business Machines Corporation and * | |
4 * others. All Rights Reserved. * | |
5 ****************************************************************************** | |
6 */ | |
7 /* | |
8 WBNF, Weighted BNF, is an extend BNF. The most difference between WBNF | |
9 and standard BNF is the WBNF accepts weight for its alternation items. | |
10 The weight specifies the opportunity it will be selected. | |
11 | |
12 The purpose of WBNF is to help generate a random string from a given grammar | |
13 which can be described with standard BNF. The introduction of 'weight' | |
14 is to guide the generator to give the specific parts different chances to be | |
15 generated. | |
16 | |
17 Usually, the user gives LanguageGenerator the grammar description in WBNF, | |
18 then LanguageGenerator will generate a random string on every next() call. | |
19 The return code of parseBNF() can help user to determine the error, | |
20 either in the grammar description or in the WBNF parser itself. | |
21 | |
22 | |
23 The grammar of WBNF itself can be described in standard BNF, | |
24 | |
25 escaping = _single character with a leading back slash, either inside
or outside quoting_ | |
26 quoting = _quoted with a pair of single quotation marks_ | |
27 string = string alphabet | string digit | string quoting | string e
scaping | | |
28 alphabet | quoting | escaping | |
29 alphabet = | |
30 digit = | |
31 integer = integer digit | digit | |
32 weight = integer % | |
33 weight-list = weight-list weight | weight | |
34 var = var alphabet | var digit | $ alphabet | |
35 | |
36 var-defs = var-defs var-def | var-def | |
37 var-def = var '=' definition; | |
38 | |
39 alternation = alternation '|' alt-item | alt-item | |
40 alt-item = sequence | sequence weight | |
41 | |
42 sequence = sequence modified | modified | |
43 | |
44 modified = core | morph | quote | repeat | |
45 morph = modified ~ | |
46 quote = modified @ | |
47 repeat = modified quantifier | modified quantifier weight-list | |
48 quantifier = ? | * | + | { integer , integer} | {integer, } | {integer} | |
49 | |
50 core = var | string | '(' definition ')' | |
51 | |
52 definition = core | modified | sequence | alternation | |
53 definition = alternation | |
54 | |
55 Remarks: | |
56 o Following characters are literals in preceding definition | |
57 but are syntax symbols in WBNF | |
58 | |
59 % $ ~ @ ? * + { } , | |
60 | |
61 o Following character are syntax symbols in preceding definition | |
62 (sapce) contact operation, or separators to increase readability | |
63 = definition | |
64 | selection operation | |
65 ( ) precedence select | |
66 ' ' override special-character to plain character | |
67 | |
68 o the definition of 'escaping' and 'quoting' are preceding definition text | |
69 o infinite is actually a predefine value PSEUDO_INFINIT defined in this file
| |
70 o if weight is not presented in "alt-item' and 'repeat', | |
71 a default weight DEFAULT_WEIGHT defined in this file is used | |
72 | |
73 o * == {0, } | |
74 + == {1, } | |
75 ? == {0, 1} | |
76 | |
77 o the weight-list for repeat assigns the weights for repeat itmes one by one | |
78 | |
79 demo{1,3} 30% 40% 100% == (demo)30% | (demodemo)40% | (demodemodemo)100% | |
80 | |
81 To find more explain of the weight-list, please see the LIMITATION of the
grammar | |
82 | |
83 o but the weight-list for question mark has different meaning | |
84 | |
85 demo ? 30% != demo{0,1} 30% 100% | |
86 demo ? 30% == demo{0,1} 70% 30% | |
87 | |
88 the 70% is calculated from (DEFAULT_WEIGHT - weight) | |
89 | |
90 | |
91 Known LIMITATION of the grammar | |
92 For 'repeat', the parser will eat up as much as possible weights at one time
, | |
93 discard superfluous weights if it is too much, | |
94 fill insufficient weights with default weight if it is too less. | |
95 This behavior means following definitions are equal | |
96 | |
97 demo{1,3} 30% 40% 100% | |
98 demo{1,3} 30% 40% 100% 50% | |
99 demo{1,3} 30% 40% | |
100 | |
101 This behavior will cause a little confusion when defining an alternation | |
102 | |
103 demo{1,3} 30% 40% 100% 50% | show 20% | |
104 | |
105 is interpreted as | |
106 | |
107 (demo{1,3} 30% 40% 100%) 100% | show 20% | |
108 | |
109 not | |
110 | |
111 (demo{1,3} 30% 40% 100%) 50% | show 20% | |
112 | |
113 to get an expected definition, please use parentheses. | |
114 | |
115 Known LIMITATION of current implement | |
116 Due to the well known point alias problem, current Parser will be effectivel
y | |
117 crashed if the definition looks like | |
118 | |
119 $a = demo; | |
120 $b = $a; | |
121 $c = $a; | |
122 or | |
123 $a = demo; | |
124 $b = $a $a; | |
125 or | |
126 $a = demo; | |
127 $b = $b $a; | |
128 | |
129 The crash will occur at delete operation in destructor or other memory relea
se code. | |
130 Several plans are on hard to fix the problem. Use a smart point with referen
ce count, | |
131 or use a central memory management solution. But now, it works well with col
lation | |
132 monkey test, which is the only user for WBNF. | |
133 */ | |
134 | |
135 #ifndef _WBNF | |
136 #define _WBNF | |
137 | |
138 #include "unicode/utypes.h" | |
139 | |
140 const int DEFAULT_WEIGHT = 100; | |
141 const int PSEUDO_INFINIT = 200; | |
142 | |
143 class LanguageGenerator_impl; | |
144 | |
145 class LanguageGenerator{ | |
146 LanguageGenerator_impl * lang_gen; | |
147 public: | |
148 enum PARSE_RESULT {OK, BNF_DEF_WRONG, INCOMPLETE, NO_TOP_NODE}; | |
149 LanguageGenerator(); | |
150 ~LanguageGenerator(); | |
151 PARSE_RESULT parseBNF(const char *const bnf_definition /*in*/, const char *c
onst top_node/*in*/, UBool debug=FALSE); | |
152 const char *next(); /* Return a null-terminated c-string. The buffer is owne
d by callee. */ | |
153 }; | |
154 | |
155 void TestWbnf(void); | |
156 | |
157 #endif /* _WBNF */ | |
OLD | NEW |