OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * Copyright (C) 2013 Google Inc. All rights reserved. | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions are | |
6 * met: | |
7 * | |
8 * * Redistributions of source code must retain the above copyright | |
9 * notice, this list of conditions and the following disclaimer. | |
10 * * Redistributions in binary form must reproduce the above | |
11 * copyright notice, this list of conditions and the following disclaimer | |
12 * in the documentation and/or other materials provided with the | |
13 * distribution. | |
14 * * Neither the name of Google Inc. nor the names of its | |
15 * contributors may be used to endorse or promote products derived from | |
16 * this software without specific prior written permission. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 */ | |
30 | |
31 #ifndef BidiTestHarness_h | |
32 #define BidiTestHarness_h | |
33 | |
34 #include <fstream> | |
Jeffrey Yasskin
2013/10/29 20:03:22
I don't think you use this header: just #include <
eseidel
2013/10/29 20:49:44
Done.
| |
35 #include <map> | |
36 #include <string> | |
37 | |
38 // FIXME: We don't have any business owning this code. We should try to | |
39 // upstream this to unicode.org if possible (for other implementations to use). | |
40 // Unicode.org provides a reference implmentation, including parser: | |
41 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceC/6.3.0/source/brtest.c | |
42 // But it, like the other implementations I've found, is rather tied to | |
43 // the algorithms it is testing. This file seeks to only implement the parser bi ts. | |
44 | |
45 // Other C/C++ implementations of this parser: | |
46 // https://github.com/googlei18n/fribidi-vs-unicode/blob/master/test.c | |
47 // http://source.icu-project.org/repos/icu/icu/trunk/source/test/intltest/bidico nf.cpp | |
48 // Both of those are too tied to their respective projects to be use to Blink. | |
49 | |
50 // There are non-C implmentations to parse BidiTest.txt as well, including: | |
51 // https://github.com/twitter/twitter-cldr-rb/blob/master/spec/bidi/bidi_spec.rb | |
52 | |
53 namespace bidi_test { | |
54 | |
55 enum ParagraphDirection { | |
56 DirectionAutoLTR = 1, | |
57 DirectionLTR = 2, | |
58 DirectionRTL = 4, | |
59 }; | |
60 const int kMaxParagraphDirection = DirectionAutoLTR | DirectionLTR | DirectionRT L; | |
61 | |
62 // For error printing: | |
63 std::string nameFromParagraphDirection(ParagraphDirection paragraphDirection) | |
64 { | |
65 switch (paragraphDirection) { | |
66 case bidi_test::DirectionAutoLTR: | |
67 return "Auto-LTR"; | |
68 case bidi_test::DirectionLTR: | |
69 return "LTR"; | |
70 case bidi_test::DirectionRTL: | |
71 return "RTL"; | |
72 } | |
73 } | |
74 | |
75 template<class Runner> | |
76 class Harness { | |
77 public: | |
78 Harness(Runner& runner) | |
79 : m_runner(runner) | |
80 { | |
81 } | |
82 void parse(std::istream& bidiTestFile); | |
83 | |
84 private: | |
85 Runner& m_runner; | |
86 }; | |
87 | |
88 // This trim() example comes from: | |
89 // http://stackoverflow.com/questions/216823/whats-the-best-way-to-trim-stdstrin g | |
90 // We could use boost::trim, but no other part of Blink uses boost yet. | |
91 | |
92 inline std::string& ltrim(std::string& s) | |
Jeffrey Yasskin
2013/10/29 20:03:22
I'd either trim the argument in place or take the
eseidel
2013/10/29 20:49:44
Done. I did the in-place way as it was simpler.
| |
93 { | |
94 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<i nt, int>(std::isspace)))); | |
95 return s; | |
96 } | |
97 | |
98 inline std::string& rtrim(std::string& s) | |
99 { | |
100 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>( std::isspace))).base(), s.end()); | |
101 return s; | |
102 } | |
103 | |
104 inline std::string& trim(std::string& s) | |
105 { | |
106 return ltrim(rtrim(s)); | |
107 } | |
108 | |
109 static bool parseStringList(std::string str, std::vector<std::string>& strings) | |
110 { | |
111 std::string separators(" \t"); | |
112 strings.clear(); | |
113 size_t lastPos = str.find_first_not_of(separators); // skip leading spaces | |
114 size_t pos = str.find_first_of(separators, lastPos); // find next space | |
115 | |
116 while (std::string::npos != pos || std::string::npos != lastPos) { | |
117 strings.push_back(str.substr(lastPos, pos - lastPos)); | |
118 lastPos = str.find_first_not_of(separators, pos); | |
119 pos = str.find_first_of(separators, lastPos); | |
120 } | |
121 return true; | |
Jeffrey Yasskin
2013/10/29 20:03:22
If this function can't return false, I'd return th
eseidel
2013/10/29 20:49:44
Done.
| |
122 } | |
123 | |
124 static bool parseIntList(std::string str, std::vector<int>& ints) | |
125 { | |
126 ints.clear(); | |
127 std::vector<std::string> strings; | |
128 if (!parseStringList(str, strings)) | |
129 return false; | |
130 for (int x = 0; x < strings.size(); x++) { | |
131 int i = atoi(strings[x].c_str()); | |
132 ints.push_back(i); | |
133 } | |
134 return true; | |
135 } | |
136 | |
137 static bool parseLevelRule(std::string line, std::vector<int>& levels) | |
Jeffrey Yasskin
2013/10/29 20:03:22
Generally pass strings as const std::string& to av
eseidel
2013/10/29 20:49:44
Done.
| |
138 { | |
139 levels.clear(); | |
140 std::vector<std::string> strings; | |
141 if (!parseStringList(line, strings)) | |
142 return false; | |
143 for (int x = 0; x < strings.size(); x++) { | |
144 const std::string& levelString = strings[x]; | |
145 int i; | |
146 if (levelString == "x") | |
147 i = -1; | |
148 else | |
149 i = atoi(levelString.c_str()); | |
150 levels.push_back(i); | |
151 } | |
152 return true;} | |
153 | |
154 static bool parseReorderRule(std::string line, std::vector<int>& reorder) | |
155 { | |
156 return parseIntList(line, reorder); | |
157 } | |
158 | |
159 static bool parseTestString(std::string line, std::basic_string<UChar>& testStri ng) | |
160 { | |
161 testString.clear(); | |
162 static std::map<std::string, UChar> charClassExamples; | |
163 if (charClassExamples.empty()) { | |
Jeffrey Yasskin
2013/10/29 20:03:22
This isn't thread-safe, which you should comment.
eseidel
2013/10/29 20:49:44
Done.
| |
164 charClassExamples.insert({"L", 0x6c}); // 'l' for L | |
165 charClassExamples.insert({"R", 0x05D0}); // HEBREW ALEF | |
166 charClassExamples.insert({"EN", 0x33}); // '3' for EN | |
167 charClassExamples.insert({"ES", 0x2d}); // '-' for ES | |
168 charClassExamples.insert({"ET", 0x25}); // '%' for ET | |
169 charClassExamples.insert({"AN", 0x0660}); // arabic 0 | |
170 charClassExamples.insert({"CS", 0x2c}); // ',' for CS | |
171 charClassExamples.insert({"B", 0x0A}); // <control-000A> | |
172 charClassExamples.insert({"S", 0x09}); // <control-0009> | |
173 charClassExamples.insert({"WS", 0x20}); // ' ' for WS | |
174 charClassExamples.insert({"ON", 0x3d}); // '=' for ON | |
175 charClassExamples.insert({"NSM", 0x05BF}); // HEBREW POINT RAFE | |
176 charClassExamples.insert({"AL", 0x0608}); // ARABIC RAY | |
177 charClassExamples.insert({"BN", 0x00AD}); // SOFT HYPHEN | |
178 charClassExamples.insert({"LRE", 0x202A}); | |
179 charClassExamples.insert({"RLE", 0x202B}); | |
180 charClassExamples.insert({"PDF", 0x202C}); | |
181 charClassExamples.insert({"LRO", 0x202D}); | |
182 charClassExamples.insert({"RLO", 0x202E}); | |
183 charClassExamples.insert({"LRI", 0x2066}); | |
184 charClassExamples.insert({"RLI", 0x2067}); | |
185 charClassExamples.insert({"FSI", 0x2068}); | |
186 charClassExamples.insert({"PDI", 0x2069}); | |
187 } | |
188 | |
189 std::vector<std::string> charClasses; | |
190 parseStringList(line, charClasses); | |
191 for (int i = 0; i < charClasses.size(); i++) { | |
192 testString.push_back(charClassExamples.find(charClasses[i])->second); | |
193 } | |
194 return true; | |
195 } | |
196 | |
197 static bool parseParagraphDirectionMask(std::string line, int& modeMask) | |
198 { | |
199 | |
200 modeMask = atoi(line.c_str()); | |
Jeffrey Yasskin
2013/10/29 20:03:22
One cool thing you can do in C++ is use bitset<3>
eseidel
2013/10/29 20:49:44
Although these are both awesome suggestions, I'm g
| |
201 return modeMask >= 1 && modeMask <= kMaxParagraphDirection; | |
202 } | |
203 | |
204 static void parseError(const std::string& line, size_t lineNumber) | |
205 { | |
206 // Use printf to avoid the expense of std::cout. | |
207 printf("Parse error, line %d : %s\n", lineNumber, line.c_str()); | |
Jeffrey Yasskin
2013/10/29 20:03:22
I think you're missing #include <stdio.h>
eseidel
2013/10/29 20:49:44
Done.
| |
208 } | |
209 | |
210 template<class Runner> | |
211 void Harness<Runner>::parse(std::istream& bidiTestFile) | |
212 { | |
213 std::string levelsPrefix("@Levels"); | |
Jeffrey Yasskin
2013/10/29 20:03:22
Make these const if you're not planning to change
eseidel
2013/10/29 20:49:44
Made them "static const".
| |
214 std::string reorderPrefix("@Reorder"); | |
215 | |
216 // FIXME: UChar is an ICU type and cheating a bit to use here. | |
217 // uint16_t might be more portable. | |
218 std::basic_string<UChar> testString; | |
219 std::vector<int> levels; | |
220 std::vector<int> reorder; | |
221 int paragraphDirectionMask; | |
222 | |
223 std::string originalLine; | |
224 std::string line; | |
225 size_t lineNumber = 0; | |
226 while (std::getline(bidiTestFile, originalLine)) { | |
227 lineNumber++; | |
228 line = originalLine; | |
Jeffrey Yasskin
2013/10/29 20:03:22
I'd usually declare variables where they're initia
eseidel
2013/10/29 20:49:44
That makes an extra copy of line though, no?
Jeffrey Yasskin
2013/10/29 23:16:03
It shouldn't make any more copies than you have in
| |
229 size_t commentStart = originalLine.find_first_of('#'); | |
230 if (commentStart != std::string::npos) | |
231 line = line.substr(0, commentStart); | |
232 line = trim(line); | |
233 if (line.empty()) | |
234 continue; | |
235 if (line[0] == '@') { | |
236 if (!line.find(levelsPrefix)) { | |
237 if (!parseLevelRule(line.substr(levelsPrefix.length() + 1), leve ls)) | |
238 parseError(originalLine, lineNumber); | |
239 continue; | |
240 } | |
241 if (!line.find(reorderPrefix)) { | |
242 if (!parseReorderRule(line.substr(reorderPrefix.length() + 1), r eorder)) | |
243 parseError(originalLine, lineNumber); | |
244 continue; | |
245 } | |
246 } else { | |
247 // Assume it's a data line. | |
248 size_t seperatorIndex = line.find_first_of(';'); | |
249 if (seperatorIndex == std::string::npos) { | |
250 parseError(originalLine, lineNumber); | |
251 continue; | |
252 } | |
253 if (!parseTestString(line.substr(0, seperatorIndex), testString)) { | |
Jeffrey Yasskin
2013/10/29 20:03:22
sp: seperator
eseidel
2013/10/29 20:49:44
I'm considering taking 3rd grade again as a corres
| |
254 parseError(originalLine, lineNumber); | |
255 continue; | |
256 } | |
257 if (!parseParagraphDirectionMask(line.substr(seperatorIndex + 1), pa ragraphDirectionMask)) { | |
258 parseError(originalLine, lineNumber); | |
259 continue; | |
260 } | |
261 | |
262 if (paragraphDirectionMask & DirectionAutoLTR) | |
Jeffrey Yasskin
2013/10/29 20:03:22
With a bitset, this becomes paragraphDirectionMask
| |
263 m_runner.runTest(testString, reorder, levels, DirectionAutoLTR, originalLine, lineNumber); | |
Jeffrey Yasskin
2013/10/29 20:03:22
It doesn't look like |levels| has a value coming i
eseidel
2013/10/29 20:49:44
It's confusing. "levels" and "order" are both "gl
Jeffrey Yasskin
2013/10/29 23:16:03
Oh, right, I just missed the assignment above beca
| |
264 if (paragraphDirectionMask & DirectionLTR) | |
265 m_runner.runTest(testString, reorder, levels, DirectionLTR, orig inalLine, lineNumber); | |
266 if (paragraphDirectionMask & DirectionRTL) | |
267 m_runner.runTest(testString, reorder, levels, DirectionRTL, orig inalLine, lineNumber); | |
268 } | |
269 } | |
270 } | |
271 | |
272 } // namespace bidi_test | |
273 | |
274 #endif // BidiTestHarness_h | |
OLD | NEW |