Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * Copyright (C) 2013 Google Inc. All rights reserved. | |
| 3 * | |
| 4 * Redistribution and use in source and binary forms, with or without | |
| 5 * modification, are permitted provided that the following conditions are | |
| 6 * met: | |
| 7 * | |
| 8 * * Redistributions of source code must retain the above copyright | |
| 9 * notice, this list of conditions and the following disclaimer. | |
| 10 * * Redistributions in binary form must reproduce the above | |
| 11 * copyright notice, this list of conditions and the following disclaimer | |
| 12 * in the documentation and/or other materials provided with the | |
| 13 * distribution. | |
| 14 * * Neither the name of Google Inc. nor the names of its | |
| 15 * contributors may be used to endorse or promote products derived from | |
| 16 * this software without specific prior written permission. | |
| 17 * | |
| 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 29 */ | |
| 30 | |
| 31 #ifndef BidiTestHarness_h | |
| 32 #define BidiTestHarness_h | |
| 33 | |
| 34 #include <fstream> | |
| 35 #include <map> | |
| 36 #include <stdio.h> | |
| 37 #include <string> | |
| 38 | |
| 39 // FIXME: We don't have any business owning this code. We should try to | |
| 40 // upstream this to unicode.org if possible (for other implementations to use). | |
| 41 // Unicode.org provides a reference implmentation, including parser: | |
| 42 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceC/6.3.0/source/brtest.c | |
| 43 // But it, like the other implementations I've found, is rather tied to | |
| 44 // the algorithms it is testing. This file seeks to only implement the parser bi ts. | |
| 45 | |
| 46 // Other C/C++ implementations of this parser: | |
| 47 // https://github.com/googlei18n/fribidi-vs-unicode/blob/master/test.c | |
| 48 // http://source.icu-project.org/repos/icu/icu/trunk/source/test/intltest/bidico nf.cpp | |
| 49 // Both of those are too tied to their respective projects to be use to Blink. | |
| 50 | |
| 51 // There are non-C implmentations to parse BidiTest.txt as well, including: | |
| 52 // https://github.com/twitter/twitter-cldr-rb/blob/master/spec/bidi/bidi_spec.rb | |
| 53 | |
| 54 namespace bidi_test { | |
| 55 | |
| 56 enum ParagraphDirection { | |
| 57 DirectionAutoLTR = 1, | |
| 58 DirectionLTR = 2, | |
| 59 DirectionRTL = 4, | |
| 60 }; | |
| 61 const int kMaxParagraphDirection = DirectionAutoLTR | DirectionLTR | DirectionRT L; | |
| 62 | |
| 63 // For error printing: | |
| 64 std::string nameFromParagraphDirection(ParagraphDirection paragraphDirection) | |
| 65 { | |
| 66 switch (paragraphDirection) { | |
| 67 case bidi_test::DirectionAutoLTR: | |
| 68 return "Auto-LTR"; | |
| 69 case bidi_test::DirectionLTR: | |
| 70 return "LTR"; | |
| 71 case bidi_test::DirectionRTL: | |
| 72 return "RTL"; | |
| 73 } | |
| 74 } | |
| 75 | |
| 76 template<class Runner> | |
| 77 class Harness { | |
| 78 public: | |
| 79 Harness(Runner& runner) | |
| 80 : m_runner(runner) | |
| 81 { | |
| 82 } | |
| 83 void parse(std::istream& bidiTestFile); | |
| 84 | |
| 85 private: | |
| 86 Runner& m_runner; | |
| 87 }; | |
| 88 | |
| 89 // This trim() example comes from: | |
| 90 // http://stackoverflow.com/questions/216823/whats-the-best-way-to-trim-stdstrin g | |
| 91 // We could use boost::trim, but no other part of Blink uses boost yet. | |
| 92 | |
| 93 inline void ltrim(std::string& s) | |
| 94 { | |
| 95 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<i nt, int>(std::isspace)))); | |
| 96 } | |
| 97 | |
| 98 inline void rtrim(std::string& s) | |
| 99 { | |
| 100 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>( std::isspace))).base(), s.end()); | |
| 101 } | |
| 102 | |
| 103 inline void trim(std::string& s) | |
| 104 { | |
| 105 rtrim(s); | |
| 106 ltrim(s); | |
| 107 } | |
| 108 | |
| 109 static std::vector<std::string> parseStringList(const std::string& str) | |
| 110 { | |
| 111 std::vector<std::string> strings; | |
| 112 std::string seperators(" \t"); | |
| 113 size_t lastPos = str.find_first_not_of(seperators); // skip leading spaces | |
|
Jeffrey Yasskin
2013/10/29 23:16:03
"seperators" still isn't spelled right. :)
eseidel
2013/10/30 23:04:11
I checked a dictionary this time. separators :)
| |
| 114 size_t pos = str.find_first_of(seperators, lastPos); // find next space | |
| 115 | |
| 116 while (std::string::npos != pos || std::string::npos != lastPos) { | |
| 117 strings.push_back(str.substr(lastPos, pos - lastPos)); | |
| 118 lastPos = str.find_first_not_of(seperators, pos); | |
| 119 pos = str.find_first_of(seperators, lastPos); | |
| 120 } | |
| 121 return strings; | |
| 122 } | |
| 123 | |
| 124 static std::vector<int> parseIntList(const std::string& str) | |
| 125 { | |
| 126 std::vector<int> ints; | |
| 127 std::vector<std::string> strings = parseStringList(str); | |
| 128 for (int x = 0; x < strings.size(); x++) { | |
| 129 int i = atoi(strings[x].c_str()); | |
| 130 ints.push_back(i); | |
| 131 } | |
| 132 return ints; | |
| 133 } | |
| 134 | |
| 135 static std::vector<int> parseLevels(const std::string& line) | |
| 136 { | |
| 137 std::vector<int> levels; | |
| 138 std::vector<std::string> strings = parseStringList(line); | |
| 139 for (int x = 0; x < strings.size(); x++) { | |
| 140 const std::string& levelString = strings[x]; | |
| 141 int i; | |
| 142 if (levelString == "x") | |
| 143 i = -1; | |
| 144 else | |
| 145 i = atoi(levelString.c_str()); | |
| 146 levels.push_back(i); | |
| 147 } | |
| 148 return levels; | |
| 149 } | |
| 150 | |
| 151 static std::basic_string<UChar> parseTestString(const std::string& line) | |
| 152 { | |
| 153 std::basic_string<UChar> testString; | |
| 154 // This static is not thread-safe, but currently that's not an issue. | |
|
Jeffrey Yasskin
2013/10/29 23:16:03
Oh, I meant you should comment the function as not
eseidel
2013/10/30 23:04:11
Done.
| |
| 155 static std::map<std::string, UChar> charClassExamples; | |
| 156 if (charClassExamples.empty()) { | |
| 157 charClassExamples.insert({"L", 0x6c}); // 'l' for L | |
| 158 charClassExamples.insert({"R", 0x05D0}); // HEBREW ALEF | |
| 159 charClassExamples.insert({"EN", 0x33}); // '3' for EN | |
| 160 charClassExamples.insert({"ES", 0x2d}); // '-' for ES | |
| 161 charClassExamples.insert({"ET", 0x25}); // '%' for ET | |
| 162 charClassExamples.insert({"AN", 0x0660}); // arabic 0 | |
| 163 charClassExamples.insert({"CS", 0x2c}); // ',' for CS | |
| 164 charClassExamples.insert({"B", 0x0A}); // <control-000A> | |
| 165 charClassExamples.insert({"S", 0x09}); // <control-0009> | |
| 166 charClassExamples.insert({"WS", 0x20}); // ' ' for WS | |
| 167 charClassExamples.insert({"ON", 0x3d}); // '=' for ON | |
| 168 charClassExamples.insert({"NSM", 0x05BF}); // HEBREW POINT RAFE | |
| 169 charClassExamples.insert({"AL", 0x0608}); // ARABIC RAY | |
| 170 charClassExamples.insert({"BN", 0x00AD}); // SOFT HYPHEN | |
| 171 charClassExamples.insert({"LRE", 0x202A}); | |
| 172 charClassExamples.insert({"RLE", 0x202B}); | |
| 173 charClassExamples.insert({"PDF", 0x202C}); | |
| 174 charClassExamples.insert({"LRO", 0x202D}); | |
| 175 charClassExamples.insert({"RLO", 0x202E}); | |
| 176 charClassExamples.insert({"LRI", 0x2066}); | |
| 177 charClassExamples.insert({"RLI", 0x2067}); | |
| 178 charClassExamples.insert({"FSI", 0x2068}); | |
| 179 charClassExamples.insert({"PDI", 0x2069}); | |
| 180 } | |
| 181 | |
| 182 std::vector<std::string> charClasses = parseStringList(line); | |
| 183 for (int i = 0; i < charClasses.size(); i++) { | |
| 184 // FIXME: If the lookup failed we could return false for a parse error. | |
| 185 testString.push_back(charClassExamples.find(charClasses[i])->second); | |
| 186 } | |
| 187 return testString; | |
| 188 } | |
| 189 | |
| 190 static bool parseParagraphDirectionMask(const std::string& line, int& modeMask) | |
| 191 { | |
| 192 modeMask = atoi(line.c_str()); | |
| 193 return modeMask >= 1 && modeMask <= kMaxParagraphDirection; | |
| 194 } | |
| 195 | |
| 196 static void parseError(const std::string& line, size_t lineNumber) | |
| 197 { | |
| 198 // Use printf to avoid the expense of std::cout. | |
| 199 printf("Parse error, line %d : %s\n", lineNumber, line.c_str()); | |
| 200 } | |
| 201 | |
| 202 template<class Runner> | |
| 203 void Harness<Runner>::parse(std::istream& bidiTestFile) | |
| 204 { | |
| 205 static const std::string levelsPrefix("@Levels"); | |
| 206 static const std::string reorderPrefix("@Reorder"); | |
| 207 | |
| 208 // FIXME: UChar is an ICU type and cheating a bit to use here. | |
| 209 // uint16_t might be more portable. | |
| 210 std::basic_string<UChar> testString; | |
| 211 std::vector<int> levels; | |
| 212 std::vector<int> reorder; | |
| 213 int paragraphDirectionMask; | |
| 214 | |
| 215 std::string originalLine; | |
| 216 std::string line; | |
| 217 size_t lineNumber = 0; | |
| 218 while (std::getline(bidiTestFile, originalLine)) { | |
| 219 lineNumber++; | |
| 220 line = originalLine; | |
| 221 size_t commentStart = originalLine.find_first_of('#'); | |
| 222 if (commentStart != std::string::npos) | |
| 223 line = line.substr(0, commentStart); | |
| 224 trim(line); | |
| 225 if (line.empty()) | |
| 226 continue; | |
| 227 if (line[0] == '@') { | |
| 228 if (!line.find(levelsPrefix)) { | |
| 229 levels = parseLevels(line.substr(levelsPrefix.length() + 1)); | |
|
Jeffrey Yasskin
2013/10/29 23:16:03
Note that line.substr makes a copy of the data. Yo
eseidel
2013/10/30 23:04:11
Performance only sorta matters for this code. Ide
| |
| 230 continue; | |
| 231 } | |
| 232 if (!line.find(reorderPrefix)) { | |
| 233 reorder = parseIntList(line.substr(reorderPrefix.length() + 1)); | |
| 234 continue; | |
| 235 } | |
| 236 } else { | |
| 237 // Assume it's a data line. | |
| 238 size_t seperatorIndex = line.find_first_of(';'); | |
| 239 if (seperatorIndex == std::string::npos) { | |
| 240 parseError(originalLine, lineNumber); | |
| 241 continue; | |
| 242 } | |
| 243 testString = parseTestString(line.substr(0, seperatorIndex)); | |
| 244 if (!parseParagraphDirectionMask(line.substr(seperatorIndex + 1), pa ragraphDirectionMask)) { | |
| 245 parseError(originalLine, lineNumber); | |
| 246 continue; | |
| 247 } | |
| 248 | |
| 249 if (paragraphDirectionMask & DirectionAutoLTR) | |
| 250 m_runner.runTest(testString, reorder, levels, DirectionAutoLTR, originalLine, lineNumber); | |
| 251 if (paragraphDirectionMask & DirectionLTR) | |
| 252 m_runner.runTest(testString, reorder, levels, DirectionLTR, orig inalLine, lineNumber); | |
| 253 if (paragraphDirectionMask & DirectionRTL) | |
| 254 m_runner.runTest(testString, reorder, levels, DirectionRTL, orig inalLine, lineNumber); | |
| 255 } | |
| 256 } | |
| 257 } | |
| 258 | |
| 259 } // namespace bidi_test | |
| 260 | |
| 261 #endif // BidiTestHarness_h | |
| OLD | NEW |