OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (C) 2013 Google Inc. All rights reserved. |
| 3 * |
| 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are |
| 6 * met: |
| 7 * |
| 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above |
| 11 * copyright notice, this list of conditions and the following disclaimer |
| 12 * in the documentation and/or other materials provided with the |
| 13 * distribution. |
| 14 * * Neither the name of Google Inc. nor the names of its |
| 15 * contributors may be used to endorse or promote products derived from |
| 16 * this software without specific prior written permission. |
| 17 * |
| 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 */ |
| 30 |
| 31 #ifndef BidiTestHarness_h |
| 32 #define BidiTestHarness_h |
| 33 |
| 34 #include <istream> |
| 35 #include <map> |
| 36 #include <stdio.h> |
| 37 #include <string> |
| 38 #include <vector> |
| 39 |
| 40 // FIXME: We don't have any business owning this code. We should try to |
| 41 // upstream this to unicode.org if possible (for other implementations to use). |
| 42 // Unicode.org provides a reference implmentation, including parser: |
| 43 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceC/6.3.0/source/brtest.c |
| 44 // But it, like the other implementations I've found, is rather tied to |
| 45 // the algorithms it is testing. This file seeks to only implement the parser bi
ts. |
| 46 |
| 47 // Other C/C++ implementations of this parser: |
| 48 // https://github.com/googlei18n/fribidi-vs-unicode/blob/master/test.c |
| 49 // http://source.icu-project.org/repos/icu/icu/trunk/source/test/intltest/bidico
nf.cpp |
| 50 // Both of those are too tied to their respective projects to be use to Blink. |
| 51 |
| 52 // There are non-C implmentations to parse BidiTest.txt as well, including: |
| 53 // https://github.com/twitter/twitter-cldr-rb/blob/master/spec/bidi/bidi_spec.rb |
| 54 |
| 55 // NOTE: None of this file is currently written to be thread-safe. |
| 56 |
| 57 namespace bidi_test { |
| 58 |
| 59 enum ParagraphDirection { |
| 60 DirectionAutoLTR = 1, |
| 61 DirectionLTR = 2, |
| 62 DirectionRTL = 4, |
| 63 }; |
| 64 const int kMaxParagraphDirection = DirectionAutoLTR | DirectionLTR | DirectionRT
L; |
| 65 |
| 66 // For error printing: |
| 67 std::string nameFromParagraphDirection(ParagraphDirection paragraphDirection) |
| 68 { |
| 69 switch (paragraphDirection) { |
| 70 case bidi_test::DirectionAutoLTR: |
| 71 return "Auto-LTR"; |
| 72 case bidi_test::DirectionLTR: |
| 73 return "LTR"; |
| 74 case bidi_test::DirectionRTL: |
| 75 return "RTL"; |
| 76 } |
| 77 // This should never be reached. |
| 78 return ""; |
| 79 } |
| 80 |
| 81 template<class Runner> |
| 82 class Harness { |
| 83 public: |
| 84 Harness(Runner& runner) |
| 85 : m_runner(runner) |
| 86 { |
| 87 } |
| 88 void parse(std::istream& bidiTestFile); |
| 89 |
| 90 private: |
| 91 Runner& m_runner; |
| 92 }; |
| 93 |
| 94 // We could use boost::trim, but no other part of Blink uses boost yet. |
| 95 inline void ltrim(std::string& s) |
| 96 { |
| 97 static const std::string separators(" \t"); |
| 98 s.erase(0, s.find_first_not_of(separators)); |
| 99 } |
| 100 |
| 101 inline void rtrim(std::string& s) |
| 102 { |
| 103 static const std::string separators(" \t"); |
| 104 size_t lastNonSpace = s.find_last_not_of(separators); |
| 105 if (lastNonSpace == std::string::npos) { |
| 106 s.erase(); |
| 107 return; |
| 108 } |
| 109 size_t firstSpaceAtEndOfString = lastNonSpace + 1; |
| 110 if (firstSpaceAtEndOfString >= s.size()) |
| 111 return; // lastNonSpace was the last char. |
| 112 s.erase(firstSpaceAtEndOfString, std::string::npos); // erase to the end of
the string. |
| 113 } |
| 114 |
| 115 inline void trim(std::string& s) |
| 116 { |
| 117 rtrim(s); |
| 118 ltrim(s); |
| 119 } |
| 120 |
| 121 static std::vector<std::string> parseStringList(const std::string& str) |
| 122 { |
| 123 std::vector<std::string> strings; |
| 124 static const std::string separators(" \t"); |
| 125 size_t lastPos = str.find_first_not_of(separators); // skip leading spaces |
| 126 size_t pos = str.find_first_of(separators, lastPos); // find next space |
| 127 |
| 128 while (std::string::npos != pos || std::string::npos != lastPos) { |
| 129 strings.push_back(str.substr(lastPos, pos - lastPos)); |
| 130 lastPos = str.find_first_not_of(separators, pos); |
| 131 pos = str.find_first_of(separators, lastPos); |
| 132 } |
| 133 return strings; |
| 134 } |
| 135 |
| 136 static std::vector<int> parseIntList(const std::string& str) |
| 137 { |
| 138 std::vector<int> ints; |
| 139 std::vector<std::string> strings = parseStringList(str); |
| 140 for (size_t x = 0; x < strings.size(); x++) { |
| 141 int i = atoi(strings[x].c_str()); |
| 142 ints.push_back(i); |
| 143 } |
| 144 return ints; |
| 145 } |
| 146 |
| 147 static std::vector<int> parseLevels(const std::string& line) |
| 148 { |
| 149 std::vector<int> levels; |
| 150 std::vector<std::string> strings = parseStringList(line); |
| 151 for (size_t x = 0; x < strings.size(); x++) { |
| 152 const std::string& levelString = strings[x]; |
| 153 int i; |
| 154 if (levelString == "x") |
| 155 i = -1; |
| 156 else |
| 157 i = atoi(levelString.c_str()); |
| 158 levels.push_back(i); |
| 159 } |
| 160 return levels; |
| 161 } |
| 162 |
| 163 // This is not thread-safe as written. |
| 164 static std::basic_string<UChar> parseTestString(const std::string& line) |
| 165 { |
| 166 std::basic_string<UChar> testString; |
| 167 static std::map<std::string, UChar> charClassExamples; |
| 168 if (charClassExamples.empty()) { |
| 169 // FIXME: Explicit make_pair is ugly, but required for C++98 compat. |
| 170 charClassExamples.insert(std::make_pair("L", 0x6c)); // 'l' for L |
| 171 charClassExamples.insert(std::make_pair("R", 0x05D0)); // HEBREW ALEF |
| 172 charClassExamples.insert(std::make_pair("EN", 0x33)); // '3' for EN |
| 173 charClassExamples.insert(std::make_pair("ES", 0x2d)); // '-' for ES |
| 174 charClassExamples.insert(std::make_pair("ET", 0x25)); // '%' for ET |
| 175 charClassExamples.insert(std::make_pair("AN", 0x0660)); // arabic 0 |
| 176 charClassExamples.insert(std::make_pair("CS", 0x2c)); // ',' for CS |
| 177 charClassExamples.insert(std::make_pair("B", 0x0A)); // <control-000A> |
| 178 charClassExamples.insert(std::make_pair("S", 0x09)); // <control-0009> |
| 179 charClassExamples.insert(std::make_pair("WS", 0x20)); // ' ' for WS |
| 180 charClassExamples.insert(std::make_pair("ON", 0x3d)); // '=' for ON |
| 181 charClassExamples.insert(std::make_pair("NSM", 0x05BF)); // HEBREW POINT
RAFE |
| 182 charClassExamples.insert(std::make_pair("AL", 0x0608)); // ARABIC RAY |
| 183 charClassExamples.insert(std::make_pair("BN", 0x00AD)); // SOFT HYPHEN |
| 184 charClassExamples.insert(std::make_pair("LRE", 0x202A)); |
| 185 charClassExamples.insert(std::make_pair("RLE", 0x202B)); |
| 186 charClassExamples.insert(std::make_pair("PDF", 0x202C)); |
| 187 charClassExamples.insert(std::make_pair("LRO", 0x202D)); |
| 188 charClassExamples.insert(std::make_pair("RLO", 0x202E)); |
| 189 charClassExamples.insert(std::make_pair("LRI", 0x2066)); |
| 190 charClassExamples.insert(std::make_pair("RLI", 0x2067)); |
| 191 charClassExamples.insert(std::make_pair("FSI", 0x2068)); |
| 192 charClassExamples.insert(std::make_pair("PDI", 0x2069)); |
| 193 } |
| 194 |
| 195 std::vector<std::string> charClasses = parseStringList(line); |
| 196 for (size_t i = 0; i < charClasses.size(); i++) { |
| 197 // FIXME: If the lookup failed we could return false for a parse error. |
| 198 testString.push_back(charClassExamples.find(charClasses[i])->second); |
| 199 } |
| 200 return testString; |
| 201 } |
| 202 |
| 203 static bool parseParagraphDirectionMask(const std::string& line, int& modeMask) |
| 204 { |
| 205 modeMask = atoi(line.c_str()); |
| 206 return modeMask >= 1 && modeMask <= kMaxParagraphDirection; |
| 207 } |
| 208 |
| 209 static void parseError(const std::string& line, size_t lineNumber) |
| 210 { |
| 211 // Use printf to avoid the expense of std::cout. |
| 212 printf("Parse error, line %zu : %s\n", lineNumber, line.c_str()); |
| 213 } |
| 214 |
| 215 template<class Runner> |
| 216 void Harness<Runner>::parse(std::istream& bidiTestFile) |
| 217 { |
| 218 static const std::string levelsPrefix("@Levels"); |
| 219 static const std::string reorderPrefix("@Reorder"); |
| 220 |
| 221 // FIXME: UChar is an ICU type and cheating a bit to use here. |
| 222 // uint16_t might be more portable. |
| 223 std::basic_string<UChar> testString; |
| 224 std::vector<int> levels; |
| 225 std::vector<int> reorder; |
| 226 int paragraphDirectionMask; |
| 227 |
| 228 std::string line; |
| 229 size_t lineNumber = 0; |
| 230 while (std::getline(bidiTestFile, line)) { |
| 231 lineNumber++; |
| 232 const std::string originalLine = line; |
| 233 size_t commentStart = line.find_first_of('#'); |
| 234 if (commentStart != std::string::npos) |
| 235 line = line.substr(0, commentStart); |
| 236 trim(line); |
| 237 if (line.empty()) |
| 238 continue; |
| 239 if (line[0] == '@') { |
| 240 if (!line.find(levelsPrefix)) { |
| 241 levels = parseLevels(line.substr(levelsPrefix.length() + 1)); |
| 242 continue; |
| 243 } |
| 244 if (!line.find(reorderPrefix)) { |
| 245 reorder = parseIntList(line.substr(reorderPrefix.length() + 1)); |
| 246 continue; |
| 247 } |
| 248 } else { |
| 249 // Assume it's a data line. |
| 250 size_t seperatorIndex = line.find_first_of(';'); |
| 251 if (seperatorIndex == std::string::npos) { |
| 252 parseError(originalLine, lineNumber); |
| 253 continue; |
| 254 } |
| 255 testString = parseTestString(line.substr(0, seperatorIndex)); |
| 256 if (!parseParagraphDirectionMask(line.substr(seperatorIndex + 1), pa
ragraphDirectionMask)) { |
| 257 parseError(originalLine, lineNumber); |
| 258 continue; |
| 259 } |
| 260 |
| 261 if (paragraphDirectionMask & DirectionAutoLTR) |
| 262 m_runner.runTest(testString, reorder, levels, DirectionAutoLTR,
originalLine, lineNumber); |
| 263 if (paragraphDirectionMask & DirectionLTR) |
| 264 m_runner.runTest(testString, reorder, levels, DirectionLTR, orig
inalLine, lineNumber); |
| 265 if (paragraphDirectionMask & DirectionRTL) |
| 266 m_runner.runTest(testString, reorder, levels, DirectionRTL, orig
inalLine, lineNumber); |
| 267 } |
| 268 } |
| 269 } |
| 270 |
| 271 } // namespace bidi_test |
| 272 |
| 273 #endif // BidiTestHarness_h |
OLD | NEW |