Chromium Code Reviews| Index: Source/platform/text/BidiTestHarness.h |
| diff --git a/Source/platform/text/BidiTestHarness.h b/Source/platform/text/BidiTestHarness.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..cc2a191f68ab9811f68ff495a81fbee18e1ad6d4 |
| --- /dev/null |
| +++ b/Source/platform/text/BidiTestHarness.h |
| @@ -0,0 +1,261 @@ |
| +/* |
| + * Copyright (C) 2013 Google Inc. All rights reserved. |
| + * |
| + * Redistribution and use in source and binary forms, with or without |
| + * modification, are permitted provided that the following conditions are |
| + * met: |
| + * |
| + * * Redistributions of source code must retain the above copyright |
| + * notice, this list of conditions and the following disclaimer. |
| + * * Redistributions in binary form must reproduce the above |
| + * copyright notice, this list of conditions and the following disclaimer |
| + * in the documentation and/or other materials provided with the |
| + * distribution. |
| + * * Neither the name of Google Inc. nor the names of its |
| + * contributors may be used to endorse or promote products derived from |
| + * this software without specific prior written permission. |
| + * |
| + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| + */ |
| + |
| +#ifndef BidiTestHarness_h |
| +#define BidiTestHarness_h |
| + |
| +#include <fstream> |
| +#include <map> |
| +#include <stdio.h> |
| +#include <string> |
| + |
| +// FIXME: We don't have any business owning this code. We should try to |
| +// upstream this to unicode.org if possible (for other implementations to use). |
| +// Unicode.org provides a reference implmentation, including parser: |
| +// http://www.unicode.org/Public/PROGRAMS/BidiReferenceC/6.3.0/source/brtest.c |
| +// But it, like the other implementations I've found, is rather tied to |
| +// the algorithms it is testing. This file seeks to only implement the parser bits. |
| + |
| +// Other C/C++ implementations of this parser: |
| +// https://github.com/googlei18n/fribidi-vs-unicode/blob/master/test.c |
| +// http://source.icu-project.org/repos/icu/icu/trunk/source/test/intltest/bidiconf.cpp |
| +// Both of those are too tied to their respective projects to be use to Blink. |
| + |
| +// There are non-C implmentations to parse BidiTest.txt as well, including: |
| +// https://github.com/twitter/twitter-cldr-rb/blob/master/spec/bidi/bidi_spec.rb |
| + |
| +namespace bidi_test { |
| + |
| +enum ParagraphDirection { |
| + DirectionAutoLTR = 1, |
| + DirectionLTR = 2, |
| + DirectionRTL = 4, |
| +}; |
| +const int kMaxParagraphDirection = DirectionAutoLTR | DirectionLTR | DirectionRTL; |
| + |
| +// For error printing: |
| +std::string nameFromParagraphDirection(ParagraphDirection paragraphDirection) |
| +{ |
| + switch (paragraphDirection) { |
| + case bidi_test::DirectionAutoLTR: |
| + return "Auto-LTR"; |
| + case bidi_test::DirectionLTR: |
| + return "LTR"; |
| + case bidi_test::DirectionRTL: |
| + return "RTL"; |
| + } |
| +} |
| + |
| +template<class Runner> |
| +class Harness { |
| +public: |
| + Harness(Runner& runner) |
| + : m_runner(runner) |
| + { |
| + } |
| + void parse(std::istream& bidiTestFile); |
| + |
| +private: |
| + Runner& m_runner; |
| +}; |
| + |
| +// This trim() example comes from: |
| +// http://stackoverflow.com/questions/216823/whats-the-best-way-to-trim-stdstring |
| +// We could use boost::trim, but no other part of Blink uses boost yet. |
| + |
| +inline void ltrim(std::string& s) |
| +{ |
| + s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace)))); |
| +} |
| + |
| +inline void rtrim(std::string& s) |
| +{ |
| + s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end()); |
| +} |
| + |
| +inline void trim(std::string& s) |
| +{ |
| + rtrim(s); |
| + ltrim(s); |
| +} |
| + |
| +static std::vector<std::string> parseStringList(const std::string& str) |
| +{ |
| + std::vector<std::string> strings; |
| + std::string seperators(" \t"); |
| + size_t lastPos = str.find_first_not_of(seperators); // skip leading spaces |
|
Jeffrey Yasskin
2013/10/29 23:16:03
"seperators" still isn't spelled right. :)
eseidel
2013/10/30 23:04:11
I checked a dictionary this time. separators :)
|
| + size_t pos = str.find_first_of(seperators, lastPos); // find next space |
| + |
| + while (std::string::npos != pos || std::string::npos != lastPos) { |
| + strings.push_back(str.substr(lastPos, pos - lastPos)); |
| + lastPos = str.find_first_not_of(seperators, pos); |
| + pos = str.find_first_of(seperators, lastPos); |
| + } |
| + return strings; |
| +} |
| + |
| +static std::vector<int> parseIntList(const std::string& str) |
| +{ |
| + std::vector<int> ints; |
| + std::vector<std::string> strings = parseStringList(str); |
| + for (int x = 0; x < strings.size(); x++) { |
| + int i = atoi(strings[x].c_str()); |
| + ints.push_back(i); |
| + } |
| + return ints; |
| +} |
| + |
| +static std::vector<int> parseLevels(const std::string& line) |
| +{ |
| + std::vector<int> levels; |
| + std::vector<std::string> strings = parseStringList(line); |
| + for (int x = 0; x < strings.size(); x++) { |
| + const std::string& levelString = strings[x]; |
| + int i; |
| + if (levelString == "x") |
| + i = -1; |
| + else |
| + i = atoi(levelString.c_str()); |
| + levels.push_back(i); |
| + } |
| + return levels; |
| +} |
| + |
| +static std::basic_string<UChar> parseTestString(const std::string& line) |
| +{ |
| + std::basic_string<UChar> testString; |
| + // This static is not thread-safe, but currently that's not an issue. |
|
Jeffrey Yasskin
2013/10/29 23:16:03
Oh, I meant you should comment the function as not
eseidel
2013/10/30 23:04:11
Done.
|
| + static std::map<std::string, UChar> charClassExamples; |
| + if (charClassExamples.empty()) { |
| + charClassExamples.insert({"L", 0x6c}); // 'l' for L |
| + charClassExamples.insert({"R", 0x05D0}); // HEBREW ALEF |
| + charClassExamples.insert({"EN", 0x33}); // '3' for EN |
| + charClassExamples.insert({"ES", 0x2d}); // '-' for ES |
| + charClassExamples.insert({"ET", 0x25}); // '%' for ET |
| + charClassExamples.insert({"AN", 0x0660}); // arabic 0 |
| + charClassExamples.insert({"CS", 0x2c}); // ',' for CS |
| + charClassExamples.insert({"B", 0x0A}); // <control-000A> |
| + charClassExamples.insert({"S", 0x09}); // <control-0009> |
| + charClassExamples.insert({"WS", 0x20}); // ' ' for WS |
| + charClassExamples.insert({"ON", 0x3d}); // '=' for ON |
| + charClassExamples.insert({"NSM", 0x05BF}); // HEBREW POINT RAFE |
| + charClassExamples.insert({"AL", 0x0608}); // ARABIC RAY |
| + charClassExamples.insert({"BN", 0x00AD}); // SOFT HYPHEN |
| + charClassExamples.insert({"LRE", 0x202A}); |
| + charClassExamples.insert({"RLE", 0x202B}); |
| + charClassExamples.insert({"PDF", 0x202C}); |
| + charClassExamples.insert({"LRO", 0x202D}); |
| + charClassExamples.insert({"RLO", 0x202E}); |
| + charClassExamples.insert({"LRI", 0x2066}); |
| + charClassExamples.insert({"RLI", 0x2067}); |
| + charClassExamples.insert({"FSI", 0x2068}); |
| + charClassExamples.insert({"PDI", 0x2069}); |
| + } |
| + |
| + std::vector<std::string> charClasses = parseStringList(line); |
| + for (int i = 0; i < charClasses.size(); i++) { |
| + // FIXME: If the lookup failed we could return false for a parse error. |
| + testString.push_back(charClassExamples.find(charClasses[i])->second); |
| + } |
| + return testString; |
| +} |
| + |
| +static bool parseParagraphDirectionMask(const std::string& line, int& modeMask) |
| +{ |
| + modeMask = atoi(line.c_str()); |
| + return modeMask >= 1 && modeMask <= kMaxParagraphDirection; |
| +} |
| + |
| +static void parseError(const std::string& line, size_t lineNumber) |
| +{ |
| + // Use printf to avoid the expense of std::cout. |
| + printf("Parse error, line %d : %s\n", lineNumber, line.c_str()); |
| +} |
| + |
| +template<class Runner> |
| +void Harness<Runner>::parse(std::istream& bidiTestFile) |
| +{ |
| + static const std::string levelsPrefix("@Levels"); |
| + static const std::string reorderPrefix("@Reorder"); |
| + |
| + // FIXME: UChar is an ICU type and cheating a bit to use here. |
| + // uint16_t might be more portable. |
| + std::basic_string<UChar> testString; |
| + std::vector<int> levels; |
| + std::vector<int> reorder; |
| + int paragraphDirectionMask; |
| + |
| + std::string originalLine; |
| + std::string line; |
| + size_t lineNumber = 0; |
| + while (std::getline(bidiTestFile, originalLine)) { |
| + lineNumber++; |
| + line = originalLine; |
| + size_t commentStart = originalLine.find_first_of('#'); |
| + if (commentStart != std::string::npos) |
| + line = line.substr(0, commentStart); |
| + trim(line); |
| + if (line.empty()) |
| + continue; |
| + if (line[0] == '@') { |
| + if (!line.find(levelsPrefix)) { |
| + levels = parseLevels(line.substr(levelsPrefix.length() + 1)); |
|
Jeffrey Yasskin
2013/10/29 23:16:03
Note that line.substr makes a copy of the data. Yo
eseidel
2013/10/30 23:04:11
Performance only sorta matters for this code. Ide
|
| + continue; |
| + } |
| + if (!line.find(reorderPrefix)) { |
| + reorder = parseIntList(line.substr(reorderPrefix.length() + 1)); |
| + continue; |
| + } |
| + } else { |
| + // Assume it's a data line. |
| + size_t seperatorIndex = line.find_first_of(';'); |
| + if (seperatorIndex == std::string::npos) { |
| + parseError(originalLine, lineNumber); |
| + continue; |
| + } |
| + testString = parseTestString(line.substr(0, seperatorIndex)); |
| + if (!parseParagraphDirectionMask(line.substr(seperatorIndex + 1), paragraphDirectionMask)) { |
| + parseError(originalLine, lineNumber); |
| + continue; |
| + } |
| + |
| + if (paragraphDirectionMask & DirectionAutoLTR) |
| + m_runner.runTest(testString, reorder, levels, DirectionAutoLTR, originalLine, lineNumber); |
| + if (paragraphDirectionMask & DirectionLTR) |
| + m_runner.runTest(testString, reorder, levels, DirectionLTR, originalLine, lineNumber); |
| + if (paragraphDirectionMask & DirectionRTL) |
| + m_runner.runTest(testString, reorder, levels, DirectionRTL, originalLine, lineNumber); |
| + } |
| + } |
| +} |
| + |
| +} // namespace bidi_test |
| + |
| +#endif // BidiTestHarness_h |