Source/platform/text/BidiTestHarness.h - Issue 39523002: Add harness for running Unicode.org Bidi tests.

Unified Diff: Source/platform/text/BidiTestHarness.h

Issue 39523002: Add harness for running Unicode.org Bidi tests. (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: update comments about reference implementations Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: Source/platform/text/BidiTestHarness.h

diff --git a/Source/platform/text/BidiTestHarness.h b/Source/platform/text/BidiTestHarness.h

new file mode 100644

index 0000000000000000000000000000000000000000..59632c8390acaf8883508c40f380c46913e65a6d

--- /dev/null

+++ b/Source/platform/text/BidiTestHarness.h

@@ -0,0 +1,270 @@

+/*

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ * 1. Redistributions of source code must retain the above copyright

+ * notice, this list of conditions and the following disclaimer.

+ * 2. Redistributions in binary form must reproduce the above copyright

+ * notice, this list of conditions and the following disclaimer in the

+ * documentation and/or other materials provided with the distribution.

+ *

+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY

leviw_travelin_and_unemployed 2013/10/28 21:33:11 Nit: We don't need to use the "Provided by Apple"

+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR

+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR

+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY

+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ */

+#ifndef BidiTestHarness_h

+#define BidiTestHarness_h

+#include <fstream>

+#include <iostream>

+#include <map>

+#include <sstream>

+#include <string>

+// Unicode.org provides a reference implmentation, including parser:

+// http://www.unicode.org/Public/PROGRAMS/BidiReferenceC/6.3.0/source/brtest.c

+// I failed to notice that until after I had written this one.

+// This implementation was written in std:: c++ with the intention

+// of being upstreamed to unicode.org.

+// FIXME: We should probably replace this implementation with the one from unicode.org.

+// Other C/C++ implementations of this parser:

+// https://github.com/googlei18n/fribidi-vs-unicode/blob/master/test.c

+// http://source.icu-project.org/repos/icu/icu/trunk/source/test/intltest/bidiconf.cpp

+// Both of those are too tied to their respective projects to be use to Blink.

+// There are non-C implmentations to parse BidiTest.txt as well, including:

+// https://github.com/twitter/twitter-cldr-rb/blob/master/spec/bidi/bidi_spec.rb

+namespace bidi_test {

+enum ParagraphDirection {

+ DirectionAutoLTR = 1,

+ DirectionLTR = 2,

+ DirectionRTL = 4,

+};

+const int kMaxParagraphDirection = DirectionAutoLTR | DirectionLTR | DirectionRTL;

+// For error printing:

+std::string nameFromParagraphDirection(ParagraphDirection paragraphDirection)

+ switch (paragraphDirection) {

+ case bidi_test::DirectionAutoLTR:

+ return "Auto-LTR";

+ case bidi_test::DirectionLTR:

+ return "LTR";

+ case bidi_test::DirectionRTL:

+ return "RTL";

+ }

+template<class Runner>

+class Harness {

+public:

+ Harness(Runner& runner)

+ : m_runner(runner)

+ {

+ }

+ void parse(std::istream& bidiTestFile);

+private:

+ Runner& m_runner;

+};

+// This trim() example comes from:

+// http://stackoverflow.com/questions/216823/whats-the-best-way-to-trim-stdstring

+// We could use boost::trim, but no other part of Blink uses boost yet.

+inline std::string& ltrim(std::string& s)

+ s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));

leviw_travelin_and_unemployed 2013/10/28 21:33:11 jyasskin for this and the other std C++ stuff.

+ return s;

+inline std::string& rtrim(std::string& s)

+ s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());

+ return s;

+inline std::string& trim(std::string& s)

+ return ltrim(rtrim(s));

+static bool parseStringList(std::string str, std::vector<std::string>& strings)

+ std::string separators(" \t");

+ strings.clear();

+ size_t lastPos = str.find_first_not_of(separators); // skip leading spaces

+ size_t pos = str.find_first_of(separators, lastPos); // find next space

+ while (std::string::npos != pos || std::string::npos != lastPos) {

+ strings.push_back(str.substr(lastPos, pos - lastPos));

+ lastPos = str.find_first_not_of(separators, pos);

+ pos = str.find_first_of(separators, lastPos);

+ }

+ return true;

+static bool parseIntList(std::string str, std::vector<int>& ints)

+ ints.clear();

+ std::vector<std::string> strings;

+ if (!parseStringList(str, strings))

+ return false;

+ for (int x = 0; x < strings.size(); x++) {

+ int i;

+ std::istringstream(strings[x]) >> i;

+ ints.push_back(i);

+ }

+ return true;

+static bool parseLevelRule(std::string line, std::vector<int>& levels)

+ levels.clear();

+ std::vector<std::string> strings;

+ if (!parseStringList(line, strings))

+ return false;

+ for (int x = 0; x < strings.size(); x++) {

+ const std::string& levelString = strings[x];

+ int i;

+ if (levelString == "x")

+ i = -1;

+ else

+ std::istringstream(levelString) >> i;

+ levels.push_back(i);

+ }

+ return true;}

+static bool parseReorderRule(std::string line, std::vector<int>& reorder)

+ return parseIntList(line, reorder);

+static bool parseTestString(std::string line, std::basic_string<UChar>& testString)

+ testString.clear();

+ static std::map<std::string, UChar> charClassExamples;

+ if (charClassExamples.empty()) {

+ charClassExamples.insert({"L", 0x6c}); // 'l' for L

+ charClassExamples.insert({"R", 0x05D0}); // HEBREW ALEF

+ charClassExamples.insert({"EN", 0x33}); // '3' for EN

+ charClassExamples.insert({"ES", 0x2d}); // '-' for ES

+ charClassExamples.insert({"ET", 0x25}); // '%' for ET

+ charClassExamples.insert({"AN", 0x0660}); // arabic 0

+ charClassExamples.insert({"CS", 0x2c}); // ',' for CS

+ charClassExamples.insert({"B", 0x0A}); // <control-000A>

+ charClassExamples.insert({"S", 0x09}); // <control-0009>

+ charClassExamples.insert({"WS", 0x20}); // ' ' for WS

+ charClassExamples.insert({"ON", 0x3d}); // '=' for ON

+ charClassExamples.insert({"NSM", 0x05BF}); // HEBREW POINT RAFE

+ charClassExamples.insert({"AL", 0x0608}); // ARABIC RAY

+ charClassExamples.insert({"BN", 0x00AD}); // SOFT HYPHEN

+ charClassExamples.insert({"LRE", 0x202A});

+ charClassExamples.insert({"RLE", 0x202B});

+ charClassExamples.insert({"PDF", 0x202C});

+ charClassExamples.insert({"LRO", 0x202D});

+ charClassExamples.insert({"RLO", 0x202E});

+ charClassExamples.insert({"LRI", 0x2066});

+ charClassExamples.insert({"RLI", 0x2067});

+ charClassExamples.insert({"FSI", 0x2068});

+ charClassExamples.insert({"PDI", 0x2069});

+ }

+ std::vector<std::string> charClasses;

+ parseStringList(line, charClasses);

+ for (int i = 0; i < charClasses.size(); i++) {

+ testString.push_back(charClassExamples.find(charClasses[i])->second);

+ }

+ return true;

+static bool parseParagraphDirectionMask(std::string line, int& modeMask)

+ std::istringstream(line) >> modeMask;

+ return modeMask >= 1 && modeMask <= kMaxParagraphDirection;

+static void parseError(const std::string& line, size_t lineNumber)

+ std::cout << "Parse error, line " << lineNumber << ":" << line;

+template<class Runner>

+void Harness<Runner>::parse(std::istream& bidiTestFile)

+ std::string levelsPrefix("@Levels");

+ std::string reorderPrefix("@Reorder");

+ // FIXME: UChar is an ICU type and cheating a bit to use here.

+ // uint16_t might be more portable.

+ std::basic_string<UChar> testString;

+ std::vector<int> levels;

+ std::vector<int> reorder;

+ int paragraphDirectionMask;

+ std::string originalLine;

+ std::string line;

+ size_t lineNumber = 0;

+ while (std::getline(bidiTestFile, originalLine)) {

+ lineNumber++;

+ line = originalLine;

+ size_t commentStart = originalLine.find_first_of('#');

+ if (commentStart != std::string::npos)

+ line = line.substr(0, commentStart);

+ line = trim(line);

+ if (line.empty())

+ continue;

+ if (line[0] == '@') {

+ if (!line.find(levelsPrefix)) {

+ if (!parseLevelRule(line.substr(levelsPrefix.length() + 1), levels))

+ parseError(originalLine, lineNumber);

+ continue;

+ }

+ if (!line.find(reorderPrefix)) {

+ if (!parseReorderRule(line.substr(reorderPrefix.length() + 1), reorder))

+ parseError(originalLine, lineNumber);

+ continue;

+ }

+ } else {

+ // Assume it's a data line.

+ size_t seperatorIndex = line.find_first_of(';');

+ if (seperatorIndex == std::string::npos) {

+ parseError(originalLine, lineNumber);

+ continue;

+ }

+ if (!parseTestString(line.substr(0, seperatorIndex), testString)) {

+ parseError(originalLine, lineNumber);

+ continue;

+ }

+ if (!parseParagraphDirectionMask(line.substr(seperatorIndex + 1), paragraphDirectionMask)) {

+ parseError(originalLine, lineNumber);

+ continue;

+ }

+ if (paragraphDirectionMask & DirectionAutoLTR)

+ m_runner.runTest(testString, reorder, levels, DirectionAutoLTR, originalLine, lineNumber);

+ if (paragraphDirectionMask & DirectionLTR)

+ m_runner.runTest(testString, reorder, levels, DirectionLTR, originalLine, lineNumber);

+ if (paragraphDirectionMask & DirectionRTL)

+ m_runner.runTest(testString, reorder, levels, DirectionRTL, originalLine, lineNumber);

+ }

+} // namespace bidi_test

+#endif // BidiTestHarness_h

« Source/platform/text/BidiResolverTest.cpp ('K') | « Source/platform/text/BidiResolverTest.cpp ('k') | no next file » | no next file with comments »