Index: Source/platform/text/BidiTestHarness.h |
diff --git a/Source/platform/text/BidiTestHarness.h b/Source/platform/text/BidiTestHarness.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..4cc8ed1cf50291705c713326ff95b3157361b479 |
--- /dev/null |
+++ b/Source/platform/text/BidiTestHarness.h |
@@ -0,0 +1,274 @@ |
+/* |
+ * Copyright (C) 2013 Google Inc. All rights reserved. |
+ * |
+ * Redistribution and use in source and binary forms, with or without |
+ * modification, are permitted provided that the following conditions are |
+ * met: |
+ * |
+ * * Redistributions of source code must retain the above copyright |
+ * notice, this list of conditions and the following disclaimer. |
+ * * Redistributions in binary form must reproduce the above |
+ * copyright notice, this list of conditions and the following disclaimer |
+ * in the documentation and/or other materials provided with the |
+ * distribution. |
+ * * Neither the name of Google Inc. nor the names of its |
+ * contributors may be used to endorse or promote products derived from |
+ * this software without specific prior written permission. |
+ * |
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
+ */ |
+ |
+#ifndef BidiTestHarness_h |
+#define BidiTestHarness_h |
+ |
+#include <fstream> |
Jeffrey Yasskin
2013/10/29 20:03:22
I don't think you use this header: just #include <
eseidel
2013/10/29 20:49:44
Done.
|
+#include <map> |
+#include <string> |
+ |
+// FIXME: We don't have any business owning this code. We should try to |
+// upstream this to unicode.org if possible (for other implementations to use). |
+// Unicode.org provides a reference implmentation, including parser: |
+// http://www.unicode.org/Public/PROGRAMS/BidiReferenceC/6.3.0/source/brtest.c |
+// But it, like the other implementations I've found, is rather tied to |
+// the algorithms it is testing. This file seeks to only implement the parser bits. |
+ |
+// Other C/C++ implementations of this parser: |
+// https://github.com/googlei18n/fribidi-vs-unicode/blob/master/test.c |
+// http://source.icu-project.org/repos/icu/icu/trunk/source/test/intltest/bidiconf.cpp |
+// Both of those are too tied to their respective projects to be use to Blink. |
+ |
+// There are non-C implmentations to parse BidiTest.txt as well, including: |
+// https://github.com/twitter/twitter-cldr-rb/blob/master/spec/bidi/bidi_spec.rb |
+ |
+namespace bidi_test { |
+ |
+enum ParagraphDirection { |
+ DirectionAutoLTR = 1, |
+ DirectionLTR = 2, |
+ DirectionRTL = 4, |
+}; |
+const int kMaxParagraphDirection = DirectionAutoLTR | DirectionLTR | DirectionRTL; |
+ |
+// For error printing: |
+std::string nameFromParagraphDirection(ParagraphDirection paragraphDirection) |
+{ |
+ switch (paragraphDirection) { |
+ case bidi_test::DirectionAutoLTR: |
+ return "Auto-LTR"; |
+ case bidi_test::DirectionLTR: |
+ return "LTR"; |
+ case bidi_test::DirectionRTL: |
+ return "RTL"; |
+ } |
+} |
+ |
+template<class Runner> |
+class Harness { |
+public: |
+ Harness(Runner& runner) |
+ : m_runner(runner) |
+ { |
+ } |
+ void parse(std::istream& bidiTestFile); |
+ |
+private: |
+ Runner& m_runner; |
+}; |
+ |
+// This trim() example comes from: |
+// http://stackoverflow.com/questions/216823/whats-the-best-way-to-trim-stdstring |
+// We could use boost::trim, but no other part of Blink uses boost yet. |
+ |
+inline std::string& ltrim(std::string& s) |
Jeffrey Yasskin
2013/10/29 20:03:22
I'd either trim the argument in place or take the
eseidel
2013/10/29 20:49:44
Done. I did the in-place way as it was simpler.
|
+{ |
+ s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace)))); |
+ return s; |
+} |
+ |
+inline std::string& rtrim(std::string& s) |
+{ |
+ s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end()); |
+ return s; |
+} |
+ |
+inline std::string& trim(std::string& s) |
+{ |
+ return ltrim(rtrim(s)); |
+} |
+ |
+static bool parseStringList(std::string str, std::vector<std::string>& strings) |
+{ |
+ std::string separators(" \t"); |
+ strings.clear(); |
+ size_t lastPos = str.find_first_not_of(separators); // skip leading spaces |
+ size_t pos = str.find_first_of(separators, lastPos); // find next space |
+ |
+ while (std::string::npos != pos || std::string::npos != lastPos) { |
+ strings.push_back(str.substr(lastPos, pos - lastPos)); |
+ lastPos = str.find_first_not_of(separators, pos); |
+ pos = str.find_first_of(separators, lastPos); |
+ } |
+ return true; |
Jeffrey Yasskin
2013/10/29 20:03:22
If this function can't return false, I'd return th
eseidel
2013/10/29 20:49:44
Done.
|
+} |
+ |
+static bool parseIntList(std::string str, std::vector<int>& ints) |
+{ |
+ ints.clear(); |
+ std::vector<std::string> strings; |
+ if (!parseStringList(str, strings)) |
+ return false; |
+ for (int x = 0; x < strings.size(); x++) { |
+ int i = atoi(strings[x].c_str()); |
+ ints.push_back(i); |
+ } |
+ return true; |
+} |
+ |
+static bool parseLevelRule(std::string line, std::vector<int>& levels) |
Jeffrey Yasskin
2013/10/29 20:03:22
Generally pass strings as const std::string& to av
eseidel
2013/10/29 20:49:44
Done.
|
+{ |
+ levels.clear(); |
+ std::vector<std::string> strings; |
+ if (!parseStringList(line, strings)) |
+ return false; |
+ for (int x = 0; x < strings.size(); x++) { |
+ const std::string& levelString = strings[x]; |
+ int i; |
+ if (levelString == "x") |
+ i = -1; |
+ else |
+ i = atoi(levelString.c_str()); |
+ levels.push_back(i); |
+ } |
+ return true;} |
+ |
+static bool parseReorderRule(std::string line, std::vector<int>& reorder) |
+{ |
+ return parseIntList(line, reorder); |
+} |
+ |
+static bool parseTestString(std::string line, std::basic_string<UChar>& testString) |
+{ |
+ testString.clear(); |
+ static std::map<std::string, UChar> charClassExamples; |
+ if (charClassExamples.empty()) { |
Jeffrey Yasskin
2013/10/29 20:03:22
This isn't thread-safe, which you should comment.
eseidel
2013/10/29 20:49:44
Done.
|
+ charClassExamples.insert({"L", 0x6c}); // 'l' for L |
+ charClassExamples.insert({"R", 0x05D0}); // HEBREW ALEF |
+ charClassExamples.insert({"EN", 0x33}); // '3' for EN |
+ charClassExamples.insert({"ES", 0x2d}); // '-' for ES |
+ charClassExamples.insert({"ET", 0x25}); // '%' for ET |
+ charClassExamples.insert({"AN", 0x0660}); // arabic 0 |
+ charClassExamples.insert({"CS", 0x2c}); // ',' for CS |
+ charClassExamples.insert({"B", 0x0A}); // <control-000A> |
+ charClassExamples.insert({"S", 0x09}); // <control-0009> |
+ charClassExamples.insert({"WS", 0x20}); // ' ' for WS |
+ charClassExamples.insert({"ON", 0x3d}); // '=' for ON |
+ charClassExamples.insert({"NSM", 0x05BF}); // HEBREW POINT RAFE |
+ charClassExamples.insert({"AL", 0x0608}); // ARABIC RAY |
+ charClassExamples.insert({"BN", 0x00AD}); // SOFT HYPHEN |
+ charClassExamples.insert({"LRE", 0x202A}); |
+ charClassExamples.insert({"RLE", 0x202B}); |
+ charClassExamples.insert({"PDF", 0x202C}); |
+ charClassExamples.insert({"LRO", 0x202D}); |
+ charClassExamples.insert({"RLO", 0x202E}); |
+ charClassExamples.insert({"LRI", 0x2066}); |
+ charClassExamples.insert({"RLI", 0x2067}); |
+ charClassExamples.insert({"FSI", 0x2068}); |
+ charClassExamples.insert({"PDI", 0x2069}); |
+ } |
+ |
+ std::vector<std::string> charClasses; |
+ parseStringList(line, charClasses); |
+ for (int i = 0; i < charClasses.size(); i++) { |
+ testString.push_back(charClassExamples.find(charClasses[i])->second); |
+ } |
+ return true; |
+} |
+ |
+static bool parseParagraphDirectionMask(std::string line, int& modeMask) |
+{ |
+ |
+ modeMask = atoi(line.c_str()); |
Jeffrey Yasskin
2013/10/29 20:03:22
One cool thing you can do in C++ is use bitset<3>
eseidel
2013/10/29 20:49:44
Although these are both awesome suggestions, I'm g
|
+ return modeMask >= 1 && modeMask <= kMaxParagraphDirection; |
+} |
+ |
+static void parseError(const std::string& line, size_t lineNumber) |
+{ |
+ // Use printf to avoid the expense of std::cout. |
+ printf("Parse error, line %d : %s\n", lineNumber, line.c_str()); |
Jeffrey Yasskin
2013/10/29 20:03:22
I think you're missing #include <stdio.h>
eseidel
2013/10/29 20:49:44
Done.
|
+} |
+ |
+template<class Runner> |
+void Harness<Runner>::parse(std::istream& bidiTestFile) |
+{ |
+ std::string levelsPrefix("@Levels"); |
Jeffrey Yasskin
2013/10/29 20:03:22
Make these const if you're not planning to change
eseidel
2013/10/29 20:49:44
Made them "static const".
|
+ std::string reorderPrefix("@Reorder"); |
+ |
+ // FIXME: UChar is an ICU type and cheating a bit to use here. |
+ // uint16_t might be more portable. |
+ std::basic_string<UChar> testString; |
+ std::vector<int> levels; |
+ std::vector<int> reorder; |
+ int paragraphDirectionMask; |
+ |
+ std::string originalLine; |
+ std::string line; |
+ size_t lineNumber = 0; |
+ while (std::getline(bidiTestFile, originalLine)) { |
+ lineNumber++; |
+ line = originalLine; |
Jeffrey Yasskin
2013/10/29 20:03:22
I'd usually declare variables where they're initia
eseidel
2013/10/29 20:49:44
That makes an extra copy of line though, no?
Jeffrey Yasskin
2013/10/29 23:16:03
It shouldn't make any more copies than you have in
|
+ size_t commentStart = originalLine.find_first_of('#'); |
+ if (commentStart != std::string::npos) |
+ line = line.substr(0, commentStart); |
+ line = trim(line); |
+ if (line.empty()) |
+ continue; |
+ if (line[0] == '@') { |
+ if (!line.find(levelsPrefix)) { |
+ if (!parseLevelRule(line.substr(levelsPrefix.length() + 1), levels)) |
+ parseError(originalLine, lineNumber); |
+ continue; |
+ } |
+ if (!line.find(reorderPrefix)) { |
+ if (!parseReorderRule(line.substr(reorderPrefix.length() + 1), reorder)) |
+ parseError(originalLine, lineNumber); |
+ continue; |
+ } |
+ } else { |
+ // Assume it's a data line. |
+ size_t seperatorIndex = line.find_first_of(';'); |
+ if (seperatorIndex == std::string::npos) { |
+ parseError(originalLine, lineNumber); |
+ continue; |
+ } |
+ if (!parseTestString(line.substr(0, seperatorIndex), testString)) { |
Jeffrey Yasskin
2013/10/29 20:03:22
sp: seperator
eseidel
2013/10/29 20:49:44
I'm considering taking 3rd grade again as a corres
|
+ parseError(originalLine, lineNumber); |
+ continue; |
+ } |
+ if (!parseParagraphDirectionMask(line.substr(seperatorIndex + 1), paragraphDirectionMask)) { |
+ parseError(originalLine, lineNumber); |
+ continue; |
+ } |
+ |
+ if (paragraphDirectionMask & DirectionAutoLTR) |
Jeffrey Yasskin
2013/10/29 20:03:22
With a bitset, this becomes paragraphDirectionMask
|
+ m_runner.runTest(testString, reorder, levels, DirectionAutoLTR, originalLine, lineNumber); |
Jeffrey Yasskin
2013/10/29 20:03:22
It doesn't look like |levels| has a value coming i
eseidel
2013/10/29 20:49:44
It's confusing. "levels" and "order" are both "gl
Jeffrey Yasskin
2013/10/29 23:16:03
Oh, right, I just missed the assignment above beca
|
+ if (paragraphDirectionMask & DirectionLTR) |
+ m_runner.runTest(testString, reorder, levels, DirectionLTR, originalLine, lineNumber); |
+ if (paragraphDirectionMask & DirectionRTL) |
+ m_runner.runTest(testString, reorder, levels, DirectionRTL, originalLine, lineNumber); |
+ } |
+ } |
+} |
+ |
+} // namespace bidi_test |
+ |
+#endif // BidiTestHarness_h |