| OLD | NEW |
| (Empty) |
| 1 // Copyright 2013 Google Inc. | |
| 2 // | |
| 3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not | |
| 4 // use this file except in compliance with the License. You may obtain a copy of | |
| 5 // the License at | |
| 6 // | |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 // | |
| 9 // Unless required by applicable law or agreed to in writing, software | |
| 10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | |
| 11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | |
| 12 // License for the specific language governing permissions and limitations under | |
| 13 // the License. | |
| 14 | |
| 15 #include "liblouis_wrapper.h" | |
| 16 | |
| 17 #include <cstddef> | |
| 18 | |
| 19 #include "liblouis/liblouis.h" | |
| 20 | |
| 21 namespace { | |
| 22 | |
| 23 // Decodes UTF-8 into 16-bit wide characters. | |
| 24 // This implementation is very permissive and may miss encoding errors. | |
| 25 // It ignores charaters which are not in the Unicode Basic Multilingual Plane. | |
| 26 // TODO(jbroman): Handle more than BMP if liblouis changes to accept UTF-16. | |
| 27 static bool DecodeUtf8(const std::string& in, std::vector<widechar>* out) { | |
| 28 int len = in.length(); | |
| 29 std::vector<widechar> result; | |
| 30 result.reserve(len); | |
| 31 int i = 0; | |
| 32 while (i < len) { | |
| 33 int ch = static_cast<unsigned char>(in[i++]); | |
| 34 widechar cp; | |
| 35 if ((ch & 0x80) == 0x00) { // U+0000 - U+007F | |
| 36 cp = ch; | |
| 37 } else if ((ch & 0xe0) == 0xc0 && i < len) { // U+0080 - U+07FF | |
| 38 cp = (ch & 0x1f) << 6; | |
| 39 ch = static_cast<unsigned char>(in[i++]); | |
| 40 cp |= (ch & 0x3f); | |
| 41 } else if ((ch & 0xf0) == 0xe0 && i+1 < len) { // U+0800 - U+FFFF | |
| 42 cp = (ch & 0x0f) << 12; | |
| 43 ch = static_cast<unsigned char>(in[i++]); | |
| 44 cp |= (ch & 0x3f) << 6; | |
| 45 ch = static_cast<unsigned char>(in[i++]); | |
| 46 cp |= (ch & 0x3f); | |
| 47 } else if ((ch & 0xf8) == 0xf0 && i+2 < len) { // U+10000 - U+1FFFFF | |
| 48 i += 3; | |
| 49 continue; | |
| 50 } else if ((ch & 0xfc) == 0xf8 && i+3 < len) { // U+200000 - U+3FFFFFF | |
| 51 i += 4; | |
| 52 continue; | |
| 53 } else if ((ch & 0xfe) == 0xfc && i+4 < len) { // U+4000000 - U+7FFFFFFF | |
| 54 i += 5; | |
| 55 continue; | |
| 56 } else { | |
| 57 // Invalid first code point. | |
| 58 return false; | |
| 59 } | |
| 60 result.push_back(cp); | |
| 61 } | |
| 62 out->swap(result); | |
| 63 return true; | |
| 64 } | |
| 65 | |
| 66 // Encodes 16-bit wide characters into UTF-8. | |
| 67 // This implementation is very permissive and may miss invalid code points in | |
| 68 // its input. | |
| 69 // TODO(jbroman): Handle more than BMP if widechar ever becomes larger. | |
| 70 static bool EncodeUtf8(const std::vector<widechar>& in, std::string* out) { | |
| 71 std::string result; | |
| 72 result.reserve(in.size() * 2); | |
| 73 for (std::vector<widechar>::const_iterator it = in.begin(); it != in.end(); | |
| 74 ++it) { | |
| 75 unsigned int cp = *it; | |
| 76 if (cp <= 0x007f) { // U+0000 - U+007F | |
| 77 result.push_back(static_cast<char>(cp)); | |
| 78 } else if (cp <= 0x07ff) { // U+0080 - U+07FF | |
| 79 result.push_back(static_cast<char>(0xc0 | ((cp >> 6) & 0x1f))); | |
| 80 result.push_back(static_cast<char>(0x80 | (cp & 0x3f))); | |
| 81 } else if (cp <= 0xffff) { // U+0800 - U+FFFF | |
| 82 result.push_back(static_cast<char>(0xe0 | ((cp >> 12) & 0x0f))); | |
| 83 result.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3f))); | |
| 84 result.push_back(static_cast<char>(0x80 | (cp & 0x3f))); | |
| 85 } else { | |
| 86 // This can't happen if widechar is 16 bits wide. | |
| 87 // TODO(jbroman): assert this | |
| 88 } | |
| 89 } | |
| 90 out->swap(result); | |
| 91 return true; | |
| 92 } | |
| 93 | |
| 94 } // namespace | |
| 95 | |
| 96 | |
| 97 namespace liblouis_nacl { | |
| 98 | |
| 99 LibLouisWrapper::LibLouisWrapper() { | |
| 100 char data_path[] = "/"; // Needed because lou_setDataPath takes a char*. | |
| 101 lou_setDataPath(data_path); | |
| 102 } | |
| 103 | |
| 104 LibLouisWrapper::~LibLouisWrapper() { | |
| 105 lou_free(); | |
| 106 } | |
| 107 | |
| 108 const char* LibLouisWrapper::tables_dir() const { | |
| 109 return "/liblouis/tables"; | |
| 110 } | |
| 111 | |
| 112 bool LibLouisWrapper::CheckTable(const std::string& table_name) { | |
| 113 return lou_getTable(table_name.c_str()) != NULL; | |
| 114 } | |
| 115 | |
| 116 bool LibLouisWrapper::Translate(const TranslationParams& params, | |
| 117 TranslationResult* out) { | |
| 118 // Convert the character set of the input text. | |
| 119 std::vector<widechar> inbuf; | |
| 120 if (!DecodeUtf8(params.text, &inbuf)) { | |
| 121 // TODO(jbroman): log this | |
| 122 return false; | |
| 123 } | |
| 124 | |
| 125 int inlen = inbuf.size(); | |
| 126 int outlen = inlen * 2; // TODO(jbroman): choose this size more accurately. | |
| 127 std::vector<widechar> outbuf(outlen); | |
| 128 std::vector<int> text_to_braille(inlen); | |
| 129 std::vector<int> braille_to_text(outlen); | |
| 130 | |
| 131 // Compute the cursor position pointer to pass to liblouis. | |
| 132 int out_cursor_position; | |
| 133 int* out_cursor_position_ptr; | |
| 134 if (params.cursor_position < 0) { | |
| 135 out_cursor_position = -1; | |
| 136 out_cursor_position_ptr = NULL; | |
| 137 } else { | |
| 138 out_cursor_position = params.cursor_position; | |
| 139 out_cursor_position_ptr = &out_cursor_position; | |
| 140 } | |
| 141 | |
| 142 // Invoke liblouis. | |
| 143 int result = lou_translate(params.table_name.c_str(), | |
| 144 &inbuf[0], &inlen, &outbuf[0], &outlen, | |
| 145 NULL /* typeform */, NULL /* spacing */, | |
| 146 &text_to_braille[0], &braille_to_text[0], | |
| 147 out_cursor_position_ptr, dotsIO /* mode */); | |
| 148 if (result == 0) { | |
| 149 // TODO(jbroman): log this | |
| 150 return false; | |
| 151 } | |
| 152 | |
| 153 // Massage the result. | |
| 154 std::vector<unsigned char> cells; | |
| 155 cells.reserve(outlen); | |
| 156 for (int i = 0; i < outlen; i++) { | |
| 157 cells.push_back(outbuf[i]); | |
| 158 } | |
| 159 braille_to_text.resize(outlen); | |
| 160 | |
| 161 // Return the translation result. | |
| 162 out->cells.swap(cells); | |
| 163 out->text_to_braille.swap(text_to_braille); | |
| 164 out->braille_to_text.swap(braille_to_text); | |
| 165 out->cursor_position = out_cursor_position; | |
| 166 return true; | |
| 167 } | |
| 168 | |
| 169 bool LibLouisWrapper::BackTranslate(const std::string& table_name, | |
| 170 const std::vector<unsigned char>& cells, std::string* out) { | |
| 171 std::vector<widechar> inbuf; | |
| 172 inbuf.reserve(cells.size()); | |
| 173 for (std::vector<unsigned char>::const_iterator it = cells.begin(); | |
| 174 it != cells.end(); ++it) { | |
| 175 // Set the high-order bit to prevent liblouis from dropping empty cells. | |
| 176 inbuf.push_back(*it | 0x8000); | |
| 177 } | |
| 178 int inlen = inbuf.size(); | |
| 179 int outlen = inlen * 2; // TODO(jbroman): choose this size more accurately. | |
| 180 std::vector<widechar> outbuf(outlen); | |
| 181 | |
| 182 // Invoke liblouis. | |
| 183 int result = lou_backTranslateString(table_name.c_str(), | |
| 184 &inbuf[0], &inlen, &outbuf[0], &outlen, | |
| 185 NULL /* typeform */, NULL /* spacing */, dotsIO /* mode */); | |
| 186 if (result == 0) { | |
| 187 // TODO(njbroman): log this | |
| 188 return false; | |
| 189 } | |
| 190 | |
| 191 // Massage the result. | |
| 192 outbuf.resize(outlen); | |
| 193 std::string text; | |
| 194 if (!EncodeUtf8(outbuf, &text)) { | |
| 195 // TODO(jbroman): log this | |
| 196 return false; | |
| 197 } | |
| 198 | |
| 199 // Return the back translation result. | |
| 200 out->swap(text); | |
| 201 return true; | |
| 202 } | |
| 203 | |
| 204 } // namespace liblouis_nacl | |
| OLD | NEW |