| Index: third_party/liblouis/nacl_wrapper/liblouis_wrapper.cc
|
| diff --git a/third_party/liblouis/nacl_wrapper/liblouis_wrapper.cc b/third_party/liblouis/nacl_wrapper/liblouis_wrapper.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..390e192e3d24b678b9bb169b5bdf22b9f6738865
|
| --- /dev/null
|
| +++ b/third_party/liblouis/nacl_wrapper/liblouis_wrapper.cc
|
| @@ -0,0 +1,204 @@
|
| +// Copyright 2013 Google Inc.
|
| +//
|
| +// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
| +// use this file except in compliance with the License. You may obtain a copy of
|
| +// the License at
|
| +//
|
| +// http://www.apache.org/licenses/LICENSE-2.0
|
| +//
|
| +// Unless required by applicable law or agreed to in writing, software
|
| +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
| +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
| +// License for the specific language governing permissions and limitations under
|
| +// the License.
|
| +
|
| +#include "liblouis_wrapper.h"
|
| +
|
| +#include <cstddef>
|
| +
|
| +#include "liblouis/liblouis.h"
|
| +
|
| +namespace {
|
| +
|
| +// Decodes UTF-8 into 16-bit wide characters.
|
| +// This implementation is very permissive and may miss encoding errors.
|
| +// It ignores charaters which are not in the Unicode Basic Multilingual Plane.
|
| +// TODO(jbroman): Handle more than BMP if liblouis changes to accept UTF-16.
|
| +static bool DecodeUtf8(const std::string& in, std::vector<widechar>* out) {
|
| + int len = in.length();
|
| + std::vector<widechar> result;
|
| + result.reserve(len);
|
| + int i = 0;
|
| + while (i < len) {
|
| + int ch = static_cast<unsigned char>(in[i++]);
|
| + widechar cp;
|
| + if ((ch & 0x80) == 0x00) { // U+0000 - U+007F
|
| + cp = ch;
|
| + } else if ((ch & 0xe0) == 0xc0 && i < len) { // U+0080 - U+07FF
|
| + cp = (ch & 0x1f) << 6;
|
| + ch = static_cast<unsigned char>(in[i++]);
|
| + cp |= (ch & 0x3f);
|
| + } else if ((ch & 0xf0) == 0xe0 && i+1 < len) { // U+0800 - U+FFFF
|
| + cp = (ch & 0x0f) << 12;
|
| + ch = static_cast<unsigned char>(in[i++]);
|
| + cp |= (ch & 0x3f) << 6;
|
| + ch = static_cast<unsigned char>(in[i++]);
|
| + cp |= (ch & 0x3f);
|
| + } else if ((ch & 0xf8) == 0xf0 && i+2 < len) { // U+10000 - U+1FFFFF
|
| + i += 3;
|
| + continue;
|
| + } else if ((ch & 0xfc) == 0xf8 && i+3 < len) { // U+200000 - U+3FFFFFF
|
| + i += 4;
|
| + continue;
|
| + } else if ((ch & 0xfe) == 0xfc && i+4 < len) { // U+4000000 - U+7FFFFFFF
|
| + i += 5;
|
| + continue;
|
| + } else {
|
| + // Invalid first code point.
|
| + return false;
|
| + }
|
| + result.push_back(cp);
|
| + }
|
| + out->swap(result);
|
| + return true;
|
| +}
|
| +
|
| +// Encodes 16-bit wide characters into UTF-8.
|
| +// This implementation is very permissive and may miss invalid code points in
|
| +// its input.
|
| +// TODO(jbroman): Handle more than BMP if widechar ever becomes larger.
|
| +static bool EncodeUtf8(const std::vector<widechar>& in, std::string* out) {
|
| + std::string result;
|
| + result.reserve(in.size() * 2);
|
| + for (std::vector<widechar>::const_iterator it = in.begin(); it != in.end();
|
| + ++it) {
|
| + unsigned int cp = *it;
|
| + if (cp <= 0x007f) { // U+0000 - U+007F
|
| + result.push_back(static_cast<char>(cp));
|
| + } else if (cp <= 0x07ff) { // U+0080 - U+07FF
|
| + result.push_back(static_cast<char>(0xc0 | ((cp >> 6) & 0x1f)));
|
| + result.push_back(static_cast<char>(0x80 | (cp & 0x3f)));
|
| + } else if (cp <= 0xffff) { // U+0800 - U+FFFF
|
| + result.push_back(static_cast<char>(0xe0 | ((cp >> 12) & 0x0f)));
|
| + result.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3f)));
|
| + result.push_back(static_cast<char>(0x80 | (cp & 0x3f)));
|
| + } else {
|
| + // This can't happen if widechar is 16 bits wide.
|
| + // TODO(jbroman): assert this
|
| + }
|
| + }
|
| + out->swap(result);
|
| + return true;
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| +
|
| +namespace liblouis_nacl {
|
| +
|
| +LibLouisWrapper::LibLouisWrapper() {
|
| + char data_path[] = "/"; // Needed because lou_setDataPath takes a char*.
|
| + lou_setDataPath(data_path);
|
| +}
|
| +
|
| +LibLouisWrapper::~LibLouisWrapper() {
|
| + lou_free();
|
| +}
|
| +
|
| +const char* LibLouisWrapper::tables_dir() const {
|
| + return "/liblouis/tables";
|
| +}
|
| +
|
| +bool LibLouisWrapper::CheckTable(const std::string& table_name) {
|
| + return lou_getTable(table_name.c_str()) != NULL;
|
| +}
|
| +
|
| +bool LibLouisWrapper::Translate(const TranslationParams& params,
|
| + TranslationResult* out) {
|
| + // Convert the character set of the input text.
|
| + std::vector<widechar> inbuf;
|
| + if (!DecodeUtf8(params.text, &inbuf)) {
|
| + // TODO(jbroman): log this
|
| + return false;
|
| + }
|
| +
|
| + int inlen = inbuf.size();
|
| + int outlen = inlen * 2; // TODO(jbroman): choose this size more accurately.
|
| + std::vector<widechar> outbuf(outlen);
|
| + std::vector<int> text_to_braille(inlen);
|
| + std::vector<int> braille_to_text(outlen);
|
| +
|
| + // Compute the cursor position pointer to pass to liblouis.
|
| + int out_cursor_position;
|
| + int* out_cursor_position_ptr;
|
| + if (params.cursor_position < 0) {
|
| + out_cursor_position = -1;
|
| + out_cursor_position_ptr = NULL;
|
| + } else {
|
| + out_cursor_position = params.cursor_position;
|
| + out_cursor_position_ptr = &out_cursor_position;
|
| + }
|
| +
|
| + // Invoke liblouis.
|
| + int result = lou_translate(params.table_name.c_str(),
|
| + &inbuf[0], &inlen, &outbuf[0], &outlen,
|
| + NULL /* typeform */, NULL /* spacing */,
|
| + &text_to_braille[0], &braille_to_text[0],
|
| + out_cursor_position_ptr, dotsIO /* mode */);
|
| + if (result == 0) {
|
| + // TODO(jbroman): log this
|
| + return false;
|
| + }
|
| +
|
| + // Massage the result.
|
| + std::vector<unsigned char> cells;
|
| + cells.reserve(outlen);
|
| + for (int i = 0; i < outlen; i++) {
|
| + cells.push_back(outbuf[i]);
|
| + }
|
| + braille_to_text.resize(outlen);
|
| +
|
| + // Return the translation result.
|
| + out->cells.swap(cells);
|
| + out->text_to_braille.swap(text_to_braille);
|
| + out->braille_to_text.swap(braille_to_text);
|
| + out->cursor_position = out_cursor_position;
|
| + return true;
|
| +}
|
| +
|
| +bool LibLouisWrapper::BackTranslate(const std::string& table_name,
|
| + const std::vector<unsigned char>& cells, std::string* out) {
|
| + std::vector<widechar> inbuf;
|
| + inbuf.reserve(cells.size());
|
| + for (std::vector<unsigned char>::const_iterator it = cells.begin();
|
| + it != cells.end(); ++it) {
|
| + // Set the high-order bit to prevent liblouis from dropping empty cells.
|
| + inbuf.push_back(*it | 0x8000);
|
| + }
|
| + int inlen = inbuf.size();
|
| + int outlen = inlen * 2; // TODO(jbroman): choose this size more accurately.
|
| + std::vector<widechar> outbuf(outlen);
|
| +
|
| + // Invoke liblouis.
|
| + int result = lou_backTranslateString(table_name.c_str(),
|
| + &inbuf[0], &inlen, &outbuf[0], &outlen,
|
| + NULL /* typeform */, NULL /* spacing */, dotsIO /* mode */);
|
| + if (result == 0) {
|
| + // TODO(njbroman): log this
|
| + return false;
|
| + }
|
| +
|
| + // Massage the result.
|
| + outbuf.resize(outlen);
|
| + std::string text;
|
| + if (!EncodeUtf8(outbuf, &text)) {
|
| + // TODO(jbroman): log this
|
| + return false;
|
| + }
|
| +
|
| + // Return the back translation result.
|
| + out->swap(text);
|
| + return true;
|
| +}
|
| +
|
| +} // namespace liblouis_nacl
|
|
|