OLD | NEW |
| (Empty) |
1 // Copyright 2013 Google Inc. | |
2 // | |
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not | |
4 // use this file except in compliance with the License. You may obtain a copy of | |
5 // the License at | |
6 // | |
7 // http://www.apache.org/licenses/LICENSE-2.0 | |
8 // | |
9 // Unless required by applicable law or agreed to in writing, software | |
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | |
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | |
12 // License for the specific language governing permissions and limitations under | |
13 // the License. | |
14 | |
15 #include "liblouis_wrapper.h" | |
16 | |
17 #include <cstddef> | |
18 | |
19 #include "liblouis/liblouis.h" | |
20 | |
21 namespace { | |
22 | |
23 // Decodes UTF-8 into 16-bit wide characters. | |
24 // This implementation is very permissive and may miss encoding errors. | |
25 // It ignores charaters which are not in the Unicode Basic Multilingual Plane. | |
26 // TODO(jbroman): Handle more than BMP if liblouis changes to accept UTF-16. | |
27 static bool DecodeUtf8(const std::string& in, std::vector<widechar>* out) { | |
28 int len = in.length(); | |
29 std::vector<widechar> result; | |
30 result.reserve(len); | |
31 int i = 0; | |
32 while (i < len) { | |
33 int ch = static_cast<unsigned char>(in[i++]); | |
34 widechar cp; | |
35 if ((ch & 0x80) == 0x00) { // U+0000 - U+007F | |
36 cp = ch; | |
37 } else if ((ch & 0xe0) == 0xc0 && i < len) { // U+0080 - U+07FF | |
38 cp = (ch & 0x1f) << 6; | |
39 ch = static_cast<unsigned char>(in[i++]); | |
40 cp |= (ch & 0x3f); | |
41 } else if ((ch & 0xf0) == 0xe0 && i+1 < len) { // U+0800 - U+FFFF | |
42 cp = (ch & 0x0f) << 12; | |
43 ch = static_cast<unsigned char>(in[i++]); | |
44 cp |= (ch & 0x3f) << 6; | |
45 ch = static_cast<unsigned char>(in[i++]); | |
46 cp |= (ch & 0x3f); | |
47 } else if ((ch & 0xf8) == 0xf0 && i+2 < len) { // U+10000 - U+1FFFFF | |
48 i += 3; | |
49 continue; | |
50 } else if ((ch & 0xfc) == 0xf8 && i+3 < len) { // U+200000 - U+3FFFFFF | |
51 i += 4; | |
52 continue; | |
53 } else if ((ch & 0xfe) == 0xfc && i+4 < len) { // U+4000000 - U+7FFFFFFF | |
54 i += 5; | |
55 continue; | |
56 } else { | |
57 // Invalid first code point. | |
58 return false; | |
59 } | |
60 result.push_back(cp); | |
61 } | |
62 out->swap(result); | |
63 return true; | |
64 } | |
65 | |
66 // Encodes 16-bit wide characters into UTF-8. | |
67 // This implementation is very permissive and may miss invalid code points in | |
68 // its input. | |
69 // TODO(jbroman): Handle more than BMP if widechar ever becomes larger. | |
70 static bool EncodeUtf8(const std::vector<widechar>& in, std::string* out) { | |
71 std::string result; | |
72 result.reserve(in.size() * 2); | |
73 for (std::vector<widechar>::const_iterator it = in.begin(); it != in.end(); | |
74 ++it) { | |
75 unsigned int cp = *it; | |
76 if (cp <= 0x007f) { // U+0000 - U+007F | |
77 result.push_back(static_cast<char>(cp)); | |
78 } else if (cp <= 0x07ff) { // U+0080 - U+07FF | |
79 result.push_back(static_cast<char>(0xc0 | ((cp >> 6) & 0x1f))); | |
80 result.push_back(static_cast<char>(0x80 | (cp & 0x3f))); | |
81 } else if (cp <= 0xffff) { // U+0800 - U+FFFF | |
82 result.push_back(static_cast<char>(0xe0 | ((cp >> 12) & 0x0f))); | |
83 result.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3f))); | |
84 result.push_back(static_cast<char>(0x80 | (cp & 0x3f))); | |
85 } else { | |
86 // This can't happen if widechar is 16 bits wide. | |
87 // TODO(jbroman): assert this | |
88 } | |
89 } | |
90 out->swap(result); | |
91 return true; | |
92 } | |
93 | |
94 } // namespace | |
95 | |
96 | |
97 namespace liblouis_nacl { | |
98 | |
99 LibLouisWrapper::LibLouisWrapper() { | |
100 char data_path[] = "/"; // Needed because lou_setDataPath takes a char*. | |
101 lou_setDataPath(data_path); | |
102 } | |
103 | |
104 LibLouisWrapper::~LibLouisWrapper() { | |
105 lou_free(); | |
106 } | |
107 | |
108 const char* LibLouisWrapper::tables_dir() const { | |
109 return "/liblouis/tables"; | |
110 } | |
111 | |
112 bool LibLouisWrapper::CheckTable(const std::string& table_name) { | |
113 return lou_getTable(table_name.c_str()) != NULL; | |
114 } | |
115 | |
116 bool LibLouisWrapper::Translate(const TranslationParams& params, | |
117 TranslationResult* out) { | |
118 // Convert the character set of the input text. | |
119 std::vector<widechar> inbuf; | |
120 if (!DecodeUtf8(params.text, &inbuf)) { | |
121 // TODO(jbroman): log this | |
122 return false; | |
123 } | |
124 | |
125 int inlen = inbuf.size(); | |
126 int outlen = inlen * 2; // TODO(jbroman): choose this size more accurately. | |
127 std::vector<widechar> outbuf(outlen); | |
128 std::vector<int> text_to_braille(inlen); | |
129 std::vector<int> braille_to_text(outlen); | |
130 | |
131 // Compute the cursor position pointer to pass to liblouis. | |
132 int out_cursor_position; | |
133 int* out_cursor_position_ptr; | |
134 if (params.cursor_position < 0) { | |
135 out_cursor_position = -1; | |
136 out_cursor_position_ptr = NULL; | |
137 } else { | |
138 out_cursor_position = params.cursor_position; | |
139 out_cursor_position_ptr = &out_cursor_position; | |
140 } | |
141 | |
142 // Invoke liblouis. | |
143 int result = lou_translate(params.table_name.c_str(), | |
144 &inbuf[0], &inlen, &outbuf[0], &outlen, | |
145 NULL /* typeform */, NULL /* spacing */, | |
146 &text_to_braille[0], &braille_to_text[0], | |
147 out_cursor_position_ptr, dotsIO /* mode */); | |
148 if (result == 0) { | |
149 // TODO(jbroman): log this | |
150 return false; | |
151 } | |
152 | |
153 // Massage the result. | |
154 std::vector<unsigned char> cells; | |
155 cells.reserve(outlen); | |
156 for (int i = 0; i < outlen; i++) { | |
157 cells.push_back(outbuf[i]); | |
158 } | |
159 braille_to_text.resize(outlen); | |
160 | |
161 // Return the translation result. | |
162 out->cells.swap(cells); | |
163 out->text_to_braille.swap(text_to_braille); | |
164 out->braille_to_text.swap(braille_to_text); | |
165 out->cursor_position = out_cursor_position; | |
166 return true; | |
167 } | |
168 | |
169 bool LibLouisWrapper::BackTranslate(const std::string& table_name, | |
170 const std::vector<unsigned char>& cells, std::string* out) { | |
171 std::vector<widechar> inbuf; | |
172 inbuf.reserve(cells.size()); | |
173 for (std::vector<unsigned char>::const_iterator it = cells.begin(); | |
174 it != cells.end(); ++it) { | |
175 // Set the high-order bit to prevent liblouis from dropping empty cells. | |
176 inbuf.push_back(*it | 0x8000); | |
177 } | |
178 int inlen = inbuf.size(); | |
179 int outlen = inlen * 2; // TODO(jbroman): choose this size more accurately. | |
180 std::vector<widechar> outbuf(outlen); | |
181 | |
182 // Invoke liblouis. | |
183 int result = lou_backTranslateString(table_name.c_str(), | |
184 &inbuf[0], &inlen, &outbuf[0], &outlen, | |
185 NULL /* typeform */, NULL /* spacing */, dotsIO /* mode */); | |
186 if (result == 0) { | |
187 // TODO(njbroman): log this | |
188 return false; | |
189 } | |
190 | |
191 // Massage the result. | |
192 outbuf.resize(outlen); | |
193 std::string text; | |
194 if (!EncodeUtf8(outbuf, &text)) { | |
195 // TODO(jbroman): log this | |
196 return false; | |
197 } | |
198 | |
199 // Return the back translation result. | |
200 out->swap(text); | |
201 return true; | |
202 } | |
203 | |
204 } // namespace liblouis_nacl | |
OLD | NEW |