third_party/hunspell/src/hunspell/hunspell.cxx - Issue 2544793003: [spellcheck] Updated Hunspell to 1.5.4

Unified Diff: third_party/hunspell/src/hunspell/hunspell.cxx

Issue 2544793003: [spellcheck] Updated Hunspell to 1.5.4 (Closed)

Patch Set: Updated patch with encoding change Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/hunspell/src/hunspell/hunspell.cxx

diff --git a/third_party/hunspell/src/hunspell/hunspell.cxx b/third_party/hunspell/src/hunspell/hunspell.cxx

index d9d60a48c6fead50ef6963ead2e7b9f0b7caa9b9..c8c5cf49521c0d78077b2cc0ccc1b51da905e9ba 100644

--- a/third_party/hunspell/src/hunspell/hunspell.cxx

+++ b/third_party/hunspell/src/hunspell/hunspell.cxx

@@ -1,102 +1,273 @@

-#include "license.hunspell"

-#include "license.myspell"

+/* ***** BEGIN LICENSE BLOCK *****

+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1

+ *

+ * The contents of this file are subject to the Mozilla Public License Version

+ * 1.1 (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ * http://www.mozilla.org/MPL/

+ *

+ * Software distributed under the License is distributed on an "AS IS" basis,

+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License

+ * for the specific language governing rights and limitations under the

+ * License.

+ *

+ * The Original Code is Hunspell, based on MySpell.

+ *

+ * The Initial Developers of the Original Code are

+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).

+ *

+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,

+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,

+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,

+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,

+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen

+ *

+ * Alternatively, the contents of this file may be used under the terms of

+ * either the GNU General Public License Version 2 or later (the "GPL"), or

+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),

+ * in which case the provisions of the GPL or the LGPL are applicable instead

+ * of those above. If you wish to allow use of your version of this file only

+ * under the terms of either the GPL or the LGPL, and not to allow others to

+ * use your version of this file under the terms of the MPL, indicate your

+ * decision by deleting the provisions above and replace them with the notice

+ * and other provisions required by the GPL or the LGPL. If you do not delete

+ * the provisions above, a recipient may use your version of this file under

+ * the terms of any one of the MPL, the GPL or the LGPL.

+ *

+ * ***** END LICENSE BLOCK ***** */

+/*

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ *

+ * 1. Redistributions of source code must retain the above copyright

+ * notice, this list of conditions and the following disclaimer.

+ *

+ * 2. Redistributions in binary form must reproduce the above copyright

+ * notice, this list of conditions and the following disclaimer in the

+ * documentation and/or other materials provided with the distribution.

+ *

+ * 3. All modifications to the source code must be clearly marked as

+ * such. Binary redistributions based on modified source code

+ * must be clearly marked as modified versions in the documentation

+ * and/or other materials provided with the distribution.

+ *

+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS

+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL

+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

+ * SUCH DAMAGE.

+ */

#include <stdlib.h>

#include <string.h>

#include <stdio.h>

+#include "affixmgr.hxx"

#include "hunspell.hxx"

+#include "suggestmgr.hxx"

#include "hunspell.h"

#ifndef HUNSPELL_CHROME_CLIENT

-#ifndef MOZILLA_CLIENT

# include "config.h"

#endif

-#endif

#include "csutil.hxx"

+#include <limits>

+#include <string>

+#define MAXWORDUTF8LEN (MAXWORDLEN * 3)

+class HunspellImpl

+public:

+#ifdef HUNSPELL_CHROME_CLIENT

+ HunspellImpl(const unsigned char* bdict_data, size_t bdict_length);

+#else

+ HunspellImpl(const char* affpath, const char* dpath, const char* key);

+#endif

+ ~HunspellImpl();

+#ifndef HUNSPELL_CHROME_CLIENT

+ int add_dic(const char* dpath, const char* key);

+#endif

+ std::vector<std::string> suffix_suggest(const std::string& root_word);

+ std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);

+ std::vector<std::string> generate(const std::string& word, const std::string& pattern);

+ std::vector<std::string> stem(const std::string& word);

+ std::vector<std::string> stem(const std::vector<std::string>& morph);

+ std::vector<std::string> analyze(const std::string& word);

+ int get_langnum() const;

+ bool input_conv(const std::string& word, std::string& dest);

+ bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);

+ std::vector<std::string> suggest(const std::string& word);

+ const std::string& get_wordchars() const;

+ const std::vector<w_char>& get_wordchars_utf16() const;

+ const std::string& get_dict_encoding() const;

+ int add(const std::string& word);

+ int add_with_affix(const std::string& word, const std::string& example);

+ int remove(const std::string& word);

+ const std::string& get_version() const;

+ struct cs_info* get_csconv();

+ std::vector<char> dic_encoding_vec;

+private:

+ AffixMgr* pAMgr;

+ std::vector<HashMgr*> m_HMgrs;

+ SuggestMgr* pSMgr;

+#ifndef HUNSPELL_CHROME_CLIENT // We are using BDict instead.

+ char* affixpath;

+#endif

+ std::string encoding;

+ struct cs_info* csconv;

+ int langnum;

+ int utf8;

+ int complexprefixes;

+ std::vector<std::string> wordbreak;

+#ifdef HUNSPELL_CHROME_CLIENT

+ // Not owned by us, owned by the Hunspell object.

+ hunspell::BDictReader* bdict_reader;

+#endif

+private:

+ void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);

+ size_t cleanword2(std::string& dest,

+ std::vector<w_char>& dest_u,

+ const std::string& src,

+ int* pcaptype,

+ size_t* pabbrev);

+ void mkinitcap(std::string& u8);

+ int mkinitcap2(std::string& u8, std::vector<w_char>& u16);

+ int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);

+ void mkallcap(std::string& u8);

+ int mkallsmall2(std::string& u8, std::vector<w_char>& u16);

+ struct hentry* checkword(const std::string& source, int* info, std::string* root);

+ std::string sharps_u8_l1(const std::string& source);

+ hentry*

+ spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);

+ int is_keepcase(const hentry* rv);

+ void insert_sug(std::vector<std::string>& slst, const std::string& word);

+ void cat_result(std::string& result, const std::string& st);

+ std::vector<std::string> spellml(const std::string& word);

+ std::string get_xml_par(const char* par);

+ const char* get_xml_pos(const char* s, const char* attr);

+ std::vector<std::string> get_xml_list(const char* list, const char* tag);

+ int check_xml_par(const char* q, const char* attr, const char* value);

+private:

+ HunspellImpl(const HunspellImpl&);

+ HunspellImpl& operator=(const HunspellImpl&);

+};

#ifdef HUNSPELL_CHROME_CLIENT

Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length)

+ : m_Impl(new HunspellImpl(bdict_data, bdict_length)) {

#else

-Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)

+Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)

+ : m_Impl(new HunspellImpl(affpath, dpath, key)) {

#endif

- encoding = NULL;

- csconv = NULL;

- utf8 = 0;

- complexprefixes = 0;

+#ifdef HUNSPELL_CHROME_CLIENT

+HunspellImpl::HunspellImpl(const unsigned char* bdict_data, size_t bdict_length) {

+#else

+HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) {

+#endif

+ csconv = NULL;

+ utf8 = 0;

+ complexprefixes = 0;

#ifndef HUNSPELL_CHROME_CLIENT

- affixpath = mystrdup(affpath);

+ affixpath = mystrdup(affpath);

#endif

- maxdic = 0;

#ifdef HUNSPELL_CHROME_CLIENT

- bdict_reader = new hunspell::BDictReader;

- bdict_reader->Init(bdict_data, bdict_length);

+ bdict_reader = new hunspell::BDictReader;

+ bdict_reader->Init(bdict_data, bdict_length);

- pHMgr[0] = new HashMgr(bdict_reader);

- if (pHMgr[0]) maxdic = 1;

+ /* first set up the hash manager */

+ m_HMgrs.push_back(new HashMgr(bdict_reader));

- pAMgr = new AffixMgr(bdict_reader, pHMgr, &maxdic);

+ pAMgr = new AffixMgr(bdict_reader, m_HMgrs); // TODO: 'key' ?

#else

- /* first set up the hash manager */

- pHMgr[0] = new HashMgr(dpath, affpath, key);

- if (pHMgr[0]) maxdic = 1;

+ /* first set up the hash manager */

+ m_HMgrs.push_back(new HashMgr(dpath, affpath, key));

- /* next set up the affix manager */

- /* it needs access to the hash manager lookup methods */

- pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);

+ /* next set up the affix manager */

+ /* it needs access to the hash manager lookup methods */

+ pAMgr = new AffixMgr(affpath, m_HMgrs, key);

#endif

- /* get the preferred try string and the dictionary */

- /* encoding from the Affix Manager for that dictionary */

- char * try_string = pAMgr->get_try_string();

- encoding = pAMgr->get_encoding();

- langnum = pAMgr->get_langnum();

- utf8 = pAMgr->get_utf8();

- if (!utf8)

- csconv = get_current_cs(encoding);

- complexprefixes = pAMgr->get_complexprefixes();

- wordbreak = pAMgr->get_breaktable();

- /* and finally set up the suggestion manager */

+ /* get the preferred try string and the dictionary */

+ /* encoding from the Affix Manager for that dictionary */

+ char* try_string = pAMgr->get_try_string();

+ encoding = pAMgr->get_encoding();

+ langnum = pAMgr->get_langnum();

+ utf8 = pAMgr->get_utf8();

+ if (!utf8)

+ csconv = get_current_cs(encoding);

+ complexprefixes = pAMgr->get_complexprefixes();

+ wordbreak = pAMgr->get_breaktable();

+ dic_encoding_vec.resize(encoding.size()+1);

+ strcpy(&dic_encoding_vec[0], encoding.c_str());

+ /* and finally set up the suggestion manager */

#ifdef HUNSPELL_CHROME_CLIENT

- pSMgr = new SuggestMgr(bdict_reader, try_string, MAXSUGGESTION, pAMgr);

+ pSMgr = new SuggestMgr(bdict_reader, try_string, MAXSUGGESTION, pAMgr);

#else

- pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);

+ pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);

#endif

- if (try_string) free(try_string);

+ if (try_string)

+ free(try_string);

}

-Hunspell::~Hunspell()

- if (pSMgr) delete pSMgr;

- if (pAMgr) delete pAMgr;

- for (int i = 0; i < maxdic; i++) delete pHMgr[i];

- maxdic = 0;

- pSMgr = NULL;

- pAMgr = NULL;

+Hunspell::~Hunspell() {

+ delete m_Impl;

+HunspellImpl::~HunspellImpl() {

+ delete pSMgr;

+ delete pAMgr;

+ for (size_t i = 0; i < m_HMgrs.size(); ++i)

+ delete m_HMgrs[i];

+ pSMgr = NULL;

+ pAMgr = NULL;

#ifdef MOZILLA_CLIENT

- delete [] csconv;

+ delete[] csconv;

#endif

- csconv= NULL;

- if (encoding) free(encoding);

- encoding = NULL;

+ csconv = NULL;

#ifdef HUNSPELL_CHROME_CLIENT

if (bdict_reader) delete bdict_reader;

bdict_reader = NULL;

#else

- if (affixpath) free(affixpath);

- affixpath = NULL;

+ if (affixpath)

+ free(affixpath);

+ affixpath = NULL;

#endif

}

#ifndef HUNSPELL_CHROME_CLIENT

// load extra dictionaries

-int Hunspell::add_dic(const char * dpath, const char * key) {

- if (maxdic == MAXDIC || !affixpath) return 1;

- pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);

- if (pHMgr[maxdic]) maxdic++; else return 1;

- return 0;

+int Hunspell::add_dic(const char* dpath, const char* key) {

+ return m_Impl->add_dic(dpath, key);

+// load extra dictionaries

+int HunspellImpl::add_dic(const char* dpath, const char* key) {

+ if (!affixpath)

+ return 1;

+ m_HMgrs.push_back(new HashMgr(dpath, affixpath, key));

+ return 0;

}

#endif

@@ -107,516 +278,499 @@ int Hunspell::add_dic(const char * dpath, const char * key) {

// set the capitalization type

// return the length of the "cleaned" (and UTF-8 encoded) word

-int Hunspell::cleanword2(char * dest, const char * src,

- w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)

- unsigned char * p = (unsigned char *) dest;

- const unsigned char * q = (const unsigned char * ) src;

- // first skip over any leading blanks

- while ((*q != '\0') && (*q == ' ')) q++;

- // now strip off any trailing periods (recording their presence)

- *pabbrev = 0;

- int nl = strlen((const char *)q);

- while ((nl > 0) && (*(q+nl-1)=='.')) {

- nl--;

- (*pabbrev)++;

- }

- // if no characters are left it can't be capitalized

- if (nl <= 0) {

- *pcaptype = NOCAP;

- *p = '\0';

- return 0;

- }

- strncpy(dest, (char *) q, nl);

- *(dest + nl) = '\0';

- nl = strlen(dest);

- if (utf8) {

- *nc = u8_u16(dest_utf, MAXWORDLEN, dest);

- // don't check too long words

- if (*nc >= MAXWORDLEN) return 0;

- if (*nc == -1) { // big Unicode character (non BMP area)

- *pcaptype = NOCAP;

- return nl;

- }

- *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);

- } else {

- *pcaptype = get_captype(dest, nl, csconv);

- *nc = nl;

- }

- return nl;

+size_t HunspellImpl::cleanword2(std::string& dest,

+ std::vector<w_char>& dest_utf,

+ const std::string& src,

+ int* pcaptype,

+ size_t* pabbrev) {

+ dest.clear();

+ dest_utf.clear();

+ const char* q = src.c_str();

+ // first skip over any leading blanks

+ while ((*q != '\0') && (*q == ' '))

+ q++;

+ // now strip off any trailing periods (recording their presence)

+ *pabbrev = 0;

+ int nl = strlen(q);

+ while ((nl > 0) && (*(q + nl - 1) == '.')) {

+ nl--;

+ (*pabbrev)++;

+ }

-int Hunspell::cleanword(char * dest, const char * src,

- int * pcaptype, int * pabbrev)

- unsigned char * p = (unsigned char *) dest;

- const unsigned char * q = (const unsigned char * ) src;

- int firstcap = 0;

- // first skip over any leading blanks

- while ((*q != '\0') && (*q == ' ')) q++;

- // now strip off any trailing periods (recording their presence)

- *pabbrev = 0;

- int nl = strlen((const char *)q);

- while ((nl > 0) && (*(q+nl-1)=='.')) {

- nl--;

- (*pabbrev)++;

- }

- // if no characters are left it can't be capitalized

- if (nl <= 0) {

- *pcaptype = NOCAP;

- *p = '\0';

- return 0;

- }

- // now determine the capitalization type of the first nl letters

- int ncap = 0;

- int nneutral = 0;

- int nc = 0;

- if (!utf8) {

- while (nl > 0) {

- nc++;

- if (csconv[(*q)].ccase) ncap++;

- if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;

- *p++ = *q++;

- nl--;

- }

- // remember to terminate the destination string

- *p = '\0';

- firstcap = csconv[(unsigned char)(*dest)].ccase;

- } else {

- unsigned short idx;

- w_char t[MAXWORDLEN];

- nc = u8_u16(t, MAXWORDLEN, src);

- for (int i = 0; i < nc; i++) {

- idx = (t[i].h << 8) + t[i].l;

- unsigned short low = unicodetolower(idx, langnum);

- if (idx != low) ncap++;

- if (unicodetoupper(idx, langnum) == low) nneutral++;

- }

- u16_u8(dest, MAXWORDUTF8LEN, t, nc);

- if (ncap) {

- idx = (t[0].h << 8) + t[0].l;

- firstcap = (idx != unicodetolower(idx, langnum));

- }

- // now finally set the captype

- if (ncap == 0) {

- *pcaptype = NOCAP;

- } else if ((ncap == 1) && firstcap) {

- *pcaptype = INITCAP;

- } else if ((ncap == nc) || ((ncap + nneutral) == nc)){

- *pcaptype = ALLCAP;

- } else if ((ncap > 1) && firstcap) {

- *pcaptype = HUHINITCAP;

- } else {

- *pcaptype = HUHCAP;

- }

- return strlen(dest);

-void Hunspell::mkallcap(char * p)

+ // if no characters are left it can't be capitalized

+ if (nl <= 0) {

+ *pcaptype = NOCAP;

+ return 0;

+ }

+ dest.append(q, nl);

+ nl = dest.size();

if (utf8) {

- w_char u[MAXWORDLEN];

- int nc = u8_u16(u, MAXWORDLEN, p);

- unsigned short idx;

- for (int i = 0; i < nc; i++) {

- idx = (u[i].h << 8) + u[i].l;

- if (idx != unicodetoupper(idx, langnum)) {

- u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);

- u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);

- }

- u16_u8(p, MAXWORDUTF8LEN, u, nc);

+ u8_u16(dest_utf, dest);

+ *pcaptype = get_captype_utf8(dest_utf, langnum);

} else {

- while (*p != '\0') {

- *p = csconv[((unsigned char) *p)].cupper;

- p++;

- }

+ *pcaptype = get_captype(dest, csconv);

}

+ return nl;

}

-int Hunspell::mkallcap2(char * p, w_char * u, int nc)

- if (utf8) {

- unsigned short idx;

- for (int i = 0; i < nc; i++) {

- idx = (u[i].h << 8) + u[i].l;

- unsigned short up = unicodetoupper(idx, langnum);

- if (idx != up) {

- u[i].h = (unsigned char) (up >> 8);

- u[i].l = (unsigned char) (up & 0x00FF);

- }

- u16_u8(p, MAXWORDUTF8LEN, u, nc);

- return strlen(p);

+void HunspellImpl::cleanword(std::string& dest,

+ const std::string& src,

+ int* pcaptype,

+ int* pabbrev) {

+ dest.clear();

+ const unsigned char* q = (const unsigned char*)src.c_str();

+ int firstcap = 0;

+ // first skip over any leading blanks

+ while ((*q != '\0') && (*q == ' '))

+ q++;

+ // now strip off any trailing periods (recording their presence)

+ *pabbrev = 0;

+ int nl = strlen((const char*)q);

+ while ((nl > 0) && (*(q + nl - 1) == '.')) {

+ nl--;

+ (*pabbrev)++;

+ }

+ // if no characters are left it can't be capitalized

+ if (nl <= 0) {

+ *pcaptype = NOCAP;

+ return;

+ }

+ // now determine the capitalization type of the first nl letters

+ int ncap = 0;

+ int nneutral = 0;

+ int nc = 0;

+ if (!utf8) {

+ while (nl > 0) {

+ nc++;

+ if (csconv[(*q)].ccase)

+ ncap++;

+ if (csconv[(*q)].cupper == csconv[(*q)].clower)

+ nneutral++;

+ dest.push_back(*q++);

+ nl--;

+ }

+ // remember to terminate the destination string

+ firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;

} else {

- while (*p != '\0') {

- *p = csconv[((unsigned char) *p)].cupper;

- p++;

+ std::vector<w_char> t;

+ u8_u16(t, src);

+ for (size_t i = 0; i < t.size(); ++i) {

+ unsigned short idx = (t[i].h << 8) + t[i].l;

+ unsigned short low = unicodetolower(idx, langnum);

+ if (idx != low)

+ ncap++;

+ if (unicodetoupper(idx, langnum) == low)

+ nneutral++;

+ }

+ u16_u8(dest, t);

+ if (ncap) {

+ unsigned short idx = (t[0].h << 8) + t[0].l;

+ firstcap = (idx != unicodetolower(idx, langnum));

}

- return nc;

+ // now finally set the captype

+ if (ncap == 0) {

+ *pcaptype = NOCAP;

+ } else if ((ncap == 1) && firstcap) {

+ *pcaptype = INITCAP;

+ } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {

+ *pcaptype = ALLCAP;

+ } else if ((ncap > 1) && firstcap) {

+ *pcaptype = HUHINITCAP;

+ } else {

+ *pcaptype = HUHCAP;

+ }

-void Hunspell::mkallsmall(char * p)

- while (*p != '\0') {

- *p = csconv[((unsigned char) *p)].clower;

- p++;

- }

+void HunspellImpl::mkallcap(std::string& u8) {

+ if (utf8) {

+ std::vector<w_char> u16;

+ u8_u16(u16, u8);

+ ::mkallcap_utf(u16, langnum);

+ u16_u8(u8, u16);

+ } else {

+ ::mkallcap(u8, csconv);

+ }

}

-int Hunspell::mkallsmall2(char * p, w_char * u, int nc)

+int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {

if (utf8) {

- unsigned short idx;

- for (int i = 0; i < nc; i++) {

- idx = (u[i].h << 8) + u[i].l;

- unsigned short low = unicodetolower(idx, langnum);

- if (idx != low) {

- u[i].h = (unsigned char) (low >> 8);

- u[i].l = (unsigned char) (low & 0x00FF);

- }

- u16_u8(p, MAXWORDUTF8LEN, u, nc);

- return strlen(p);

+ ::mkallsmall_utf(u16, langnum);

+ u16_u8(u8, u16);

} else {

- while (*p != '\0') {

- *p = csconv[((unsigned char) *p)].clower;

- p++;

- }

+ ::mkallsmall(u8, csconv);

}

- return nc;

+ return u8.size();

}

// convert UTF-8 sharp S codes to latin 1

-char * Hunspell::sharps_u8_l1(char * dest, char * source) {

- char * p = dest;

- *p = *source;

- for (p++, source++; *(source - 1); p++, source++) {

- *p = *source;

- if (*source == '\x9F') *--p = '\xDF';

- }

- return dest;

+std::string HunspellImpl::sharps_u8_l1(const std::string& source) {

+ std::string dest(source);

+ mystrrep(dest, "\xC3\x9F", "\xDF");

+ return dest;

}

// recursive search for right ss - sharp s permutations

-hentry * Hunspell::spellsharps(char * base, char * pos, int n,

- int repnum, char * tmp, int * info, char **root) {

- pos = strstr(pos, "ss");

- if (pos && (n < MAXSHARPS)) {

- *pos = '\xC3';

- *(pos + 1) = '\x9F';

- hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);

- if (h) return h;

- *pos = 's';

- *(pos + 1) = 's';

- h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);

- if (h) return h;

- } else if (repnum > 0) {

- if (utf8) return checkword(base, info, root);

- return checkword(sharps_u8_l1(tmp, base), info, root);

- }

- return NULL;

+hentry* HunspellImpl::spellsharps(std::string& base,

+ size_t n_pos,

+ int n,

+ int repnum,

+ int* info,

+ std::string* root) {

+ size_t pos = base.find("ss", n_pos);

+ if (pos != std::string::npos && (n < MAXSHARPS)) {

+ base[pos] = '\xC3';

+ base[pos + 1] = '\x9F';

+ hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);

+ if (h)

+ return h;

+ base[pos] = 's';

+ base[pos + 1] = 's';

+ h = spellsharps(base, pos + 2, n + 1, repnum, info, root);

+ if (h)

+ return h;

+ } else if (repnum > 0) {

+ if (utf8)

+ return checkword(base, info, root);

+ std::string tmp(sharps_u8_l1(base));

+ return checkword(tmp, info, root);

+ }

+ return NULL;

}

-int Hunspell::is_keepcase(const hentry * rv) {

- return pAMgr && rv->astr && pAMgr->get_keepcase() &&

- TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);

+int HunspellImpl::is_keepcase(const hentry* rv) {

+ return pAMgr && rv->astr && pAMgr->get_keepcase() &&

+ TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);

}

-/* insert a word to the beginning of the suggestion array and return ns */

-int Hunspell::insert_sug(char ***slst, char * word, int ns) {

- char * dup = mystrdup(word);

- if (!dup) return ns;

- if (ns == MAXSUGGESTION) {

- ns--;

- free((*slst)[ns]);

- }

- for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];

- (*slst)[0] = dup;

- return ns + 1;

+/* insert a word to the beginning of the suggestion array */

+void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {

+ slst.insert(slst.begin(), word);

}

-int Hunspell::spell(const char * word, int * info, char ** root)

+bool Hunspell::spell(const std::string& word, int* info, std::string* root) {

+ return m_Impl->spell(word, info, root);

+bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {

#ifdef HUNSPELL_CHROME_CLIENT

- if (pHMgr[0]) pHMgr[0]->EmptyHentryCache();

+ if (m_HMgrs[0]) m_HMgrs[0]->EmptyHentryCache();

#endif

- struct hentry * rv=NULL;

- // need larger vector. For example, Turkish capital letter I converted a

- // 2-byte UTF-8 character (dotless i) by mkallsmall.

- char cw[MAXWORDUTF8LEN];

- char wspace[MAXWORDUTF8LEN];

- w_char unicw[MAXWORDLEN];

+ struct hentry* rv = NULL;

+ int info2 = 0;

+ if (!info)

+ info = &info2;

+ else

+ *info = 0;

// Hunspell supports XML input of the simplified API (see manual)

- if (strcmp(word, SPELL_XML) == 0) return 1;

- int nc = strlen(word);

- int wl2 = 0;

+ if (word == SPELL_XML)

+ return true;

if (utf8) {

- if (nc >= MAXWORDUTF8LEN) return 0;

+ if (word.size() >= MAXWORDUTF8LEN)

+ return false;

} else {

- if (nc >= MAXWORDLEN) return 0;

+ if (word.size() >= MAXWORDLEN)

+ return false;

}

- int captype = 0;

- int abbv = 0;

- int wl = 0;

+ int captype = NOCAP;

+ size_t abbv = 0;

+ size_t wl = 0;

+ std::string scw;

+ std::vector<w_char> sunicw;

// input conversion

- RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

- if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);

- else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

+ RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;

+ {

+ std::string wspace;

+ bool convstatus = rl ? rl->conv(word, wspace) : false;

+ if (convstatus)

+ wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);

+ else

+ wl = cleanword2(scw, sunicw, word, &captype, &abbv);

+ }

- int info2 = 0;

- if (wl == 0 || maxdic == 0) return 1;

- if (root) *root = NULL;

+#ifdef MOZILLA_CLIENT

+ // accept the abbreviated words without dots

+ // workaround for the incomplete tokenization of Mozilla

+ abbv = 1;

+#endif

- // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)

+ if (wl == 0 || m_HMgrs.empty())

+ return true;

+ if (root)

+ root->clear();

+ // allow numbers with dots, dashes and commas (but forbid double separators:

+ // "..", "--" etc.)

enum { NBEGIN, NNUM, NSEP };

int nstate = NBEGIN;

- int i;

+ size_t i;

for (i = 0; (i < wl); i++) {

- if ((cw[i] <= '9') && (cw[i] >= '0')) {

- nstate = NNUM;

- } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {

- if ((nstate == NSEP) || (i == 0)) break;

- nstate = NSEP;

- } else break;

+ if ((scw[i] <= '9') && (scw[i] >= '0')) {

+ nstate = NNUM;

+ } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {

+ if ((nstate == NSEP) || (i == 0))

+ break;

+ nstate = NSEP;

+ } else

+ break;

}

- if ((i == wl) && (nstate == NNUM)) return 1;

- if (!info) info = &info2; else *info = 0;

- switch(captype) {

- case HUHCAP:

- case HUHINITCAP:

- *info += SPELL_ORIGCAP;

- case NOCAP: {

- rv = checkword(cw, info, root);

- if ((abbv) && !(rv)) {

- memcpy(wspace,cw,wl);

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- rv = checkword(wspace, info, root);

- }

+ if ((i == wl) && (nstate == NNUM))

+ return true;

+ switch (captype) {

+ case HUHCAP:

+ /* FALLTHROUGH */

+ case HUHINITCAP:

+ *info += SPELL_ORIGCAP;

+ /* FALLTHROUGH */

+ case NOCAP:

+ rv = checkword(scw, info, root);

+ if ((abbv) && !(rv)) {

+ std::string u8buffer(scw);

+ u8buffer.push_back('.');

+ rv = checkword(u8buffer, info, root);

+ }

+ break;

+ case ALLCAP: {

+ *info += SPELL_ORIGCAP;

+ rv = checkword(scw, info, root);

+ if (rv)

+ break;

+ if (abbv) {

+ std::string u8buffer(scw);

+ u8buffer.push_back('.');

+ rv = checkword(u8buffer, info, root);

+ if (rv)

+ break;

+ }

+ // Spec. prefix handling for Catalan, French, Italian:

+ // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).

+ size_t apos = pAMgr ? scw.find('\'') : std::string::npos;

+ if (apos != std::string::npos) {

+ mkallsmall2(scw, sunicw);

+ //conversion may result in string with different len to pre-mkallsmall2

+ //so re-scan

+ if (apos != std::string::npos && apos < scw.size() - 1) {

+ std::string part1 = scw.substr(0, apos+1);

+ std::string part2 = scw.substr(apos+1);

+ if (utf8) {

+ std::vector<w_char> part1u, part2u;

+ u8_u16(part1u, part1);

+ u8_u16(part2u, part2);

+ mkinitcap2(part2, part2u);

+ scw = part1 + part2;

+ sunicw = part1u;

+ sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());

+ rv = checkword(scw, info, root);

+ if (rv)

+ break;

+ } else {

+ mkinitcap2(part2, sunicw);

+ scw = part1 + part2;

+ rv = checkword(scw, info, root);

+ if (rv)

+ break;

+ }

+ mkinitcap2(scw, sunicw);

+ rv = checkword(scw, info, root);

+ if (rv)

break;

- }

- case ALLCAP: {

- *info += SPELL_ORIGCAP;

- rv = checkword(cw, info, root);

- if (rv) break;

- if (abbv) {

- memcpy(wspace,cw,wl);

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- rv = checkword(wspace, info, root);

- if (rv) break;

- }

- // Spec. prefix handling for Catalan, French, Italian:

- // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).

- if (pAMgr && strchr(cw, '\'')) {

- wl = mkallsmall2(cw, unicw, nc);

- //There are no really sane circumstances where this could fail,

- //but anyway...

- if (char * apostrophe = strchr(cw, '\'')) {

- if (utf8) {

- w_char tmpword[MAXWORDLEN];

- *apostrophe = '\0';

- wl2 = u8_u16(tmpword, MAXWORDLEN, cw);

- *apostrophe = '\'';

- if (wl2 < nc) {

- mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);

- rv = checkword(cw, info, root);

- if (rv) break;

- }

- } else {

- mkinitcap2(apostrophe + 1, unicw, nc);

- rv = checkword(cw, info, root);

- if (rv) break;

- }

- mkinitcap2(cw, unicw, nc);

- rv = checkword(cw, info, root);

- if (rv) break;

- }

- if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {

- char tmpword[MAXWORDUTF8LEN];

- wl = mkallsmall2(cw, unicw, nc);

- memcpy(wspace,cw,(wl+1));

- rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

- if (!rv) {

- wl2 = mkinitcap2(cw, unicw, nc);

- rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);

- }

- if ((abbv) && !(rv)) {

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

- if (!rv) {

- memcpy(wspace, cw, wl2);

- *(wspace+wl2) = '.';

- *(wspace+wl2+1) = '\0';

- rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

- }

- if (rv) break;

- }

}

- case INITCAP: {

- *info += SPELL_ORIGCAP;

- wl = mkallsmall2(cw, unicw, nc);

- memcpy(wspace,cw,(wl+1));

- wl2 = mkinitcap2(cw, unicw, nc);

- if (captype == INITCAP) *info += SPELL_INITCAP;

- rv = checkword(cw, info, root);

- if (captype == INITCAP) *info -= SPELL_INITCAP;

- // forbid bad capitalization

- // (for example, ijs -> Ijs instead of IJs in Dutch)

- // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)

- if (*info & SPELL_FORBIDDEN) {

- rv = NULL;

- break;

- }

- if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;

- if (rv) break;

- rv = checkword(wspace, info, root);

- if (abbv && !rv) {

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- rv = checkword(wspace, info, root);

- if (!rv) {

- memcpy(wspace, cw, wl2);

- *(wspace+wl2) = '.';

- *(wspace+wl2+1) = '\0';

- if (captype == INITCAP) *info += SPELL_INITCAP;

- rv = checkword(wspace, info, root);

- if (captype == INITCAP) *info -= SPELL_INITCAP;

- if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;

- break;

- }

- if (rv && is_keepcase(rv) &&

- ((captype == ALLCAP) ||

- // if CHECKSHARPS: KEEPCASE words with \xDF are allowed

- // in INITCAP form, too.

- !(pAMgr->get_checksharps() &&

- ((utf8 && strstr(wspace, "\xC3\x9F")) ||

- (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;

- break;

- }

+ }

+ if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {

+ mkallsmall2(scw, sunicw);

+ std::string u8buffer(scw);

+ rv = spellsharps(u8buffer, 0, 0, 0, info, root);

+ if (!rv) {

+ mkinitcap2(scw, sunicw);

+ rv = spellsharps(scw, 0, 0, 0, info, root);

+ }

+ if ((abbv) && !(rv)) {

+ u8buffer.push_back('.');

+ rv = spellsharps(u8buffer, 0, 0, 0, info, root);

+ if (!rv) {

+ u8buffer = std::string(scw);

+ u8buffer.push_back('.');

+ rv = spellsharps(u8buffer, 0, 0, 0, info, root);

+ }

+ if (rv)

+ break;

+ }

+ case INITCAP: {

+ *info += SPELL_ORIGCAP;

+ mkallsmall2(scw, sunicw);

+ std::string u8buffer(scw);

+ mkinitcap2(scw, sunicw);

+ if (captype == INITCAP)

+ *info += SPELL_INITCAP;

+ rv = checkword(scw, info, root);

+ if (captype == INITCAP)

+ *info -= SPELL_INITCAP;

+ // forbid bad capitalization

+ // (for example, ijs -> Ijs instead of IJs in Dutch)

+ // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)

+ if (*info & SPELL_FORBIDDEN) {

+ rv = NULL;

+ break;

+ }

+ if (rv && is_keepcase(rv) && (captype == ALLCAP))

+ rv = NULL;

+ if (rv)

+ break;

+ rv = checkword(u8buffer, info, root);

+ if (abbv && !rv) {

+ u8buffer.push_back('.');

+ rv = checkword(u8buffer, info, root);

+ if (!rv) {

+ u8buffer = scw;

+ u8buffer.push_back('.');

+ if (captype == INITCAP)

+ *info += SPELL_INITCAP;

+ rv = checkword(u8buffer, info, root);

+ if (captype == INITCAP)

+ *info -= SPELL_INITCAP;

+ if (rv && is_keepcase(rv) && (captype == ALLCAP))

+ rv = NULL;

+ break;

+ }

+ if (rv && is_keepcase(rv) &&

+ ((captype == ALLCAP) ||

+ // if CHECKSHARPS: KEEPCASE words with \xDF are allowed

+ // in INITCAP form, too.

+ !(pAMgr->get_checksharps() &&

+ ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||

+ (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))

+ rv = NULL;

+ break;

+ }

}

if (rv) {

- if (pAMgr && pAMgr->get_warn() && rv->astr &&

- TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {

- *info += SPELL_WARN;

- if (pAMgr->get_forbidwarn()) return 0;

- return HUNSPELL_OK_WARN;

- }

- return HUNSPELL_OK;

+ if (pAMgr && pAMgr->get_warn() && rv->astr &&

+ TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {

+ *info += SPELL_WARN;

+ if (pAMgr->get_forbidwarn())

+ return false;

+ return true;

+ }

+ return true;

}

// recursive breaking at break points

- if (wordbreak) {

- char * s;

- char r;

+ if (!wordbreak.empty()) {

int nbr = 0;

- wl = strlen(cw);

- int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;

+ wl = scw.size();

// calculate break points for recursion limit

- for (int j = 0; j < numbreak; j++) {

- s = cw;

- do {

- s = (char *) strstr(s, wordbreak[j]);

- if (s) {

- nbr++;

- s++;

- }

- } while (s);

- }

- if (nbr >= 10) return 0;

+ for (size_t j = 0; j < wordbreak.size(); ++j) {

+ size_t pos = 0;

+ while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) {

+ ++nbr;

+ pos += wordbreak[j].size();

+ }

+ if (nbr >= 10)

+ return false;

// check boundary patterns (^begin and end$)

- for (int j = 0; j < numbreak; j++) {

- int plen = strlen(wordbreak[j]);

- if (plen == 1 || plen > wl) continue;

- if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0

- && spell(cw + plen - 1)) return 1;

+ for (size_t j = 0; j < wordbreak.size(); ++j) {

+ size_t plen = wordbreak[j].size();

+ if (plen == 1 || plen > wl)

+ continue;

+ if (wordbreak[j][0] == '^' &&

+ scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1)))

+ return true;

if (wordbreak[j][plen - 1] == '$' &&

- strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {

- r = cw[wl - plen + 1];

- cw[wl - plen + 1] = '\0';

- if (spell(cw)) return 1;

- cw[wl - plen + 1] = r;

- }

+ scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) {

+ std::string suffix(scw.substr(wl - plen + 1));

+ scw.resize(wl - plen + 1);

+ if (spell(scw))

+ return true;

+ scw.append(suffix);

+ }

}

// other patterns

- for (int j = 0; j < numbreak; j++) {

- int plen = strlen(wordbreak[j]);

- s=(char *) strstr(cw, wordbreak[j]);

- if (s && (s > cw) && (s < cw + wl - plen)) {

- if (!spell(s + plen)) continue;

- r = *s;

- *s = '\0';

+ for (size_t j = 0; j < wordbreak.size(); ++j) {

+ size_t plen = wordbreak[j].size();

+ size_t found = scw.find(wordbreak[j]);

+ if ((found > 0) && (found < wl - plen)) {

+ if (!spell(scw.substr(found + plen)))

+ continue;

+ std::string suffix(scw.substr(found));

+ scw.resize(found);

// examine 2 sides of the break point

- if (spell(cw)) return 1;

- *s = r;

+ if (spell(scw))

+ return true;

+ scw.append(suffix);

// LANG_hu: spec. dash rule

- if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {

- r = s[1];

- s[1] = '\0';

- if (spell(cw)) return 1; // check the first part with dash

- s[1] = r;

- }

- // end of LANG speficic region

+ if (langnum == LANG_hu && wordbreak[j] == "-") {

+ suffix = scw.substr(found + 1);

+ scw.resize(found + 1);

+ if (spell(scw))

+ return true; // check the first part with dash

+ scw.append(suffix);

+ }

+ // end of LANG specific region

}

- return 0;

+ return false;

}

-struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)

- struct hentry * he = NULL;

- int len, i;

- char w2[MAXWORDUTF8LEN];

- const char * word;

+struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {

+ bool usebuffer = false;

+ std::string w2;

+ const char* word;

+ int len;

- char * ignoredchars = pAMgr->get_ignore();

+ const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;

if (ignoredchars != NULL) {

- strcpy(w2, w);

- if (utf8) {

- int ignoredchars_utf16_len;

- unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);

- remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);

- } else {

- remove_ignored_chars(w2,ignoredchars);

- }

- word = w2;

- } else word = w;

- len = strlen(word);

+ w2.assign(w);

+ if (utf8) {

+ const std::vector<w_char>& ignoredchars_utf16 =

+ pAMgr->get_ignore_utf16();

+ remove_ignored_chars_utf(w2, ignoredchars_utf16);

+ } else {

+ remove_ignored_chars(w2, ignoredchars);

+ }

+ word = w2.c_str();

+ len = w2.size();

+ usebuffer = true;

+ } else {

+ word = w.c_str();

+ len = w.size();

+ }

if (!len)

- return NULL;

+ return NULL;

#ifdef HUNSPELL_CHROME_CLIENT

// We need to check if the word length is valid to make coverity (Event

@@ -627,1430 +781,1310 @@ struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)

// word reversing wrapper for complex prefixes

if (complexprefixes) {

- if (word != w2) {

- strcpy(w2, word);

- word = w2;

+ if (!usebuffer) {

+ w2.assign(word);

+ usebuffer = true;

}

- if (utf8) reverseword_utf(w2); else reverseword(w2);

+ if (utf8)

+ reverseword_utf(w2);

+ else

+ reverseword(w2);

+ }

+ if (usebuffer) {

+ word = w2.c_str();

}

// look word in hash table

- for (i = 0; (i < maxdic) && !he; i ++) {

- he = (pHMgr[i])->lookup(word);

- // check forbidden and onlyincompound words

- if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {

- if (info) *info += SPELL_FORBIDDEN;

- // LANG_hu section: set dash information for suggestions

- if (langnum == LANG_hu) {

+ struct hentry* he = NULL;

+ for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {

+ he = m_HMgrs[i]->lookup(word);

+ // check forbidden and onlyincompound words

+ if ((he) && (he->astr) && (pAMgr) &&

+ TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {

+ if (info)

+ *info += SPELL_FORBIDDEN;

+ // LANG_hu section: set dash information for suggestions

+ if (langnum == LANG_hu) {

if (pAMgr->get_compoundflag() &&

TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {

- if (info) *info += SPELL_COMPOUND;

+ if (info)

+ *info += SPELL_COMPOUND;

}

+ }

+ return NULL;

}

- return NULL;

- }

- // he = next not needaffix, onlyincompound homonym or onlyupcase word

- while (he && (he->astr) &&

- ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||

- (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||

- (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))

- )) he = he->next_homonym;

+ // he = next not needaffix, onlyincompound homonym or onlyupcase word

+ while (he && (he->astr) && pAMgr &&

+ ((pAMgr->get_needaffix() &&

+ TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||

+ (pAMgr->get_onlyincompound() &&

+ TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||

+ (info && (*info & SPELL_INITCAP) &&

+ TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))

+ he = he->next_homonym;

}

// check with affixes

if (!he && pAMgr) {

- // try stripping off affixes */

- he = pAMgr->affix_check(word, len, 0);

- // check compound restriction and onlyupcase

- if (he && he->astr && (

- (pAMgr->get_onlyincompound() &&

- TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||

- (info && (*info & SPELL_INITCAP) &&

- TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {

- he = NULL;

- }

- if (he) {

- if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {

- if (info) *info += SPELL_FORBIDDEN;

- return NULL;

+ // try stripping off affixes */

+ he = pAMgr->affix_check(word, len, 0);

+ // check compound restriction and onlyupcase

+ if (he && he->astr &&

+ ((pAMgr->get_onlyincompound() &&

+ TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||

+ (info && (*info & SPELL_INITCAP) &&

+ TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {

+ he = NULL;

+ }

+ if (he) {

+ if ((he->astr) && (pAMgr) &&

+ TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {

+ if (info)

+ *info += SPELL_FORBIDDEN;

+ return NULL;

+ }

+ if (root) {

+ root->assign(he->word);

+ if (complexprefixes) {

+ if (utf8)

+ reverseword_utf(*root);

+ else

+ reverseword(*root);

}

+ }

+ // try check compound word

+ } else if (pAMgr->get_compound()) {

+ struct hentry* rwords[100]; // buffer for COMPOUND pattern checking

+ he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);

+ // LANG_hu section: `moving rule' with last dash

+ if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {

+ std::string dup(word, len - 1);

+ he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info);

+ }

+ // end of LANG specific region

+ if (he) {

if (root) {

- *root = mystrdup(he->word);

- if (*root && complexprefixes) {

- if (utf8) reverseword_utf(*root); else reverseword(*root);

- }

- // try check compound word

- } else if (pAMgr->get_compound()) {

- he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);

- // LANG_hu section: `moving rule' with last dash

- if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {

- char * dup = mystrdup(word);

- if (!dup) return NULL;

- dup[len-1] = '\0';

- he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info);

- free(dup);

- }

- // end of LANG speficic region

- if (he) {

- if (root) {

- *root = mystrdup(he->word);

- if (*root && complexprefixes) {

- if (utf8) reverseword_utf(*root); else reverseword(*root);

- }

- if (info) *info += SPELL_COMPOUND;

+ root->assign(he->word);

+ if (complexprefixes) {

+ if (utf8)

+ reverseword_utf(*root);

+ else

+ reverseword(*root);

}

- }

+ }

+ if (info)

+ *info += SPELL_COMPOUND;

+ }

}

return he;

}

-int Hunspell::suggest(char*** slst, const char * word)

+std::vector<std::string> Hunspell::suggest(const std::string& word) {

+ return m_Impl->suggest(word);

+std::vector<std::string> HunspellImpl::suggest(const std::string& word) {

#ifdef HUNSPELL_CHROME_CLIENT

- if (pHMgr[0]) pHMgr[0]->EmptyHentryCache();

+ if (m_HMgrs[0]) m_HMgrs[0]->EmptyHentryCache();

#endif

+ std::vector<std::string> slst;

int onlycmpdsug = 0;

- char cw[MAXWORDUTF8LEN];

- char wspace[MAXWORDUTF8LEN];

- if (!pSMgr || maxdic == 0) return 0;

- w_char unicw[MAXWORDLEN];

- *slst = NULL;

+ if (!pSMgr || m_HMgrs.empty())

+ return slst;

// process XML input of the simplified API (see manual)

- if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {

- return spellml(slst, word);

+ if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {

+ return spellml(word);

}

- int nc = strlen(word);

if (utf8) {

- if (nc >= MAXWORDUTF8LEN) return 0;

+ if (word.size() >= MAXWORDUTF8LEN)

+ return slst;

} else {

- if (nc >= MAXWORDLEN) return 0;

+ if (word.size() >= MAXWORDLEN)

+ return slst;

}

- int captype = 0;

- int abbv = 0;

- int wl = 0;

+ int captype = NOCAP;

+ size_t abbv = 0;

+ size_t wl = 0;

+ std::string scw;

+ std::vector<w_char> sunicw;

// input conversion

- RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

- if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);

- else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

+ RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

+ {

+ std::string wspace;

+ bool convstatus = rl ? rl->conv(word, wspace) : false;

+ if (convstatus)

+ wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);

+ else

+ wl = cleanword2(scw, sunicw, word, &captype, &abbv);

+ if (wl == 0)

+ return slst;

+ }

- if (wl == 0) return 0;

- int ns = 0;

int capwords = 0;

// check capitalized form for FORCEUCASE

if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {

int info = SPELL_ORIGCAP;

- char ** wlst;

- if (checkword(cw, &info, NULL)) {

- if (*slst) {

- wlst = *slst;

+ if (checkword(scw, &info, NULL)) {

+ std::string form(scw);

+ mkinitcap(form);

+ slst.push_back(form);

+ return slst;

+ }

+ switch (captype) {

+ case NOCAP: {

+ pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);

+ break;

+ }

+ case INITCAP: {

+ capwords = 1;

+ pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);

+ std::string wspace(scw);

+ mkallsmall2(wspace, sunicw);

+ pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);

+ break;

+ }

+ case HUHINITCAP:

+ capwords = 1;

+ case HUHCAP: {

+ pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);

+ // something.The -> something. The

+ size_t dot_pos = scw.find('.');

+ if (dot_pos != std::string::npos) {

+ std::string postdot = scw.substr(dot_pos + 1);

+ int captype_;

+ if (utf8) {

+ std::vector<w_char> postdotu;

+ u8_u16(postdotu, postdot);

+ captype_ = get_captype_utf8(postdotu, langnum);

} else {

- wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));

- if (wlst == NULL) return -1;

- *slst = wlst;

- for (int i = 0; i < MAXSUGGESTION; i++) {

- wlst[i] = NULL;

- }

+ captype_ = get_captype(postdot, csconv);

+ }

+ if (captype_ == INITCAP) {

+ std::string str(scw);

+ str.insert(dot_pos + 1, 1, ' ');

+ insert_sug(slst, str);

}

- wlst[0] = mystrdup(cw);

- mkinitcap(wlst[0]);

- return 1;

+ }

+ std::string wspace;

+ if (captype == HUHINITCAP) {

+ // TheOpenOffice.org -> The OpenOffice.org

+ wspace = scw;

+ mkinitsmall2(wspace, sunicw);

+ pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);

+ }

+ wspace = scw;

+ mkallsmall2(wspace, sunicw);

+ if (spell(wspace.c_str()))

+ insert_sug(slst, wspace);

+ size_t prevns = slst.size();

+ pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);

+ if (captype == HUHINITCAP) {

+ mkinitcap2(wspace, sunicw);

+ if (spell(wspace.c_str()))

+ insert_sug(slst, wspace);

+ pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);

+ }

+ // aNew -> "a New" (instead of "a new")

+ for (size_t j = prevns; j < slst.size(); ++j) {

+ const char* space = strchr(slst[j].c_str(), ' ');

+ if (space) {

+ size_t slen = strlen(space + 1);

+ // different case after space (need capitalisation)

+ if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {

+ std::string first(slst[j].c_str(), space + 1);

+ std::string second(space + 1);

+ std::vector<w_char> w;

+ if (utf8)

+ u8_u16(w, second);

+ mkinitcap2(second, w);

+ // set as first suggestion

+ slst.erase(slst.begin() + j);

+ slst.insert(slst.begin(), first + second);

+ }

+ break;

+ }

+ case ALLCAP: {

+ std::string wspace(scw);

+ mkallsmall2(wspace, sunicw);

+ pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);

+ if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))

+ insert_sug(slst, wspace);

+ mkinitcap2(wspace, sunicw);

+ pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);

+ for (size_t j = 0; j < slst.size(); ++j) {

+ mkallcap(slst[j]);

+ if (pAMgr && pAMgr->get_checksharps()) {

+ if (utf8) {

+ mystrrep(slst[j], "\xC3\x9F", "SS");

+ } else {

+ mystrrep(slst[j], "\xDF", "SS");

+ }

+ break;

}

- }

- switch(captype) {

- case NOCAP: {

- ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);

- break;

- }

- case INITCAP: {

- capwords = 1;

- ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);

- if (ns == -1) break;

- memcpy(wspace,cw,(wl+1));

- mkallsmall2(wspace, unicw, nc);

- ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

- break;

- }

- case HUHINITCAP:

- capwords = 1;

- case HUHCAP: {

- ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);

- if (ns != -1) {

- int prevns;

- // something.The -> something. The

- char * dot = strchr(cw, '.');

- if (dot && (dot > cw)) {

- int captype_;

- if (utf8) {

- w_char w_[MAXWORDLEN];

- int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);

- captype_ = get_captype_utf8(w_, wl_, langnum);

- } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);

- if (captype_ == INITCAP) {

- char * st = mystrdup(cw);

- if (st) st = (char *) realloc(st, wl + 2);

- if (st) {

- st[(dot - cw) + 1] = ' ';

- strcpy(st + (dot - cw) + 2, dot + 1);

- ns = insert_sug(slst, st, ns);

- free(st);

- }

- if (captype == HUHINITCAP) {

- // TheOpenOffice.org -> The OpenOffice.org

- memcpy(wspace,cw,(wl+1));

- mkinitsmall2(wspace, unicw, nc);

- ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

- }

- memcpy(wspace,cw,(wl+1));

- mkallsmall2(wspace, unicw, nc);

- if (spell(wspace)) ns = insert_sug(slst, wspace, ns);

- prevns = ns;

- ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

- if (captype == HUHINITCAP) {

- mkinitcap2(wspace, unicw, nc);

- if (spell(wspace)) ns = insert_sug(slst, wspace, ns);

- ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

- }

- // aNew -> "a New" (instead of "a new")

- for (int j = prevns; j < ns; j++) {

- char * space = strchr((*slst)[j],' ');

- if (space) {

- int slen = strlen(space + 1);

- // different case after space (need capitalisation)

- if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {

- w_char w[MAXWORDLEN];

- int wc = 0;

- char * r = (*slst)[j];

- if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);

- mkinitcap2(space + 1, w, wc);

- // set as first suggestion

- for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];

- (*slst)[0] = r;

- }

- break;

- }

- case ALLCAP: {

- memcpy(wspace, cw, (wl+1));

- mkallsmall2(wspace, unicw, nc);

- ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

- if (ns == -1) break;

- if (pAMgr && pAMgr->get_keepcase() && spell(wspace))

- ns = insert_sug(slst, wspace, ns);

- mkinitcap2(wspace, unicw, nc);

- ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

- for (int j=0; j < ns; j++) {

- mkallcap((*slst)[j]);

- if (pAMgr && pAMgr->get_checksharps()) {

- char * pos;

- if (utf8) {

- pos = strstr((*slst)[j], "\xC3\x9F");

- while (pos) {

- *pos = 'S';

- *(pos+1) = 'S';

- pos = strstr(pos+2, "\xC3\x9F");

- }

- } else {

- pos = strchr((*slst)[j], '\xDF');

- while (pos) {

- (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);

- mystrrep((*slst)[j], "\xDF", "SS");

- pos = strchr((*slst)[j], '\xDF');

- }

- break;

- }

}

- // LANG_hu section: replace '-' with ' ' in Hungarian

+ // LANG_hu section: replace '-' with ' ' in Hungarian

if (langnum == LANG_hu) {

- for (int j=0; j < ns; j++) {

- char * pos = strchr((*slst)[j],'-');

- if (pos) {

- int info;

- char w[MAXWORDUTF8LEN];

- *pos = '\0';

- strcpy(w, (*slst)[j]);

- strcat(w, pos + 1);

- spell(w, &info, NULL);

- if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {

- *pos = ' ';

- } else *pos = '-';

- }

+ for (size_t j = 0; j < slst.size(); ++j) {

+ size_t pos = slst[j].find('-');

+ if (pos != std::string::npos) {

+ int info;

+ std::string w(slst[j].substr(0, pos));

+ w.append(slst[j].substr(pos + 1));

+ (void)spell(w, &info, NULL);

+ if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {

+ slst[j][pos] = ' ';

+ } else

+ slst[j][pos] = '-';

}

+ }

}

// END OF LANG_hu section

// try ngram approach since found nothing or only compound words

- if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {

- switch(captype) {

- case NOCAP: {

- ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);

- break;

- }

- case HUHINITCAP:

- capwords = 1;

- case HUHCAP: {

- memcpy(wspace,cw,(wl+1));

- mkallsmall2(wspace, unicw, nc);

- ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);

- break;

- }

- case INITCAP: {

- capwords = 1;

- memcpy(wspace,cw,(wl+1));

- mkallsmall2(wspace, unicw, nc);

- ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);

- break;

- }

- case ALLCAP: {

- memcpy(wspace,cw,(wl+1));

- mkallsmall2(wspace, unicw, nc);

- int oldns = ns;

- ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);

- for (int j = oldns; j < ns; j++)

- mkallcap((*slst)[j]);

- break;

- }

+ if (pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {

+ switch (captype) {

+ case NOCAP: {

+ pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs);

+ break;

+ }

+ case HUHINITCAP:

+ capwords = 1;

+ case HUHCAP: {

+ std::string wspace(scw);

+ mkallsmall2(wspace, sunicw);

+ pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);

+ break;

+ }

+ case INITCAP: {

+ capwords = 1;

+ std::string wspace(scw);

+ mkallsmall2(wspace, sunicw);

+ pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);

+ break;

}

+ case ALLCAP: {

+ std::string wspace(scw);

+ mkallsmall2(wspace, sunicw);

+ size_t oldns = slst.size();

+ pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);

+ for (size_t j = oldns; j < slst.size(); ++j) {

+ mkallcap(slst[j]);

+ }

+ break;

+ }

}

// try dash suggestion (Afo-American -> Afro-American)

- if (char * pos = strchr(cw, '-')) {

- char * ppos = cw;

- int nodashsug = 1;

- char ** nlst = NULL;

- int nn = 0;

- int last = 0;

- if (*slst) {

- for (int j = 0; j < ns && nodashsug == 1; j++) {

- if (strchr((*slst)[j], '-')) nodashsug = 0;

- }

- while (nodashsug && !last) {

- if (*pos == '\0') last = 1; else *pos = '\0';

- if (!spell(ppos)) {

- nn = suggest(&nlst, ppos);

- for (int j = nn - 1; j >= 0; j--) {

- strncpy(wspace, cw, ppos - cw);

- strcpy(wspace + (ppos - cw), nlst[j]);

- if (!last) {

- strcat(wspace, "-");

- strcat(wspace, pos + 1);

- }

- ns = insert_sug(slst, wspace, ns);

- free(nlst[j]);

+ size_t dash_pos = scw.find('-');

+ if (dash_pos != std::string::npos) {

+ int nodashsug = 1;

+ for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {

+ if (slst[j].find('-') != std::string::npos)

+ nodashsug = 0;

+ }

+ size_t prev_pos = 0;

+ bool last = false;

+ while (nodashsug && !last) {

+ if (dash_pos == scw.size())

+ last = 1;

+ std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);

+ if (!spell(chunk.c_str())) {

+ std::vector<std::string> nlst = suggest(chunk.c_str());

+ for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) {

+ std::string wspace = scw.substr(0, prev_pos);

+ wspace.append(*j);

+ if (!last) {

+ wspace.append("-");

+ wspace.append(scw.substr(dash_pos + 1));

}

- if (nlst != NULL) free(nlst);

- nodashsug = 0;

- }

- if (!last) {

- *pos = '-';

- ppos = pos + 1;

- pos = strchr(ppos, '-');

+ insert_sug(slst, wspace);

}

- if (!pos) pos = cw + strlen(cw);

- }

+ nodashsug = 0;

+ }

+ if (!last) {

+ prev_pos = dash_pos + 1;

+ dash_pos = scw.find('-', prev_pos);

+ }

+ if (dash_pos == std::string::npos)

+ dash_pos = scw.size();

+ }

}

// word reversing wrapper for complex prefixes

if (complexprefixes) {

- for (int j = 0; j < ns; j++) {

- if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);

+ for (size_t j = 0; j < slst.size(); ++j) {

+ if (utf8)

+ reverseword_utf(slst[j]);

+ else

+ reverseword(slst[j]);

}

// capitalize

- if (capwords) for (int j=0; j < ns; j++) {

- mkinitcap((*slst)[j]);

- }

+ if (capwords)

+ for (size_t j = 0; j < slst.size(); ++j) {

+ mkinitcap(slst[j]);

+ }

// expand suggestions with dot(s)

if (abbv && pAMgr && pAMgr->get_sugswithdots()) {

- for (int j = 0; j < ns; j++) {

- (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);

- strcat((*slst)[j], word + strlen(word) - abbv);

+ for (size_t j = 0; j < slst.size(); ++j) {

+ slst[j].append(word.substr(word.size() - abbv));

}

// remove bad capitalized and forbidden forms

if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {

- switch (captype) {

- case INITCAP:

- case ALLCAP: {

- int l = 0;

- for (int j=0; j < ns; j++) {

- if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {

- char s[MAXSWUTF8L];

- w_char w[MAXSWL];

- int len;

- if (utf8) {

- len = u8_u16(w, MAXSWL, (*slst)[j]);

- } else {

- strcpy(s, (*slst)[j]);

- len = strlen(s);

- }

- mkallsmall2(s, w, len);

- free((*slst)[j]);

- if (spell(s)) {

- (*slst)[l] = mystrdup(s);

- if ((*slst)[l]) l++;

- } else {

- mkinitcap2(s, w, len);

+ switch (captype) {

+ case INITCAP:

+ case ALLCAP: {

+ size_t l = 0;

+ for (size_t j = 0; j < slst.size(); ++j) {

+ if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {

+ std::string s;

+ std::vector<w_char> w;

+ if (utf8) {

+ u8_u16(w, slst[j]);

+ } else {

+ s = slst[j];

+ }

+ mkallsmall2(s, w);

if (spell(s)) {

- (*slst)[l] = mystrdup(s);

- if ((*slst)[l]) l++;

+ slst[l] = s;

+ ++l;

+ } else {

+ mkinitcap2(s, w);

+ if (spell(s)) {

+ slst[l] = s;

+ ++l;

+ }

}

+ } else {

+ slst[l] = slst[j];

+ ++l;

}

- } else {

- (*slst)[l] = (*slst)[j];

- l++;

}

+ slst.resize(l);

}

- ns = l;

}

- }

// remove duplications

- int l = 0;

- for (int j = 0; j < ns; j++) {

- (*slst)[l] = (*slst)[j];

- for (int k = 0; k < l; k++) {

- if (strcmp((*slst)[k], (*slst)[j]) == 0) {

- free((*slst)[j]);

- l--;

+ size_t l = 0;

+ for (size_t j = 0; j < slst.size(); ++j) {

+ slst[l] = slst[j];

+ for (size_t k = 0; k < l; ++k) {

+ if (slst[k] == slst[j]) {

+ --l;

break;

}

- l++;

+ ++l;

}

- ns = l;

+ slst.resize(l);

// output conversion

rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;

- for (int j = 0; rl && j < ns; j++) {

- if (rl->conv((*slst)[j], wspace)) {

- free((*slst)[j]);

- (*slst)[j] = mystrdup(wspace);

+ for (size_t j = 0; rl && j < slst.size(); ++j) {

+ std::string wspace;

+ if (rl->conv(slst[j], wspace)) {

+ slst[j] = wspace;

}

- // if suggestions removed by nosuggest, onlyincompound parameters

- if (l == 0 && *slst) {

- free(*slst);

- *slst = NULL;

- }

- return l;

+ return slst;

}

-void Hunspell::free_list(char *** slst, int n) {

- freelist(slst, n);

+const std::string& Hunspell::get_dict_encoding() const {

+ return m_Impl->get_dict_encoding();

}

-char * Hunspell::get_dic_encoding()

+const std::string& HunspellImpl::get_dict_encoding() const {

return encoding;

}

-#ifdef HUNSPELL_EXPERIMENTAL

-// XXX need UTF-8 support

-int Hunspell::suggest_auto(char*** slst, const char * word)

- char cw[MAXWORDUTF8LEN];

- char wspace[MAXWORDUTF8LEN];

- if (!pSMgr || maxdic == 0) return 0;

- int wl = strlen(word);

- if (utf8) {

- if (wl >= MAXWORDUTF8LEN) return 0;

- } else {

- if (wl >= MAXWORDLEN) return 0;

- }

- int captype = 0;

- int abbv = 0;

- wl = cleanword(cw, word, &captype, &abbv);

- if (wl == 0) return 0;

- int ns = 0;

- *slst = NULL; // HU, nsug in pSMgr->suggest

- switch(captype) {

- case NOCAP: {

- ns = pSMgr->suggest_auto(slst, cw, ns);

- if (ns>0) break;

- break;

- }

- case INITCAP: {

- memcpy(wspace,cw,(wl+1));

- mkallsmall(wspace);

- ns = pSMgr->suggest_auto(slst, wspace, ns);

- for (int j=0; j < ns; j++)

- mkinitcap((*slst)[j]);

- ns = pSMgr->suggest_auto(slst, cw, ns);

- break;

- }

- case HUHINITCAP:

- case HUHCAP: {

- ns = pSMgr->suggest_auto(slst, cw, ns);

- if (ns == 0) {

- memcpy(wspace,cw,(wl+1));

- mkallsmall(wspace);

- ns = pSMgr->suggest_auto(slst, wspace, ns);

- }

- break;

- }

- case ALLCAP: {

- memcpy(wspace,cw,(wl+1));

- mkallsmall(wspace);

- ns = pSMgr->suggest_auto(slst, wspace, ns);

- mkinitcap(wspace);

- ns = pSMgr->suggest_auto(slst, wspace, ns);

- for (int j=0; j < ns; j++)

- mkallcap((*slst)[j]);

- break;

- }

+std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {

+ return m_Impl->stem(desc);

- // word reversing wrapper for complex prefixes

- if (complexprefixes) {

- for (int j = 0; j < ns; j++) {

- if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);

- }

+std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {

+ std::vector<std::string> slst;

- // expand suggestions with dot(s)

- if (abbv && pAMgr && pAMgr->get_sugswithdots()) {

- for (int j = 0; j < ns; j++) {

- (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);

- strcat((*slst)[j], word + strlen(word) - abbv);

- }

+ std::string result2;

+ if (desc.empty())

+ return slst;

+ for (size_t i = 0; i < desc.size(); ++i) {

- // LANG_hu section: replace '-' with ' ' in Hungarian

- if (langnum == LANG_hu) {

- for (int j=0; j < ns; j++) {

- char * pos = strchr((*slst)[j],'-');

- if (pos) {

- int info;

- char w[MAXWORDUTF8LEN];

- *pos = '\0';

- strcpy(w, (*slst)[j]);

- strcat(w, pos + 1);

- spell(w, &info, NULL);

- if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {

- *pos = ' ';

- } else *pos = '-';

- }

- // END OF LANG_hu section

- return ns;

-#endif

+ std::string result;

-int Hunspell::stem(char*** slst, char ** desc, int n)

- char result[MAXLNLEN];

- char result2[MAXLNLEN];

- *slst = NULL;

- if (n == 0) return 0;

- *result2 = '\0';

- for (int i = 0; i < n; i++) {

- *result = '\0';

// add compound word parts (except the last one)

- char * s = (char *) desc[i];

- char * part = strstr(s, MORPH_PART);

+ const char* s = desc[i].c_str();

+ const char* part = strstr(s, MORPH_PART);

if (part) {

- char * nextpart = strstr(part + 1, MORPH_PART);

- while (nextpart) {

- copy_field(result + strlen(result), part, MORPH_PART);

- part = nextpart;

- nextpart = strstr(part + 1, MORPH_PART);

- }

- s = part;

+ const char* nextpart = strstr(part + 1, MORPH_PART);

+ while (nextpart) {

+ std::string field;

+ copy_field(field, part, MORPH_PART);

+ result.append(field);

+ part = nextpart;

+ nextpart = strstr(part + 1, MORPH_PART);

+ }

+ s = part;

}

- char **pl;

- char tok[MAXLNLEN];

- strcpy(tok, s);

- char * alt = strstr(tok, " | ");

- while (alt) {

- alt[1] = MSEP_ALT;

- alt = strstr(alt, " | ");

+ std::string tok(s);

+ size_t alt = 0;

+ while ((alt = tok.find(" | ", alt)) != std::string::npos) {

+ tok[alt + 1] = MSEP_ALT;

}

- int pln = line_tok(tok, &pl, MSEP_ALT);

- for (int k = 0; k < pln; k++) {

- // add derivational suffixes

- if (strstr(pl[k], MORPH_DERI_SFX)) {

- // remove inflectional suffixes

- char * is = strstr(pl[k], MORPH_INFL_SFX);

- if (is) *is = '\0';

- char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);

- if (sg) {

- char ** gen;

- int genl = line_tok(sg, &gen, MSEP_REC);

- free(sg);

- for (int j = 0; j < genl; j++) {

- sprintf(result2 + strlen(result2), "%c%s%s",

- MSEP_REC, result, gen[j]);

- }

- freelist(&gen, genl);

- }

- } else {

- sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);

- if (strstr(pl[k], MORPH_SURF_PFX)) {

- copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);

- }

- copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);

+ std::vector<std::string> pl = line_tok(tok, MSEP_ALT);

+ for (size_t k = 0; k < pl.size(); ++k) {

+ // add derivational suffixes

+ if (pl[k].find(MORPH_DERI_SFX) != std::string::npos) {

+ // remove inflectional suffixes

+ const size_t is = pl[k].find(MORPH_INFL_SFX);

+ if (is != std::string::npos)

+ pl[k].resize(is);

+ std::vector<std::string> singlepl;

+ singlepl.push_back(pl[k]);

+ std::string sg = pSMgr->suggest_gen(singlepl, pl[k]);

+ if (!sg.empty()) {

+ std::vector<std::string> gen = line_tok(sg, MSEP_REC);

+ for (size_t j = 0; j < gen.size(); ++j) {

+ result2.push_back(MSEP_REC);

+ result2.append(result);

+ result2.append(gen[j]);

+ }

+ } else {

+ result2.push_back(MSEP_REC);

+ result2.append(result);

+ if (pl[k].find(MORPH_SURF_PFX) != std::string::npos) {

+ std::string field;

+ copy_field(field, pl[k], MORPH_SURF_PFX);

+ result2.append(field);

}

+ std::string field;

+ copy_field(field, pl[k], MORPH_STEM);

+ result2.append(field);

+ }

}

- freelist(&pl, pln);

}

- int sln = line_tok(result2, slst, MSEP_REC);

- return uniqlist(*slst, sln);

+ slst = line_tok(result2, MSEP_REC);

+ uniqlist(slst);

+ return slst;

}

-int Hunspell::stem(char*** slst, const char * word)

- char ** pl;

- int pln = analyze(&pl, word);

- int pln2 = stem(slst, pl, pln);

- freelist(&pl, pln);

- return pln2;

+std::vector<std::string> Hunspell::stem(const std::string& word) {

+ return m_Impl->stem(word);

}

-#ifdef HUNSPELL_EXPERIMENTAL

-int Hunspell::suggest_pos_stems(char*** slst, const char * word)

- char cw[MAXWORDUTF8LEN];

- char wspace[MAXWORDUTF8LEN];

- if (! pSMgr || maxdic == 0) return 0;

- int wl = strlen(word);

- if (utf8) {

- if (wl >= MAXWORDUTF8LEN) return 0;

- } else {

- if (wl >= MAXWORDLEN) return 0;

- }

- int captype = 0;

- int abbv = 0;

- wl = cleanword(cw, word, &captype, &abbv);

- if (wl == 0) return 0;

- int ns = 0; // ns=0 = normalized input

- *slst = NULL; // HU, nsug in pSMgr->suggest

- switch(captype) {

- case HUHCAP:

- case NOCAP: {

- ns = pSMgr->suggest_pos_stems(slst, cw, ns);

- if ((abbv) && (ns == 0)) {

- memcpy(wspace,cw,wl);

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

- }

- break;

- }

- case INITCAP: {

+std::vector<std::string> HunspellImpl::stem(const std::string& word) {

+ return stem(analyze(word));

- ns = pSMgr->suggest_pos_stems(slst, cw, ns);

+const char* Hunspell::get_wordchars() const {

+ return m_Impl->get_wordchars().c_str();

- if (ns == 0 || ((*slst)[0][0] == '#')) {

- memcpy(wspace,cw,(wl+1));

- mkallsmall(wspace);

- ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

- }

+const std::string& Hunspell::get_wordchars_cpp() const {

+ return m_Impl->get_wordchars();

- break;

+const std::string& HunspellImpl::get_wordchars() const {

+ return pAMgr->get_wordchars();

- }

+const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {

+ return m_Impl->get_wordchars_utf16();

- case ALLCAP: {

- ns = pSMgr->suggest_pos_stems(slst, cw, ns);

- if (ns != 0) break;

+const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {

+ return pAMgr->get_wordchars_utf16();

- memcpy(wspace,cw,(wl+1));

- mkallsmall(wspace);

- ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

+void HunspellImpl::mkinitcap(std::string& u8) {

+ if (utf8) {

+ std::vector<w_char> u16;

+ u8_u16(u16, u8);

+ ::mkinitcap_utf(u16, langnum);

+ u16_u8(u8, u16);

+ } else {

+ ::mkinitcap(u8, csconv);

+ }

- if (ns == 0) {

- mkinitcap(wspace);

- ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

- }

- break;

- }

+int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {

+ if (utf8) {

+ ::mkinitcap_utf(u16, langnum);

+ u16_u8(u8, u16);

+ } else {

+ ::mkinitcap(u8, csconv);

}

+ return u8.size();

- return ns;

+int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {

+ if (utf8) {

+ ::mkinitsmall_utf(u16, langnum);

+ u16_u8(u8, u16);

+ } else {

+ ::mkinitsmall(u8, csconv);

+ }

+ return u8.size();

}

-#endif // END OF HUNSPELL_EXPERIMENTAL CODE

-const char * Hunspell::get_wordchars()

- return pAMgr->get_wordchars();

+int Hunspell::add(const std::string& word) {

+ return m_Impl->add(word);

}

-unsigned short * Hunspell::get_wordchars_utf16(int * len)

- return pAMgr->get_wordchars_utf16(len);

+int HunspellImpl::add(const std::string& word) {

+ if (!m_HMgrs.empty())

+ return m_HMgrs[0]->add(word);

+ return 0;

}

-void Hunspell::mkinitcap(char * p)

- if (!utf8) {

- if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;

- } else {

- int len;

- w_char u[MAXWORDLEN];

- len = u8_u16(u, MAXWORDLEN, p);

- unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);

- u[0].h = (unsigned char) (i >> 8);

- u[0].l = (unsigned char) (i & 0x00FF);

- u16_u8(p, MAXWORDUTF8LEN, u, len);

- }

+int Hunspell::add_with_affix(const std::string& word, const std::string& example) {

+ return m_Impl->add_with_affix(word, example);

}

-int Hunspell::mkinitcap2(char * p, w_char * u, int nc)

- if (!utf8) {

- if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;

- } else if (nc > 0) {

- unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);

- u[0].h = (unsigned char) (i >> 8);

- u[0].l = (unsigned char) (i & 0x00FF);

- u16_u8(p, MAXWORDUTF8LEN, u, nc);

- return strlen(p);

- }

- return nc;

+int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {

+ if (!m_HMgrs.empty())

+ return m_HMgrs[0]->add_with_affix(word, example);

+ return 0;

}

-int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)

- if (!utf8) {

- if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;

- } else if (nc > 0) {

- unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);

- u[0].h = (unsigned char) (i >> 8);

- u[0].l = (unsigned char) (i & 0x00FF);

- u16_u8(p, MAXWORDUTF8LEN, u, nc);

- return strlen(p);

- }

- return nc;

+int Hunspell::remove(const std::string& word) {

+ return m_Impl->remove(word);

}

-int Hunspell::add(const char * word)

- if (pHMgr[0]) return (pHMgr[0])->add(word);

- return 0;

+int HunspellImpl::remove(const std::string& word) {

+ if (!m_HMgrs.empty())

+ return m_HMgrs[0]->remove(word);

+ return 0;

}

-int Hunspell::add_with_affix(const char * word, const char * example)

- if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);

- return 0;

+const char* Hunspell::get_version() const {

+ return m_Impl->get_version().c_str();

}

-int Hunspell::remove(const char * word)

- if (pHMgr[0]) return (pHMgr[0])->remove(word);

- return 0;

+const std::string& Hunspell::get_version_cpp() const {

+ return m_Impl->get_version();

}

-const char * Hunspell::get_version()

+const std::string& HunspellImpl::get_version() const {

return pAMgr->get_version();

}

-struct cs_info * Hunspell::get_csconv()

+struct cs_info* HunspellImpl::get_csconv() {

return csconv;

}

-void Hunspell::cat_result(char * result, char * st)

- if (st) {

- if (*result) mystrcat(result, "\n", MAXLNLEN);

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

+struct cs_info* Hunspell::get_csconv() {

+ return m_Impl->get_csconv();

}

-int Hunspell::analyze(char*** slst, const char * word)

- char cw[MAXWORDUTF8LEN];

- char wspace[MAXWORDUTF8LEN];

- w_char unicw[MAXWORDLEN];

- int wl2 = 0;

- *slst = NULL;

- if (! pSMgr || maxdic == 0) return 0;

- int nc = strlen(word);

+void HunspellImpl::cat_result(std::string& result, const std::string& st) {

+ if (!st.empty()) {

+ if (!result.empty())

+ result.append("\n");

+ result.append(st);

+ }

+std::vector<std::string> Hunspell::analyze(const std::string& word) {

+ return m_Impl->analyze(word);

+std::vector<std::string> HunspellImpl::analyze(const std::string& word) {

+ std::vector<std::string> slst;

+ if (!pSMgr || m_HMgrs.empty())

+ return slst;

if (utf8) {

- if (nc >= MAXWORDUTF8LEN) return 0;

+ if (word.size() >= MAXWORDUTF8LEN)

+ return slst;

} else {

- if (nc >= MAXWORDLEN) return 0;

+ if (word.size() >= MAXWORDLEN)

+ return slst;

}

- int captype = 0;

- int abbv = 0;

- int wl = 0;

+ int captype = NOCAP;

+ size_t abbv = 0;

+ size_t wl = 0;

+ std::string scw;

+ std::vector<w_char> sunicw;

// input conversion

- RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

- if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);

- else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

+ RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

+ {

+ std::string wspace;

+ bool convstatus = rl ? rl->conv(word, wspace) : false;

+ if (convstatus)

+ wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);

+ else

+ wl = cleanword2(scw, sunicw, word, &captype, &abbv);

+ }

if (wl == 0) {

- if (abbv) {

- for (wl = 0; wl < abbv; wl++) cw[wl] = '.';

- cw[wl] = '\0';

- abbv = 0;

- } else return 0;

+ if (abbv) {

+ scw.clear();

+ for (wl = 0; wl < abbv; wl++)

+ scw.push_back('.');

+ abbv = 0;

+ } else

+ return slst;

}

- char result[MAXLNLEN];

- char * st = NULL;

- *result = '\0';

- int n = 0;

- int n2 = 0;

- int n3 = 0;

+ std::string result;

+ size_t n = 0;

// test numbers

// LANG_hu section: set dash information for suggestions

if (langnum == LANG_hu) {

- while ((n < wl) &&

- (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {

- n++;

- if ((cw[n] == '.') || (cw[n] == ',')) {

- if (((n2 == 0) && (n > 3)) ||

- ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;

- n2++;

- n3 = n;

- }

+ size_t n2 = 0;

+ size_t n3 = 0;

+ while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||

+ (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {

+ n++;

+ if ((scw[n] == '.') || (scw[n] == ',')) {

+ if (((n2 == 0) && (n > 3)) ||

+ ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))

+ break;

+ n2++;

+ n3 = n;

+ }

- if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;

- if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {

- mystrcat(result, cw, MAXLNLEN);

- result[n - 1] = '\0';

- if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));

- else {

- char sign = cw[n];

- cw[n] = '\0';

- cat_result(result, pSMgr->suggest_morph(cw + n - 1));

- mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE

- cw[n] = sign;

- cat_result(result, pSMgr->suggest_morph(cw + n));

- }

- return line_tok(result, slst, MSEP_REC);

- }

+ if ((n == wl) && (n3 > 0) && (n - n3 > 3))

+ return slst;

+ if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&

+ checkword(scw.substr(n), NULL, NULL))) {

+ result.append(scw);

+ result.resize(n - 1);

+ if (n == wl)

+ cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));

+ else {

+ std::string chunk = scw.substr(n - 1, 1);

+ cat_result(result, pSMgr->suggest_morph(chunk));

+ result.push_back('+'); // XXX SPEC. MORPHCODE

+ cat_result(result, pSMgr->suggest_morph(scw.substr(n)));

+ }

+ return line_tok(result, MSEP_REC);

+ }

}

// END OF LANG_hu section

- switch(captype) {

- case HUHCAP:

- case HUHINITCAP:

- case NOCAP: {

- cat_result(result, pSMgr->suggest_morph(cw));

- if (abbv) {

- memcpy(wspace,cw,wl);

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- cat_result(result, pSMgr->suggest_morph(wspace));

- }

- break;

- }

- case INITCAP: {

- wl = mkallsmall2(cw, unicw, nc);

- memcpy(wspace,cw,(wl+1));

- wl2 = mkinitcap2(cw, unicw, nc);

- cat_result(result, pSMgr->suggest_morph(wspace));

- cat_result(result, pSMgr->suggest_morph(cw));

- if (abbv) {

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- cat_result(result, pSMgr->suggest_morph(wspace));

- memcpy(wspace, cw, wl2);

- *(wspace+wl2) = '.';

- *(wspace+wl2+1) = '\0';

- cat_result(result, pSMgr->suggest_morph(wspace));

- }

- break;

- }

- case ALLCAP: {

- cat_result(result, pSMgr->suggest_morph(cw));

- if (abbv) {

- memcpy(wspace,cw,wl);

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- cat_result(result, pSMgr->suggest_morph(cw));

- }

- wl = mkallsmall2(cw, unicw, nc);

- memcpy(wspace,cw,(wl+1));

- wl2 = mkinitcap2(cw, unicw, nc);

- cat_result(result, pSMgr->suggest_morph(wspace));

- cat_result(result, pSMgr->suggest_morph(cw));

- if (abbv) {

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- cat_result(result, pSMgr->suggest_morph(wspace));

- memcpy(wspace, cw, wl2);

- *(wspace+wl2) = '.';

- *(wspace+wl2+1) = '\0';

- cat_result(result, pSMgr->suggest_morph(wspace));

- }

- break;

- }

+ switch (captype) {

+ case HUHCAP:

+ case HUHINITCAP:

+ case NOCAP: {

+ cat_result(result, pSMgr->suggest_morph(scw));

+ if (abbv) {

+ std::string u8buffer(scw);

+ u8buffer.push_back('.');

+ cat_result(result, pSMgr->suggest_morph(u8buffer));

+ }

+ break;

+ }

+ case INITCAP: {

+ mkallsmall2(scw, sunicw);

+ std::string u8buffer(scw);

+ mkinitcap2(scw, sunicw);

+ cat_result(result, pSMgr->suggest_morph(u8buffer));

+ cat_result(result, pSMgr->suggest_morph(scw));

+ if (abbv) {

+ u8buffer.push_back('.');

+ cat_result(result, pSMgr->suggest_morph(u8buffer));

+ u8buffer = scw;

+ u8buffer.push_back('.');

+ cat_result(result, pSMgr->suggest_morph(u8buffer));

+ }

+ break;

+ }

+ case ALLCAP: {

+ cat_result(result, pSMgr->suggest_morph(scw));

+ if (abbv) {

+ std::string u8buffer(scw);

+ u8buffer.push_back('.');

+ cat_result(result, pSMgr->suggest_morph(u8buffer));

+ }

+ mkallsmall2(scw, sunicw);

+ std::string u8buffer(scw);

+ mkinitcap2(scw, sunicw);

+ cat_result(result, pSMgr->suggest_morph(u8buffer));

+ cat_result(result, pSMgr->suggest_morph(scw));

+ if (abbv) {

+ u8buffer.push_back('.');

+ cat_result(result, pSMgr->suggest_morph(u8buffer));

+ u8buffer = scw;

+ u8buffer.push_back('.');

+ cat_result(result, pSMgr->suggest_morph(u8buffer));

+ }

+ break;

+ }

}

- if (*result) {

+ if (!result.empty()) {

// word reversing wrapper for complex prefixes

if (complexprefixes) {

- if (utf8) reverseword_utf(result); else reverseword(result);

+ if (utf8)

+ reverseword_utf(result);

+ else

+ reverseword(result);

}

- return line_tok(result, slst, MSEP_REC);

+ return line_tok(result, MSEP_REC);

}

// compound word with dash (HU) I18n

- char * dash = NULL;

- int nresult = 0;

// LANG_hu section: set dash information for suggestions

- if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');

- if ((langnum == LANG_hu) && dash) {

- *dash='\0';

- // examine 2 sides of the dash

- if (dash[1] == '\0') { // base word ending with dash

- if (spell(cw)) {

- char * p = pSMgr->suggest_morph(cw);

- if (p) {

- int ret = line_tok(p, slst, MSEP_REC);

- free(p);

- return ret;

- }

- } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.

- if (spell(cw) && (spell("-e"))) {

- st = pSMgr->suggest_morph(cw);

- if (st) {

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE

- st = pSMgr->suggest_morph("-e");

- if (st) {

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- return line_tok(result, slst, MSEP_REC);

- }

- } else {

+ size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;

+ if (dash_pos != std::string::npos) {

+ int nresult = 0;

+ std::string part1 = scw.substr(0, dash_pos);

+ std::string part2 = scw.substr(dash_pos+1);

+ // examine 2 sides of the dash

+ if (part2.empty()) { // base word ending with dash

+ if (spell(part1)) {

+ std::string p = pSMgr->suggest_morph(part1);

+ if (!p.empty()) {

+ slst = line_tok(p, MSEP_REC);

+ return slst;

+ }

+ } else if (part2.size() == 1 && part2[0] == 'e') { // XXX (HU) -e hat.

+ if (spell(part1) && (spell("-e"))) {

+ std::string st = pSMgr->suggest_morph(part1);

+ if (!st.empty()) {

+ result.append(st);

+ }

+ result.push_back('+'); // XXX spec. separator in MORPHCODE

+ st = pSMgr->suggest_morph("-e");

+ if (!st.empty()) {

+ result.append(st);

+ }

+ return line_tok(result, MSEP_REC);

+ }

+ } else {

// first word ending with dash: word- XXX ???

- char r2 = *(dash + 1);

- dash[0]='-';

- dash[1]='\0';

- nresult = spell(cw);

- dash[1] = r2;

- dash[0]='\0';

- if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||

- ((dash[1] > '0') && (dash[1] < '9')))) {

- st = pSMgr->suggest_morph(cw);

- if (st) {

- mystrcat(result, st, MAXLNLEN);

- free(st);

- mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE

- }

- st = pSMgr->suggest_morph(dash+1);

- if (st) {

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- return line_tok(result, slst, MSEP_REC);

- }

+ part1.push_back(' ');

+ nresult = spell(part1);

+ part1.erase(part1.size() - 1);

+ if (nresult && spell(part2) &&

+ ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {

+ std::string st = pSMgr->suggest_morph(part1);

+ if (!st.empty()) {

+ result.append(st);

+ result.push_back('+'); // XXX spec. separator in MORPHCODE

+ }

+ st = pSMgr->suggest_morph(part2);

+ if (!st.empty()) {

+ result.append(st);

+ }

+ return line_tok(result, MSEP_REC);

}

- // affixed number in correct word

- if (nresult && (dash > cw) && (((*(dash-1)<='9') &&

- (*(dash-1)>='0')) || (*(dash-1)=='.'))) {

- *dash='-';

- n = 1;

- if (*(dash - n) == '.') n++;

- // search first not a number character to left from dash

- while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {

- n++;

- }

- if ((dash - n) < cw) n--;

- // numbers: valami1000000-hoz

- // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,

- // 56-hoz, 6-hoz

- for(; n >= 1; n--) {

- if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {

- mystrcat(result, cw, MAXLNLEN);

- result[dash - cw - n] = '\0';

- st = pSMgr->suggest_morph(dash - n);

- if (st) {

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- return line_tok(result, slst, MSEP_REC);

- }

+ }

+ // affixed number in correct word

+ if (nresult && (dash_pos > 0) &&

+ (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||

+ (scw[dash_pos - 1] == '.'))) {

+ n = 1;

+ if (scw[dash_pos - n] == '.')

+ n++;

+ // search first not a number character to left from dash

+ while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&

+ (n < 6)) {

+ n++;

+ }

+ if (dash_pos < n)

+ n--;

+ // numbers: valami1000000-hoz

+ // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,

+ // 56-hoz, 6-hoz

+ for (; n >= 1; n--) {

+ if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {

+ continue;

+ }

+ std::string chunk = scw.substr(dash_pos - n);

+ if (checkword(chunk, NULL, NULL)) {

+ result.append(chunk);

+ std::string st = pSMgr->suggest_morph(chunk);

+ if (!st.empty()) {

+ result.append(st);

+ }

+ return line_tok(result, MSEP_REC);

+ }

}

- return 0;

+ return slst;

}

-int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)

- *slst = NULL;

- if (!pSMgr || !pln) return 0;

- char **pl2;

- int pl2n = analyze(&pl2, word);

- int captype = 0;

+std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {

+ return m_Impl->generate(word, pl);

+std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {

+ std::vector<std::string> slst;

+ if (!pSMgr || pl.empty())

+ return slst;

+ std::vector<std::string> pl2 = analyze(word);

+ int captype = NOCAP;

int abbv = 0;

- char cw[MAXWORDUTF8LEN];

+ std::string cw;

cleanword(cw, word, &captype, &abbv);

- char result[MAXLNLEN];

- *result = '\0';

+ std::string result;

- for (int i = 0; i < pln; i++) {

- cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));

+ for (size_t i = 0; i < pl.size(); ++i) {

+ cat_result(result, pSMgr->suggest_gen(pl2, pl[i]));

}

- freelist(&pl2, pl2n);

- if (*result) {

+ if (!result.empty()) {

// allcap

- if (captype == ALLCAP) mkallcap(result);

+ if (captype == ALLCAP)

+ mkallcap(result);

// line split

- int linenum = line_tok(result, slst, MSEP_REC);

+ slst = line_tok(result, MSEP_REC);

// capitalize

if (captype == INITCAP || captype == HUHINITCAP) {

- for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);

+ for (size_t j = 0; j < slst.size(); ++j) {

+ mkinitcap(slst[j]);

+ }

}

// temporary filtering of prefix related errors (eg.

// generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")

- int r = 0;

- for (int j=0; j < linenum; j++) {

- if (!spell((*slst)[j])) {

- free((*slst)[j]);

- (*slst)[j] = NULL;

- } else {

- if (r < j) (*slst)[r] = (*slst)[j];

- r++;

- }

+ std::vector<std::string>::iterator it = slst.begin();

+ while (it != slst.end()) {

+ if (!spell(*it)) {

+ it = slst.erase(it);

+ } else {

+ ++it;

+ }

}

- if (r > 0) return r;

- free(*slst);

- *slst = NULL;

}

- return 0;

+ return slst;

}

-int Hunspell::generate(char*** slst, const char * word, const char * pattern)

- char **pl;

- int pln = analyze(&pl, pattern);

- int n = generate(slst, word, pl, pln);

- freelist(&pl, pln);

- return uniqlist(*slst, n);

+std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {

+ return m_Impl->generate(word, pattern);

+std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {

+ std::vector<std::string> pl = analyze(pattern);

+ std::vector<std::string> slst = generate(word, pl);

+ uniqlist(slst);

+ return slst;

}

// minimal XML parser functions

-int Hunspell::get_xml_par(char * dest, const char * par, int max)

- char * d = dest;

- if (!par) return 0;

- char end = *par;

- char * dmax = dest + max;

- if (end == '>') end = '<';

- else if (end != '\'' && end != '"') return 0; // bad XML

- for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;

- *d = '\0';

- mystrrep(dest, "<", "<");

- mystrrep(dest, "&", "&");

- return (int)(d - dest);

-int Hunspell::get_langnum() const

- return langnum;

+std::string HunspellImpl::get_xml_par(const char* par) {

+ std::string dest;

+ if (!par)

+ return dest;

+ char end = *par;

+ if (end == '>')

+ end = '<';

+ else if (end != '\'' && end != '"')

+ return 0; // bad XML

+ for (par++; *par != '\0' && *par != end; ++par) {

+ dest.push_back(*par);

+ }

+ mystrrep(dest, "<", "<");

+ mystrrep(dest, "&", "&");

+ return dest;

+int Hunspell::get_langnum() const {

+ return m_Impl->get_langnum();

+int HunspellImpl::get_langnum() const {

+ return langnum;

+bool Hunspell::input_conv(const std::string& word, std::string& dest) {

+ return m_Impl->input_conv(word, dest);

+int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {

+ std::string d;

+ bool ret = input_conv(word, d);

+ if (ret && d.size() < destsize) {

+ strncpy(dest, d.c_str(), destsize);

+ return 1;

+ }

+ return 0;

+bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {

+ RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;

+ if (rl) {

+ return rl->conv(word, dest);

+ }

+ dest.assign(word);

+ return false;

}

// return the beginning of the element (attr == NULL) or the attribute

-const char * Hunspell::get_xml_pos(const char * s, const char * attr)

- const char * end = strchr(s, '>');

- const char * p = s;

- if (attr == NULL) return end;

+const char* HunspellImpl::get_xml_pos(const char* s, const char* attr) {

+ const char* end = strchr(s, '>');

+ const char* p = s;

+ if (attr == NULL)

+ return end;

do {

p = strstr(p, attr);

- if (!p || p >= end) return 0;

- } while (*(p-1) != ' ' && *(p-1) != '\n');

+ if (!p || p >= end)

+ return 0;

+ } while (*(p - 1) != ' ' && *(p - 1) != '\n');

return p + strlen(attr);

}

-int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {

- char cw[MAXWORDUTF8LEN];

- if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&

- strcmp(cw, value) == 0) return 1;

+int HunspellImpl::check_xml_par(const char* q,

+ const char* attr,

+ const char* value) {

+ std::string cw = get_xml_par(get_xml_pos(q, attr));

+ if (cw == value)

+ return 1;

return 0;

}

-int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {

- int n = 0;

- char * p;

- if (!list) return 0;

- for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++;

- if (n == 0) return 0;

- *slst = (char **) malloc(sizeof(char *) * n);

- if (!*slst) return 0;

- for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) {

- int l = strlen(p);

- (*slst)[n] = (char *) malloc(l + 1);

- if (!(*slst)[n]) return n;

- if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {

- free((*slst)[n]);

- break;

- }

+std::vector<std::string> HunspellImpl::get_xml_list(const char* list, const char* tag) {

+ std::vector<std::string> slst;

+ if (!list)

+ return slst;

+ const char* p = list;

+ for (size_t n = 0; ((p = strstr(p, tag)) != NULL); ++p, ++n) {

+ std::string cw = get_xml_par(p + strlen(tag) - 1);

+ if (cw.empty()) {

+ break;

}

- return n;

+ slst.push_back(cw);

+ }

+ return slst;

}

-int Hunspell::spellml(char*** slst, const char * word)

- char *q, *q2;

- char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];

- q = (char *) strstr(word, "<query");

- if (!q) return 0; // bad XML input

- q2 = strchr(q, '>');

- if (!q2) return 0; // bad XML input

+std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {

+ std::vector<std::string> slst;

+ const char* word = in_word.c_str();

+ const char* q = strstr(word, "<query");

+ if (!q)

+ return slst; // bad XML input

+ const char* q2 = strchr(q, '>');

+ if (!q2)

+ return slst; // bad XML input

q2 = strstr(q2, "<word");

- if (!q2) return 0; // bad XML input

+ if (!q2)

+ return slst; // bad XML input

if (check_xml_par(q, "type=", "analyze")) {

- int n = 0, s = 0;

- if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);

- if (n == 0) return 0;

- // convert the result to <code><a>ana1</a><a>ana2</a></code> format

- for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);

- char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&

- if (!r) return 0;

- strcpy(r, "<code>");

- for (int i = 0; i < n; i++) {

- int l = strlen(r);

- strcpy(r + l, "<a>");

- strcpy(r + l + 3, (*slst)[i]);

- mystrrep(r + l + 3, "\t", " ");

- mystrrep(r + l + 3, "<", "<");

- mystrrep(r + l + 3, "&", "&");

- strcat(r, "</a>");

- free((*slst)[i]);

- }

- strcat(r, "</code>");

- (*slst)[0] = r;

- return 1;

+ std::string cw = get_xml_par(strchr(q2, '>'));

+ if (!cw.empty())

+ slst = analyze(cw);

+ if (slst.empty())

+ return slst;

+ // convert the result to <code><a>ana1</a><a>ana2</a></code> format

+ std::string r;

+ r.append("<code>");

+ for (size_t i = 0; i < slst.size(); ++i) {

+ r.append("<a>");

+ std::string entry(slst[i]);

+ mystrrep(entry, "\t", " ");

+ mystrrep(entry, "&", "&");

+ mystrrep(entry, "<", "<");

+ r.append(entry);

+ r.append("</a>");

+ }

+ r.append("</code>");

+ slst.clear();

+ slst.push_back(r);

+ return slst;

} else if (check_xml_par(q, "type=", "stem")) {

- if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);

+ std::string cw = get_xml_par(strchr(q2, '>'));

+ if (!cw.empty())

+ return stem(cw);

} else if (check_xml_par(q, "type=", "generate")) {

- int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);

- if (n == 0) return 0;

- char * q3 = strstr(q2 + 1, "<word");

- if (q3) {

- if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {

- return generate(slst, cw, cw2);

- }

- } else {

- if ((q2 = strstr(q2 + 1, "<code")) != NULL) {

- char ** slst2;

- if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>")) != 0) {

- int n2 = generate(slst, cw, slst2, n);

- freelist(&slst2, n);

- return uniqlist(*slst, n2);

- }

- freelist(&slst2, n);

+ std::string cw = get_xml_par(strchr(q2, '>'));

+ if (cw.empty())

+ return slst;

+ const char* q3 = strstr(q2 + 1, "<word");

+ if (q3) {

+ std::string cw2 = get_xml_par(strchr(q3, '>'));

+ if (!cw2.empty()) {

+ return generate(cw, cw2);

+ }

+ } else {

+ if ((q2 = strstr(q2 + 1, "<code")) != NULL) {

+ std::vector<std::string> slst2 = get_xml_list(strchr(q2, '>'), "<a>");

+ if (!slst2.empty()) {

+ slst = generate(cw, slst2);

+ uniqlist(slst);

+ return slst;

}

+ }

}

- return 0;

+ return slst;

}

-#ifdef HUNSPELL_EXPERIMENTAL

-// XXX need UTF-8 support

-char * Hunspell::morph_with_correction(const char * word)

- char cw[MAXWORDUTF8LEN];

- char wspace[MAXWORDUTF8LEN];

- if (! pSMgr || maxdic == 0) return NULL;

- int wl = strlen(word);

- if (utf8) {

- if (wl >= MAXWORDUTF8LEN) return NULL;

- } else {

- if (wl >= MAXWORDLEN) return NULL;

+int Hunspell::spell(const char* word, int* info, char** root) {

+ std::string sroot;

+ bool ret = m_Impl->spell(word, info, root ? &sroot : NULL);

+ if (root) {

+ if (sroot.empty()) {

+ *root = NULL;

+ } else {

+ *root = mystrdup(sroot.c_str());

+ }

}

- int captype = 0;

- int abbv = 0;

- wl = cleanword(cw, word, &captype, &abbv);

- if (wl == 0) return NULL;

- char result[MAXLNLEN];

- char * st = NULL;

- *result = '\0';

- switch(captype) {

- case NOCAP: {

- st = pSMgr->suggest_morph_for_spelling_error(cw);

- if (st) {

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- if (abbv) {

- memcpy(wspace,cw,wl);

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

- if (st) {

- if (*result) mystrcat(result, "\n", MAXLNLEN);

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- break;

- }

- case INITCAP: {

- memcpy(wspace,cw,(wl+1));

- mkallsmall(wspace);

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

- if (st) {

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- st = pSMgr->suggest_morph_for_spelling_error(cw);

- if (st) {

- if (*result) mystrcat(result, "\n", MAXLNLEN);

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- if (abbv) {

- memcpy(wspace,cw,wl);

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- mkallsmall(wspace);

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

- if (st) {

- if (*result) mystrcat(result, "\n", MAXLNLEN);

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- mkinitcap(wspace);

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

- if (st) {

- if (*result) mystrcat(result, "\n", MAXLNLEN);

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- break;

- }

- case HUHCAP: {

- st = pSMgr->suggest_morph_for_spelling_error(cw);

- if (st) {

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- memcpy(wspace,cw,(wl+1));

- mkallsmall(wspace);

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

- if (st) {

- if (*result) mystrcat(result, "\n", MAXLNLEN);

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- break;

- }

- case ALLCAP: {

- memcpy(wspace,cw,(wl+1));

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

- if (st) {

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- mkallsmall(wspace);

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

- if (st) {

- if (*result) mystrcat(result, "\n", MAXLNLEN);

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- mkinitcap(wspace);

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

- if (st) {

- if (*result) mystrcat(result, "\n", MAXLNLEN);

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- if (abbv) {

- memcpy(wspace,cw,(wl+1));

- *(wspace+wl) = '.';

- *(wspace+wl+1) = '\0';

- if (*result) mystrcat(result, "\n", MAXLNLEN);

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

- if (st) {

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- mkallsmall(wspace);

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

- if (st) {

- if (*result) mystrcat(result, "\n", MAXLNLEN);

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- mkinitcap(wspace);

- st = pSMgr->suggest_morph_for_spelling_error(wspace);

- if (st) {

- if (*result) mystrcat(result, "\n", MAXLNLEN);

- mystrcat(result, st, MAXLNLEN);

- free(st);

- }

- break;

- }

+ return ret;

+namespace {

+ int munge_vector(char*** slst, const std::vector<std::string>& items) {

+ if (items.empty()) {

+ *slst = NULL;

+ return 0;

+ } else {

+ *slst = (char**)malloc(sizeof(char*) * items.size());

+ if (!*slst)

+ return 0;

+ for (size_t i = 0; i < items.size(); ++i)

+ (*slst)[i] = mystrdup(items[i].c_str());

+ }

+ return items.size();

}

- if (*result) return mystrdup(result);

- return NULL;

+void Hunspell::free_list(char*** slst, int n) {

+ Hunspell_free_list((Hunhandle*)(this), slst, n);

}

-#endif // END OF HUNSPELL_EXPERIMENTAL CODE

+int Hunspell::suggest(char*** slst, const char* word) {

+ return Hunspell_suggest((Hunhandle*)(this), slst, word);

-Hunhandle *Hunspell_create(const char * affpath, const char * dpath)

+int Hunspell::suffix_suggest(char*** slst, const char* root_word) {

+ std::vector<std::string> stems = m_Impl->suffix_suggest(root_word);

+ return munge_vector(slst, stems);

+char* Hunspell::get_dic_encoding() {

+ return &(m_Impl->dic_encoding_vec[0]);

+int Hunspell::stem(char*** slst, char** desc, int n) {

+ return Hunspell_stem2((Hunhandle*)(this), slst, desc, n);

+int Hunspell::stem(char*** slst, const char* word) {

+ return Hunspell_stem((Hunhandle*)(this), slst, word);

+int Hunspell::analyze(char*** slst, const char* word) {

+ return Hunspell_analyze((Hunhandle*)(this), slst, word);

+int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {

+ return Hunspell_generate2((Hunhandle*)(this), slst, word, pl, pln);

+int Hunspell::generate(char*** slst, const char* word, const char* pattern) {

+ return Hunspell_generate((Hunhandle*)(this), slst, word, pattern);

+Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {

#ifdef HUNSPELL_CHROME_CLIENT

return NULL;

#else

- return (Hunhandle*)(new Hunspell(affpath, dpath));

+ return (Hunhandle*)(new Hunspell(affpath, dpath));

#endif

}

-Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,

- const char * key)

+Hunhandle* Hunspell_create_key(const char* affpath,

+ const char* dpath,

+ const char* key) {

#ifdef HUNSPELL_CHROME_CLIENT

return NULL;

#else

- return (Hunhandle*)(new Hunspell(affpath, dpath, key));

+ return reinterpret_cast<Hunhandle*>(new Hunspell(affpath, dpath, key));

#endif

}

-void Hunspell_destroy(Hunhandle *pHunspell)

- delete (Hunspell*)(pHunspell);

+void Hunspell_destroy(Hunhandle* pHunspell) {

+ delete reinterpret_cast<Hunspell*>(pHunspell);

}

-int Hunspell_spell(Hunhandle *pHunspell, const char *word)

- return ((Hunspell*)pHunspell)->spell(word);

+#ifndef HUNSPELL_CHROME_CLIENT

+int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {

+ return reinterpret_cast<Hunspell*>(pHunspell)->add_dic(dpath);

}

+#endif

-char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)

- return ((Hunspell*)pHunspell)->get_dic_encoding();

+int Hunspell_spell(Hunhandle* pHunspell, const char* word) {

+ return reinterpret_cast<Hunspell*>(pHunspell)->spell(std::string(word));

}

-int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)

- return ((Hunspell*)pHunspell)->suggest(slst, word);

+char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {

+ return reinterpret_cast<Hunspell*>(pHunspell)->get_dic_encoding();

}

-int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)

- return ((Hunspell*)pHunspell)->analyze(slst, word);

+int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {

+ std::vector<std::string> suggests = reinterpret_cast<Hunspell*>(pHunspell)->suggest(word);

+ return munge_vector(slst, suggests);

}

-int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)

- return ((Hunspell*)pHunspell)->stem(slst, word);

+int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {

+ std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->analyze(word);

+ return munge_vector(slst, stems);

}

-int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)

- return ((Hunspell*)pHunspell)->stem(slst, desc, n);

+int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {

+ std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(word);

+ return munge_vector(slst, stems);

}

-int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,

- const char * word2)

- return ((Hunspell*)pHunspell)->generate(slst, word, word2);

+int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {

+ std::vector<std::string> morph;

+ for (int i = 0; i < n; ++i)

+ morph.push_back(desc[i]);

+ std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(morph);

+ return munge_vector(slst, stems);

}

-int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,

- char** desc, int n)

- return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);

+int Hunspell_generate(Hunhandle* pHunspell,

+ char*** slst,

+ const char* word,

+ const char* pattern) {

+ std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, pattern);

+ return munge_vector(slst, stems);

+int Hunspell_generate2(Hunhandle* pHunspell,

+ char*** slst,

+ const char* word,

+ char** desc,

+ int n) {

+ std::vector<std::string> morph;

+ for (int i = 0; i < n; ++i)

+ morph.push_back(desc[i]);

+ std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, morph);

+ return munge_vector(slst, stems);

}

- /* functions for run-time modification of the dictionary */

+/* functions for run-time modification of the dictionary */

- /* add word to the run-time dictionary */

+/* add word to the run-time dictionary */

-int Hunspell_add(Hunhandle *pHunspell, const char * word) {

- return ((Hunspell*)pHunspell)->add(word);

+int Hunspell_add(Hunhandle* pHunspell, const char* word) {

+ return reinterpret_cast<Hunspell*>(pHunspell)->add(word);

}

- /* add word to the run-time dictionary with affix flags of

- * the example (a dictionary word): Hunspell will recognize

- * affixed forms of the new word, too.

- */

+/* add word to the run-time dictionary with affix flags of

+ * the example (a dictionary word): Hunspell will recognize

+ * affixed forms of the new word, too.

+ */

-int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,

- const char * example) {

- return ((Hunspell*)pHunspell)->add_with_affix(word, example);

+int Hunspell_add_with_affix(Hunhandle* pHunspell,

+ const char* word,

+ const char* example) {

+ return reinterpret_cast<Hunspell*>(pHunspell)->add_with_affix(word, example);

}

- /* remove word from the run-time dictionary */

+/* remove word from the run-time dictionary */

-int Hunspell_remove(Hunhandle *pHunspell, const char * word) {

- return ((Hunspell*)pHunspell)->remove(word);

+int Hunspell_remove(Hunhandle* pHunspell, const char* word) {

+ return reinterpret_cast<Hunspell*>(pHunspell)->remove(word);

}

-void Hunspell_free_list(Hunhandle *, char *** slst, int n) {

- freelist(slst, n);

+void Hunspell_free_list(Hunhandle*, char*** list, int n) {

+ if (list && *list) {

+ for (int i = 0; i < n; i++)

+ free((*list)[i]);

+ free(*list);

+ *list = NULL;

+ }

+std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {

+ return m_Impl->suffix_suggest(root_word);

+std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {

+ std::vector<std::string> slst;

+ struct hentry* he = NULL;

+ int len;

+ std::string w2;

+ const char* word;

+ const char* ignoredchars = pAMgr->get_ignore();

+ if (ignoredchars != NULL) {

+ w2.assign(root_word);

+ if (utf8) {

+ const std::vector<w_char>& ignoredchars_utf16 =

+ pAMgr->get_ignore_utf16();

+ remove_ignored_chars_utf(w2, ignoredchars_utf16);

+ } else {

+ remove_ignored_chars(w2, ignoredchars);

+ }

+ word = w2.c_str();

+ } else

+ word = root_word.c_str();

+ len = strlen(word);

+ if (!len)

+ return slst;

+ for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {

+ he = m_HMgrs[i]->lookup(word);

+ }

+ if (he) {

+ slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());

+ }

+ return slst;

}

« no previous file with comments | « third_party/hunspell/src/hunspell/hunspell.hxx ('k') | third_party/hunspell/src/hunspell/hunspell.dsp » ('j') | no next file with comments »