| Index: third_party/hunspell/src/hunspell/hunspell.cxx
|
| diff --git a/third_party/hunspell/src/hunspell/hunspell.cxx b/third_party/hunspell/src/hunspell/hunspell.cxx
|
| index d9d60a48c6fead50ef6963ead2e7b9f0b7caa9b9..c8c5cf49521c0d78077b2cc0ccc1b51da905e9ba 100644
|
| --- a/third_party/hunspell/src/hunspell/hunspell.cxx
|
| +++ b/third_party/hunspell/src/hunspell/hunspell.cxx
|
| @@ -1,102 +1,273 @@
|
| -#include "license.hunspell"
|
| -#include "license.myspell"
|
| +/* ***** BEGIN LICENSE BLOCK *****
|
| + * Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
| + *
|
| + * The contents of this file are subject to the Mozilla Public License Version
|
| + * 1.1 (the "License"); you may not use this file except in compliance with
|
| + * the License. You may obtain a copy of the License at
|
| + * http://www.mozilla.org/MPL/
|
| + *
|
| + * Software distributed under the License is distributed on an "AS IS" basis,
|
| + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
| + * for the specific language governing rights and limitations under the
|
| + * License.
|
| + *
|
| + * The Original Code is Hunspell, based on MySpell.
|
| + *
|
| + * The Initial Developers of the Original Code are
|
| + * Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
| + * Portions created by the Initial Developers are Copyright (C) 2002-2005
|
| + * the Initial Developers. All Rights Reserved.
|
| + *
|
| + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
| + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
| + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
| + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
| + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
| + *
|
| + * Alternatively, the contents of this file may be used under the terms of
|
| + * either the GNU General Public License Version 2 or later (the "GPL"), or
|
| + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
| + * in which case the provisions of the GPL or the LGPL are applicable instead
|
| + * of those above. If you wish to allow use of your version of this file only
|
| + * under the terms of either the GPL or the LGPL, and not to allow others to
|
| + * use your version of this file under the terms of the MPL, indicate your
|
| + * decision by deleting the provisions above and replace them with the notice
|
| + * and other provisions required by the GPL or the LGPL. If you do not delete
|
| + * the provisions above, a recipient may use your version of this file under
|
| + * the terms of any one of the MPL, the GPL or the LGPL.
|
| + *
|
| + * ***** END LICENSE BLOCK ***** */
|
| +/*
|
| + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
| + * And Contributors. All rights reserved.
|
| + *
|
| + * Redistribution and use in source and binary forms, with or without
|
| + * modification, are permitted provided that the following conditions
|
| + * are met:
|
| + *
|
| + * 1. Redistributions of source code must retain the above copyright
|
| + * notice, this list of conditions and the following disclaimer.
|
| + *
|
| + * 2. Redistributions in binary form must reproduce the above copyright
|
| + * notice, this list of conditions and the following disclaimer in the
|
| + * documentation and/or other materials provided with the distribution.
|
| + *
|
| + * 3. All modifications to the source code must be clearly marked as
|
| + * such. Binary redistributions based on modified source code
|
| + * must be clearly marked as modified versions in the documentation
|
| + * and/or other materials provided with the distribution.
|
| + *
|
| + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
| + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
| + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
| + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
| + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
| + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
| + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
| + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
| + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
| + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
| + * SUCH DAMAGE.
|
| + */
|
|
|
| #include <stdlib.h>
|
| #include <string.h>
|
| #include <stdio.h>
|
|
|
| +#include "affixmgr.hxx"
|
| #include "hunspell.hxx"
|
| +#include "suggestmgr.hxx"
|
| #include "hunspell.h"
|
| #ifndef HUNSPELL_CHROME_CLIENT
|
| -#ifndef MOZILLA_CLIENT
|
| # include "config.h"
|
| #endif
|
| -#endif
|
| #include "csutil.hxx"
|
|
|
| +#include <limits>
|
| +#include <string>
|
| +
|
| +#define MAXWORDUTF8LEN (MAXWORDLEN * 3)
|
| +
|
| +class HunspellImpl
|
| +{
|
| +public:
|
| +#ifdef HUNSPELL_CHROME_CLIENT
|
| + HunspellImpl(const unsigned char* bdict_data, size_t bdict_length);
|
| +#else
|
| + HunspellImpl(const char* affpath, const char* dpath, const char* key);
|
| +#endif
|
| + ~HunspellImpl();
|
| +#ifndef HUNSPELL_CHROME_CLIENT
|
| + int add_dic(const char* dpath, const char* key);
|
| +#endif
|
| + std::vector<std::string> suffix_suggest(const std::string& root_word);
|
| + std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
|
| + std::vector<std::string> generate(const std::string& word, const std::string& pattern);
|
| + std::vector<std::string> stem(const std::string& word);
|
| + std::vector<std::string> stem(const std::vector<std::string>& morph);
|
| + std::vector<std::string> analyze(const std::string& word);
|
| + int get_langnum() const;
|
| + bool input_conv(const std::string& word, std::string& dest);
|
| + bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
|
| + std::vector<std::string> suggest(const std::string& word);
|
| + const std::string& get_wordchars() const;
|
| + const std::vector<w_char>& get_wordchars_utf16() const;
|
| + const std::string& get_dict_encoding() const;
|
| + int add(const std::string& word);
|
| + int add_with_affix(const std::string& word, const std::string& example);
|
| + int remove(const std::string& word);
|
| + const std::string& get_version() const;
|
| + struct cs_info* get_csconv();
|
| + std::vector<char> dic_encoding_vec;
|
| +
|
| +private:
|
| + AffixMgr* pAMgr;
|
| + std::vector<HashMgr*> m_HMgrs;
|
| + SuggestMgr* pSMgr;
|
| +#ifndef HUNSPELL_CHROME_CLIENT // We are using BDict instead.
|
| + char* affixpath;
|
| +#endif
|
| + std::string encoding;
|
| + struct cs_info* csconv;
|
| + int langnum;
|
| + int utf8;
|
| + int complexprefixes;
|
| + std::vector<std::string> wordbreak;
|
| +
|
| +#ifdef HUNSPELL_CHROME_CLIENT
|
| + // Not owned by us, owned by the Hunspell object.
|
| + hunspell::BDictReader* bdict_reader;
|
| +#endif
|
| +
|
| +private:
|
| + void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
|
| + size_t cleanword2(std::string& dest,
|
| + std::vector<w_char>& dest_u,
|
| + const std::string& src,
|
| + int* pcaptype,
|
| + size_t* pabbrev);
|
| + void mkinitcap(std::string& u8);
|
| + int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
|
| + int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
|
| + void mkallcap(std::string& u8);
|
| + int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
|
| + struct hentry* checkword(const std::string& source, int* info, std::string* root);
|
| + std::string sharps_u8_l1(const std::string& source);
|
| + hentry*
|
| + spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);
|
| + int is_keepcase(const hentry* rv);
|
| + void insert_sug(std::vector<std::string>& slst, const std::string& word);
|
| + void cat_result(std::string& result, const std::string& st);
|
| + std::vector<std::string> spellml(const std::string& word);
|
| + std::string get_xml_par(const char* par);
|
| + const char* get_xml_pos(const char* s, const char* attr);
|
| + std::vector<std::string> get_xml_list(const char* list, const char* tag);
|
| + int check_xml_par(const char* q, const char* attr, const char* value);
|
| +private:
|
| + HunspellImpl(const HunspellImpl&);
|
| + HunspellImpl& operator=(const HunspellImpl&);
|
| +};
|
| +
|
| #ifdef HUNSPELL_CHROME_CLIENT
|
| Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length)
|
| + : m_Impl(new HunspellImpl(bdict_data, bdict_length)) {
|
| #else
|
| -Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
|
| +Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
|
| + : m_Impl(new HunspellImpl(affpath, dpath, key)) {
|
| #endif
|
| -{
|
| - encoding = NULL;
|
| - csconv = NULL;
|
| - utf8 = 0;
|
| - complexprefixes = 0;
|
| +}
|
| +
|
| +#ifdef HUNSPELL_CHROME_CLIENT
|
| +HunspellImpl::HunspellImpl(const unsigned char* bdict_data, size_t bdict_length) {
|
| +#else
|
| +HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) {
|
| +#endif
|
| + csconv = NULL;
|
| + utf8 = 0;
|
| + complexprefixes = 0;
|
| #ifndef HUNSPELL_CHROME_CLIENT
|
| - affixpath = mystrdup(affpath);
|
| + affixpath = mystrdup(affpath);
|
| #endif
|
| - maxdic = 0;
|
|
|
| #ifdef HUNSPELL_CHROME_CLIENT
|
| - bdict_reader = new hunspell::BDictReader;
|
| - bdict_reader->Init(bdict_data, bdict_length);
|
| + bdict_reader = new hunspell::BDictReader;
|
| + bdict_reader->Init(bdict_data, bdict_length);
|
|
|
| - pHMgr[0] = new HashMgr(bdict_reader);
|
| - if (pHMgr[0]) maxdic = 1;
|
| + /* first set up the hash manager */
|
| + m_HMgrs.push_back(new HashMgr(bdict_reader));
|
|
|
| - pAMgr = new AffixMgr(bdict_reader, pHMgr, &maxdic);
|
| + pAMgr = new AffixMgr(bdict_reader, m_HMgrs); // TODO: 'key' ?
|
| #else
|
| - /* first set up the hash manager */
|
| - pHMgr[0] = new HashMgr(dpath, affpath, key);
|
| - if (pHMgr[0]) maxdic = 1;
|
| + /* first set up the hash manager */
|
| + m_HMgrs.push_back(new HashMgr(dpath, affpath, key));
|
|
|
| - /* next set up the affix manager */
|
| - /* it needs access to the hash manager lookup methods */
|
| - pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
|
| + /* next set up the affix manager */
|
| + /* it needs access to the hash manager lookup methods */
|
| + pAMgr = new AffixMgr(affpath, m_HMgrs, key);
|
| #endif
|
|
|
| - /* get the preferred try string and the dictionary */
|
| - /* encoding from the Affix Manager for that dictionary */
|
| - char * try_string = pAMgr->get_try_string();
|
| - encoding = pAMgr->get_encoding();
|
| - langnum = pAMgr->get_langnum();
|
| - utf8 = pAMgr->get_utf8();
|
| - if (!utf8)
|
| - csconv = get_current_cs(encoding);
|
| - complexprefixes = pAMgr->get_complexprefixes();
|
| - wordbreak = pAMgr->get_breaktable();
|
| -
|
| - /* and finally set up the suggestion manager */
|
| + /* get the preferred try string and the dictionary */
|
| + /* encoding from the Affix Manager for that dictionary */
|
| + char* try_string = pAMgr->get_try_string();
|
| + encoding = pAMgr->get_encoding();
|
| + langnum = pAMgr->get_langnum();
|
| + utf8 = pAMgr->get_utf8();
|
| + if (!utf8)
|
| + csconv = get_current_cs(encoding);
|
| + complexprefixes = pAMgr->get_complexprefixes();
|
| + wordbreak = pAMgr->get_breaktable();
|
| +
|
| + dic_encoding_vec.resize(encoding.size()+1);
|
| + strcpy(&dic_encoding_vec[0], encoding.c_str());
|
| +
|
| + /* and finally set up the suggestion manager */
|
| #ifdef HUNSPELL_CHROME_CLIENT
|
| - pSMgr = new SuggestMgr(bdict_reader, try_string, MAXSUGGESTION, pAMgr);
|
| + pSMgr = new SuggestMgr(bdict_reader, try_string, MAXSUGGESTION, pAMgr);
|
| #else
|
| - pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
|
| + pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
|
| #endif
|
| - if (try_string) free(try_string);
|
| + if (try_string)
|
| + free(try_string);
|
| }
|
|
|
| -Hunspell::~Hunspell()
|
| -{
|
| - if (pSMgr) delete pSMgr;
|
| - if (pAMgr) delete pAMgr;
|
| - for (int i = 0; i < maxdic; i++) delete pHMgr[i];
|
| - maxdic = 0;
|
| - pSMgr = NULL;
|
| - pAMgr = NULL;
|
| +Hunspell::~Hunspell() {
|
| + delete m_Impl;
|
| +}
|
| +
|
| +HunspellImpl::~HunspellImpl() {
|
| + delete pSMgr;
|
| + delete pAMgr;
|
| + for (size_t i = 0; i < m_HMgrs.size(); ++i)
|
| + delete m_HMgrs[i];
|
| + pSMgr = NULL;
|
| + pAMgr = NULL;
|
| #ifdef MOZILLA_CLIENT
|
| - delete [] csconv;
|
| + delete[] csconv;
|
| #endif
|
| - csconv= NULL;
|
| - if (encoding) free(encoding);
|
| - encoding = NULL;
|
| + csconv = NULL;
|
| #ifdef HUNSPELL_CHROME_CLIENT
|
| if (bdict_reader) delete bdict_reader;
|
| bdict_reader = NULL;
|
| #else
|
| - if (affixpath) free(affixpath);
|
| - affixpath = NULL;
|
| + if (affixpath)
|
| + free(affixpath);
|
| + affixpath = NULL;
|
| #endif
|
| }
|
|
|
| #ifndef HUNSPELL_CHROME_CLIENT
|
| // load extra dictionaries
|
| -int Hunspell::add_dic(const char * dpath, const char * key) {
|
| - if (maxdic == MAXDIC || !affixpath) return 1;
|
| - pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
|
| - if (pHMgr[maxdic]) maxdic++; else return 1;
|
| - return 0;
|
| +int Hunspell::add_dic(const char* dpath, const char* key) {
|
| + return m_Impl->add_dic(dpath, key);
|
| +}
|
| +
|
| +// load extra dictionaries
|
| +int HunspellImpl::add_dic(const char* dpath, const char* key) {
|
| + if (!affixpath)
|
| + return 1;
|
| + m_HMgrs.push_back(new HashMgr(dpath, affixpath, key));
|
| + return 0;
|
| }
|
| #endif
|
|
|
| @@ -107,516 +278,499 @@ int Hunspell::add_dic(const char * dpath, const char * key) {
|
| // set the capitalization type
|
| // return the length of the "cleaned" (and UTF-8 encoded) word
|
|
|
| -int Hunspell::cleanword2(char * dest, const char * src,
|
| - w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
|
| -{
|
| - unsigned char * p = (unsigned char *) dest;
|
| - const unsigned char * q = (const unsigned char * ) src;
|
| -
|
| - // first skip over any leading blanks
|
| - while ((*q != '\0') && (*q == ' ')) q++;
|
| -
|
| - // now strip off any trailing periods (recording their presence)
|
| - *pabbrev = 0;
|
| - int nl = strlen((const char *)q);
|
| - while ((nl > 0) && (*(q+nl-1)=='.')) {
|
| - nl--;
|
| - (*pabbrev)++;
|
| - }
|
| -
|
| - // if no characters are left it can't be capitalized
|
| - if (nl <= 0) {
|
| - *pcaptype = NOCAP;
|
| - *p = '\0';
|
| - return 0;
|
| - }
|
| -
|
| - strncpy(dest, (char *) q, nl);
|
| - *(dest + nl) = '\0';
|
| - nl = strlen(dest);
|
| - if (utf8) {
|
| - *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
|
| - // don't check too long words
|
| - if (*nc >= MAXWORDLEN) return 0;
|
| - if (*nc == -1) { // big Unicode character (non BMP area)
|
| - *pcaptype = NOCAP;
|
| - return nl;
|
| - }
|
| - *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
|
| - } else {
|
| - *pcaptype = get_captype(dest, nl, csconv);
|
| - *nc = nl;
|
| - }
|
| - return nl;
|
| -}
|
| +size_t HunspellImpl::cleanword2(std::string& dest,
|
| + std::vector<w_char>& dest_utf,
|
| + const std::string& src,
|
| + int* pcaptype,
|
| + size_t* pabbrev) {
|
| + dest.clear();
|
| + dest_utf.clear();
|
| +
|
| + const char* q = src.c_str();
|
| +
|
| + // first skip over any leading blanks
|
| + while ((*q != '\0') && (*q == ' '))
|
| + q++;
|
| +
|
| + // now strip off any trailing periods (recording their presence)
|
| + *pabbrev = 0;
|
| + int nl = strlen(q);
|
| + while ((nl > 0) && (*(q + nl - 1) == '.')) {
|
| + nl--;
|
| + (*pabbrev)++;
|
| + }
|
|
|
| -int Hunspell::cleanword(char * dest, const char * src,
|
| - int * pcaptype, int * pabbrev)
|
| -{
|
| - unsigned char * p = (unsigned char *) dest;
|
| - const unsigned char * q = (const unsigned char * ) src;
|
| - int firstcap = 0;
|
| -
|
| - // first skip over any leading blanks
|
| - while ((*q != '\0') && (*q == ' ')) q++;
|
| -
|
| - // now strip off any trailing periods (recording their presence)
|
| - *pabbrev = 0;
|
| - int nl = strlen((const char *)q);
|
| - while ((nl > 0) && (*(q+nl-1)=='.')) {
|
| - nl--;
|
| - (*pabbrev)++;
|
| - }
|
| -
|
| - // if no characters are left it can't be capitalized
|
| - if (nl <= 0) {
|
| - *pcaptype = NOCAP;
|
| - *p = '\0';
|
| - return 0;
|
| - }
|
| -
|
| - // now determine the capitalization type of the first nl letters
|
| - int ncap = 0;
|
| - int nneutral = 0;
|
| - int nc = 0;
|
| -
|
| - if (!utf8) {
|
| - while (nl > 0) {
|
| - nc++;
|
| - if (csconv[(*q)].ccase) ncap++;
|
| - if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
|
| - *p++ = *q++;
|
| - nl--;
|
| - }
|
| - // remember to terminate the destination string
|
| - *p = '\0';
|
| - firstcap = csconv[(unsigned char)(*dest)].ccase;
|
| - } else {
|
| - unsigned short idx;
|
| - w_char t[MAXWORDLEN];
|
| - nc = u8_u16(t, MAXWORDLEN, src);
|
| - for (int i = 0; i < nc; i++) {
|
| - idx = (t[i].h << 8) + t[i].l;
|
| - unsigned short low = unicodetolower(idx, langnum);
|
| - if (idx != low) ncap++;
|
| - if (unicodetoupper(idx, langnum) == low) nneutral++;
|
| - }
|
| - u16_u8(dest, MAXWORDUTF8LEN, t, nc);
|
| - if (ncap) {
|
| - idx = (t[0].h << 8) + t[0].l;
|
| - firstcap = (idx != unicodetolower(idx, langnum));
|
| - }
|
| - }
|
| -
|
| - // now finally set the captype
|
| - if (ncap == 0) {
|
| - *pcaptype = NOCAP;
|
| - } else if ((ncap == 1) && firstcap) {
|
| - *pcaptype = INITCAP;
|
| - } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
|
| - *pcaptype = ALLCAP;
|
| - } else if ((ncap > 1) && firstcap) {
|
| - *pcaptype = HUHINITCAP;
|
| - } else {
|
| - *pcaptype = HUHCAP;
|
| - }
|
| - return strlen(dest);
|
| -}
|
| -
|
| -void Hunspell::mkallcap(char * p)
|
| -{
|
| + // if no characters are left it can't be capitalized
|
| + if (nl <= 0) {
|
| + *pcaptype = NOCAP;
|
| + return 0;
|
| + }
|
| +
|
| + dest.append(q, nl);
|
| + nl = dest.size();
|
| if (utf8) {
|
| - w_char u[MAXWORDLEN];
|
| - int nc = u8_u16(u, MAXWORDLEN, p);
|
| - unsigned short idx;
|
| - for (int i = 0; i < nc; i++) {
|
| - idx = (u[i].h << 8) + u[i].l;
|
| - if (idx != unicodetoupper(idx, langnum)) {
|
| - u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
|
| - u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
|
| - }
|
| - }
|
| - u16_u8(p, MAXWORDUTF8LEN, u, nc);
|
| + u8_u16(dest_utf, dest);
|
| + *pcaptype = get_captype_utf8(dest_utf, langnum);
|
| } else {
|
| - while (*p != '\0') {
|
| - *p = csconv[((unsigned char) *p)].cupper;
|
| - p++;
|
| - }
|
| + *pcaptype = get_captype(dest, csconv);
|
| }
|
| + return nl;
|
| }
|
|
|
| -int Hunspell::mkallcap2(char * p, w_char * u, int nc)
|
| -{
|
| - if (utf8) {
|
| - unsigned short idx;
|
| - for (int i = 0; i < nc; i++) {
|
| - idx = (u[i].h << 8) + u[i].l;
|
| - unsigned short up = unicodetoupper(idx, langnum);
|
| - if (idx != up) {
|
| - u[i].h = (unsigned char) (up >> 8);
|
| - u[i].l = (unsigned char) (up & 0x00FF);
|
| - }
|
| - }
|
| - u16_u8(p, MAXWORDUTF8LEN, u, nc);
|
| - return strlen(p);
|
| +void HunspellImpl::cleanword(std::string& dest,
|
| + const std::string& src,
|
| + int* pcaptype,
|
| + int* pabbrev) {
|
| + dest.clear();
|
| + const unsigned char* q = (const unsigned char*)src.c_str();
|
| + int firstcap = 0;
|
| +
|
| + // first skip over any leading blanks
|
| + while ((*q != '\0') && (*q == ' '))
|
| + q++;
|
| +
|
| + // now strip off any trailing periods (recording their presence)
|
| + *pabbrev = 0;
|
| + int nl = strlen((const char*)q);
|
| + while ((nl > 0) && (*(q + nl - 1) == '.')) {
|
| + nl--;
|
| + (*pabbrev)++;
|
| + }
|
| +
|
| + // if no characters are left it can't be capitalized
|
| + if (nl <= 0) {
|
| + *pcaptype = NOCAP;
|
| + return;
|
| + }
|
| +
|
| + // now determine the capitalization type of the first nl letters
|
| + int ncap = 0;
|
| + int nneutral = 0;
|
| + int nc = 0;
|
| +
|
| + if (!utf8) {
|
| + while (nl > 0) {
|
| + nc++;
|
| + if (csconv[(*q)].ccase)
|
| + ncap++;
|
| + if (csconv[(*q)].cupper == csconv[(*q)].clower)
|
| + nneutral++;
|
| + dest.push_back(*q++);
|
| + nl--;
|
| + }
|
| + // remember to terminate the destination string
|
| + firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
|
| } else {
|
| - while (*p != '\0') {
|
| - *p = csconv[((unsigned char) *p)].cupper;
|
| - p++;
|
| + std::vector<w_char> t;
|
| + u8_u16(t, src);
|
| + for (size_t i = 0; i < t.size(); ++i) {
|
| + unsigned short idx = (t[i].h << 8) + t[i].l;
|
| + unsigned short low = unicodetolower(idx, langnum);
|
| + if (idx != low)
|
| + ncap++;
|
| + if (unicodetoupper(idx, langnum) == low)
|
| + nneutral++;
|
| + }
|
| + u16_u8(dest, t);
|
| + if (ncap) {
|
| + unsigned short idx = (t[0].h << 8) + t[0].l;
|
| + firstcap = (idx != unicodetolower(idx, langnum));
|
| }
|
| }
|
| - return nc;
|
| -}
|
|
|
| + // now finally set the captype
|
| + if (ncap == 0) {
|
| + *pcaptype = NOCAP;
|
| + } else if ((ncap == 1) && firstcap) {
|
| + *pcaptype = INITCAP;
|
| + } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
|
| + *pcaptype = ALLCAP;
|
| + } else if ((ncap > 1) && firstcap) {
|
| + *pcaptype = HUHINITCAP;
|
| + } else {
|
| + *pcaptype = HUHCAP;
|
| + }
|
| +}
|
|
|
| -void Hunspell::mkallsmall(char * p)
|
| -{
|
| - while (*p != '\0') {
|
| - *p = csconv[((unsigned char) *p)].clower;
|
| - p++;
|
| - }
|
| +void HunspellImpl::mkallcap(std::string& u8) {
|
| + if (utf8) {
|
| + std::vector<w_char> u16;
|
| + u8_u16(u16, u8);
|
| + ::mkallcap_utf(u16, langnum);
|
| + u16_u8(u8, u16);
|
| + } else {
|
| + ::mkallcap(u8, csconv);
|
| + }
|
| }
|
|
|
| -int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
|
| -{
|
| +int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
|
| if (utf8) {
|
| - unsigned short idx;
|
| - for (int i = 0; i < nc; i++) {
|
| - idx = (u[i].h << 8) + u[i].l;
|
| - unsigned short low = unicodetolower(idx, langnum);
|
| - if (idx != low) {
|
| - u[i].h = (unsigned char) (low >> 8);
|
| - u[i].l = (unsigned char) (low & 0x00FF);
|
| - }
|
| - }
|
| - u16_u8(p, MAXWORDUTF8LEN, u, nc);
|
| - return strlen(p);
|
| + ::mkallsmall_utf(u16, langnum);
|
| + u16_u8(u8, u16);
|
| } else {
|
| - while (*p != '\0') {
|
| - *p = csconv[((unsigned char) *p)].clower;
|
| - p++;
|
| - }
|
| + ::mkallsmall(u8, csconv);
|
| }
|
| - return nc;
|
| + return u8.size();
|
| }
|
|
|
| // convert UTF-8 sharp S codes to latin 1
|
| -char * Hunspell::sharps_u8_l1(char * dest, char * source) {
|
| - char * p = dest;
|
| - *p = *source;
|
| - for (p++, source++; *(source - 1); p++, source++) {
|
| - *p = *source;
|
| - if (*source == '\x9F') *--p = '\xDF';
|
| - }
|
| - return dest;
|
| +std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
|
| + std::string dest(source);
|
| + mystrrep(dest, "\xC3\x9F", "\xDF");
|
| + return dest;
|
| }
|
|
|
| // recursive search for right ss - sharp s permutations
|
| -hentry * Hunspell::spellsharps(char * base, char * pos, int n,
|
| - int repnum, char * tmp, int * info, char **root) {
|
| - pos = strstr(pos, "ss");
|
| - if (pos && (n < MAXSHARPS)) {
|
| - *pos = '\xC3';
|
| - *(pos + 1) = '\x9F';
|
| - hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
|
| - if (h) return h;
|
| - *pos = 's';
|
| - *(pos + 1) = 's';
|
| - h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
|
| - if (h) return h;
|
| - } else if (repnum > 0) {
|
| - if (utf8) return checkword(base, info, root);
|
| - return checkword(sharps_u8_l1(tmp, base), info, root);
|
| - }
|
| - return NULL;
|
| +hentry* HunspellImpl::spellsharps(std::string& base,
|
| + size_t n_pos,
|
| + int n,
|
| + int repnum,
|
| + int* info,
|
| + std::string* root) {
|
| + size_t pos = base.find("ss", n_pos);
|
| + if (pos != std::string::npos && (n < MAXSHARPS)) {
|
| + base[pos] = '\xC3';
|
| + base[pos + 1] = '\x9F';
|
| + hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
|
| + if (h)
|
| + return h;
|
| + base[pos] = 's';
|
| + base[pos + 1] = 's';
|
| + h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
|
| + if (h)
|
| + return h;
|
| + } else if (repnum > 0) {
|
| + if (utf8)
|
| + return checkword(base, info, root);
|
| + std::string tmp(sharps_u8_l1(base));
|
| + return checkword(tmp, info, root);
|
| + }
|
| + return NULL;
|
| }
|
|
|
| -int Hunspell::is_keepcase(const hentry * rv) {
|
| - return pAMgr && rv->astr && pAMgr->get_keepcase() &&
|
| - TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
|
| +int HunspellImpl::is_keepcase(const hentry* rv) {
|
| + return pAMgr && rv->astr && pAMgr->get_keepcase() &&
|
| + TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
|
| }
|
|
|
| -/* insert a word to the beginning of the suggestion array and return ns */
|
| -int Hunspell::insert_sug(char ***slst, char * word, int ns) {
|
| - char * dup = mystrdup(word);
|
| - if (!dup) return ns;
|
| - if (ns == MAXSUGGESTION) {
|
| - ns--;
|
| - free((*slst)[ns]);
|
| - }
|
| - for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
|
| - (*slst)[0] = dup;
|
| - return ns + 1;
|
| +/* insert a word to the beginning of the suggestion array */
|
| +void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
|
| + slst.insert(slst.begin(), word);
|
| }
|
|
|
| -int Hunspell::spell(const char * word, int * info, char ** root)
|
| -{
|
| +bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
|
| + return m_Impl->spell(word, info, root);
|
| +}
|
| +
|
| +bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {
|
| #ifdef HUNSPELL_CHROME_CLIENT
|
| - if (pHMgr[0]) pHMgr[0]->EmptyHentryCache();
|
| + if (m_HMgrs[0]) m_HMgrs[0]->EmptyHentryCache();
|
| #endif
|
| - struct hentry * rv=NULL;
|
| - // need larger vector. For example, Turkish capital letter I converted a
|
| - // 2-byte UTF-8 character (dotless i) by mkallsmall.
|
| - char cw[MAXWORDUTF8LEN];
|
| - char wspace[MAXWORDUTF8LEN];
|
| - w_char unicw[MAXWORDLEN];
|
| + struct hentry* rv = NULL;
|
| +
|
| + int info2 = 0;
|
| + if (!info)
|
| + info = &info2;
|
| + else
|
| + *info = 0;
|
| +
|
| // Hunspell supports XML input of the simplified API (see manual)
|
| - if (strcmp(word, SPELL_XML) == 0) return 1;
|
| - int nc = strlen(word);
|
| - int wl2 = 0;
|
| + if (word == SPELL_XML)
|
| + return true;
|
| if (utf8) {
|
| - if (nc >= MAXWORDUTF8LEN) return 0;
|
| + if (word.size() >= MAXWORDUTF8LEN)
|
| + return false;
|
| } else {
|
| - if (nc >= MAXWORDLEN) return 0;
|
| + if (word.size() >= MAXWORDLEN)
|
| + return false;
|
| }
|
| - int captype = 0;
|
| - int abbv = 0;
|
| - int wl = 0;
|
| + int captype = NOCAP;
|
| + size_t abbv = 0;
|
| + size_t wl = 0;
|
| +
|
| + std::string scw;
|
| + std::vector<w_char> sunicw;
|
|
|
| // input conversion
|
| - RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
|
| - if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
|
| - else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
|
| + RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
|
| + {
|
| + std::string wspace;
|
| +
|
| + bool convstatus = rl ? rl->conv(word, wspace) : false;
|
| + if (convstatus)
|
| + wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
|
| + else
|
| + wl = cleanword2(scw, sunicw, word, &captype, &abbv);
|
| + }
|
|
|
| - int info2 = 0;
|
| - if (wl == 0 || maxdic == 0) return 1;
|
| - if (root) *root = NULL;
|
| +#ifdef MOZILLA_CLIENT
|
| + // accept the abbreviated words without dots
|
| + // workaround for the incomplete tokenization of Mozilla
|
| + abbv = 1;
|
| +#endif
|
|
|
| - // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
|
| + if (wl == 0 || m_HMgrs.empty())
|
| + return true;
|
| + if (root)
|
| + root->clear();
|
| +
|
| + // allow numbers with dots, dashes and commas (but forbid double separators:
|
| + // "..", "--" etc.)
|
| enum { NBEGIN, NNUM, NSEP };
|
| int nstate = NBEGIN;
|
| - int i;
|
| + size_t i;
|
|
|
| for (i = 0; (i < wl); i++) {
|
| - if ((cw[i] <= '9') && (cw[i] >= '0')) {
|
| - nstate = NNUM;
|
| - } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
|
| - if ((nstate == NSEP) || (i == 0)) break;
|
| - nstate = NSEP;
|
| - } else break;
|
| + if ((scw[i] <= '9') && (scw[i] >= '0')) {
|
| + nstate = NNUM;
|
| + } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
|
| + if ((nstate == NSEP) || (i == 0))
|
| + break;
|
| + nstate = NSEP;
|
| + } else
|
| + break;
|
| }
|
| - if ((i == wl) && (nstate == NNUM)) return 1;
|
| - if (!info) info = &info2; else *info = 0;
|
| -
|
| - switch(captype) {
|
| - case HUHCAP:
|
| - case HUHINITCAP:
|
| - *info += SPELL_ORIGCAP;
|
| - case NOCAP: {
|
| - rv = checkword(cw, info, root);
|
| - if ((abbv) && !(rv)) {
|
| - memcpy(wspace,cw,wl);
|
| - *(wspace+wl) = '.';
|
| - *(wspace+wl+1) = '\0';
|
| - rv = checkword(wspace, info, root);
|
| - }
|
| + if ((i == wl) && (nstate == NNUM))
|
| + return true;
|
| +
|
| + switch (captype) {
|
| + case HUHCAP:
|
| + /* FALLTHROUGH */
|
| + case HUHINITCAP:
|
| + *info += SPELL_ORIGCAP;
|
| + /* FALLTHROUGH */
|
| + case NOCAP:
|
| + rv = checkword(scw, info, root);
|
| + if ((abbv) && !(rv)) {
|
| + std::string u8buffer(scw);
|
| + u8buffer.push_back('.');
|
| + rv = checkword(u8buffer, info, root);
|
| + }
|
| + break;
|
| + case ALLCAP: {
|
| + *info += SPELL_ORIGCAP;
|
| + rv = checkword(scw, info, root);
|
| + if (rv)
|
| + break;
|
| + if (abbv) {
|
| + std::string u8buffer(scw);
|
| + u8buffer.push_back('.');
|
| + rv = checkword(u8buffer, info, root);
|
| + if (rv)
|
| + break;
|
| + }
|
| + // Spec. prefix handling for Catalan, French, Italian:
|
| + // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
|
| + size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
|
| + if (apos != std::string::npos) {
|
| + mkallsmall2(scw, sunicw);
|
| + //conversion may result in string with different len to pre-mkallsmall2
|
| + //so re-scan
|
| + if (apos != std::string::npos && apos < scw.size() - 1) {
|
| + std::string part1 = scw.substr(0, apos+1);
|
| + std::string part2 = scw.substr(apos+1);
|
| + if (utf8) {
|
| + std::vector<w_char> part1u, part2u;
|
| + u8_u16(part1u, part1);
|
| + u8_u16(part2u, part2);
|
| + mkinitcap2(part2, part2u);
|
| + scw = part1 + part2;
|
| + sunicw = part1u;
|
| + sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
|
| + rv = checkword(scw, info, root);
|
| + if (rv)
|
| + break;
|
| + } else {
|
| + mkinitcap2(part2, sunicw);
|
| + scw = part1 + part2;
|
| + rv = checkword(scw, info, root);
|
| + if (rv)
|
| + break;
|
| + }
|
| + mkinitcap2(scw, sunicw);
|
| + rv = checkword(scw, info, root);
|
| + if (rv)
|
| break;
|
| - }
|
| - case ALLCAP: {
|
| - *info += SPELL_ORIGCAP;
|
| - rv = checkword(cw, info, root);
|
| - if (rv) break;
|
| - if (abbv) {
|
| - memcpy(wspace,cw,wl);
|
| - *(wspace+wl) = '.';
|
| - *(wspace+wl+1) = '\0';
|
| - rv = checkword(wspace, info, root);
|
| - if (rv) break;
|
| - }
|
| - // Spec. prefix handling for Catalan, French, Italian:
|
| - // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
|
| - if (pAMgr && strchr(cw, '\'')) {
|
| - wl = mkallsmall2(cw, unicw, nc);
|
| - //There are no really sane circumstances where this could fail,
|
| - //but anyway...
|
| - if (char * apostrophe = strchr(cw, '\'')) {
|
| - if (utf8) {
|
| - w_char tmpword[MAXWORDLEN];
|
| - *apostrophe = '\0';
|
| - wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
|
| - *apostrophe = '\'';
|
| - if (wl2 < nc) {
|
| - mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
|
| - rv = checkword(cw, info, root);
|
| - if (rv) break;
|
| - }
|
| - } else {
|
| - mkinitcap2(apostrophe + 1, unicw, nc);
|
| - rv = checkword(cw, info, root);
|
| - if (rv) break;
|
| - }
|
| - }
|
| - mkinitcap2(cw, unicw, nc);
|
| - rv = checkword(cw, info, root);
|
| - if (rv) break;
|
| - }
|
| - if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
|
| - char tmpword[MAXWORDUTF8LEN];
|
| - wl = mkallsmall2(cw, unicw, nc);
|
| - memcpy(wspace,cw,(wl+1));
|
| - rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
|
| - if (!rv) {
|
| - wl2 = mkinitcap2(cw, unicw, nc);
|
| - rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
|
| - }
|
| - if ((abbv) && !(rv)) {
|
| - *(wspace+wl) = '.';
|
| - *(wspace+wl+1) = '\0';
|
| - rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
|
| - if (!rv) {
|
| - memcpy(wspace, cw, wl2);
|
| - *(wspace+wl2) = '.';
|
| - *(wspace+wl2+1) = '\0';
|
| - rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
|
| - }
|
| - }
|
| - if (rv) break;
|
| - }
|
| }
|
| - case INITCAP: {
|
| - *info += SPELL_ORIGCAP;
|
| - wl = mkallsmall2(cw, unicw, nc);
|
| - memcpy(wspace,cw,(wl+1));
|
| - wl2 = mkinitcap2(cw, unicw, nc);
|
| - if (captype == INITCAP) *info += SPELL_INITCAP;
|
| - rv = checkword(cw, info, root);
|
| - if (captype == INITCAP) *info -= SPELL_INITCAP;
|
| - // forbid bad capitalization
|
| - // (for example, ijs -> Ijs instead of IJs in Dutch)
|
| - // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
|
| - if (*info & SPELL_FORBIDDEN) {
|
| - rv = NULL;
|
| - break;
|
| - }
|
| - if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
|
| - if (rv) break;
|
| -
|
| - rv = checkword(wspace, info, root);
|
| - if (abbv && !rv) {
|
| -
|
| - *(wspace+wl) = '.';
|
| - *(wspace+wl+1) = '\0';
|
| - rv = checkword(wspace, info, root);
|
| - if (!rv) {
|
| - memcpy(wspace, cw, wl2);
|
| - *(wspace+wl2) = '.';
|
| - *(wspace+wl2+1) = '\0';
|
| - if (captype == INITCAP) *info += SPELL_INITCAP;
|
| - rv = checkword(wspace, info, root);
|
| - if (captype == INITCAP) *info -= SPELL_INITCAP;
|
| - if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
|
| - break;
|
| - }
|
| - }
|
| - if (rv && is_keepcase(rv) &&
|
| - ((captype == ALLCAP) ||
|
| - // if CHECKSHARPS: KEEPCASE words with \xDF are allowed
|
| - // in INITCAP form, too.
|
| - !(pAMgr->get_checksharps() &&
|
| - ((utf8 && strstr(wspace, "\xC3\x9F")) ||
|
| - (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
|
| - break;
|
| - }
|
| + }
|
| + if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
|
| +
|
| + mkallsmall2(scw, sunicw);
|
| + std::string u8buffer(scw);
|
| + rv = spellsharps(u8buffer, 0, 0, 0, info, root);
|
| + if (!rv) {
|
| + mkinitcap2(scw, sunicw);
|
| + rv = spellsharps(scw, 0, 0, 0, info, root);
|
| + }
|
| + if ((abbv) && !(rv)) {
|
| + u8buffer.push_back('.');
|
| + rv = spellsharps(u8buffer, 0, 0, 0, info, root);
|
| + if (!rv) {
|
| + u8buffer = std::string(scw);
|
| + u8buffer.push_back('.');
|
| + rv = spellsharps(u8buffer, 0, 0, 0, info, root);
|
| + }
|
| + }
|
| + if (rv)
|
| + break;
|
| + }
|
| + }
|
| + case INITCAP: {
|
| +
|
| + *info += SPELL_ORIGCAP;
|
| + mkallsmall2(scw, sunicw);
|
| + std::string u8buffer(scw);
|
| + mkinitcap2(scw, sunicw);
|
| + if (captype == INITCAP)
|
| + *info += SPELL_INITCAP;
|
| + rv = checkword(scw, info, root);
|
| + if (captype == INITCAP)
|
| + *info -= SPELL_INITCAP;
|
| + // forbid bad capitalization
|
| + // (for example, ijs -> Ijs instead of IJs in Dutch)
|
| + // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
|
| + if (*info & SPELL_FORBIDDEN) {
|
| + rv = NULL;
|
| + break;
|
| + }
|
| + if (rv && is_keepcase(rv) && (captype == ALLCAP))
|
| + rv = NULL;
|
| + if (rv)
|
| + break;
|
| +
|
| + rv = checkword(u8buffer, info, root);
|
| + if (abbv && !rv) {
|
| + u8buffer.push_back('.');
|
| + rv = checkword(u8buffer, info, root);
|
| + if (!rv) {
|
| + u8buffer = scw;
|
| + u8buffer.push_back('.');
|
| + if (captype == INITCAP)
|
| + *info += SPELL_INITCAP;
|
| + rv = checkword(u8buffer, info, root);
|
| + if (captype == INITCAP)
|
| + *info -= SPELL_INITCAP;
|
| + if (rv && is_keepcase(rv) && (captype == ALLCAP))
|
| + rv = NULL;
|
| + break;
|
| + }
|
| + }
|
| + if (rv && is_keepcase(rv) &&
|
| + ((captype == ALLCAP) ||
|
| + // if CHECKSHARPS: KEEPCASE words with \xDF are allowed
|
| + // in INITCAP form, too.
|
| + !(pAMgr->get_checksharps() &&
|
| + ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
|
| + (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
|
| + rv = NULL;
|
| + break;
|
| + }
|
| }
|
|
|
| if (rv) {
|
| - if (pAMgr && pAMgr->get_warn() && rv->astr &&
|
| - TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
|
| - *info += SPELL_WARN;
|
| - if (pAMgr->get_forbidwarn()) return 0;
|
| - return HUNSPELL_OK_WARN;
|
| - }
|
| - return HUNSPELL_OK;
|
| + if (pAMgr && pAMgr->get_warn() && rv->astr &&
|
| + TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
|
| + *info += SPELL_WARN;
|
| + if (pAMgr->get_forbidwarn())
|
| + return false;
|
| + return true;
|
| + }
|
| + return true;
|
| }
|
|
|
| // recursive breaking at break points
|
| - if (wordbreak) {
|
| - char * s;
|
| - char r;
|
| + if (!wordbreak.empty()) {
|
| +
|
| int nbr = 0;
|
| - wl = strlen(cw);
|
| - int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
|
| + wl = scw.size();
|
|
|
| // calculate break points for recursion limit
|
| - for (int j = 0; j < numbreak; j++) {
|
| - s = cw;
|
| - do {
|
| - s = (char *) strstr(s, wordbreak[j]);
|
| - if (s) {
|
| - nbr++;
|
| - s++;
|
| - }
|
| - } while (s);
|
| - }
|
| - if (nbr >= 10) return 0;
|
| + for (size_t j = 0; j < wordbreak.size(); ++j) {
|
| + size_t pos = 0;
|
| + while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) {
|
| + ++nbr;
|
| + pos += wordbreak[j].size();
|
| + }
|
| + }
|
| + if (nbr >= 10)
|
| + return false;
|
|
|
| // check boundary patterns (^begin and end$)
|
| - for (int j = 0; j < numbreak; j++) {
|
| - int plen = strlen(wordbreak[j]);
|
| - if (plen == 1 || plen > wl) continue;
|
| - if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
|
| - && spell(cw + plen - 1)) return 1;
|
| + for (size_t j = 0; j < wordbreak.size(); ++j) {
|
| + size_t plen = wordbreak[j].size();
|
| + if (plen == 1 || plen > wl)
|
| + continue;
|
| +
|
| + if (wordbreak[j][0] == '^' &&
|
| + scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1)))
|
| + return true;
|
| +
|
| if (wordbreak[j][plen - 1] == '$' &&
|
| - strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
|
| - r = cw[wl - plen + 1];
|
| - cw[wl - plen + 1] = '\0';
|
| - if (spell(cw)) return 1;
|
| - cw[wl - plen + 1] = r;
|
| - }
|
| + scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) {
|
| + std::string suffix(scw.substr(wl - plen + 1));
|
| + scw.resize(wl - plen + 1);
|
| + if (spell(scw))
|
| + return true;
|
| + scw.append(suffix);
|
| + }
|
| }
|
|
|
| // other patterns
|
| - for (int j = 0; j < numbreak; j++) {
|
| - int plen = strlen(wordbreak[j]);
|
| - s=(char *) strstr(cw, wordbreak[j]);
|
| - if (s && (s > cw) && (s < cw + wl - plen)) {
|
| - if (!spell(s + plen)) continue;
|
| - r = *s;
|
| - *s = '\0';
|
| + for (size_t j = 0; j < wordbreak.size(); ++j) {
|
| + size_t plen = wordbreak[j].size();
|
| + size_t found = scw.find(wordbreak[j]);
|
| + if ((found > 0) && (found < wl - plen)) {
|
| + if (!spell(scw.substr(found + plen)))
|
| + continue;
|
| + std::string suffix(scw.substr(found));
|
| + scw.resize(found);
|
| // examine 2 sides of the break point
|
| - if (spell(cw)) return 1;
|
| - *s = r;
|
| + if (spell(scw))
|
| + return true;
|
| + scw.append(suffix);
|
|
|
| // LANG_hu: spec. dash rule
|
| - if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
|
| - r = s[1];
|
| - s[1] = '\0';
|
| - if (spell(cw)) return 1; // check the first part with dash
|
| - s[1] = r;
|
| - }
|
| - // end of LANG speficic region
|
| -
|
| + if (langnum == LANG_hu && wordbreak[j] == "-") {
|
| + suffix = scw.substr(found + 1);
|
| + scw.resize(found + 1);
|
| + if (spell(scw))
|
| + return true; // check the first part with dash
|
| + scw.append(suffix);
|
| + }
|
| + // end of LANG specific region
|
| }
|
| }
|
| }
|
|
|
| - return 0;
|
| + return false;
|
| }
|
|
|
| -struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
|
| -{
|
| - struct hentry * he = NULL;
|
| - int len, i;
|
| - char w2[MAXWORDUTF8LEN];
|
| - const char * word;
|
| +struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
|
| + bool usebuffer = false;
|
| + std::string w2;
|
| + const char* word;
|
| + int len;
|
|
|
| - char * ignoredchars = pAMgr->get_ignore();
|
| + const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
|
| if (ignoredchars != NULL) {
|
| - strcpy(w2, w);
|
| - if (utf8) {
|
| - int ignoredchars_utf16_len;
|
| - unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
|
| - remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
|
| - } else {
|
| - remove_ignored_chars(w2,ignoredchars);
|
| - }
|
| - word = w2;
|
| - } else word = w;
|
| -
|
| - len = strlen(word);
|
| + w2.assign(w);
|
| + if (utf8) {
|
| + const std::vector<w_char>& ignoredchars_utf16 =
|
| + pAMgr->get_ignore_utf16();
|
| + remove_ignored_chars_utf(w2, ignoredchars_utf16);
|
| + } else {
|
| + remove_ignored_chars(w2, ignoredchars);
|
| + }
|
| + word = w2.c_str();
|
| + len = w2.size();
|
| + usebuffer = true;
|
| + } else {
|
| + word = w.c_str();
|
| + len = w.size();
|
| + }
|
|
|
| if (!len)
|
| - return NULL;
|
| + return NULL;
|
|
|
| #ifdef HUNSPELL_CHROME_CLIENT
|
| // We need to check if the word length is valid to make coverity (Event
|
| @@ -627,1430 +781,1310 @@ struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
|
|
|
| // word reversing wrapper for complex prefixes
|
| if (complexprefixes) {
|
| - if (word != w2) {
|
| - strcpy(w2, word);
|
| - word = w2;
|
| + if (!usebuffer) {
|
| + w2.assign(word);
|
| + usebuffer = true;
|
| }
|
| - if (utf8) reverseword_utf(w2); else reverseword(w2);
|
| + if (utf8)
|
| + reverseword_utf(w2);
|
| + else
|
| + reverseword(w2);
|
| + }
|
| +
|
| + if (usebuffer) {
|
| + word = w2.c_str();
|
| }
|
|
|
| // look word in hash table
|
| - for (i = 0; (i < maxdic) && !he; i ++) {
|
| - he = (pHMgr[i])->lookup(word);
|
| -
|
| - // check forbidden and onlyincompound words
|
| - if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
|
| - if (info) *info += SPELL_FORBIDDEN;
|
| - // LANG_hu section: set dash information for suggestions
|
| - if (langnum == LANG_hu) {
|
| + struct hentry* he = NULL;
|
| + for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
|
| + he = m_HMgrs[i]->lookup(word);
|
| +
|
| + // check forbidden and onlyincompound words
|
| + if ((he) && (he->astr) && (pAMgr) &&
|
| + TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
|
| + if (info)
|
| + *info += SPELL_FORBIDDEN;
|
| + // LANG_hu section: set dash information for suggestions
|
| + if (langnum == LANG_hu) {
|
| if (pAMgr->get_compoundflag() &&
|
| TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
|
| - if (info) *info += SPELL_COMPOUND;
|
| + if (info)
|
| + *info += SPELL_COMPOUND;
|
| }
|
| + }
|
| + return NULL;
|
| }
|
| - return NULL;
|
| - }
|
|
|
| - // he = next not needaffix, onlyincompound homonym or onlyupcase word
|
| - while (he && (he->astr) &&
|
| - ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
|
| - (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
|
| - (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
|
| - )) he = he->next_homonym;
|
| + // he = next not needaffix, onlyincompound homonym or onlyupcase word
|
| + while (he && (he->astr) && pAMgr &&
|
| + ((pAMgr->get_needaffix() &&
|
| + TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
|
| + (pAMgr->get_onlyincompound() &&
|
| + TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
|
| + (info && (*info & SPELL_INITCAP) &&
|
| + TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
|
| + he = he->next_homonym;
|
| }
|
|
|
| // check with affixes
|
| if (!he && pAMgr) {
|
| - // try stripping off affixes */
|
| - he = pAMgr->affix_check(word, len, 0);
|
| -
|
| - // check compound restriction and onlyupcase
|
| - if (he && he->astr && (
|
| - (pAMgr->get_onlyincompound() &&
|
| - TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
|
| - (info && (*info & SPELL_INITCAP) &&
|
| - TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
|
| - he = NULL;
|
| - }
|
| -
|
| - if (he) {
|
| - if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
|
| - if (info) *info += SPELL_FORBIDDEN;
|
| - return NULL;
|
| + // try stripping off affixes */
|
| + he = pAMgr->affix_check(word, len, 0);
|
| +
|
| + // check compound restriction and onlyupcase
|
| + if (he && he->astr &&
|
| + ((pAMgr->get_onlyincompound() &&
|
| + TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
|
| + (info && (*info & SPELL_INITCAP) &&
|
| + TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
|
| + he = NULL;
|
| + }
|
| +
|
| + if (he) {
|
| + if ((he->astr) && (pAMgr) &&
|
| + TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
|
| + if (info)
|
| + *info += SPELL_FORBIDDEN;
|
| + return NULL;
|
| + }
|
| + if (root) {
|
| + root->assign(he->word);
|
| + if (complexprefixes) {
|
| + if (utf8)
|
| + reverseword_utf(*root);
|
| + else
|
| + reverseword(*root);
|
| }
|
| + }
|
| + // try check compound word
|
| + } else if (pAMgr->get_compound()) {
|
| + struct hentry* rwords[100]; // buffer for COMPOUND pattern checking
|
| + he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
|
| + // LANG_hu section: `moving rule' with last dash
|
| + if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
|
| + std::string dup(word, len - 1);
|
| + he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info);
|
| + }
|
| + // end of LANG specific region
|
| + if (he) {
|
| if (root) {
|
| - *root = mystrdup(he->word);
|
| - if (*root && complexprefixes) {
|
| - if (utf8) reverseword_utf(*root); else reverseword(*root);
|
| - }
|
| - }
|
| - // try check compound word
|
| - } else if (pAMgr->get_compound()) {
|
| - he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);
|
| - // LANG_hu section: `moving rule' with last dash
|
| - if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
|
| - char * dup = mystrdup(word);
|
| - if (!dup) return NULL;
|
| - dup[len-1] = '\0';
|
| - he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info);
|
| - free(dup);
|
| - }
|
| - // end of LANG speficic region
|
| - if (he) {
|
| - if (root) {
|
| - *root = mystrdup(he->word);
|
| - if (*root && complexprefixes) {
|
| - if (utf8) reverseword_utf(*root); else reverseword(*root);
|
| - }
|
| - }
|
| - if (info) *info += SPELL_COMPOUND;
|
| + root->assign(he->word);
|
| + if (complexprefixes) {
|
| + if (utf8)
|
| + reverseword_utf(*root);
|
| + else
|
| + reverseword(*root);
|
| }
|
| - }
|
| -
|
| + }
|
| + if (info)
|
| + *info += SPELL_COMPOUND;
|
| + }
|
| + }
|
| }
|
|
|
| return he;
|
| }
|
|
|
| -int Hunspell::suggest(char*** slst, const char * word)
|
| -{
|
| +std::vector<std::string> Hunspell::suggest(const std::string& word) {
|
| + return m_Impl->suggest(word);
|
| +}
|
| +
|
| +std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
|
| #ifdef HUNSPELL_CHROME_CLIENT
|
| - if (pHMgr[0]) pHMgr[0]->EmptyHentryCache();
|
| + if (m_HMgrs[0]) m_HMgrs[0]->EmptyHentryCache();
|
| #endif
|
| + std::vector<std::string> slst;
|
| +
|
| int onlycmpdsug = 0;
|
| - char cw[MAXWORDUTF8LEN];
|
| - char wspace[MAXWORDUTF8LEN];
|
| - if (!pSMgr || maxdic == 0) return 0;
|
| - w_char unicw[MAXWORDLEN];
|
| - *slst = NULL;
|
| + if (!pSMgr || m_HMgrs.empty())
|
| + return slst;
|
| +
|
| // process XML input of the simplified API (see manual)
|
| - if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
|
| - return spellml(slst, word);
|
| + if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
|
| + return spellml(word);
|
| }
|
| - int nc = strlen(word);
|
| if (utf8) {
|
| - if (nc >= MAXWORDUTF8LEN) return 0;
|
| + if (word.size() >= MAXWORDUTF8LEN)
|
| + return slst;
|
| } else {
|
| - if (nc >= MAXWORDLEN) return 0;
|
| + if (word.size() >= MAXWORDLEN)
|
| + return slst;
|
| }
|
| - int captype = 0;
|
| - int abbv = 0;
|
| - int wl = 0;
|
| + int captype = NOCAP;
|
| + size_t abbv = 0;
|
| + size_t wl = 0;
|
| +
|
| + std::string scw;
|
| + std::vector<w_char> sunicw;
|
|
|
| // input conversion
|
| - RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
|
| - if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
|
| - else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
|
| + RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
|
| + {
|
| + std::string wspace;
|
| +
|
| + bool convstatus = rl ? rl->conv(word, wspace) : false;
|
| + if (convstatus)
|
| + wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
|
| + else
|
| + wl = cleanword2(scw, sunicw, word, &captype, &abbv);
|
| +
|
| + if (wl == 0)
|
| + return slst;
|
| + }
|
|
|
| - if (wl == 0) return 0;
|
| - int ns = 0;
|
| int capwords = 0;
|
|
|
| // check capitalized form for FORCEUCASE
|
| if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
|
| int info = SPELL_ORIGCAP;
|
| - char ** wlst;
|
| - if (checkword(cw, &info, NULL)) {
|
| - if (*slst) {
|
| - wlst = *slst;
|
| + if (checkword(scw, &info, NULL)) {
|
| + std::string form(scw);
|
| + mkinitcap(form);
|
| + slst.push_back(form);
|
| + return slst;
|
| + }
|
| + }
|
| +
|
| + switch (captype) {
|
| + case NOCAP: {
|
| + pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
|
| + break;
|
| + }
|
| +
|
| + case INITCAP: {
|
| + capwords = 1;
|
| + pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
|
| + std::string wspace(scw);
|
| + mkallsmall2(wspace, sunicw);
|
| + pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
|
| + break;
|
| + }
|
| + case HUHINITCAP:
|
| + capwords = 1;
|
| + case HUHCAP: {
|
| + pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
|
| + // something.The -> something. The
|
| + size_t dot_pos = scw.find('.');
|
| + if (dot_pos != std::string::npos) {
|
| + std::string postdot = scw.substr(dot_pos + 1);
|
| + int captype_;
|
| + if (utf8) {
|
| + std::vector<w_char> postdotu;
|
| + u8_u16(postdotu, postdot);
|
| + captype_ = get_captype_utf8(postdotu, langnum);
|
| } else {
|
| - wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));
|
| - if (wlst == NULL) return -1;
|
| - *slst = wlst;
|
| - for (int i = 0; i < MAXSUGGESTION; i++) {
|
| - wlst[i] = NULL;
|
| - }
|
| + captype_ = get_captype(postdot, csconv);
|
| + }
|
| + if (captype_ == INITCAP) {
|
| + std::string str(scw);
|
| + str.insert(dot_pos + 1, 1, ' ');
|
| + insert_sug(slst, str);
|
| }
|
| - wlst[0] = mystrdup(cw);
|
| - mkinitcap(wlst[0]);
|
| - return 1;
|
| + }
|
| +
|
| + std::string wspace;
|
| +
|
| + if (captype == HUHINITCAP) {
|
| + // TheOpenOffice.org -> The OpenOffice.org
|
| + wspace = scw;
|
| + mkinitsmall2(wspace, sunicw);
|
| + pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
|
| + }
|
| + wspace = scw;
|
| + mkallsmall2(wspace, sunicw);
|
| + if (spell(wspace.c_str()))
|
| + insert_sug(slst, wspace);
|
| + size_t prevns = slst.size();
|
| + pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
|
| + if (captype == HUHINITCAP) {
|
| + mkinitcap2(wspace, sunicw);
|
| + if (spell(wspace.c_str()))
|
| + insert_sug(slst, wspace);
|
| + pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
|
| + }
|
| + // aNew -> "a New" (instead of "a new")
|
| + for (size_t j = prevns; j < slst.size(); ++j) {
|
| + const char* space = strchr(slst[j].c_str(), ' ');
|
| + if (space) {
|
| + size_t slen = strlen(space + 1);
|
| + // different case after space (need capitalisation)
|
| + if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
|
| + std::string first(slst[j].c_str(), space + 1);
|
| + std::string second(space + 1);
|
| + std::vector<w_char> w;
|
| + if (utf8)
|
| + u8_u16(w, second);
|
| + mkinitcap2(second, w);
|
| + // set as first suggestion
|
| + slst.erase(slst.begin() + j);
|
| + slst.insert(slst.begin(), first + second);
|
| + }
|
| + }
|
| + }
|
| + break;
|
| + }
|
| +
|
| + case ALLCAP: {
|
| + std::string wspace(scw);
|
| + mkallsmall2(wspace, sunicw);
|
| + pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
|
| + if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
|
| + insert_sug(slst, wspace);
|
| + mkinitcap2(wspace, sunicw);
|
| + pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
|
| + for (size_t j = 0; j < slst.size(); ++j) {
|
| + mkallcap(slst[j]);
|
| + if (pAMgr && pAMgr->get_checksharps()) {
|
| + if (utf8) {
|
| + mystrrep(slst[j], "\xC3\x9F", "SS");
|
| + } else {
|
| + mystrrep(slst[j], "\xDF", "SS");
|
| + }
|
| + }
|
| + }
|
| + break;
|
| }
|
| - }
|
| -
|
| - switch(captype) {
|
| - case NOCAP: {
|
| - ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
|
| - break;
|
| - }
|
| -
|
| - case INITCAP: {
|
| - capwords = 1;
|
| - ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
|
| - if (ns == -1) break;
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkallsmall2(wspace, unicw, nc);
|
| - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
|
| - break;
|
| - }
|
| - case HUHINITCAP:
|
| - capwords = 1;
|
| - case HUHCAP: {
|
| - ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
|
| - if (ns != -1) {
|
| - int prevns;
|
| - // something.The -> something. The
|
| - char * dot = strchr(cw, '.');
|
| - if (dot && (dot > cw)) {
|
| - int captype_;
|
| - if (utf8) {
|
| - w_char w_[MAXWORDLEN];
|
| - int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
|
| - captype_ = get_captype_utf8(w_, wl_, langnum);
|
| - } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
|
| - if (captype_ == INITCAP) {
|
| - char * st = mystrdup(cw);
|
| - if (st) st = (char *) realloc(st, wl + 2);
|
| - if (st) {
|
| - st[(dot - cw) + 1] = ' ';
|
| - strcpy(st + (dot - cw) + 2, dot + 1);
|
| - ns = insert_sug(slst, st, ns);
|
| - free(st);
|
| - }
|
| - }
|
| - }
|
| - if (captype == HUHINITCAP) {
|
| - // TheOpenOffice.org -> The OpenOffice.org
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkinitsmall2(wspace, unicw, nc);
|
| - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
|
| - }
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkallsmall2(wspace, unicw, nc);
|
| - if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
|
| - prevns = ns;
|
| - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
|
| - if (captype == HUHINITCAP) {
|
| - mkinitcap2(wspace, unicw, nc);
|
| - if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
|
| - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
|
| - }
|
| - // aNew -> "a New" (instead of "a new")
|
| - for (int j = prevns; j < ns; j++) {
|
| - char * space = strchr((*slst)[j],' ');
|
| - if (space) {
|
| - int slen = strlen(space + 1);
|
| - // different case after space (need capitalisation)
|
| - if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
|
| - w_char w[MAXWORDLEN];
|
| - int wc = 0;
|
| - char * r = (*slst)[j];
|
| - if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
|
| - mkinitcap2(space + 1, w, wc);
|
| - // set as first suggestion
|
| - for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
|
| - (*slst)[0] = r;
|
| - }
|
| - }
|
| - }
|
| - }
|
| - break;
|
| - }
|
| -
|
| - case ALLCAP: {
|
| - memcpy(wspace, cw, (wl+1));
|
| - mkallsmall2(wspace, unicw, nc);
|
| - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
|
| - if (ns == -1) break;
|
| - if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
|
| - ns = insert_sug(slst, wspace, ns);
|
| - mkinitcap2(wspace, unicw, nc);
|
| - ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
|
| - for (int j=0; j < ns; j++) {
|
| - mkallcap((*slst)[j]);
|
| - if (pAMgr && pAMgr->get_checksharps()) {
|
| - char * pos;
|
| - if (utf8) {
|
| - pos = strstr((*slst)[j], "\xC3\x9F");
|
| - while (pos) {
|
| - *pos = 'S';
|
| - *(pos+1) = 'S';
|
| - pos = strstr(pos+2, "\xC3\x9F");
|
| - }
|
| - } else {
|
| - pos = strchr((*slst)[j], '\xDF');
|
| - while (pos) {
|
| - (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
|
| - mystrrep((*slst)[j], "\xDF", "SS");
|
| - pos = strchr((*slst)[j], '\xDF');
|
| - }
|
| - }
|
| - }
|
| - }
|
| - break;
|
| - }
|
| }
|
|
|
| - // LANG_hu section: replace '-' with ' ' in Hungarian
|
| + // LANG_hu section: replace '-' with ' ' in Hungarian
|
| if (langnum == LANG_hu) {
|
| - for (int j=0; j < ns; j++) {
|
| - char * pos = strchr((*slst)[j],'-');
|
| - if (pos) {
|
| - int info;
|
| - char w[MAXWORDUTF8LEN];
|
| - *pos = '\0';
|
| - strcpy(w, (*slst)[j]);
|
| - strcat(w, pos + 1);
|
| - spell(w, &info, NULL);
|
| - if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
|
| - *pos = ' ';
|
| - } else *pos = '-';
|
| - }
|
| + for (size_t j = 0; j < slst.size(); ++j) {
|
| + size_t pos = slst[j].find('-');
|
| + if (pos != std::string::npos) {
|
| + int info;
|
| + std::string w(slst[j].substr(0, pos));
|
| + w.append(slst[j].substr(pos + 1));
|
| + (void)spell(w, &info, NULL);
|
| + if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
|
| + slst[j][pos] = ' ';
|
| + } else
|
| + slst[j][pos] = '-';
|
| }
|
| + }
|
| }
|
| // END OF LANG_hu section
|
|
|
| // try ngram approach since found nothing or only compound words
|
| - if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
|
| - switch(captype) {
|
| - case NOCAP: {
|
| - ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
|
| - break;
|
| - }
|
| - case HUHINITCAP:
|
| - capwords = 1;
|
| - case HUHCAP: {
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkallsmall2(wspace, unicw, nc);
|
| - ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
|
| - break;
|
| - }
|
| - case INITCAP: {
|
| - capwords = 1;
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkallsmall2(wspace, unicw, nc);
|
| - ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
|
| - break;
|
| - }
|
| - case ALLCAP: {
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkallsmall2(wspace, unicw, nc);
|
| - int oldns = ns;
|
| - ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
|
| - for (int j = oldns; j < ns; j++)
|
| - mkallcap((*slst)[j]);
|
| - break;
|
| - }
|
| + if (pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
|
| + switch (captype) {
|
| + case NOCAP: {
|
| + pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs);
|
| + break;
|
| + }
|
| + case HUHINITCAP:
|
| + capwords = 1;
|
| + case HUHCAP: {
|
| + std::string wspace(scw);
|
| + mkallsmall2(wspace, sunicw);
|
| + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
|
| + break;
|
| + }
|
| + case INITCAP: {
|
| + capwords = 1;
|
| + std::string wspace(scw);
|
| + mkallsmall2(wspace, sunicw);
|
| + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
|
| + break;
|
| }
|
| + case ALLCAP: {
|
| + std::string wspace(scw);
|
| + mkallsmall2(wspace, sunicw);
|
| + size_t oldns = slst.size();
|
| + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
|
| + for (size_t j = oldns; j < slst.size(); ++j) {
|
| + mkallcap(slst[j]);
|
| + }
|
| + break;
|
| + }
|
| + }
|
| }
|
|
|
| // try dash suggestion (Afo-American -> Afro-American)
|
| - if (char * pos = strchr(cw, '-')) {
|
| - char * ppos = cw;
|
| - int nodashsug = 1;
|
| - char ** nlst = NULL;
|
| - int nn = 0;
|
| - int last = 0;
|
| - if (*slst) {
|
| - for (int j = 0; j < ns && nodashsug == 1; j++) {
|
| - if (strchr((*slst)[j], '-')) nodashsug = 0;
|
| - }
|
| - }
|
| - while (nodashsug && !last) {
|
| - if (*pos == '\0') last = 1; else *pos = '\0';
|
| - if (!spell(ppos)) {
|
| - nn = suggest(&nlst, ppos);
|
| - for (int j = nn - 1; j >= 0; j--) {
|
| - strncpy(wspace, cw, ppos - cw);
|
| - strcpy(wspace + (ppos - cw), nlst[j]);
|
| - if (!last) {
|
| - strcat(wspace, "-");
|
| - strcat(wspace, pos + 1);
|
| - }
|
| - ns = insert_sug(slst, wspace, ns);
|
| - free(nlst[j]);
|
| + size_t dash_pos = scw.find('-');
|
| + if (dash_pos != std::string::npos) {
|
| + int nodashsug = 1;
|
| + for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
|
| + if (slst[j].find('-') != std::string::npos)
|
| + nodashsug = 0;
|
| + }
|
| +
|
| + size_t prev_pos = 0;
|
| + bool last = false;
|
| +
|
| + while (nodashsug && !last) {
|
| + if (dash_pos == scw.size())
|
| + last = 1;
|
| + std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
|
| + if (!spell(chunk.c_str())) {
|
| + std::vector<std::string> nlst = suggest(chunk.c_str());
|
| + for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) {
|
| + std::string wspace = scw.substr(0, prev_pos);
|
| + wspace.append(*j);
|
| + if (!last) {
|
| + wspace.append("-");
|
| + wspace.append(scw.substr(dash_pos + 1));
|
| }
|
| - if (nlst != NULL) free(nlst);
|
| - nodashsug = 0;
|
| - }
|
| - if (!last) {
|
| - *pos = '-';
|
| - ppos = pos + 1;
|
| - pos = strchr(ppos, '-');
|
| + insert_sug(slst, wspace);
|
| }
|
| - if (!pos) pos = cw + strlen(cw);
|
| - }
|
| + nodashsug = 0;
|
| + }
|
| + if (!last) {
|
| + prev_pos = dash_pos + 1;
|
| + dash_pos = scw.find('-', prev_pos);
|
| + }
|
| + if (dash_pos == std::string::npos)
|
| + dash_pos = scw.size();
|
| + }
|
| }
|
|
|
| // word reversing wrapper for complex prefixes
|
| if (complexprefixes) {
|
| - for (int j = 0; j < ns; j++) {
|
| - if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
|
| + for (size_t j = 0; j < slst.size(); ++j) {
|
| + if (utf8)
|
| + reverseword_utf(slst[j]);
|
| + else
|
| + reverseword(slst[j]);
|
| }
|
| }
|
|
|
| // capitalize
|
| - if (capwords) for (int j=0; j < ns; j++) {
|
| - mkinitcap((*slst)[j]);
|
| - }
|
| + if (capwords)
|
| + for (size_t j = 0; j < slst.size(); ++j) {
|
| + mkinitcap(slst[j]);
|
| + }
|
|
|
| // expand suggestions with dot(s)
|
| if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
|
| - for (int j = 0; j < ns; j++) {
|
| - (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
|
| - strcat((*slst)[j], word + strlen(word) - abbv);
|
| + for (size_t j = 0; j < slst.size(); ++j) {
|
| + slst[j].append(word.substr(word.size() - abbv));
|
| }
|
| }
|
|
|
| // remove bad capitalized and forbidden forms
|
| if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
|
| - switch (captype) {
|
| - case INITCAP:
|
| - case ALLCAP: {
|
| - int l = 0;
|
| - for (int j=0; j < ns; j++) {
|
| - if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {
|
| - char s[MAXSWUTF8L];
|
| - w_char w[MAXSWL];
|
| - int len;
|
| - if (utf8) {
|
| - len = u8_u16(w, MAXSWL, (*slst)[j]);
|
| - } else {
|
| - strcpy(s, (*slst)[j]);
|
| - len = strlen(s);
|
| - }
|
| - mkallsmall2(s, w, len);
|
| - free((*slst)[j]);
|
| - if (spell(s)) {
|
| - (*slst)[l] = mystrdup(s);
|
| - if ((*slst)[l]) l++;
|
| - } else {
|
| - mkinitcap2(s, w, len);
|
| + switch (captype) {
|
| + case INITCAP:
|
| + case ALLCAP: {
|
| + size_t l = 0;
|
| + for (size_t j = 0; j < slst.size(); ++j) {
|
| + if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
|
| + std::string s;
|
| + std::vector<w_char> w;
|
| + if (utf8) {
|
| + u8_u16(w, slst[j]);
|
| + } else {
|
| + s = slst[j];
|
| + }
|
| + mkallsmall2(s, w);
|
| if (spell(s)) {
|
| - (*slst)[l] = mystrdup(s);
|
| - if ((*slst)[l]) l++;
|
| + slst[l] = s;
|
| + ++l;
|
| + } else {
|
| + mkinitcap2(s, w);
|
| + if (spell(s)) {
|
| + slst[l] = s;
|
| + ++l;
|
| + }
|
| }
|
| + } else {
|
| + slst[l] = slst[j];
|
| + ++l;
|
| }
|
| - } else {
|
| - (*slst)[l] = (*slst)[j];
|
| - l++;
|
| }
|
| + slst.resize(l);
|
| }
|
| - ns = l;
|
| }
|
| }
|
| - }
|
|
|
| // remove duplications
|
| - int l = 0;
|
| - for (int j = 0; j < ns; j++) {
|
| - (*slst)[l] = (*slst)[j];
|
| - for (int k = 0; k < l; k++) {
|
| - if (strcmp((*slst)[k], (*slst)[j]) == 0) {
|
| - free((*slst)[j]);
|
| - l--;
|
| + size_t l = 0;
|
| + for (size_t j = 0; j < slst.size(); ++j) {
|
| + slst[l] = slst[j];
|
| + for (size_t k = 0; k < l; ++k) {
|
| + if (slst[k] == slst[j]) {
|
| + --l;
|
| break;
|
| }
|
| }
|
| - l++;
|
| + ++l;
|
| }
|
| - ns = l;
|
| + slst.resize(l);
|
|
|
| // output conversion
|
| rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
|
| - for (int j = 0; rl && j < ns; j++) {
|
| - if (rl->conv((*slst)[j], wspace)) {
|
| - free((*slst)[j]);
|
| - (*slst)[j] = mystrdup(wspace);
|
| + for (size_t j = 0; rl && j < slst.size(); ++j) {
|
| + std::string wspace;
|
| + if (rl->conv(slst[j], wspace)) {
|
| + slst[j] = wspace;
|
| }
|
| }
|
|
|
| - // if suggestions removed by nosuggest, onlyincompound parameters
|
| - if (l == 0 && *slst) {
|
| - free(*slst);
|
| - *slst = NULL;
|
| - }
|
| - return l;
|
| + return slst;
|
| }
|
|
|
| -void Hunspell::free_list(char *** slst, int n) {
|
| - freelist(slst, n);
|
| +const std::string& Hunspell::get_dict_encoding() const {
|
| + return m_Impl->get_dict_encoding();
|
| }
|
|
|
| -char * Hunspell::get_dic_encoding()
|
| -{
|
| +const std::string& HunspellImpl::get_dict_encoding() const {
|
| return encoding;
|
| }
|
|
|
| -#ifdef HUNSPELL_EXPERIMENTAL
|
| -// XXX need UTF-8 support
|
| -int Hunspell::suggest_auto(char*** slst, const char * word)
|
| -{
|
| - char cw[MAXWORDUTF8LEN];
|
| - char wspace[MAXWORDUTF8LEN];
|
| - if (!pSMgr || maxdic == 0) return 0;
|
| - int wl = strlen(word);
|
| - if (utf8) {
|
| - if (wl >= MAXWORDUTF8LEN) return 0;
|
| - } else {
|
| - if (wl >= MAXWORDLEN) return 0;
|
| - }
|
| - int captype = 0;
|
| - int abbv = 0;
|
| - wl = cleanword(cw, word, &captype, &abbv);
|
| - if (wl == 0) return 0;
|
| - int ns = 0;
|
| - *slst = NULL; // HU, nsug in pSMgr->suggest
|
| -
|
| - switch(captype) {
|
| - case NOCAP: {
|
| - ns = pSMgr->suggest_auto(slst, cw, ns);
|
| - if (ns>0) break;
|
| - break;
|
| - }
|
| -
|
| - case INITCAP: {
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkallsmall(wspace);
|
| - ns = pSMgr->suggest_auto(slst, wspace, ns);
|
| - for (int j=0; j < ns; j++)
|
| - mkinitcap((*slst)[j]);
|
| - ns = pSMgr->suggest_auto(slst, cw, ns);
|
| - break;
|
| -
|
| - }
|
| -
|
| - case HUHINITCAP:
|
| - case HUHCAP: {
|
| - ns = pSMgr->suggest_auto(slst, cw, ns);
|
| - if (ns == 0) {
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkallsmall(wspace);
|
| - ns = pSMgr->suggest_auto(slst, wspace, ns);
|
| - }
|
| - break;
|
| - }
|
| -
|
| - case ALLCAP: {
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkallsmall(wspace);
|
| - ns = pSMgr->suggest_auto(slst, wspace, ns);
|
| -
|
| - mkinitcap(wspace);
|
| - ns = pSMgr->suggest_auto(slst, wspace, ns);
|
| -
|
| - for (int j=0; j < ns; j++)
|
| - mkallcap((*slst)[j]);
|
| - break;
|
| - }
|
| - }
|
| +std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
|
| + return m_Impl->stem(desc);
|
| +}
|
|
|
| - // word reversing wrapper for complex prefixes
|
| - if (complexprefixes) {
|
| - for (int j = 0; j < ns; j++) {
|
| - if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
|
| - }
|
| - }
|
| +std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
|
| + std::vector<std::string> slst;
|
|
|
| - // expand suggestions with dot(s)
|
| - if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
|
| - for (int j = 0; j < ns; j++) {
|
| - (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
|
| - strcat((*slst)[j], word + strlen(word) - abbv);
|
| - }
|
| - }
|
| + std::string result2;
|
| + if (desc.empty())
|
| + return slst;
|
| + for (size_t i = 0; i < desc.size(); ++i) {
|
|
|
| - // LANG_hu section: replace '-' with ' ' in Hungarian
|
| - if (langnum == LANG_hu) {
|
| - for (int j=0; j < ns; j++) {
|
| - char * pos = strchr((*slst)[j],'-');
|
| - if (pos) {
|
| - int info;
|
| - char w[MAXWORDUTF8LEN];
|
| - *pos = '\0';
|
| - strcpy(w, (*slst)[j]);
|
| - strcat(w, pos + 1);
|
| - spell(w, &info, NULL);
|
| - if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
|
| - *pos = ' ';
|
| - } else *pos = '-';
|
| - }
|
| - }
|
| - }
|
| - // END OF LANG_hu section
|
| - return ns;
|
| -}
|
| -#endif
|
| + std::string result;
|
|
|
| -int Hunspell::stem(char*** slst, char ** desc, int n)
|
| -{
|
| - char result[MAXLNLEN];
|
| - char result2[MAXLNLEN];
|
| - *slst = NULL;
|
| - if (n == 0) return 0;
|
| - *result2 = '\0';
|
| - for (int i = 0; i < n; i++) {
|
| - *result = '\0';
|
| // add compound word parts (except the last one)
|
| - char * s = (char *) desc[i];
|
| - char * part = strstr(s, MORPH_PART);
|
| + const char* s = desc[i].c_str();
|
| + const char* part = strstr(s, MORPH_PART);
|
| if (part) {
|
| - char * nextpart = strstr(part + 1, MORPH_PART);
|
| - while (nextpart) {
|
| - copy_field(result + strlen(result), part, MORPH_PART);
|
| - part = nextpart;
|
| - nextpart = strstr(part + 1, MORPH_PART);
|
| - }
|
| - s = part;
|
| + const char* nextpart = strstr(part + 1, MORPH_PART);
|
| + while (nextpart) {
|
| + std::string field;
|
| + copy_field(field, part, MORPH_PART);
|
| + result.append(field);
|
| + part = nextpart;
|
| + nextpart = strstr(part + 1, MORPH_PART);
|
| + }
|
| + s = part;
|
| }
|
|
|
| - char **pl;
|
| - char tok[MAXLNLEN];
|
| - strcpy(tok, s);
|
| - char * alt = strstr(tok, " | ");
|
| - while (alt) {
|
| - alt[1] = MSEP_ALT;
|
| - alt = strstr(alt, " | ");
|
| + std::string tok(s);
|
| + size_t alt = 0;
|
| + while ((alt = tok.find(" | ", alt)) != std::string::npos) {
|
| + tok[alt + 1] = MSEP_ALT;
|
| }
|
| - int pln = line_tok(tok, &pl, MSEP_ALT);
|
| - for (int k = 0; k < pln; k++) {
|
| - // add derivational suffixes
|
| - if (strstr(pl[k], MORPH_DERI_SFX)) {
|
| - // remove inflectional suffixes
|
| - char * is = strstr(pl[k], MORPH_INFL_SFX);
|
| - if (is) *is = '\0';
|
| - char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
|
| - if (sg) {
|
| - char ** gen;
|
| - int genl = line_tok(sg, &gen, MSEP_REC);
|
| - free(sg);
|
| - for (int j = 0; j < genl; j++) {
|
| - sprintf(result2 + strlen(result2), "%c%s%s",
|
| - MSEP_REC, result, gen[j]);
|
| - }
|
| - freelist(&gen, genl);
|
| - }
|
| - } else {
|
| - sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
|
| - if (strstr(pl[k], MORPH_SURF_PFX)) {
|
| - copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
|
| - }
|
| - copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
|
| + std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
|
| + for (size_t k = 0; k < pl.size(); ++k) {
|
| + // add derivational suffixes
|
| + if (pl[k].find(MORPH_DERI_SFX) != std::string::npos) {
|
| + // remove inflectional suffixes
|
| + const size_t is = pl[k].find(MORPH_INFL_SFX);
|
| + if (is != std::string::npos)
|
| + pl[k].resize(is);
|
| + std::vector<std::string> singlepl;
|
| + singlepl.push_back(pl[k]);
|
| + std::string sg = pSMgr->suggest_gen(singlepl, pl[k]);
|
| + if (!sg.empty()) {
|
| + std::vector<std::string> gen = line_tok(sg, MSEP_REC);
|
| + for (size_t j = 0; j < gen.size(); ++j) {
|
| + result2.push_back(MSEP_REC);
|
| + result2.append(result);
|
| + result2.append(gen[j]);
|
| + }
|
| + }
|
| + } else {
|
| + result2.push_back(MSEP_REC);
|
| + result2.append(result);
|
| + if (pl[k].find(MORPH_SURF_PFX) != std::string::npos) {
|
| + std::string field;
|
| + copy_field(field, pl[k], MORPH_SURF_PFX);
|
| + result2.append(field);
|
| }
|
| + std::string field;
|
| + copy_field(field, pl[k], MORPH_STEM);
|
| + result2.append(field);
|
| + }
|
| }
|
| - freelist(&pl, pln);
|
| }
|
| - int sln = line_tok(result2, slst, MSEP_REC);
|
| - return uniqlist(*slst, sln);
|
| -
|
| + slst = line_tok(result2, MSEP_REC);
|
| + uniqlist(slst);
|
| + return slst;
|
| }
|
|
|
| -int Hunspell::stem(char*** slst, const char * word)
|
| -{
|
| - char ** pl;
|
| - int pln = analyze(&pl, word);
|
| - int pln2 = stem(slst, pl, pln);
|
| - freelist(&pl, pln);
|
| - return pln2;
|
| +std::vector<std::string> Hunspell::stem(const std::string& word) {
|
| + return m_Impl->stem(word);
|
| }
|
|
|
| -#ifdef HUNSPELL_EXPERIMENTAL
|
| -int Hunspell::suggest_pos_stems(char*** slst, const char * word)
|
| -{
|
| - char cw[MAXWORDUTF8LEN];
|
| - char wspace[MAXWORDUTF8LEN];
|
| - if (! pSMgr || maxdic == 0) return 0;
|
| - int wl = strlen(word);
|
| - if (utf8) {
|
| - if (wl >= MAXWORDUTF8LEN) return 0;
|
| - } else {
|
| - if (wl >= MAXWORDLEN) return 0;
|
| - }
|
| - int captype = 0;
|
| - int abbv = 0;
|
| - wl = cleanword(cw, word, &captype, &abbv);
|
| - if (wl == 0) return 0;
|
| -
|
| - int ns = 0; // ns=0 = normalized input
|
| -
|
| - *slst = NULL; // HU, nsug in pSMgr->suggest
|
| -
|
| - switch(captype) {
|
| - case HUHCAP:
|
| - case NOCAP: {
|
| - ns = pSMgr->suggest_pos_stems(slst, cw, ns);
|
| -
|
| - if ((abbv) && (ns == 0)) {
|
| - memcpy(wspace,cw,wl);
|
| - *(wspace+wl) = '.';
|
| - *(wspace+wl+1) = '\0';
|
| - ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
|
| - }
|
| -
|
| - break;
|
| - }
|
| -
|
| - case INITCAP: {
|
| +std::vector<std::string> HunspellImpl::stem(const std::string& word) {
|
| + return stem(analyze(word));
|
| +}
|
|
|
| - ns = pSMgr->suggest_pos_stems(slst, cw, ns);
|
| +const char* Hunspell::get_wordchars() const {
|
| + return m_Impl->get_wordchars().c_str();
|
| +}
|
|
|
| - if (ns == 0 || ((*slst)[0][0] == '#')) {
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkallsmall(wspace);
|
| - ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
|
| - }
|
| +const std::string& Hunspell::get_wordchars_cpp() const {
|
| + return m_Impl->get_wordchars();
|
| +}
|
|
|
| - break;
|
| +const std::string& HunspellImpl::get_wordchars() const {
|
| + return pAMgr->get_wordchars();
|
| +}
|
|
|
| - }
|
| +const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
|
| + return m_Impl->get_wordchars_utf16();
|
| +}
|
|
|
| - case ALLCAP: {
|
| - ns = pSMgr->suggest_pos_stems(slst, cw, ns);
|
| - if (ns != 0) break;
|
| +const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
|
| + return pAMgr->get_wordchars_utf16();
|
| +}
|
|
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkallsmall(wspace);
|
| - ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
|
| +void HunspellImpl::mkinitcap(std::string& u8) {
|
| + if (utf8) {
|
| + std::vector<w_char> u16;
|
| + u8_u16(u16, u8);
|
| + ::mkinitcap_utf(u16, langnum);
|
| + u16_u8(u8, u16);
|
| + } else {
|
| + ::mkinitcap(u8, csconv);
|
| + }
|
| +}
|
|
|
| - if (ns == 0) {
|
| - mkinitcap(wspace);
|
| - ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
|
| - }
|
| - break;
|
| - }
|
| +int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
|
| + if (utf8) {
|
| + ::mkinitcap_utf(u16, langnum);
|
| + u16_u8(u8, u16);
|
| + } else {
|
| + ::mkinitcap(u8, csconv);
|
| }
|
| + return u8.size();
|
| +}
|
|
|
| - return ns;
|
| +int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
|
| + if (utf8) {
|
| + ::mkinitsmall_utf(u16, langnum);
|
| + u16_u8(u8, u16);
|
| + } else {
|
| + ::mkinitsmall(u8, csconv);
|
| + }
|
| + return u8.size();
|
| }
|
| -#endif // END OF HUNSPELL_EXPERIMENTAL CODE
|
|
|
| -const char * Hunspell::get_wordchars()
|
| -{
|
| - return pAMgr->get_wordchars();
|
| +int Hunspell::add(const std::string& word) {
|
| + return m_Impl->add(word);
|
| }
|
|
|
| -unsigned short * Hunspell::get_wordchars_utf16(int * len)
|
| -{
|
| - return pAMgr->get_wordchars_utf16(len);
|
| +int HunspellImpl::add(const std::string& word) {
|
| + if (!m_HMgrs.empty())
|
| + return m_HMgrs[0]->add(word);
|
| + return 0;
|
| }
|
|
|
| -void Hunspell::mkinitcap(char * p)
|
| -{
|
| - if (!utf8) {
|
| - if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
|
| - } else {
|
| - int len;
|
| - w_char u[MAXWORDLEN];
|
| - len = u8_u16(u, MAXWORDLEN, p);
|
| - unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
|
| - u[0].h = (unsigned char) (i >> 8);
|
| - u[0].l = (unsigned char) (i & 0x00FF);
|
| - u16_u8(p, MAXWORDUTF8LEN, u, len);
|
| - }
|
| +int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
|
| + return m_Impl->add_with_affix(word, example);
|
| }
|
|
|
| -int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
|
| -{
|
| - if (!utf8) {
|
| - if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
|
| - } else if (nc > 0) {
|
| - unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
|
| - u[0].h = (unsigned char) (i >> 8);
|
| - u[0].l = (unsigned char) (i & 0x00FF);
|
| - u16_u8(p, MAXWORDUTF8LEN, u, nc);
|
| - return strlen(p);
|
| - }
|
| - return nc;
|
| +int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
|
| + if (!m_HMgrs.empty())
|
| + return m_HMgrs[0]->add_with_affix(word, example);
|
| + return 0;
|
| }
|
|
|
| -int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
|
| -{
|
| - if (!utf8) {
|
| - if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
|
| - } else if (nc > 0) {
|
| - unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
|
| - u[0].h = (unsigned char) (i >> 8);
|
| - u[0].l = (unsigned char) (i & 0x00FF);
|
| - u16_u8(p, MAXWORDUTF8LEN, u, nc);
|
| - return strlen(p);
|
| - }
|
| - return nc;
|
| +int Hunspell::remove(const std::string& word) {
|
| + return m_Impl->remove(word);
|
| }
|
|
|
| -int Hunspell::add(const char * word)
|
| -{
|
| - if (pHMgr[0]) return (pHMgr[0])->add(word);
|
| - return 0;
|
| +int HunspellImpl::remove(const std::string& word) {
|
| + if (!m_HMgrs.empty())
|
| + return m_HMgrs[0]->remove(word);
|
| + return 0;
|
| }
|
|
|
| -int Hunspell::add_with_affix(const char * word, const char * example)
|
| -{
|
| - if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
|
| - return 0;
|
| +const char* Hunspell::get_version() const {
|
| + return m_Impl->get_version().c_str();
|
| }
|
|
|
| -int Hunspell::remove(const char * word)
|
| -{
|
| - if (pHMgr[0]) return (pHMgr[0])->remove(word);
|
| - return 0;
|
| +const std::string& Hunspell::get_version_cpp() const {
|
| + return m_Impl->get_version();
|
| }
|
|
|
| -const char * Hunspell::get_version()
|
| -{
|
| +const std::string& HunspellImpl::get_version() const {
|
| return pAMgr->get_version();
|
| }
|
|
|
| -struct cs_info * Hunspell::get_csconv()
|
| -{
|
| +struct cs_info* HunspellImpl::get_csconv() {
|
| return csconv;
|
| }
|
|
|
| -void Hunspell::cat_result(char * result, char * st)
|
| -{
|
| - if (st) {
|
| - if (*result) mystrcat(result, "\n", MAXLNLEN);
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| +struct cs_info* Hunspell::get_csconv() {
|
| + return m_Impl->get_csconv();
|
| }
|
|
|
| -int Hunspell::analyze(char*** slst, const char * word)
|
| -{
|
| - char cw[MAXWORDUTF8LEN];
|
| - char wspace[MAXWORDUTF8LEN];
|
| - w_char unicw[MAXWORDLEN];
|
| - int wl2 = 0;
|
| - *slst = NULL;
|
| - if (! pSMgr || maxdic == 0) return 0;
|
| - int nc = strlen(word);
|
| +void HunspellImpl::cat_result(std::string& result, const std::string& st) {
|
| + if (!st.empty()) {
|
| + if (!result.empty())
|
| + result.append("\n");
|
| + result.append(st);
|
| + }
|
| +}
|
| +
|
| +std::vector<std::string> Hunspell::analyze(const std::string& word) {
|
| + return m_Impl->analyze(word);
|
| +}
|
| +
|
| +std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
|
| + std::vector<std::string> slst;
|
| + if (!pSMgr || m_HMgrs.empty())
|
| + return slst;
|
| if (utf8) {
|
| - if (nc >= MAXWORDUTF8LEN) return 0;
|
| + if (word.size() >= MAXWORDUTF8LEN)
|
| + return slst;
|
| } else {
|
| - if (nc >= MAXWORDLEN) return 0;
|
| + if (word.size() >= MAXWORDLEN)
|
| + return slst;
|
| }
|
| - int captype = 0;
|
| - int abbv = 0;
|
| - int wl = 0;
|
| + int captype = NOCAP;
|
| + size_t abbv = 0;
|
| + size_t wl = 0;
|
| +
|
| + std::string scw;
|
| + std::vector<w_char> sunicw;
|
|
|
| // input conversion
|
| - RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
|
| - if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
|
| - else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
|
| + RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
|
| + {
|
| + std::string wspace;
|
| +
|
| + bool convstatus = rl ? rl->conv(word, wspace) : false;
|
| + if (convstatus)
|
| + wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
|
| + else
|
| + wl = cleanword2(scw, sunicw, word, &captype, &abbv);
|
| + }
|
|
|
| if (wl == 0) {
|
| - if (abbv) {
|
| - for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
|
| - cw[wl] = '\0';
|
| - abbv = 0;
|
| - } else return 0;
|
| + if (abbv) {
|
| + scw.clear();
|
| + for (wl = 0; wl < abbv; wl++)
|
| + scw.push_back('.');
|
| + abbv = 0;
|
| + } else
|
| + return slst;
|
| }
|
|
|
| - char result[MAXLNLEN];
|
| - char * st = NULL;
|
| -
|
| - *result = '\0';
|
| -
|
| - int n = 0;
|
| - int n2 = 0;
|
| - int n3 = 0;
|
| + std::string result;
|
|
|
| + size_t n = 0;
|
| // test numbers
|
| // LANG_hu section: set dash information for suggestions
|
| if (langnum == LANG_hu) {
|
| - while ((n < wl) &&
|
| - (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
|
| - n++;
|
| - if ((cw[n] == '.') || (cw[n] == ',')) {
|
| - if (((n2 == 0) && (n > 3)) ||
|
| - ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
|
| - n2++;
|
| - n3 = n;
|
| - }
|
| - }
|
| + size_t n2 = 0;
|
| + size_t n3 = 0;
|
| +
|
| + while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
|
| + (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
|
| + n++;
|
| + if ((scw[n] == '.') || (scw[n] == ',')) {
|
| + if (((n2 == 0) && (n > 3)) ||
|
| + ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
|
| + break;
|
| + n2++;
|
| + n3 = n;
|
| + }
|
| + }
|
|
|
| - if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
|
| - if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {
|
| - mystrcat(result, cw, MAXLNLEN);
|
| - result[n - 1] = '\0';
|
| - if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
|
| - else {
|
| - char sign = cw[n];
|
| - cw[n] = '\0';
|
| - cat_result(result, pSMgr->suggest_morph(cw + n - 1));
|
| - mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE
|
| - cw[n] = sign;
|
| - cat_result(result, pSMgr->suggest_morph(cw + n));
|
| - }
|
| - return line_tok(result, slst, MSEP_REC);
|
| - }
|
| + if ((n == wl) && (n3 > 0) && (n - n3 > 3))
|
| + return slst;
|
| + if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
|
| + checkword(scw.substr(n), NULL, NULL))) {
|
| + result.append(scw);
|
| + result.resize(n - 1);
|
| + if (n == wl)
|
| + cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
|
| + else {
|
| + std::string chunk = scw.substr(n - 1, 1);
|
| + cat_result(result, pSMgr->suggest_morph(chunk));
|
| + result.push_back('+'); // XXX SPEC. MORPHCODE
|
| + cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
|
| + }
|
| + return line_tok(result, MSEP_REC);
|
| + }
|
| }
|
| // END OF LANG_hu section
|
|
|
| - switch(captype) {
|
| - case HUHCAP:
|
| - case HUHINITCAP:
|
| - case NOCAP: {
|
| - cat_result(result, pSMgr->suggest_morph(cw));
|
| - if (abbv) {
|
| - memcpy(wspace,cw,wl);
|
| - *(wspace+wl) = '.';
|
| - *(wspace+wl+1) = '\0';
|
| - cat_result(result, pSMgr->suggest_morph(wspace));
|
| - }
|
| - break;
|
| - }
|
| - case INITCAP: {
|
| - wl = mkallsmall2(cw, unicw, nc);
|
| - memcpy(wspace,cw,(wl+1));
|
| - wl2 = mkinitcap2(cw, unicw, nc);
|
| - cat_result(result, pSMgr->suggest_morph(wspace));
|
| - cat_result(result, pSMgr->suggest_morph(cw));
|
| - if (abbv) {
|
| - *(wspace+wl) = '.';
|
| - *(wspace+wl+1) = '\0';
|
| - cat_result(result, pSMgr->suggest_morph(wspace));
|
| -
|
| - memcpy(wspace, cw, wl2);
|
| - *(wspace+wl2) = '.';
|
| - *(wspace+wl2+1) = '\0';
|
| -
|
| - cat_result(result, pSMgr->suggest_morph(wspace));
|
| - }
|
| - break;
|
| - }
|
| - case ALLCAP: {
|
| - cat_result(result, pSMgr->suggest_morph(cw));
|
| - if (abbv) {
|
| - memcpy(wspace,cw,wl);
|
| - *(wspace+wl) = '.';
|
| - *(wspace+wl+1) = '\0';
|
| - cat_result(result, pSMgr->suggest_morph(cw));
|
| - }
|
| - wl = mkallsmall2(cw, unicw, nc);
|
| - memcpy(wspace,cw,(wl+1));
|
| - wl2 = mkinitcap2(cw, unicw, nc);
|
| -
|
| - cat_result(result, pSMgr->suggest_morph(wspace));
|
| - cat_result(result, pSMgr->suggest_morph(cw));
|
| - if (abbv) {
|
| - *(wspace+wl) = '.';
|
| - *(wspace+wl+1) = '\0';
|
| - cat_result(result, pSMgr->suggest_morph(wspace));
|
| -
|
| - memcpy(wspace, cw, wl2);
|
| - *(wspace+wl2) = '.';
|
| - *(wspace+wl2+1) = '\0';
|
| -
|
| - cat_result(result, pSMgr->suggest_morph(wspace));
|
| - }
|
| - break;
|
| - }
|
| + switch (captype) {
|
| + case HUHCAP:
|
| + case HUHINITCAP:
|
| + case NOCAP: {
|
| + cat_result(result, pSMgr->suggest_morph(scw));
|
| + if (abbv) {
|
| + std::string u8buffer(scw);
|
| + u8buffer.push_back('.');
|
| + cat_result(result, pSMgr->suggest_morph(u8buffer));
|
| + }
|
| + break;
|
| + }
|
| + case INITCAP: {
|
| + mkallsmall2(scw, sunicw);
|
| + std::string u8buffer(scw);
|
| + mkinitcap2(scw, sunicw);
|
| + cat_result(result, pSMgr->suggest_morph(u8buffer));
|
| + cat_result(result, pSMgr->suggest_morph(scw));
|
| + if (abbv) {
|
| + u8buffer.push_back('.');
|
| + cat_result(result, pSMgr->suggest_morph(u8buffer));
|
| +
|
| + u8buffer = scw;
|
| + u8buffer.push_back('.');
|
| +
|
| + cat_result(result, pSMgr->suggest_morph(u8buffer));
|
| + }
|
| + break;
|
| + }
|
| + case ALLCAP: {
|
| + cat_result(result, pSMgr->suggest_morph(scw));
|
| + if (abbv) {
|
| + std::string u8buffer(scw);
|
| + u8buffer.push_back('.');
|
| + cat_result(result, pSMgr->suggest_morph(u8buffer));
|
| + }
|
| + mkallsmall2(scw, sunicw);
|
| + std::string u8buffer(scw);
|
| + mkinitcap2(scw, sunicw);
|
| +
|
| + cat_result(result, pSMgr->suggest_morph(u8buffer));
|
| + cat_result(result, pSMgr->suggest_morph(scw));
|
| + if (abbv) {
|
| + u8buffer.push_back('.');
|
| + cat_result(result, pSMgr->suggest_morph(u8buffer));
|
| +
|
| + u8buffer = scw;
|
| + u8buffer.push_back('.');
|
| +
|
| + cat_result(result, pSMgr->suggest_morph(u8buffer));
|
| + }
|
| + break;
|
| + }
|
| }
|
|
|
| - if (*result) {
|
| + if (!result.empty()) {
|
| // word reversing wrapper for complex prefixes
|
| if (complexprefixes) {
|
| - if (utf8) reverseword_utf(result); else reverseword(result);
|
| + if (utf8)
|
| + reverseword_utf(result);
|
| + else
|
| + reverseword(result);
|
| }
|
| - return line_tok(result, slst, MSEP_REC);
|
| + return line_tok(result, MSEP_REC);
|
| }
|
|
|
| // compound word with dash (HU) I18n
|
| - char * dash = NULL;
|
| - int nresult = 0;
|
| // LANG_hu section: set dash information for suggestions
|
| - if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
|
| - if ((langnum == LANG_hu) && dash) {
|
| - *dash='\0';
|
| - // examine 2 sides of the dash
|
| - if (dash[1] == '\0') { // base word ending with dash
|
| - if (spell(cw)) {
|
| - char * p = pSMgr->suggest_morph(cw);
|
| - if (p) {
|
| - int ret = line_tok(p, slst, MSEP_REC);
|
| - free(p);
|
| - return ret;
|
| - }
|
| -
|
| - }
|
| - } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
|
| - if (spell(cw) && (spell("-e"))) {
|
| - st = pSMgr->suggest_morph(cw);
|
| - if (st) {
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
|
| - st = pSMgr->suggest_morph("-e");
|
| - if (st) {
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - return line_tok(result, slst, MSEP_REC);
|
| - }
|
| - } else {
|
| +
|
| + size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
|
| + if (dash_pos != std::string::npos) {
|
| + int nresult = 0;
|
| +
|
| + std::string part1 = scw.substr(0, dash_pos);
|
| + std::string part2 = scw.substr(dash_pos+1);
|
| +
|
| + // examine 2 sides of the dash
|
| + if (part2.empty()) { // base word ending with dash
|
| + if (spell(part1)) {
|
| + std::string p = pSMgr->suggest_morph(part1);
|
| + if (!p.empty()) {
|
| + slst = line_tok(p, MSEP_REC);
|
| + return slst;
|
| + }
|
| + }
|
| + } else if (part2.size() == 1 && part2[0] == 'e') { // XXX (HU) -e hat.
|
| + if (spell(part1) && (spell("-e"))) {
|
| + std::string st = pSMgr->suggest_morph(part1);
|
| + if (!st.empty()) {
|
| + result.append(st);
|
| + }
|
| + result.push_back('+'); // XXX spec. separator in MORPHCODE
|
| + st = pSMgr->suggest_morph("-e");
|
| + if (!st.empty()) {
|
| + result.append(st);
|
| + }
|
| + return line_tok(result, MSEP_REC);
|
| + }
|
| + } else {
|
| // first word ending with dash: word- XXX ???
|
| - char r2 = *(dash + 1);
|
| - dash[0]='-';
|
| - dash[1]='\0';
|
| - nresult = spell(cw);
|
| - dash[1] = r2;
|
| - dash[0]='\0';
|
| - if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
|
| - ((dash[1] > '0') && (dash[1] < '9')))) {
|
| - st = pSMgr->suggest_morph(cw);
|
| - if (st) {
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
|
| - }
|
| - st = pSMgr->suggest_morph(dash+1);
|
| - if (st) {
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - return line_tok(result, slst, MSEP_REC);
|
| - }
|
| + part1.push_back(' ');
|
| + nresult = spell(part1);
|
| + part1.erase(part1.size() - 1);
|
| + if (nresult && spell(part2) &&
|
| + ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
|
| + std::string st = pSMgr->suggest_morph(part1);
|
| + if (!st.empty()) {
|
| + result.append(st);
|
| + result.push_back('+'); // XXX spec. separator in MORPHCODE
|
| + }
|
| + st = pSMgr->suggest_morph(part2);
|
| + if (!st.empty()) {
|
| + result.append(st);
|
| + }
|
| + return line_tok(result, MSEP_REC);
|
| }
|
| - // affixed number in correct word
|
| - if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
|
| - (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
|
| - *dash='-';
|
| - n = 1;
|
| - if (*(dash - n) == '.') n++;
|
| - // search first not a number character to left from dash
|
| - while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
|
| - n++;
|
| - }
|
| - if ((dash - n) < cw) n--;
|
| - // numbers: valami1000000-hoz
|
| - // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
|
| - // 56-hoz, 6-hoz
|
| - for(; n >= 1; n--) {
|
| - if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
|
| - mystrcat(result, cw, MAXLNLEN);
|
| - result[dash - cw - n] = '\0';
|
| - st = pSMgr->suggest_morph(dash - n);
|
| - if (st) {
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - return line_tok(result, slst, MSEP_REC);
|
| - }
|
| - }
|
| - }
|
| + }
|
| + // affixed number in correct word
|
| + if (nresult && (dash_pos > 0) &&
|
| + (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
|
| + (scw[dash_pos - 1] == '.'))) {
|
| + n = 1;
|
| + if (scw[dash_pos - n] == '.')
|
| + n++;
|
| + // search first not a number character to left from dash
|
| + while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
|
| + (n < 6)) {
|
| + n++;
|
| + }
|
| + if (dash_pos < n)
|
| + n--;
|
| + // numbers: valami1000000-hoz
|
| + // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
|
| + // 56-hoz, 6-hoz
|
| + for (; n >= 1; n--) {
|
| + if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
|
| + continue;
|
| + }
|
| + std::string chunk = scw.substr(dash_pos - n);
|
| + if (checkword(chunk, NULL, NULL)) {
|
| + result.append(chunk);
|
| + std::string st = pSMgr->suggest_morph(chunk);
|
| + if (!st.empty()) {
|
| + result.append(st);
|
| + }
|
| + return line_tok(result, MSEP_REC);
|
| + }
|
| + }
|
| + }
|
| }
|
| - return 0;
|
| + return slst;
|
| }
|
|
|
| -int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
|
| -{
|
| - *slst = NULL;
|
| - if (!pSMgr || !pln) return 0;
|
| - char **pl2;
|
| - int pl2n = analyze(&pl2, word);
|
| - int captype = 0;
|
| +std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
|
| + return m_Impl->generate(word, pl);
|
| +}
|
| +
|
| +std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
|
| + std::vector<std::string> slst;
|
| + if (!pSMgr || pl.empty())
|
| + return slst;
|
| + std::vector<std::string> pl2 = analyze(word);
|
| + int captype = NOCAP;
|
| int abbv = 0;
|
| - char cw[MAXWORDUTF8LEN];
|
| + std::string cw;
|
| cleanword(cw, word, &captype, &abbv);
|
| - char result[MAXLNLEN];
|
| - *result = '\0';
|
| + std::string result;
|
|
|
| - for (int i = 0; i < pln; i++) {
|
| - cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
|
| + for (size_t i = 0; i < pl.size(); ++i) {
|
| + cat_result(result, pSMgr->suggest_gen(pl2, pl[i]));
|
| }
|
| - freelist(&pl2, pl2n);
|
|
|
| - if (*result) {
|
| + if (!result.empty()) {
|
| // allcap
|
| - if (captype == ALLCAP) mkallcap(result);
|
| + if (captype == ALLCAP)
|
| + mkallcap(result);
|
|
|
| // line split
|
| - int linenum = line_tok(result, slst, MSEP_REC);
|
| + slst = line_tok(result, MSEP_REC);
|
|
|
| // capitalize
|
| if (captype == INITCAP || captype == HUHINITCAP) {
|
| - for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
|
| + for (size_t j = 0; j < slst.size(); ++j) {
|
| + mkinitcap(slst[j]);
|
| + }
|
| }
|
|
|
| // temporary filtering of prefix related errors (eg.
|
| // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
|
| -
|
| - int r = 0;
|
| - for (int j=0; j < linenum; j++) {
|
| - if (!spell((*slst)[j])) {
|
| - free((*slst)[j]);
|
| - (*slst)[j] = NULL;
|
| - } else {
|
| - if (r < j) (*slst)[r] = (*slst)[j];
|
| - r++;
|
| - }
|
| + std::vector<std::string>::iterator it = slst.begin();
|
| + while (it != slst.end()) {
|
| + if (!spell(*it)) {
|
| + it = slst.erase(it);
|
| + } else {
|
| + ++it;
|
| + }
|
| }
|
| - if (r > 0) return r;
|
| - free(*slst);
|
| - *slst = NULL;
|
| }
|
| - return 0;
|
| + return slst;
|
| }
|
|
|
| -int Hunspell::generate(char*** slst, const char * word, const char * pattern)
|
| -{
|
| - char **pl;
|
| - int pln = analyze(&pl, pattern);
|
| - int n = generate(slst, word, pl, pln);
|
| - freelist(&pl, pln);
|
| - return uniqlist(*slst, n);
|
| +std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
|
| + return m_Impl->generate(word, pattern);
|
| +}
|
| +
|
| +std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
|
| + std::vector<std::string> pl = analyze(pattern);
|
| + std::vector<std::string> slst = generate(word, pl);
|
| + uniqlist(slst);
|
| + return slst;
|
| }
|
|
|
| // minimal XML parser functions
|
| -int Hunspell::get_xml_par(char * dest, const char * par, int max)
|
| -{
|
| - char * d = dest;
|
| - if (!par) return 0;
|
| - char end = *par;
|
| - char * dmax = dest + max;
|
| - if (end == '>') end = '<';
|
| - else if (end != '\'' && end != '"') return 0; // bad XML
|
| - for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
|
| - *d = '\0';
|
| - mystrrep(dest, "<", "<");
|
| - mystrrep(dest, "&", "&");
|
| - return (int)(d - dest);
|
| -}
|
| -
|
| -int Hunspell::get_langnum() const
|
| -{
|
| - return langnum;
|
| +std::string HunspellImpl::get_xml_par(const char* par) {
|
| + std::string dest;
|
| + if (!par)
|
| + return dest;
|
| + char end = *par;
|
| + if (end == '>')
|
| + end = '<';
|
| + else if (end != '\'' && end != '"')
|
| + return 0; // bad XML
|
| + for (par++; *par != '\0' && *par != end; ++par) {
|
| + dest.push_back(*par);
|
| + }
|
| + mystrrep(dest, "<", "<");
|
| + mystrrep(dest, "&", "&");
|
| + return dest;
|
| +}
|
| +
|
| +int Hunspell::get_langnum() const {
|
| + return m_Impl->get_langnum();
|
| +}
|
| +
|
| +int HunspellImpl::get_langnum() const {
|
| + return langnum;
|
| +}
|
| +
|
| +bool Hunspell::input_conv(const std::string& word, std::string& dest) {
|
| + return m_Impl->input_conv(word, dest);
|
| +}
|
| +
|
| +int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
|
| + std::string d;
|
| + bool ret = input_conv(word, d);
|
| + if (ret && d.size() < destsize) {
|
| + strncpy(dest, d.c_str(), destsize);
|
| + return 1;
|
| + }
|
| + return 0;
|
| +}
|
| +
|
| +bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
|
| + RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
|
| + if (rl) {
|
| + return rl->conv(word, dest);
|
| + }
|
| + dest.assign(word);
|
| + return false;
|
| }
|
|
|
| // return the beginning of the element (attr == NULL) or the attribute
|
| -const char * Hunspell::get_xml_pos(const char * s, const char * attr)
|
| -{
|
| - const char * end = strchr(s, '>');
|
| - const char * p = s;
|
| - if (attr == NULL) return end;
|
| +const char* HunspellImpl::get_xml_pos(const char* s, const char* attr) {
|
| + const char* end = strchr(s, '>');
|
| + const char* p = s;
|
| + if (attr == NULL)
|
| + return end;
|
| do {
|
| p = strstr(p, attr);
|
| - if (!p || p >= end) return 0;
|
| - } while (*(p-1) != ' ' && *(p-1) != '\n');
|
| + if (!p || p >= end)
|
| + return 0;
|
| + } while (*(p - 1) != ' ' && *(p - 1) != '\n');
|
| return p + strlen(attr);
|
| }
|
|
|
| -int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {
|
| - char cw[MAXWORDUTF8LEN];
|
| - if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
|
| - strcmp(cw, value) == 0) return 1;
|
| +int HunspellImpl::check_xml_par(const char* q,
|
| + const char* attr,
|
| + const char* value) {
|
| + std::string cw = get_xml_par(get_xml_pos(q, attr));
|
| + if (cw == value)
|
| + return 1;
|
| return 0;
|
| }
|
|
|
| -int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
|
| - int n = 0;
|
| - char * p;
|
| - if (!list) return 0;
|
| - for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++;
|
| - if (n == 0) return 0;
|
| - *slst = (char **) malloc(sizeof(char *) * n);
|
| - if (!*slst) return 0;
|
| - for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) {
|
| - int l = strlen(p);
|
| - (*slst)[n] = (char *) malloc(l + 1);
|
| - if (!(*slst)[n]) return n;
|
| - if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
|
| - free((*slst)[n]);
|
| - break;
|
| - }
|
| +std::vector<std::string> HunspellImpl::get_xml_list(const char* list, const char* tag) {
|
| + std::vector<std::string> slst;
|
| + if (!list)
|
| + return slst;
|
| + const char* p = list;
|
| + for (size_t n = 0; ((p = strstr(p, tag)) != NULL); ++p, ++n) {
|
| + std::string cw = get_xml_par(p + strlen(tag) - 1);
|
| + if (cw.empty()) {
|
| + break;
|
| }
|
| - return n;
|
| + slst.push_back(cw);
|
| + }
|
| + return slst;
|
| }
|
|
|
| -int Hunspell::spellml(char*** slst, const char * word)
|
| -{
|
| - char *q, *q2;
|
| - char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
|
| - q = (char *) strstr(word, "<query");
|
| - if (!q) return 0; // bad XML input
|
| - q2 = strchr(q, '>');
|
| - if (!q2) return 0; // bad XML input
|
| +std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
|
| + std::vector<std::string> slst;
|
| +
|
| + const char* word = in_word.c_str();
|
| +
|
| + const char* q = strstr(word, "<query");
|
| + if (!q)
|
| + return slst; // bad XML input
|
| + const char* q2 = strchr(q, '>');
|
| + if (!q2)
|
| + return slst; // bad XML input
|
| q2 = strstr(q2, "<word");
|
| - if (!q2) return 0; // bad XML input
|
| + if (!q2)
|
| + return slst; // bad XML input
|
| if (check_xml_par(q, "type=", "analyze")) {
|
| - int n = 0, s = 0;
|
| - if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
|
| - if (n == 0) return 0;
|
| - // convert the result to <code><a>ana1</a><a>ana2</a></code> format
|
| - for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
|
| - char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&
|
| - if (!r) return 0;
|
| - strcpy(r, "<code>");
|
| - for (int i = 0; i < n; i++) {
|
| - int l = strlen(r);
|
| - strcpy(r + l, "<a>");
|
| - strcpy(r + l + 3, (*slst)[i]);
|
| - mystrrep(r + l + 3, "\t", " ");
|
| - mystrrep(r + l + 3, "<", "<");
|
| - mystrrep(r + l + 3, "&", "&");
|
| - strcat(r, "</a>");
|
| - free((*slst)[i]);
|
| - }
|
| - strcat(r, "</code>");
|
| - (*slst)[0] = r;
|
| - return 1;
|
| + std::string cw = get_xml_par(strchr(q2, '>'));
|
| + if (!cw.empty())
|
| + slst = analyze(cw);
|
| + if (slst.empty())
|
| + return slst;
|
| + // convert the result to <code><a>ana1</a><a>ana2</a></code> format
|
| + std::string r;
|
| + r.append("<code>");
|
| + for (size_t i = 0; i < slst.size(); ++i) {
|
| + r.append("<a>");
|
| +
|
| + std::string entry(slst[i]);
|
| + mystrrep(entry, "\t", " ");
|
| + mystrrep(entry, "&", "&");
|
| + mystrrep(entry, "<", "<");
|
| + r.append(entry);
|
| +
|
| + r.append("</a>");
|
| + }
|
| + r.append("</code>");
|
| + slst.clear();
|
| + slst.push_back(r);
|
| + return slst;
|
| } else if (check_xml_par(q, "type=", "stem")) {
|
| - if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
|
| + std::string cw = get_xml_par(strchr(q2, '>'));
|
| + if (!cw.empty())
|
| + return stem(cw);
|
| } else if (check_xml_par(q, "type=", "generate")) {
|
| - int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
|
| - if (n == 0) return 0;
|
| - char * q3 = strstr(q2 + 1, "<word");
|
| - if (q3) {
|
| - if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
|
| - return generate(slst, cw, cw2);
|
| - }
|
| - } else {
|
| - if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
|
| - char ** slst2;
|
| - if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>")) != 0) {
|
| - int n2 = generate(slst, cw, slst2, n);
|
| - freelist(&slst2, n);
|
| - return uniqlist(*slst, n2);
|
| - }
|
| - freelist(&slst2, n);
|
| + std::string cw = get_xml_par(strchr(q2, '>'));
|
| + if (cw.empty())
|
| + return slst;
|
| + const char* q3 = strstr(q2 + 1, "<word");
|
| + if (q3) {
|
| + std::string cw2 = get_xml_par(strchr(q3, '>'));
|
| + if (!cw2.empty()) {
|
| + return generate(cw, cw2);
|
| + }
|
| + } else {
|
| + if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
|
| + std::vector<std::string> slst2 = get_xml_list(strchr(q2, '>'), "<a>");
|
| + if (!slst2.empty()) {
|
| + slst = generate(cw, slst2);
|
| + uniqlist(slst);
|
| + return slst;
|
| }
|
| }
|
| + }
|
| }
|
| - return 0;
|
| + return slst;
|
| }
|
|
|
| -
|
| -#ifdef HUNSPELL_EXPERIMENTAL
|
| -// XXX need UTF-8 support
|
| -char * Hunspell::morph_with_correction(const char * word)
|
| -{
|
| - char cw[MAXWORDUTF8LEN];
|
| - char wspace[MAXWORDUTF8LEN];
|
| - if (! pSMgr || maxdic == 0) return NULL;
|
| - int wl = strlen(word);
|
| - if (utf8) {
|
| - if (wl >= MAXWORDUTF8LEN) return NULL;
|
| - } else {
|
| - if (wl >= MAXWORDLEN) return NULL;
|
| +int Hunspell::spell(const char* word, int* info, char** root) {
|
| + std::string sroot;
|
| + bool ret = m_Impl->spell(word, info, root ? &sroot : NULL);
|
| + if (root) {
|
| + if (sroot.empty()) {
|
| + *root = NULL;
|
| + } else {
|
| + *root = mystrdup(sroot.c_str());
|
| + }
|
| }
|
| - int captype = 0;
|
| - int abbv = 0;
|
| - wl = cleanword(cw, word, &captype, &abbv);
|
| - if (wl == 0) return NULL;
|
| -
|
| - char result[MAXLNLEN];
|
| - char * st = NULL;
|
| -
|
| - *result = '\0';
|
| -
|
| -
|
| - switch(captype) {
|
| - case NOCAP: {
|
| - st = pSMgr->suggest_morph_for_spelling_error(cw);
|
| - if (st) {
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - if (abbv) {
|
| - memcpy(wspace,cw,wl);
|
| - *(wspace+wl) = '.';
|
| - *(wspace+wl+1) = '\0';
|
| - st = pSMgr->suggest_morph_for_spelling_error(wspace);
|
| - if (st) {
|
| - if (*result) mystrcat(result, "\n", MAXLNLEN);
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - }
|
| - break;
|
| - }
|
| - case INITCAP: {
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkallsmall(wspace);
|
| - st = pSMgr->suggest_morph_for_spelling_error(wspace);
|
| - if (st) {
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - st = pSMgr->suggest_morph_for_spelling_error(cw);
|
| - if (st) {
|
| - if (*result) mystrcat(result, "\n", MAXLNLEN);
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - if (abbv) {
|
| - memcpy(wspace,cw,wl);
|
| - *(wspace+wl) = '.';
|
| - *(wspace+wl+1) = '\0';
|
| - mkallsmall(wspace);
|
| - st = pSMgr->suggest_morph_for_spelling_error(wspace);
|
| - if (st) {
|
| - if (*result) mystrcat(result, "\n", MAXLNLEN);
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - mkinitcap(wspace);
|
| - st = pSMgr->suggest_morph_for_spelling_error(wspace);
|
| - if (st) {
|
| - if (*result) mystrcat(result, "\n", MAXLNLEN);
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - }
|
| - break;
|
| - }
|
| - case HUHCAP: {
|
| - st = pSMgr->suggest_morph_for_spelling_error(cw);
|
| - if (st) {
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - memcpy(wspace,cw,(wl+1));
|
| - mkallsmall(wspace);
|
| - st = pSMgr->suggest_morph_for_spelling_error(wspace);
|
| - if (st) {
|
| - if (*result) mystrcat(result, "\n", MAXLNLEN);
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - break;
|
| - }
|
| - case ALLCAP: {
|
| - memcpy(wspace,cw,(wl+1));
|
| - st = pSMgr->suggest_morph_for_spelling_error(wspace);
|
| - if (st) {
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - mkallsmall(wspace);
|
| - st = pSMgr->suggest_morph_for_spelling_error(wspace);
|
| - if (st) {
|
| - if (*result) mystrcat(result, "\n", MAXLNLEN);
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - mkinitcap(wspace);
|
| - st = pSMgr->suggest_morph_for_spelling_error(wspace);
|
| - if (st) {
|
| - if (*result) mystrcat(result, "\n", MAXLNLEN);
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - if (abbv) {
|
| - memcpy(wspace,cw,(wl+1));
|
| - *(wspace+wl) = '.';
|
| - *(wspace+wl+1) = '\0';
|
| - if (*result) mystrcat(result, "\n", MAXLNLEN);
|
| - st = pSMgr->suggest_morph_for_spelling_error(wspace);
|
| - if (st) {
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - mkallsmall(wspace);
|
| - st = pSMgr->suggest_morph_for_spelling_error(wspace);
|
| - if (st) {
|
| - if (*result) mystrcat(result, "\n", MAXLNLEN);
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - mkinitcap(wspace);
|
| - st = pSMgr->suggest_morph_for_spelling_error(wspace);
|
| - if (st) {
|
| - if (*result) mystrcat(result, "\n", MAXLNLEN);
|
| - mystrcat(result, st, MAXLNLEN);
|
| - free(st);
|
| - }
|
| - }
|
| - break;
|
| - }
|
| + return ret;
|
| +}
|
| +
|
| +namespace {
|
| + int munge_vector(char*** slst, const std::vector<std::string>& items) {
|
| + if (items.empty()) {
|
| + *slst = NULL;
|
| + return 0;
|
| + } else {
|
| + *slst = (char**)malloc(sizeof(char*) * items.size());
|
| + if (!*slst)
|
| + return 0;
|
| + for (size_t i = 0; i < items.size(); ++i)
|
| + (*slst)[i] = mystrdup(items[i].c_str());
|
| + }
|
| + return items.size();
|
| }
|
| +}
|
|
|
| - if (*result) return mystrdup(result);
|
| - return NULL;
|
| +void Hunspell::free_list(char*** slst, int n) {
|
| + Hunspell_free_list((Hunhandle*)(this), slst, n);
|
| }
|
|
|
| -#endif // END OF HUNSPELL_EXPERIMENTAL CODE
|
| +int Hunspell::suggest(char*** slst, const char* word) {
|
| + return Hunspell_suggest((Hunhandle*)(this), slst, word);
|
| +}
|
|
|
| -Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
|
| -{
|
| +int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
|
| + std::vector<std::string> stems = m_Impl->suffix_suggest(root_word);
|
| + return munge_vector(slst, stems);
|
| +}
|
| +
|
| +char* Hunspell::get_dic_encoding() {
|
| + return &(m_Impl->dic_encoding_vec[0]);
|
| +}
|
| +
|
| +int Hunspell::stem(char*** slst, char** desc, int n) {
|
| + return Hunspell_stem2((Hunhandle*)(this), slst, desc, n);
|
| +}
|
| +
|
| +int Hunspell::stem(char*** slst, const char* word) {
|
| + return Hunspell_stem((Hunhandle*)(this), slst, word);
|
| +}
|
| +
|
| +int Hunspell::analyze(char*** slst, const char* word) {
|
| + return Hunspell_analyze((Hunhandle*)(this), slst, word);
|
| +}
|
| +
|
| +int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
|
| + return Hunspell_generate2((Hunhandle*)(this), slst, word, pl, pln);
|
| +}
|
| +
|
| +int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
|
| + return Hunspell_generate((Hunhandle*)(this), slst, word, pattern);
|
| +}
|
| +
|
| +Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
|
| #ifdef HUNSPELL_CHROME_CLIENT
|
| return NULL;
|
| #else
|
| - return (Hunhandle*)(new Hunspell(affpath, dpath));
|
| + return (Hunhandle*)(new Hunspell(affpath, dpath));
|
| #endif
|
| }
|
|
|
| -Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
|
| - const char * key)
|
| -{
|
| +Hunhandle* Hunspell_create_key(const char* affpath,
|
| + const char* dpath,
|
| + const char* key) {
|
| #ifdef HUNSPELL_CHROME_CLIENT
|
| return NULL;
|
| #else
|
| - return (Hunhandle*)(new Hunspell(affpath, dpath, key));
|
| + return reinterpret_cast<Hunhandle*>(new Hunspell(affpath, dpath, key));
|
| #endif
|
| }
|
|
|
| -void Hunspell_destroy(Hunhandle *pHunspell)
|
| -{
|
| - delete (Hunspell*)(pHunspell);
|
| +void Hunspell_destroy(Hunhandle* pHunspell) {
|
| + delete reinterpret_cast<Hunspell*>(pHunspell);
|
| }
|
|
|
| -int Hunspell_spell(Hunhandle *pHunspell, const char *word)
|
| -{
|
| - return ((Hunspell*)pHunspell)->spell(word);
|
| +#ifndef HUNSPELL_CHROME_CLIENT
|
| +int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
|
| + return reinterpret_cast<Hunspell*>(pHunspell)->add_dic(dpath);
|
| }
|
| +#endif
|
|
|
| -char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
|
| -{
|
| - return ((Hunspell*)pHunspell)->get_dic_encoding();
|
| +int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
|
| + return reinterpret_cast<Hunspell*>(pHunspell)->spell(std::string(word));
|
| }
|
|
|
| -int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
|
| -{
|
| - return ((Hunspell*)pHunspell)->suggest(slst, word);
|
| +char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
|
| + return reinterpret_cast<Hunspell*>(pHunspell)->get_dic_encoding();
|
| }
|
|
|
| -int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
|
| -{
|
| - return ((Hunspell*)pHunspell)->analyze(slst, word);
|
| +int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
|
| + std::vector<std::string> suggests = reinterpret_cast<Hunspell*>(pHunspell)->suggest(word);
|
| + return munge_vector(slst, suggests);
|
| }
|
|
|
| -int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
|
| -{
|
| - return ((Hunspell*)pHunspell)->stem(slst, word);
|
| +int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
|
| + std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->analyze(word);
|
| + return munge_vector(slst, stems);
|
| }
|
|
|
| -int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
|
| -{
|
| - return ((Hunspell*)pHunspell)->stem(slst, desc, n);
|
| +int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
|
| +
|
| + std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(word);
|
| + return munge_vector(slst, stems);
|
| }
|
|
|
| -int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
|
| - const char * word2)
|
| -{
|
| - return ((Hunspell*)pHunspell)->generate(slst, word, word2);
|
| +int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
|
| + std::vector<std::string> morph;
|
| + for (int i = 0; i < n; ++i)
|
| + morph.push_back(desc[i]);
|
| +
|
| + std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(morph);
|
| + return munge_vector(slst, stems);
|
| }
|
|
|
| -int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
|
| - char** desc, int n)
|
| -{
|
| - return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
|
| +int Hunspell_generate(Hunhandle* pHunspell,
|
| + char*** slst,
|
| + const char* word,
|
| + const char* pattern) {
|
| + std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, pattern);
|
| + return munge_vector(slst, stems);
|
| +}
|
| +
|
| +int Hunspell_generate2(Hunhandle* pHunspell,
|
| + char*** slst,
|
| + const char* word,
|
| + char** desc,
|
| + int n) {
|
| + std::vector<std::string> morph;
|
| + for (int i = 0; i < n; ++i)
|
| + morph.push_back(desc[i]);
|
| +
|
| + std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, morph);
|
| + return munge_vector(slst, stems);
|
| }
|
|
|
| - /* functions for run-time modification of the dictionary */
|
| +/* functions for run-time modification of the dictionary */
|
|
|
| - /* add word to the run-time dictionary */
|
| +/* add word to the run-time dictionary */
|
|
|
| -int Hunspell_add(Hunhandle *pHunspell, const char * word) {
|
| - return ((Hunspell*)pHunspell)->add(word);
|
| +int Hunspell_add(Hunhandle* pHunspell, const char* word) {
|
| + return reinterpret_cast<Hunspell*>(pHunspell)->add(word);
|
| }
|
|
|
| - /* add word to the run-time dictionary with affix flags of
|
| - * the example (a dictionary word): Hunspell will recognize
|
| - * affixed forms of the new word, too.
|
| - */
|
| +/* add word to the run-time dictionary with affix flags of
|
| + * the example (a dictionary word): Hunspell will recognize
|
| + * affixed forms of the new word, too.
|
| + */
|
|
|
| -int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
|
| - const char * example) {
|
| - return ((Hunspell*)pHunspell)->add_with_affix(word, example);
|
| +int Hunspell_add_with_affix(Hunhandle* pHunspell,
|
| + const char* word,
|
| + const char* example) {
|
| + return reinterpret_cast<Hunspell*>(pHunspell)->add_with_affix(word, example);
|
| }
|
|
|
| - /* remove word from the run-time dictionary */
|
| +/* remove word from the run-time dictionary */
|
|
|
| -int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
|
| - return ((Hunspell*)pHunspell)->remove(word);
|
| +int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
|
| + return reinterpret_cast<Hunspell*>(pHunspell)->remove(word);
|
| }
|
|
|
| -void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
|
| - freelist(slst, n);
|
| +void Hunspell_free_list(Hunhandle*, char*** list, int n) {
|
| + if (list && *list) {
|
| + for (int i = 0; i < n; i++)
|
| + free((*list)[i]);
|
| + free(*list);
|
| + *list = NULL;
|
| + }
|
| +}
|
| +
|
| +std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
|
| + return m_Impl->suffix_suggest(root_word);
|
| +}
|
| +
|
| +std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
|
| + std::vector<std::string> slst;
|
| + struct hentry* he = NULL;
|
| + int len;
|
| + std::string w2;
|
| + const char* word;
|
| + const char* ignoredchars = pAMgr->get_ignore();
|
| + if (ignoredchars != NULL) {
|
| + w2.assign(root_word);
|
| + if (utf8) {
|
| + const std::vector<w_char>& ignoredchars_utf16 =
|
| + pAMgr->get_ignore_utf16();
|
| + remove_ignored_chars_utf(w2, ignoredchars_utf16);
|
| + } else {
|
| + remove_ignored_chars(w2, ignoredchars);
|
| + }
|
| + word = w2.c_str();
|
| + } else
|
| + word = root_word.c_str();
|
| +
|
| + len = strlen(word);
|
| +
|
| + if (!len)
|
| + return slst;
|
| +
|
| + for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
|
| + he = m_HMgrs[i]->lookup(word);
|
| + }
|
| + if (he) {
|
| + slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());
|
| + }
|
| + return slst;
|
| }
|
|
|