3rdparty moved to inputcontext/3rdparty.
The 3rdparty files are now compiled as part of the corresponding input method, so that the project files can be written without using platform specific linker flags.
This commit is contained in:
parent
9bf518145d
commit
59b516a118
2
3rdparty/3rdparty.pro
vendored
2
3rdparty/3rdparty.pro
vendored
@ -1,2 +0,0 @@
|
||||
TEMPLATE = subdirs
|
||||
SUBDIRS = hunspell pinyin
|
7
3rdparty/hunspell/hunspell.pri
vendored
7
3rdparty/hunspell/hunspell.pri
vendored
@ -1,7 +0,0 @@
|
||||
include( $${GLOBAL_PRI_FOLDER}/functions.pri )
|
||||
|
||||
DEPS =
|
||||
|
||||
INCS =
|
||||
|
||||
REGISTERED_MODULES += $$addModule( inputcontext_hunspell, 0.0.0, $${_FILE_}, $$INCS, $$DEPS )
|
49
3rdparty/hunspell/hunspell.pro
vendored
49
3rdparty/hunspell/hunspell.pro
vendored
@ -1,49 +0,0 @@
|
||||
QSK_OUT_ROOT = $${OUT_PWD}/../..
|
||||
QSK_PLUGIN_DIR = $${QSK_OUT_ROOT}/plugins
|
||||
|
||||
TEMPLATE = lib
|
||||
TARGET = qskinputcontext_hunspell
|
||||
|
||||
CONFIG += static
|
||||
CONFIG += precompile_header warn_off
|
||||
|
||||
MODULE_INCLUDEPATH = $$PWD/src
|
||||
|
||||
DESTDIR = $${QSK_OUT_ROOT}/plugins/platforminputcontexts
|
||||
QMAKE_RPATHDIR *= $${DESTDIR_LIBS}
|
||||
|
||||
SOURCES += \
|
||||
src/hunspell/affentry.cxx \
|
||||
src/hunspell/affixmgr.cxx \
|
||||
src/hunspell/csutil.cxx \
|
||||
src/hunspell/filemgr.cxx \
|
||||
src/hunspell/hashmgr.cxx \
|
||||
src/hunspell/hunspell.cxx \
|
||||
src/hunspell/hunzip.cxx \
|
||||
src/hunspell/phonet.cxx \
|
||||
src/hunspell/replist.cxx \
|
||||
src/hunspell/suggestmgr.cxx
|
||||
|
||||
HEADERS += \
|
||||
src/hunspell/affentry.hxx \
|
||||
src/hunspell/affixmgr.hxx \
|
||||
src/hunspell/atypes.hxx \
|
||||
src/hunspell/baseaffix.hxx \
|
||||
src/hunspell/csutil.hxx \
|
||||
src/hunspell/filemgr.hxx \
|
||||
src/hunspell/hashmgr.hxx \
|
||||
src/hunspell/htypes.hxx \
|
||||
src/hunspell/hunspell.h \
|
||||
src/hunspell/hunspell.hxx \
|
||||
src/hunspell/hunvisapi.h \
|
||||
src/hunspell/hunzip.hxx \
|
||||
src/hunspell/langnum.hxx \
|
||||
src/hunspell/phonet.hxx \
|
||||
src/hunspell/replist.hxx \
|
||||
src/hunspell/suggestmgr.hxx \
|
||||
src/hunspell/w_char.hxx
|
||||
|
||||
OTHER_FILES +=\
|
||||
src/hunspell/license.hunspell \
|
||||
src/hunspell/license.myspell \
|
||||
src/hunspell/utf_info.cxx
|
983
3rdparty/hunspell/src/hunspell/affentry.cxx
vendored
983
3rdparty/hunspell/src/hunspell/affentry.cxx
vendored
@ -1,983 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "affentry.hxx"
|
||||
#include "csutil.hxx"
|
||||
|
||||
AffEntry::~AffEntry() {
|
||||
if (opts & aeLONGCOND)
|
||||
free(c.l.conds2);
|
||||
if (morphcode && !(opts & aeALIASM))
|
||||
free(morphcode);
|
||||
if (contclass && !(opts & aeALIASF))
|
||||
free(contclass);
|
||||
}
|
||||
|
||||
PfxEntry::PfxEntry(AffixMgr* pmgr)
|
||||
// register affix manager
|
||||
: pmyMgr(pmgr),
|
||||
next(NULL),
|
||||
nexteq(NULL),
|
||||
nextne(NULL),
|
||||
flgnxt(NULL) {
|
||||
}
|
||||
|
||||
// add prefix to this word assuming conditions hold
|
||||
std::string PfxEntry::add(const char* word, size_t len) {
|
||||
std::string result;
|
||||
if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
|
||||
(len >= numconds) && test_condition(word) &&
|
||||
(!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0))) {
|
||||
/* we have a match so add prefix */
|
||||
result.assign(appnd);
|
||||
result.append(word + strip.size());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
inline char* PfxEntry::nextchar(char* p) {
|
||||
if (p) {
|
||||
p++;
|
||||
if (opts & aeLONGCOND) {
|
||||
// jump to the 2nd part of the condition
|
||||
if (p == c.conds + MAXCONDLEN_1)
|
||||
return c.l.conds2;
|
||||
// end of the MAXCONDLEN length condition
|
||||
} else if (p == c.conds + MAXCONDLEN)
|
||||
return NULL;
|
||||
return *p ? p : NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
inline int PfxEntry::test_condition(const char* st) {
|
||||
const char* pos = NULL; // group with pos input position
|
||||
bool neg = false; // complementer
|
||||
bool ingroup = false; // character in the group
|
||||
if (numconds == 0)
|
||||
return 1;
|
||||
char* p = c.conds;
|
||||
while (1) {
|
||||
switch (*p) {
|
||||
case '\0':
|
||||
return 1;
|
||||
case '[': {
|
||||
neg = false;
|
||||
ingroup = false;
|
||||
p = nextchar(p);
|
||||
pos = st;
|
||||
break;
|
||||
}
|
||||
case '^': {
|
||||
p = nextchar(p);
|
||||
neg = true;
|
||||
break;
|
||||
}
|
||||
case ']': {
|
||||
if ((neg && ingroup) || (!neg && !ingroup))
|
||||
return 0;
|
||||
pos = NULL;
|
||||
p = nextchar(p);
|
||||
// skip the next character
|
||||
if (!ingroup && *st)
|
||||
for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++)
|
||||
;
|
||||
if (*st == '\0' && p)
|
||||
return 0; // word <= condition
|
||||
break;
|
||||
}
|
||||
case '.':
|
||||
if (!pos) { // dots are not metacharacters in groups: [.]
|
||||
p = nextchar(p);
|
||||
// skip the next character
|
||||
for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++)
|
||||
;
|
||||
if (*st == '\0' && p)
|
||||
return 0; // word <= condition
|
||||
break;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
default: {
|
||||
if (*st == *p) {
|
||||
st++;
|
||||
p = nextchar(p);
|
||||
if ((opts & aeUTF8) && (*(st - 1) & 0x80)) { // multibyte
|
||||
while (p && (*p & 0xc0) == 0x80) { // character
|
||||
if (*p != *st) {
|
||||
if (!pos)
|
||||
return 0;
|
||||
st = pos;
|
||||
break;
|
||||
}
|
||||
p = nextchar(p);
|
||||
st++;
|
||||
}
|
||||
if (pos && st != pos) {
|
||||
ingroup = true;
|
||||
while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
|
||||
}
|
||||
}
|
||||
} else if (pos) {
|
||||
ingroup = true;
|
||||
while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
|
||||
}
|
||||
}
|
||||
} else if (pos) { // group
|
||||
p = nextchar(p);
|
||||
} else
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (!p)
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// check if this prefix entry matches
|
||||
struct hentry* PfxEntry::checkword(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag) {
|
||||
struct hentry* he; // hash entry of root word or NULL
|
||||
|
||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||
// So if the remaining root word has positive length
|
||||
// and if there are enough chars in root word and added back strip chars
|
||||
// to meet the number of characters conditions, then test it
|
||||
|
||||
int tmpl = len - appnd.size(); // length of tmpword
|
||||
|
||||
if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {
|
||||
// generate new root word by removing prefix and adding
|
||||
// back any characters that would have been stripped
|
||||
|
||||
std::string tmpword(strip);
|
||||
tmpword.append(word + appnd.size());
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
// are met. Please see the appendix at the end of
|
||||
// this file for more info on exactly what is being
|
||||
// tested
|
||||
|
||||
// if all conditions are met then check if resulting
|
||||
// root word in the dictionary
|
||||
|
||||
if (test_condition(tmpword.c_str())) {
|
||||
tmpl += strip.size();
|
||||
if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
|
||||
do {
|
||||
if (TESTAFF(he->astr, aflag, he->alen) &&
|
||||
// forbid single prefixes with needaffix flag
|
||||
!TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
|
||||
// needflag
|
||||
((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
|
||||
(contclass && TESTAFF(contclass, needflag, contclasslen))))
|
||||
return he;
|
||||
he = he->next_homonym; // check homonyms
|
||||
} while (he);
|
||||
}
|
||||
|
||||
// prefix matched but no root word was found
|
||||
// if aeXPRODUCT is allowed, try again but now
|
||||
// ross checked combined with a suffix
|
||||
|
||||
// if ((opts & aeXPRODUCT) && in_compound) {
|
||||
if ((opts & aeXPRODUCT)) {
|
||||
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, aeXPRODUCT, this,
|
||||
FLAG_NULL, needflag, in_compound);
|
||||
if (he)
|
||||
return he;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// check if this prefix entry matches
|
||||
struct hentry* PfxEntry::check_twosfx(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag) {
|
||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||
// So if the remaining root word has positive length
|
||||
// and if there are enough chars in root word and added back strip chars
|
||||
// to meet the number of characters conditions, then test it
|
||||
|
||||
int tmpl = len - appnd.size(); // length of tmpword
|
||||
|
||||
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
||||
(tmpl + strip.size() >= numconds)) {
|
||||
// generate new root word by removing prefix and adding
|
||||
// back any characters that would have been stripped
|
||||
|
||||
std::string tmpword(strip);
|
||||
tmpword.append(word + appnd.size());
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
// are met. Please see the appendix at the end of
|
||||
// this file for more info on exactly what is being
|
||||
// tested
|
||||
|
||||
// if all conditions are met then check if resulting
|
||||
// root word in the dictionary
|
||||
|
||||
if (test_condition(tmpword.c_str())) {
|
||||
tmpl += strip.size();
|
||||
|
||||
// prefix matched but no root word was found
|
||||
// if aeXPRODUCT is allowed, try again but now
|
||||
// cross checked combined with a suffix
|
||||
|
||||
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
|
||||
// hash entry of root word or NULL
|
||||
struct hentry* he = pmyMgr->suffix_check_twosfx(tmpword.c_str(), tmpl, aeXPRODUCT, this,
|
||||
needflag);
|
||||
if (he)
|
||||
return he;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// check if this prefix entry matches
|
||||
std::string PfxEntry::check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag) {
|
||||
std::string result;
|
||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||
// So if the remaining root word has positive length
|
||||
// and if there are enough chars in root word and added back strip chars
|
||||
// to meet the number of characters conditions, then test it
|
||||
int tmpl = len - appnd.size(); // length of tmpword
|
||||
|
||||
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
||||
(tmpl + strip.size() >= numconds)) {
|
||||
// generate new root word by removing prefix and adding
|
||||
// back any characters that would have been stripped
|
||||
|
||||
std::string tmpword(strip);
|
||||
tmpword.append(word + appnd.size());
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
// are met. Please see the appendix at the end of
|
||||
// this file for more info on exactly what is being
|
||||
// tested
|
||||
|
||||
// if all conditions are met then check if resulting
|
||||
// root word in the dictionary
|
||||
|
||||
if (test_condition(tmpword.c_str())) {
|
||||
tmpl += strip.size();
|
||||
|
||||
// prefix matched but no root word was found
|
||||
// if aeXPRODUCT is allowed, try again but now
|
||||
// ross checked combined with a suffix
|
||||
|
||||
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
|
||||
result = pmyMgr->suffix_check_twosfx_morph(tmpword.c_str(), tmpl,
|
||||
aeXPRODUCT,
|
||||
this, needflag);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// check if this prefix entry matches
|
||||
std::string PfxEntry::check_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag) {
|
||||
std::string result;
|
||||
|
||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||
// So if the remaining root word has positive length
|
||||
// and if there are enough chars in root word and added back strip chars
|
||||
// to meet the number of characters conditions, then test it
|
||||
|
||||
int tmpl = len - appnd.size(); // length of tmpword
|
||||
|
||||
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
||||
(tmpl + strip.size() >= numconds)) {
|
||||
// generate new root word by removing prefix and adding
|
||||
// back any characters that would have been stripped
|
||||
|
||||
std::string tmpword(strip);
|
||||
tmpword.append(word + appnd.size());
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
// are met. Please see the appendix at the end of
|
||||
// this file for more info on exactly what is being
|
||||
// tested
|
||||
|
||||
// if all conditions are met then check if resulting
|
||||
// root word in the dictionary
|
||||
|
||||
if (test_condition(tmpword.c_str())) {
|
||||
tmpl += strip.size();
|
||||
struct hentry* he; // hash entry of root word or NULL
|
||||
if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
|
||||
do {
|
||||
if (TESTAFF(he->astr, aflag, he->alen) &&
|
||||
// forbid single prefixes with needaffix flag
|
||||
!TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
|
||||
// needflag
|
||||
((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
|
||||
(contclass && TESTAFF(contclass, needflag, contclasslen)))) {
|
||||
if (morphcode) {
|
||||
result.append(" ");
|
||||
result.append(morphcode);
|
||||
} else
|
||||
result.append(getKey());
|
||||
if (!HENTRY_FIND(he, MORPH_STEM)) {
|
||||
result.append(" ");
|
||||
result.append(MORPH_STEM);
|
||||
result.append(HENTRY_WORD(he));
|
||||
}
|
||||
// store the pointer of the hash entry
|
||||
if (HENTRY_DATA(he)) {
|
||||
result.append(" ");
|
||||
result.append(HENTRY_DATA2(he));
|
||||
} else {
|
||||
// return with debug information
|
||||
char* flag = pmyMgr->encode_flag(getFlag());
|
||||
result.append(" ");
|
||||
result.append(MORPH_FLAG);
|
||||
result.append(flag);
|
||||
free(flag);
|
||||
}
|
||||
result.append("\n");
|
||||
}
|
||||
he = he->next_homonym;
|
||||
} while (he);
|
||||
}
|
||||
|
||||
// prefix matched but no root word was found
|
||||
// if aeXPRODUCT is allowed, try again but now
|
||||
// ross checked combined with a suffix
|
||||
|
||||
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
|
||||
std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, aeXPRODUCT, this,
|
||||
FLAG_NULL, needflag);
|
||||
if (!st.empty()) {
|
||||
result.append(st);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
SfxEntry::SfxEntry(AffixMgr* pmgr)
|
||||
: pmyMgr(pmgr) // register affix manager
|
||||
,
|
||||
next(NULL),
|
||||
nexteq(NULL),
|
||||
nextne(NULL),
|
||||
flgnxt(NULL),
|
||||
l_morph(NULL),
|
||||
r_morph(NULL),
|
||||
eq_morph(NULL) {
|
||||
}
|
||||
|
||||
// add suffix to this word assuming conditions hold
|
||||
std::string SfxEntry::add(const char* word, size_t len) {
|
||||
std::string result;
|
||||
/* make sure all conditions match */
|
||||
if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
|
||||
(len >= numconds) && test_condition(word + len, word) &&
|
||||
(!strip.size() ||
|
||||
(strcmp(word + len - strip.size(), strip.c_str()) == 0))) {
|
||||
result.assign(word);
|
||||
/* we have a match so add suffix */
|
||||
result.replace(len - strip.size(), std::string::npos, appnd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
inline char* SfxEntry::nextchar(char* p) {
|
||||
if (p) {
|
||||
p++;
|
||||
if (opts & aeLONGCOND) {
|
||||
// jump to the 2nd part of the condition
|
||||
if (p == c.l.conds1 + MAXCONDLEN_1)
|
||||
return c.l.conds2;
|
||||
// end of the MAXCONDLEN length condition
|
||||
} else if (p == c.conds + MAXCONDLEN)
|
||||
return NULL;
|
||||
return *p ? p : NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
inline int SfxEntry::test_condition(const char* st, const char* beg) {
|
||||
const char* pos = NULL; // group with pos input position
|
||||
bool neg = false; // complementer
|
||||
bool ingroup = false; // character in the group
|
||||
if (numconds == 0)
|
||||
return 1;
|
||||
char* p = c.conds;
|
||||
st--;
|
||||
int i = 1;
|
||||
while (1) {
|
||||
switch (*p) {
|
||||
case '\0':
|
||||
return 1;
|
||||
case '[':
|
||||
p = nextchar(p);
|
||||
pos = st;
|
||||
break;
|
||||
case '^':
|
||||
p = nextchar(p);
|
||||
neg = true;
|
||||
break;
|
||||
case ']':
|
||||
if (!neg && !ingroup)
|
||||
return 0;
|
||||
i++;
|
||||
// skip the next character
|
||||
if (!ingroup) {
|
||||
for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--)
|
||||
;
|
||||
st--;
|
||||
}
|
||||
pos = NULL;
|
||||
neg = false;
|
||||
ingroup = false;
|
||||
p = nextchar(p);
|
||||
if (st < beg && p)
|
||||
return 0; // word <= condition
|
||||
break;
|
||||
case '.':
|
||||
if (!pos) {
|
||||
// dots are not metacharacters in groups: [.]
|
||||
p = nextchar(p);
|
||||
// skip the next character
|
||||
for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80;
|
||||
st--)
|
||||
;
|
||||
if (st < beg) { // word <= condition
|
||||
if (p)
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 character
|
||||
st--;
|
||||
if (st < beg) { // word <= condition
|
||||
if (p)
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
default: {
|
||||
if (*st == *p) {
|
||||
p = nextchar(p);
|
||||
if ((opts & aeUTF8) && (*st & 0x80)) {
|
||||
st--;
|
||||
while (p && (st >= beg)) {
|
||||
if (*p != *st) {
|
||||
if (!pos)
|
||||
return 0;
|
||||
st = pos;
|
||||
break;
|
||||
}
|
||||
// first byte of the UTF-8 multibyte character
|
||||
if ((*p & 0xc0) != 0x80)
|
||||
break;
|
||||
p = nextchar(p);
|
||||
st--;
|
||||
}
|
||||
if (pos && st != pos) {
|
||||
if (neg)
|
||||
return 0;
|
||||
else if (i == numconds)
|
||||
return 1;
|
||||
ingroup = true;
|
||||
while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
|
||||
}
|
||||
st--;
|
||||
}
|
||||
if (p && *p != ']')
|
||||
p = nextchar(p);
|
||||
} else if (pos) {
|
||||
if (neg)
|
||||
return 0;
|
||||
else if (i == numconds)
|
||||
return 1;
|
||||
ingroup = true;
|
||||
while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
|
||||
}
|
||||
// if (p && *p != ']') p = nextchar(p);
|
||||
st--;
|
||||
}
|
||||
if (!pos) {
|
||||
i++;
|
||||
st--;
|
||||
}
|
||||
if (st < beg && p && *p != ']')
|
||||
return 0; // word <= condition
|
||||
} else if (pos) { // group
|
||||
p = nextchar(p);
|
||||
} else
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (!p)
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// see if this suffix is present in the word
|
||||
struct hentry* SfxEntry::checkword(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG cclass,
|
||||
const FLAG needflag,
|
||||
const FLAG badflag) {
|
||||
struct hentry* he; // hash entry pointer
|
||||
PfxEntry* ep = ppfx;
|
||||
|
||||
// if this suffix is being cross checked with a prefix
|
||||
// but it does not support cross products skip it
|
||||
|
||||
if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0))
|
||||
return NULL;
|
||||
|
||||
// upon entry suffix is 0 length or already matches the end of the word.
|
||||
// So if the remaining root word has positive length
|
||||
// and if there are enough chars in root word and added back strip chars
|
||||
// to meet the number of characters conditions, then test it
|
||||
|
||||
int tmpl = len - appnd.size(); // length of tmpword
|
||||
// the second condition is not enough for UTF-8 strings
|
||||
// it checked in test_condition()
|
||||
|
||||
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
||||
(tmpl + strip.size() >= numconds)) {
|
||||
// generate new root word by removing suffix and adding
|
||||
// back any characters that would have been stripped or
|
||||
// or null terminating the shorter string
|
||||
|
||||
std::string tmpstring(word, tmpl);
|
||||
if (strip.size()) {
|
||||
tmpstring.append(strip);
|
||||
}
|
||||
|
||||
const char* tmpword = tmpstring.c_str();
|
||||
const char* endword = tmpword + tmpstring.size();
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
// are met. Please see the appendix at the end of
|
||||
// this file for more info on exactly what is being
|
||||
// tested
|
||||
|
||||
// if all conditions are met then check if resulting
|
||||
// root word in the dictionary
|
||||
|
||||
if (test_condition(endword, tmpword)) {
|
||||
#ifdef SZOSZABLYA_POSSIBLE_ROOTS
|
||||
fprintf(stdout, "%s %s %c\n", word, tmpword, aflag);
|
||||
#endif
|
||||
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
|
||||
do {
|
||||
// check conditional suffix (enabled by prefix)
|
||||
if ((TESTAFF(he->astr, aflag, he->alen) ||
|
||||
(ep && ep->getCont() &&
|
||||
TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
|
||||
(((optflags & aeXPRODUCT) == 0) ||
|
||||
(ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) ||
|
||||
// enabled by prefix
|
||||
((contclass) &&
|
||||
(ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))) &&
|
||||
// handle cont. class
|
||||
((!cclass) ||
|
||||
((contclass) && TESTAFF(contclass, cclass, contclasslen))) &&
|
||||
// check only in compound homonyms (bad flags)
|
||||
(!badflag || !TESTAFF(he->astr, badflag, he->alen)) &&
|
||||
// handle required flag
|
||||
((!needflag) ||
|
||||
(TESTAFF(he->astr, needflag, he->alen) ||
|
||||
((contclass) && TESTAFF(contclass, needflag, contclasslen)))))
|
||||
return he;
|
||||
he = he->next_homonym; // check homonyms
|
||||
} while (he);
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// see if two-level suffix is present in the word
|
||||
struct hentry* SfxEntry::check_twosfx(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag) {
|
||||
PfxEntry* ep = ppfx;
|
||||
|
||||
// if this suffix is being cross checked with a prefix
|
||||
// but it does not support cross products skip it
|
||||
|
||||
if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
|
||||
return NULL;
|
||||
|
||||
// upon entry suffix is 0 length or already matches the end of the word.
|
||||
// So if the remaining root word has positive length
|
||||
// and if there are enough chars in root word and added back strip chars
|
||||
// to meet the number of characters conditions, then test it
|
||||
|
||||
int tmpl = len - appnd.size(); // length of tmpword
|
||||
|
||||
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
||||
(tmpl + strip.size() >= numconds)) {
|
||||
// generate new root word by removing suffix and adding
|
||||
// back any characters that would have been stripped or
|
||||
// or null terminating the shorter string
|
||||
|
||||
std::string tmpword(word);
|
||||
tmpword.resize(tmpl);
|
||||
tmpword.append(strip);
|
||||
tmpl += strip.size();
|
||||
|
||||
const char* beg = tmpword.c_str();
|
||||
const char* end = beg + tmpl;
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
// are met. Please see the appendix at the end of
|
||||
// this file for more info on exactly what is being
|
||||
// tested
|
||||
|
||||
// if all conditions are met then recall suffix_check
|
||||
|
||||
if (test_condition(end, beg)) {
|
||||
struct hentry* he; // hash entry pointer
|
||||
if (ppfx) {
|
||||
// handle conditional suffix
|
||||
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
|
||||
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL,
|
||||
(FLAG)aflag, needflag, IN_CPD_NOT);
|
||||
else
|
||||
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, optflags, ppfx,
|
||||
(FLAG)aflag, needflag, IN_CPD_NOT);
|
||||
} else {
|
||||
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL,
|
||||
(FLAG)aflag, needflag, IN_CPD_NOT);
|
||||
}
|
||||
if (he)
|
||||
return he;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// see if two-level suffix is present in the word
|
||||
std::string SfxEntry::check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag) {
|
||||
PfxEntry* ep = ppfx;
|
||||
|
||||
std::string result;
|
||||
|
||||
// if this suffix is being cross checked with a prefix
|
||||
// but it does not support cross products skip it
|
||||
|
||||
if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
|
||||
return result;
|
||||
|
||||
// upon entry suffix is 0 length or already matches the end of the word.
|
||||
// So if the remaining root word has positive length
|
||||
// and if there are enough chars in root word and added back strip chars
|
||||
// to meet the number of characters conditions, then test it
|
||||
|
||||
int tmpl = len - appnd.size(); // length of tmpword
|
||||
|
||||
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
||||
(tmpl + strip.size() >= numconds)) {
|
||||
// generate new root word by removing suffix and adding
|
||||
// back any characters that would have been stripped or
|
||||
// or null terminating the shorter string
|
||||
|
||||
std::string tmpword(word);
|
||||
tmpword.resize(tmpl);
|
||||
tmpword.append(strip);
|
||||
tmpl += strip.size();
|
||||
|
||||
const char* beg = tmpword.c_str();
|
||||
const char* end = beg + tmpl;
|
||||
|
||||
// now make sure all of the conditions on characters
|
||||
// are met. Please see the appendix at the end of
|
||||
// this file for more info on exactly what is being
|
||||
// tested
|
||||
|
||||
// if all conditions are met then recall suffix_check
|
||||
|
||||
if (test_condition(end, beg)) {
|
||||
if (ppfx) {
|
||||
// handle conditional suffix
|
||||
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
|
||||
std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag,
|
||||
needflag);
|
||||
if (!st.empty()) {
|
||||
if (ppfx->getMorph()) {
|
||||
result.append(ppfx->getMorph());
|
||||
result.append(" ");
|
||||
}
|
||||
result.append(st);
|
||||
mychomp(result);
|
||||
}
|
||||
} else {
|
||||
std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, optflags, ppfx, aflag,
|
||||
needflag);
|
||||
if (!st.empty()) {
|
||||
result.append(st);
|
||||
mychomp(result);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, needflag);
|
||||
if (!st.empty()) {
|
||||
result.append(st);
|
||||
mychomp(result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// get next homonym with same affix
|
||||
struct hentry* SfxEntry::get_next_homonym(struct hentry* he,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG cclass,
|
||||
const FLAG needflag) {
|
||||
PfxEntry* ep = ppfx;
|
||||
FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
|
||||
|
||||
while (he->next_homonym) {
|
||||
he = he->next_homonym;
|
||||
if ((TESTAFF(he->astr, aflag, he->alen) ||
|
||||
(ep && ep->getCont() &&
|
||||
TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
|
||||
((optflags & aeXPRODUCT) == 0 || TESTAFF(he->astr, eFlag, he->alen) ||
|
||||
// handle conditional suffix
|
||||
((contclass) && TESTAFF(contclass, eFlag, contclasslen))) &&
|
||||
// handle cont. class
|
||||
((!cclass) ||
|
||||
((contclass) && TESTAFF(contclass, cclass, contclasslen))) &&
|
||||
// handle required flag
|
||||
((!needflag) ||
|
||||
(TESTAFF(he->astr, needflag, he->alen) ||
|
||||
((contclass) && TESTAFF(contclass, needflag, contclasslen)))))
|
||||
return he;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void SfxEntry::initReverseWord() {
|
||||
rappnd = appnd;
|
||||
reverseword(rappnd);
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
Appendix: Understanding Affix Code
|
||||
|
||||
|
||||
An affix is either a prefix or a suffix attached to root words to make
|
||||
other words.
|
||||
|
||||
Basically a Prefix or a Suffix is set of AffEntry objects
|
||||
which store information about the prefix or suffix along
|
||||
with supporting routines to check if a word has a particular
|
||||
prefix or suffix or a combination.
|
||||
|
||||
The structure affentry is defined as follows:
|
||||
|
||||
struct affentry
|
||||
{
|
||||
unsigned short aflag; // ID used to represent the affix
|
||||
std::string strip; // string to strip before adding affix
|
||||
std::string appnd; // the affix string to add
|
||||
char numconds; // the number of conditions that must be met
|
||||
char opts; // flag: aeXPRODUCT- combine both prefix and suffix
|
||||
char conds[SETSIZE]; // array which encodes the conditions to be met
|
||||
};
|
||||
|
||||
|
||||
Here is a suffix borrowed from the en_US.aff file. This file
|
||||
is whitespace delimited.
|
||||
|
||||
SFX D Y 4
|
||||
SFX D 0 e d
|
||||
SFX D y ied [^aeiou]y
|
||||
SFX D 0 ed [^ey]
|
||||
SFX D 0 ed [aeiou]y
|
||||
|
||||
This information can be interpreted as follows:
|
||||
|
||||
In the first line has 4 fields
|
||||
|
||||
Field
|
||||
-----
|
||||
1 SFX - indicates this is a suffix
|
||||
2 D - is the name of the character flag which represents this suffix
|
||||
3 Y - indicates it can be combined with prefixes (cross product)
|
||||
4 4 - indicates that sequence of 4 affentry structures are needed to
|
||||
properly store the affix information
|
||||
|
||||
The remaining lines describe the unique information for the 4 SfxEntry
|
||||
objects that make up this affix. Each line can be interpreted
|
||||
as follows: (note fields 1 and 2 are as a check against line 1 info)
|
||||
|
||||
Field
|
||||
-----
|
||||
1 SFX - indicates this is a suffix
|
||||
2 D - is the name of the character flag for this affix
|
||||
3 y - the string of chars to strip off before adding affix
|
||||
(a 0 here indicates the NULL string)
|
||||
4 ied - the string of affix characters to add
|
||||
5 [^aeiou]y - the conditions which must be met before the affix
|
||||
can be applied
|
||||
|
||||
Field 5 is interesting. Since this is a suffix, field 5 tells us that
|
||||
there are 2 conditions that must be met. The first condition is that
|
||||
the next to the last character in the word must *NOT* be any of the
|
||||
following "a", "e", "i", "o" or "u". The second condition is that
|
||||
the last character of the word must end in "y".
|
||||
|
||||
So how can we encode this information concisely and be able to
|
||||
test for both conditions in a fast manner? The answer is found
|
||||
but studying the wonderful ispell code of Geoff Kuenning, et.al.
|
||||
(now available under a normal BSD license).
|
||||
|
||||
If we set up a conds array of 256 bytes indexed (0 to 255) and access it
|
||||
using a character (cast to an unsigned char) of a string, we have 8 bits
|
||||
of information we can store about that character. Specifically we
|
||||
could use each bit to say if that character is allowed in any of the
|
||||
last (or first for prefixes) 8 characters of the word.
|
||||
|
||||
Basically, each character at one end of the word (up to the number
|
||||
of conditions) is used to index into the conds array and the resulting
|
||||
value found there says whether the that character is valid for a
|
||||
specific character position in the word.
|
||||
|
||||
For prefixes, it does this by setting bit 0 if that char is valid
|
||||
in the first position, bit 1 if valid in the second position, and so on.
|
||||
|
||||
If a bit is not set, then that char is not valid for that postion in the
|
||||
word.
|
||||
|
||||
If working with suffixes bit 0 is used for the character closest
|
||||
to the front, bit 1 for the next character towards the end, ...,
|
||||
with bit numconds-1 representing the last char at the end of the string.
|
||||
|
||||
Note: since entries in the conds[] are 8 bits, only 8 conditions
|
||||
(read that only 8 character positions) can be examined at one
|
||||
end of a word (the beginning for prefixes and the end for suffixes.
|
||||
|
||||
So to make this clearer, lets encode the conds array values for the
|
||||
first two affentries for the suffix D described earlier.
|
||||
|
||||
|
||||
For the first affentry:
|
||||
numconds = 1 (only examine the last character)
|
||||
|
||||
conds['e'] = (1 << 0) (the word must end in an E)
|
||||
all others are all 0
|
||||
|
||||
For the second affentry:
|
||||
numconds = 2 (only examine the last two characters)
|
||||
|
||||
conds[X] = conds[X] | (1 << 0) (aeiou are not allowed)
|
||||
where X is all characters *but* a, e, i, o, or u
|
||||
|
||||
|
||||
conds['y'] = (1 << 1) (the last char must be a y)
|
||||
all other bits for all other entries in the conds array are zero
|
||||
|
||||
#endif
|
223
3rdparty/hunspell/src/hunspell/affentry.hxx
vendored
223
3rdparty/hunspell/src/hunspell/affentry.hxx
vendored
@ -1,223 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef AFFIX_HXX_
|
||||
#define AFFIX_HXX_
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "baseaffix.hxx"
|
||||
#include "affixmgr.hxx"
|
||||
|
||||
/* A Prefix Entry */
|
||||
|
||||
class PfxEntry : public AffEntry {
|
||||
private:
|
||||
PfxEntry(const PfxEntry&);
|
||||
PfxEntry& operator=(const PfxEntry&);
|
||||
|
||||
private:
|
||||
AffixMgr* pmyMgr;
|
||||
|
||||
PfxEntry* next;
|
||||
PfxEntry* nexteq;
|
||||
PfxEntry* nextne;
|
||||
PfxEntry* flgnxt;
|
||||
|
||||
public:
|
||||
explicit PfxEntry(AffixMgr* pmgr);
|
||||
|
||||
bool allowCross() const { return ((opts & aeXPRODUCT) != 0); }
|
||||
struct hentry* checkword(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
struct hentry* check_twosfx(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
std::string check_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
std::string check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
FLAG getFlag() { return aflag; }
|
||||
const char* getKey() { return appnd.c_str(); }
|
||||
std::string add(const char* word, size_t len);
|
||||
|
||||
inline short getKeyLen() { return appnd.size(); }
|
||||
|
||||
inline const char* getMorph() { return morphcode; }
|
||||
|
||||
inline const unsigned short* getCont() { return contclass; }
|
||||
inline short getContLen() { return contclasslen; }
|
||||
|
||||
inline PfxEntry* getNext() { return next; }
|
||||
inline PfxEntry* getNextNE() { return nextne; }
|
||||
inline PfxEntry* getNextEQ() { return nexteq; }
|
||||
inline PfxEntry* getFlgNxt() { return flgnxt; }
|
||||
|
||||
inline void setNext(PfxEntry* ptr) { next = ptr; }
|
||||
inline void setNextNE(PfxEntry* ptr) { nextne = ptr; }
|
||||
inline void setNextEQ(PfxEntry* ptr) { nexteq = ptr; }
|
||||
inline void setFlgNxt(PfxEntry* ptr) { flgnxt = ptr; }
|
||||
|
||||
inline char* nextchar(char* p);
|
||||
inline int test_condition(const char* st);
|
||||
};
|
||||
|
||||
/* A Suffix Entry */
|
||||
|
||||
class SfxEntry : public AffEntry {
|
||||
private:
|
||||
SfxEntry(const SfxEntry&);
|
||||
SfxEntry& operator=(const SfxEntry&);
|
||||
|
||||
private:
|
||||
AffixMgr* pmyMgr;
|
||||
std::string rappnd;
|
||||
|
||||
SfxEntry* next;
|
||||
SfxEntry* nexteq;
|
||||
SfxEntry* nextne;
|
||||
SfxEntry* flgnxt;
|
||||
|
||||
SfxEntry* l_morph;
|
||||
SfxEntry* r_morph;
|
||||
SfxEntry* eq_morph;
|
||||
|
||||
public:
|
||||
explicit SfxEntry(AffixMgr* pmgr);
|
||||
|
||||
bool allowCross() const { return ((opts & aeXPRODUCT) != 0); }
|
||||
struct hentry* checkword(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG cclass,
|
||||
const FLAG needflag,
|
||||
const FLAG badflag);
|
||||
|
||||
struct hentry* check_twosfx(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
std::string check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
struct hentry* get_next_homonym(struct hentry* he);
|
||||
struct hentry* get_next_homonym(struct hentry* word,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG cclass,
|
||||
const FLAG needflag);
|
||||
|
||||
FLAG getFlag() { return aflag; }
|
||||
const char* getKey() { return rappnd.c_str(); }
|
||||
std::string add(const char* word, size_t len);
|
||||
|
||||
inline const char* getMorph() { return morphcode; }
|
||||
|
||||
inline const unsigned short* getCont() { return contclass; }
|
||||
inline short getContLen() { return contclasslen; }
|
||||
inline const char* getAffix() { return appnd.c_str(); }
|
||||
|
||||
inline short getKeyLen() { return appnd.size(); }
|
||||
|
||||
inline SfxEntry* getNext() { return next; }
|
||||
inline SfxEntry* getNextNE() { return nextne; }
|
||||
inline SfxEntry* getNextEQ() { return nexteq; }
|
||||
|
||||
inline SfxEntry* getLM() { return l_morph; }
|
||||
inline SfxEntry* getRM() { return r_morph; }
|
||||
inline SfxEntry* getEQM() { return eq_morph; }
|
||||
inline SfxEntry* getFlgNxt() { return flgnxt; }
|
||||
|
||||
inline void setNext(SfxEntry* ptr) { next = ptr; }
|
||||
inline void setNextNE(SfxEntry* ptr) { nextne = ptr; }
|
||||
inline void setNextEQ(SfxEntry* ptr) { nexteq = ptr; }
|
||||
inline void setFlgNxt(SfxEntry* ptr) { flgnxt = ptr; }
|
||||
void initReverseWord();
|
||||
|
||||
inline char* nextchar(char* p);
|
||||
inline int test_condition(const char* st, const char* begin);
|
||||
};
|
||||
|
||||
#endif
|
4894
3rdparty/hunspell/src/hunspell/affixmgr.cxx
vendored
4894
3rdparty/hunspell/src/hunspell/affixmgr.cxx
vendored
File diff suppressed because it is too large
Load Diff
369
3rdparty/hunspell/src/hunspell/affixmgr.hxx
vendored
369
3rdparty/hunspell/src/hunspell/affixmgr.hxx
vendored
@ -1,369 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef AFFIXMGR_HXX_
|
||||
#define AFFIXMGR_HXX_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "baseaffix.hxx"
|
||||
#include "hashmgr.hxx"
|
||||
#include "phonet.hxx"
|
||||
#include "replist.hxx"
|
||||
|
||||
// check flag duplication
|
||||
#define dupSFX (1 << 0)
|
||||
#define dupPFX (1 << 1)
|
||||
|
||||
class PfxEntry;
|
||||
class SfxEntry;
|
||||
|
||||
class AffixMgr {
|
||||
PfxEntry* pStart[SETSIZE];
|
||||
SfxEntry* sStart[SETSIZE];
|
||||
PfxEntry* pFlag[SETSIZE];
|
||||
SfxEntry* sFlag[SETSIZE];
|
||||
const std::vector<HashMgr*>& alldic;
|
||||
const HashMgr* pHMgr;
|
||||
std::string keystring;
|
||||
std::string trystring;
|
||||
std::string encoding;
|
||||
struct cs_info* csconv;
|
||||
int utf8;
|
||||
int complexprefixes;
|
||||
FLAG compoundflag;
|
||||
FLAG compoundbegin;
|
||||
FLAG compoundmiddle;
|
||||
FLAG compoundend;
|
||||
FLAG compoundroot;
|
||||
FLAG compoundforbidflag;
|
||||
FLAG compoundpermitflag;
|
||||
int compoundmoresuffixes;
|
||||
int checkcompounddup;
|
||||
int checkcompoundrep;
|
||||
int checkcompoundcase;
|
||||
int checkcompoundtriple;
|
||||
int simplifiedtriple;
|
||||
FLAG forbiddenword;
|
||||
FLAG nosuggest;
|
||||
FLAG nongramsuggest;
|
||||
FLAG needaffix;
|
||||
int cpdmin;
|
||||
bool parsedrep;
|
||||
std::vector<replentry> reptable;
|
||||
RepList* iconvtable;
|
||||
RepList* oconvtable;
|
||||
bool parsedmaptable;
|
||||
std::vector<mapentry> maptable;
|
||||
bool parsedbreaktable;
|
||||
std::vector<std::string> breaktable;
|
||||
bool parsedcheckcpd;
|
||||
std::vector<patentry> checkcpdtable;
|
||||
int simplifiedcpd;
|
||||
bool parseddefcpd;
|
||||
std::vector<flagentry> defcpdtable;
|
||||
phonetable* phone;
|
||||
int maxngramsugs;
|
||||
int maxcpdsugs;
|
||||
int maxdiff;
|
||||
int onlymaxdiff;
|
||||
int nosplitsugs;
|
||||
int sugswithdots;
|
||||
int cpdwordmax;
|
||||
int cpdmaxsyllable;
|
||||
std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit,
|
||||
std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding
|
||||
std::string cpdsyllablenum; // syllable count incrementing flag
|
||||
const char* pfxappnd; // BUG: not stateless
|
||||
const char* sfxappnd; // BUG: not stateless
|
||||
int sfxextra; // BUG: not stateless
|
||||
FLAG sfxflag; // BUG: not stateless
|
||||
char* derived; // BUG: not stateless
|
||||
SfxEntry* sfx; // BUG: not stateless
|
||||
PfxEntry* pfx; // BUG: not stateless
|
||||
int checknum;
|
||||
std::string wordchars; // letters + spec. word characters
|
||||
std::vector<w_char> wordchars_utf16;
|
||||
std::string ignorechars; // letters + spec. word characters
|
||||
std::vector<w_char> ignorechars_utf16;
|
||||
std::string version; // affix and dictionary file version string
|
||||
std::string lang; // language
|
||||
int langnum;
|
||||
FLAG lemma_present;
|
||||
FLAG circumfix;
|
||||
FLAG onlyincompound;
|
||||
FLAG keepcase;
|
||||
FLAG forceucase;
|
||||
FLAG warn;
|
||||
int forbidwarn;
|
||||
FLAG substandard;
|
||||
int checksharps;
|
||||
int fullstrip;
|
||||
|
||||
int havecontclass; // boolean variable
|
||||
char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold
|
||||
// affix)
|
||||
|
||||
public:
|
||||
AffixMgr(const char* affpath, const std::vector<HashMgr*>& ptr, const char* key = NULL);
|
||||
~AffixMgr();
|
||||
struct hentry* affix_check(const char* word,
|
||||
int len,
|
||||
const unsigned short needflag = (unsigned short)0,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
struct hentry* prefix_check(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
inline int isSubset(const char* s1, const char* s2);
|
||||
struct hentry* prefix_check_twosfx(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
inline int isRevSubset(const char* s1, const char* end_of_s2, int len);
|
||||
struct hentry* suffix_check(const char* word,
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG cclass = FLAG_NULL,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
struct hentry* suffix_check_twosfx(const char* word,
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
std::string affix_check_morph(const char* word,
|
||||
int len,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
std::string prefix_check_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
std::string suffix_check_morph(const char* word,
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG cclass = FLAG_NULL,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
|
||||
std::string prefix_check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
std::string suffix_check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
std::string morphgen(const char* ts,
|
||||
int wl,
|
||||
const unsigned short* ap,
|
||||
unsigned short al,
|
||||
const char* morph,
|
||||
const char* targetmorph,
|
||||
int level);
|
||||
|
||||
int expand_rootword(struct guessword* wlst,
|
||||
int maxn,
|
||||
const char* ts,
|
||||
int wl,
|
||||
const unsigned short* ap,
|
||||
unsigned short al,
|
||||
const char* bad,
|
||||
int,
|
||||
const char*);
|
||||
|
||||
short get_syllable(const std::string& word);
|
||||
int cpdrep_check(const char* word, int len);
|
||||
int cpdpat_check(const char* word,
|
||||
int len,
|
||||
hentry* r1,
|
||||
hentry* r2,
|
||||
const char affixed);
|
||||
int defcpd_check(hentry*** words,
|
||||
short wnum,
|
||||
hentry* rv,
|
||||
hentry** rwords,
|
||||
char all);
|
||||
int cpdcase_check(const char* word, int len);
|
||||
inline int candidate_check(const char* word, int len);
|
||||
void setcminmax(int* cmin, int* cmax, const char* word, int len);
|
||||
struct hentry* compound_check(const std::string& word,
|
||||
short wordnum,
|
||||
short numsyllable,
|
||||
short maxwordnum,
|
||||
short wnum,
|
||||
hentry** words,
|
||||
hentry** rwords,
|
||||
char hu_mov_rule,
|
||||
char is_sug,
|
||||
int* info);
|
||||
|
||||
int compound_check_morph(const char* word,
|
||||
int len,
|
||||
short wordnum,
|
||||
short numsyllable,
|
||||
short maxwordnum,
|
||||
short wnum,
|
||||
hentry** words,
|
||||
hentry** rwords,
|
||||
char hu_mov_rule,
|
||||
std::string& result,
|
||||
const std::string* partresult);
|
||||
|
||||
std::vector<std::string> get_suffix_words(short unsigned* suff,
|
||||
int len,
|
||||
const char* root_word);
|
||||
|
||||
struct hentry* lookup(const char* word);
|
||||
const std::vector<replentry>& get_reptable() const;
|
||||
RepList* get_iconvtable() const;
|
||||
RepList* get_oconvtable() const;
|
||||
struct phonetable* get_phonetable() const;
|
||||
const std::vector<mapentry>& get_maptable() const;
|
||||
const std::vector<std::string>& get_breaktable() const;
|
||||
const std::string& get_encoding();
|
||||
int get_langnum() const;
|
||||
char* get_key_string();
|
||||
char* get_try_string() const;
|
||||
const std::string& get_wordchars() const;
|
||||
const std::vector<w_char>& get_wordchars_utf16() const;
|
||||
const char* get_ignore() const;
|
||||
const std::vector<w_char>& get_ignore_utf16() const;
|
||||
int get_compound() const;
|
||||
FLAG get_compoundflag() const;
|
||||
FLAG get_forbiddenword() const;
|
||||
FLAG get_nosuggest() const;
|
||||
FLAG get_nongramsuggest() const;
|
||||
FLAG get_needaffix() const;
|
||||
FLAG get_onlyincompound() const;
|
||||
const char* get_derived() const;
|
||||
const std::string& get_version() const;
|
||||
int have_contclass() const;
|
||||
int get_utf8() const;
|
||||
int get_complexprefixes() const;
|
||||
char* get_suffixed(char) const;
|
||||
int get_maxngramsugs() const;
|
||||
int get_maxcpdsugs() const;
|
||||
int get_maxdiff() const;
|
||||
int get_onlymaxdiff() const;
|
||||
int get_nosplitsugs() const;
|
||||
int get_sugswithdots(void) const;
|
||||
FLAG get_keepcase(void) const;
|
||||
FLAG get_forceucase(void) const;
|
||||
FLAG get_warn(void) const;
|
||||
int get_forbidwarn(void) const;
|
||||
int get_checksharps(void) const;
|
||||
char* encode_flag(unsigned short aflag) const;
|
||||
int get_fullstrip() const;
|
||||
|
||||
private:
|
||||
int parse_file(const char* affpath, const char* key);
|
||||
bool parse_flag(const std::string& line, unsigned short* out, FileMgr* af);
|
||||
bool parse_num(const std::string& line, int* out, FileMgr* af);
|
||||
bool parse_cpdsyllable(const std::string& line, FileMgr* af);
|
||||
bool parse_reptable(const std::string& line, FileMgr* af);
|
||||
bool parse_convtable(const std::string& line,
|
||||
FileMgr* af,
|
||||
RepList** rl,
|
||||
const std::string& keyword);
|
||||
bool parse_phonetable(const std::string& line, FileMgr* af);
|
||||
bool parse_maptable(const std::string& line, FileMgr* af);
|
||||
bool parse_breaktable(const std::string& line, FileMgr* af);
|
||||
bool parse_checkcpdtable(const std::string& line, FileMgr* af);
|
||||
bool parse_defcpdtable(const std::string& line, FileMgr* af);
|
||||
bool parse_affix(const std::string& line, const char at, FileMgr* af, char* dupflags);
|
||||
|
||||
void reverse_condition(std::string&);
|
||||
std::string& debugflag(std::string& result, unsigned short flag);
|
||||
int condlen(const char*);
|
||||
int encodeit(AffEntry& entry, const char* cs);
|
||||
int build_pfxtree(PfxEntry* pfxptr);
|
||||
int build_sfxtree(SfxEntry* sfxptr);
|
||||
int process_pfx_order();
|
||||
int process_sfx_order();
|
||||
PfxEntry* process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr);
|
||||
SfxEntry* process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr);
|
||||
int process_pfx_tree_to_list();
|
||||
int process_sfx_tree_to_list();
|
||||
int redundant_condition(char, const char* strip, int stripl, const char* cond, int);
|
||||
void finishFileMgr(FileMgr* afflst);
|
||||
};
|
||||
|
||||
#endif
|
119
3rdparty/hunspell/src/hunspell/atypes.hxx
vendored
119
3rdparty/hunspell/src/hunspell/atypes.hxx
vendored
@ -1,119 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef ATYPES_HXX_
|
||||
#define ATYPES_HXX_
|
||||
|
||||
#ifndef HUNSPELL_WARNING
|
||||
#include <stdio.h>
|
||||
#ifdef HUNSPELL_WARNING_ON
|
||||
#define HUNSPELL_WARNING fprintf
|
||||
#else
|
||||
// empty inline function to switch off warnings (instead of the C99 standard
|
||||
// variadic macros)
|
||||
static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// HUNSTEM def.
|
||||
#define HUNSTEM
|
||||
|
||||
#include "w_char.hxx"
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define SETSIZE 256
|
||||
#define CONTSIZE 65536
|
||||
|
||||
// AffEntry options
|
||||
#define aeXPRODUCT (1 << 0)
|
||||
#define aeUTF8 (1 << 1)
|
||||
#define aeALIASF (1 << 2)
|
||||
#define aeALIASM (1 << 3)
|
||||
#define aeLONGCOND (1 << 4)
|
||||
|
||||
// compound options
|
||||
#define IN_CPD_NOT 0
|
||||
#define IN_CPD_BEGIN 1
|
||||
#define IN_CPD_END 2
|
||||
#define IN_CPD_OTHER 3
|
||||
|
||||
// info options
|
||||
#define SPELL_COMPOUND (1 << 0)
|
||||
#define SPELL_FORBIDDEN (1 << 1)
|
||||
#define SPELL_ALLCAP (1 << 2)
|
||||
#define SPELL_NOCAP (1 << 3)
|
||||
#define SPELL_INITCAP (1 << 4)
|
||||
#define SPELL_ORIGCAP (1 << 5)
|
||||
#define SPELL_WARN (1 << 6)
|
||||
|
||||
#define MINCPDLEN 3
|
||||
#define MAXCOMPOUND 10
|
||||
#define MAXCONDLEN 20
|
||||
#define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char*))
|
||||
|
||||
#define MAXACC 1000
|
||||
|
||||
#define FLAG unsigned short
|
||||
#define FLAG_NULL 0x00
|
||||
#define FREE_FLAG(a) a = 0
|
||||
|
||||
#define TESTAFF(a, b, c) (std::binary_search(a, a + c, b))
|
||||
|
||||
struct guessword {
|
||||
char* word;
|
||||
bool allow;
|
||||
char* orig;
|
||||
};
|
||||
|
||||
typedef std::vector<std::string> mapentry;
|
||||
typedef std::vector<FLAG> flagentry;
|
||||
|
||||
struct patentry {
|
||||
std::string pattern;
|
||||
std::string pattern2;
|
||||
std::string pattern3;
|
||||
FLAG cond;
|
||||
FLAG cond2;
|
||||
patentry()
|
||||
: cond(FLAG_NULL)
|
||||
, cond2(FLAG_NULL) {
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
74
3rdparty/hunspell/src/hunspell/baseaffix.hxx
vendored
74
3rdparty/hunspell/src/hunspell/baseaffix.hxx
vendored
@ -1,74 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef BASEAFF_HXX_
|
||||
#define BASEAFF_HXX_
|
||||
|
||||
#include <string>
|
||||
|
||||
class AffEntry {
|
||||
private:
|
||||
AffEntry(const AffEntry&);
|
||||
AffEntry& operator=(const AffEntry&);
|
||||
|
||||
public:
|
||||
AffEntry()
|
||||
: numconds(0),
|
||||
opts(0),
|
||||
aflag(0),
|
||||
morphcode(0),
|
||||
contclass(NULL),
|
||||
contclasslen(0) {}
|
||||
virtual ~AffEntry();
|
||||
std::string appnd;
|
||||
std::string strip;
|
||||
unsigned char numconds;
|
||||
char opts;
|
||||
unsigned short aflag;
|
||||
union {
|
||||
char conds[MAXCONDLEN];
|
||||
struct {
|
||||
char conds1[MAXCONDLEN_1];
|
||||
char* conds2;
|
||||
} l;
|
||||
} c;
|
||||
char* morphcode;
|
||||
unsigned short* contclass;
|
||||
short contclasslen;
|
||||
};
|
||||
|
||||
#endif
|
2640
3rdparty/hunspell/src/hunspell/csutil.cxx
vendored
2640
3rdparty/hunspell/src/hunspell/csutil.cxx
vendored
File diff suppressed because it is too large
Load Diff
314
3rdparty/hunspell/src/hunspell/csutil.hxx
vendored
314
3rdparty/hunspell/src/hunspell/csutil.hxx
vendored
@ -1,314 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CSUTIL_HXX_
|
||||
#define CSUTIL_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
// First some base level utility routines
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <string.h>
|
||||
#include "w_char.hxx"
|
||||
#include "htypes.hxx"
|
||||
|
||||
#ifdef MOZILLA_CLIENT
|
||||
#include "nscore.h" // for mozalloc headers
|
||||
#endif
|
||||
|
||||
// casing
|
||||
#define NOCAP 0
|
||||
#define INITCAP 1
|
||||
#define ALLCAP 2
|
||||
#define HUHCAP 3
|
||||
#define HUHINITCAP 4
|
||||
|
||||
// default encoding and keystring
|
||||
#define SPELL_ENCODING "ISO8859-1"
|
||||
#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
|
||||
|
||||
// default morphological fields
|
||||
#define MORPH_STEM "st:"
|
||||
#define MORPH_ALLOMORPH "al:"
|
||||
#define MORPH_POS "po:"
|
||||
#define MORPH_DERI_PFX "dp:"
|
||||
#define MORPH_INFL_PFX "ip:"
|
||||
#define MORPH_TERM_PFX "tp:"
|
||||
#define MORPH_DERI_SFX "ds:"
|
||||
#define MORPH_INFL_SFX "is:"
|
||||
#define MORPH_TERM_SFX "ts:"
|
||||
#define MORPH_SURF_PFX "sp:"
|
||||
#define MORPH_FREQ "fr:"
|
||||
#define MORPH_PHON "ph:"
|
||||
#define MORPH_HYPH "hy:"
|
||||
#define MORPH_PART "pa:"
|
||||
#define MORPH_FLAG "fl:"
|
||||
#define MORPH_HENTRY "_H:"
|
||||
#define MORPH_TAG_LEN strlen(MORPH_STEM)
|
||||
|
||||
#define MSEP_FLD ' '
|
||||
#define MSEP_REC '\n'
|
||||
#define MSEP_ALT '\v'
|
||||
|
||||
// default flags
|
||||
#define DEFAULTFLAGS 65510
|
||||
#define FORBIDDENWORD 65510
|
||||
#define ONLYUPCASEFLAG 65511
|
||||
|
||||
// fix long pathname problem of WIN32 by using w_char std::fstream::open override
|
||||
LIBHUNSPELL_DLL_EXPORTED void myopen(std::ifstream& stream, const char* path,
|
||||
std::ios_base::openmode mode);
|
||||
|
||||
// convert UTF-16 characters to UTF-8
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest,
|
||||
const std::vector<w_char>& src);
|
||||
|
||||
// convert UTF-8 characters to UTF-16
|
||||
LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest,
|
||||
const std::string& src);
|
||||
|
||||
// remove end of line char(s)
|
||||
LIBHUNSPELL_DLL_EXPORTED void mychomp(std::string& s);
|
||||
|
||||
// duplicate string
|
||||
LIBHUNSPELL_DLL_EXPORTED char* mystrdup(const char* s);
|
||||
|
||||
// parse into tokens with char delimiter
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string::const_iterator mystrsep(const std::string &str,
|
||||
std::string::const_iterator& start);
|
||||
|
||||
// replace pat by rep in word and return word
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& mystrrep(std::string& str,
|
||||
const std::string& search,
|
||||
const std::string& replace);
|
||||
|
||||
// append s to ends of every lines in text
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& strlinecat(std::string& str,
|
||||
const std::string& apd);
|
||||
|
||||
// tokenize into lines with new line
|
||||
LIBHUNSPELL_DLL_EXPORTED std::vector<std::string> line_tok(const std::string& text,
|
||||
char breakchar);
|
||||
|
||||
// tokenize into lines with new line and uniq in place
|
||||
LIBHUNSPELL_DLL_EXPORTED void line_uniq(std::string& text, char breakchar);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void line_uniq_app(std::string& text, char breakchar);
|
||||
|
||||
// reverse word
|
||||
LIBHUNSPELL_DLL_EXPORTED size_t reverseword(std::string& word);
|
||||
|
||||
// reverse word
|
||||
LIBHUNSPELL_DLL_EXPORTED size_t reverseword_utf(std::string&);
|
||||
|
||||
// remove duplicates
|
||||
LIBHUNSPELL_DLL_EXPORTED void uniqlist(std::vector<std::string>& list);
|
||||
|
||||
// character encoding information
|
||||
struct cs_info {
|
||||
unsigned char ccase;
|
||||
unsigned char clower;
|
||||
unsigned char cupper;
|
||||
};
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void initialize_utf_tbl();
|
||||
LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
|
||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c,
|
||||
int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED w_char upper_utf(w_char u, int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED w_char lower_utf(w_char u, int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c,
|
||||
int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED struct cs_info* get_current_cs(const std::string& es);
|
||||
|
||||
// get language identifiers of language codes
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const std::string& lang);
|
||||
|
||||
// get characters of the given 8bit encoding with lower- and uppercase forms
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string get_casechars(const char* enc);
|
||||
|
||||
// convert std::string to all caps
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& mkallcap(std::string& s,
|
||||
const struct cs_info* csconv);
|
||||
|
||||
// convert null terminated string to all little
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& mkallsmall(std::string& s,
|
||||
const struct cs_info* csconv);
|
||||
|
||||
// convert first letter of string to little
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& mkinitsmall(std::string& s,
|
||||
const struct cs_info* csconv);
|
||||
|
||||
// convert first letter of string to capital
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& mkinitcap(std::string& s,
|
||||
const struct cs_info* csconv);
|
||||
|
||||
// convert first letter of UTF-8 string to capital
|
||||
LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
|
||||
mkinitcap_utf(std::vector<w_char>& u, int langnum);
|
||||
|
||||
// convert UTF-8 string to little
|
||||
LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
|
||||
mkallsmall_utf(std::vector<w_char>& u, int langnum);
|
||||
|
||||
// convert first letter of UTF-8 string to little
|
||||
LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
|
||||
mkinitsmall_utf(std::vector<w_char>& u, int langnum);
|
||||
|
||||
// convert UTF-8 string to capital
|
||||
LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
|
||||
mkallcap_utf(std::vector<w_char>& u, int langnum);
|
||||
|
||||
// get type of capitalization
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*);
|
||||
|
||||
// get type of capitalization (UTF-8)
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum);
|
||||
|
||||
// strip all ignored characters in the string
|
||||
LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars_utf(
|
||||
std::string& word,
|
||||
const std::vector<w_char>& ignored_chars);
|
||||
|
||||
// strip all ignored characters in the string
|
||||
LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars(
|
||||
std::string& word,
|
||||
const std::string& ignored_chars);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED bool parse_string(const std::string& line,
|
||||
std::string& out,
|
||||
int ln);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED bool parse_array(const std::string& line,
|
||||
std::string& out,
|
||||
std::vector<w_char>& out_utf16,
|
||||
int utf8,
|
||||
int ln);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char* r);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED bool copy_field(std::string& dest,
|
||||
const std::string& morph,
|
||||
const std::string& var);
|
||||
|
||||
// conversion function for protected memory
|
||||
LIBHUNSPELL_DLL_EXPORTED void store_pointer(char* dest, char* source);
|
||||
|
||||
// conversion function for protected memory
|
||||
LIBHUNSPELL_DLL_EXPORTED char* get_stored_pointer(const char* s);
|
||||
|
||||
// hash entry macros
|
||||
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry* h) {
|
||||
char* ret;
|
||||
if (!h->var)
|
||||
ret = NULL;
|
||||
else if (h->var & H_OPT_ALIASM)
|
||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||
else
|
||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA(
|
||||
const struct hentry* h) {
|
||||
const char* ret;
|
||||
if (!h->var)
|
||||
ret = NULL;
|
||||
else if (h->var & H_OPT_ALIASM)
|
||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||
else
|
||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// NULL-free version for warning-free OOo build
|
||||
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(
|
||||
const struct hentry* h) {
|
||||
const char* ret;
|
||||
if (!h->var)
|
||||
ret = "";
|
||||
else if (h->var & H_OPT_ALIASM)
|
||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||
else
|
||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry* h,
|
||||
const char* p) {
|
||||
return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
|
||||
}
|
||||
|
||||
#endif
|
117
3rdparty/hunspell/src/hunspell/filemgr.cxx
vendored
117
3rdparty/hunspell/src/hunspell/filemgr.cxx
vendored
@ -1,117 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "filemgr.hxx"
|
||||
#include "csutil.hxx"
|
||||
|
||||
int FileMgr::fail(const char* err, const char* par) {
|
||||
fprintf(stderr, err, par);
|
||||
return -1;
|
||||
}
|
||||
|
||||
FileMgr::FileMgr(const char* file, const char* key) : hin(NULL), linenum(0) {
|
||||
in[0] = '\0';
|
||||
|
||||
myopen(fin, file, std::ios_base::in);
|
||||
if (!fin.is_open()) {
|
||||
// check hzipped file
|
||||
std::string st(file);
|
||||
st.append(HZIP_EXTENSION);
|
||||
hin = new Hunzip(st.c_str(), key);
|
||||
}
|
||||
if (!fin.is_open() && !hin->is_open())
|
||||
fail(MSG_OPEN, file);
|
||||
}
|
||||
|
||||
FileMgr::~FileMgr() {
|
||||
delete hin;
|
||||
}
|
||||
|
||||
bool FileMgr::getline(std::string& dest) {
|
||||
bool ret = false;
|
||||
++linenum;
|
||||
if (fin.is_open()) {
|
||||
ret = static_cast<bool>(std::getline(fin, dest));
|
||||
} else if (hin->is_open()) {
|
||||
ret = hin->getline(dest);
|
||||
}
|
||||
if (!ret) {
|
||||
--linenum;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int FileMgr::getlinenum() {
|
||||
return linenum;
|
||||
}
|
98
3rdparty/hunspell/src/hunspell/filemgr.hxx
vendored
98
3rdparty/hunspell/src/hunspell/filemgr.hxx
vendored
@ -1,98 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* file manager class - read lines of files [filename] OR [filename.hz] */
|
||||
#ifndef FILEMGR_HXX_
|
||||
#define FILEMGR_HXX_
|
||||
|
||||
#include "hunzip.hxx"
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
class FileMgr {
|
||||
private:
|
||||
FileMgr(const FileMgr&);
|
||||
FileMgr& operator=(const FileMgr&);
|
||||
|
||||
protected:
|
||||
std::ifstream fin;
|
||||
Hunzip* hin;
|
||||
char in[BUFSIZE + 50]; // input buffer
|
||||
int fail(const char* err, const char* par);
|
||||
int linenum;
|
||||
|
||||
public:
|
||||
FileMgr(const char* filename, const char* key = NULL);
|
||||
~FileMgr();
|
||||
bool getline(std::string&);
|
||||
int getlinenum();
|
||||
};
|
||||
#endif
|
1193
3rdparty/hunspell/src/hunspell/hashmgr.cxx
vendored
1193
3rdparty/hunspell/src/hunspell/hashmgr.cxx
vendored
File diff suppressed because it is too large
Load Diff
145
3rdparty/hunspell/src/hunspell/hashmgr.hxx
vendored
145
3rdparty/hunspell/src/hunspell/hashmgr.hxx
vendored
@ -1,145 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HASHMGR_HXX_
|
||||
#define HASHMGR_HXX_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "htypes.hxx"
|
||||
#include "filemgr.hxx"
|
||||
#include "w_char.hxx"
|
||||
|
||||
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
|
||||
|
||||
class HashMgr {
|
||||
int tablesize;
|
||||
struct hentry** tableptr;
|
||||
flag flag_mode;
|
||||
int complexprefixes;
|
||||
int utf8;
|
||||
unsigned short forbiddenword;
|
||||
int langnum;
|
||||
std::string enc;
|
||||
std::string lang;
|
||||
struct cs_info* csconv;
|
||||
std::string ignorechars;
|
||||
std::vector<w_char> ignorechars_utf16;
|
||||
int numaliasf; // flag vector `compression' with aliases
|
||||
unsigned short** aliasf;
|
||||
unsigned short* aliasflen;
|
||||
int numaliasm; // morphological desciption `compression' with aliases
|
||||
char** aliasm;
|
||||
|
||||
public:
|
||||
HashMgr(const char* tpath, const char* apath, const char* key = NULL);
|
||||
~HashMgr();
|
||||
|
||||
struct hentry* lookup(const char*) const;
|
||||
int hash(const char*) const;
|
||||
struct hentry* walk_hashtable(int& col, struct hentry* hp) const;
|
||||
|
||||
int add(const std::string& word);
|
||||
int add_with_affix(const std::string& word, const std::string& pattern);
|
||||
int remove(const std::string& word);
|
||||
int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const;
|
||||
bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const;
|
||||
unsigned short decode_flag(const char* flag) const;
|
||||
char* encode_flag(unsigned short flag) const;
|
||||
int is_aliasf() const;
|
||||
int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const;
|
||||
int is_aliasm() const;
|
||||
char* get_aliasm(int index) const;
|
||||
|
||||
private:
|
||||
int get_clen_and_captype(const std::string& word, int* captype);
|
||||
int get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf);
|
||||
int load_tables(const char* tpath, const char* key);
|
||||
int add_word(const std::string& word,
|
||||
int wcl,
|
||||
unsigned short* ap,
|
||||
int al,
|
||||
const std::string* desc,
|
||||
bool onlyupcase);
|
||||
int load_config(const char* affpath, const char* key);
|
||||
bool parse_aliasf(const std::string& line, FileMgr* af);
|
||||
int add_hidden_capitalized_word(const std::string& word,
|
||||
int wcl,
|
||||
unsigned short* flags,
|
||||
int al,
|
||||
const std::string* dp,
|
||||
int captype);
|
||||
bool parse_aliasm(const std::string& line, FileMgr* af);
|
||||
int remove_forbidden_flag(const std::string& word);
|
||||
};
|
||||
|
||||
#endif
|
68
3rdparty/hunspell/src/hunspell/htypes.hxx
vendored
68
3rdparty/hunspell/src/hunspell/htypes.hxx
vendored
@ -1,68 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef HTYPES_HXX_
|
||||
#define HTYPES_HXX_
|
||||
|
||||
#define ROTATE_LEN 5
|
||||
|
||||
#define ROTATE(v, q) \
|
||||
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q)) - 1));
|
||||
|
||||
// hentry options
|
||||
#define H_OPT (1 << 0)
|
||||
#define H_OPT_ALIASM (1 << 1)
|
||||
#define H_OPT_PHON (1 << 2)
|
||||
|
||||
// see also csutil.hxx
|
||||
#define HENTRY_WORD(h) &(h->word[0])
|
||||
|
||||
// approx. number of user defined words
|
||||
#define USERWORD 1000
|
||||
|
||||
struct hentry {
|
||||
unsigned char blen; // word length in bytes
|
||||
unsigned char clen; // word length in characters (different for UTF-8 enc.)
|
||||
short alen; // length of affix flag vector
|
||||
unsigned short* astr; // affix flag vector
|
||||
struct hentry* next; // next word with same hash code
|
||||
struct hentry* next_homonym; // next homonym word (with same hash code)
|
||||
char var; // variable fields (only for special pronounciation yet)
|
||||
char word[1]; // variable-length word (8-bit or UTF-8 encoding)
|
||||
};
|
||||
|
||||
#endif
|
2019
3rdparty/hunspell/src/hunspell/hunspell.cxx
vendored
2019
3rdparty/hunspell/src/hunspell/hunspell.cxx
vendored
File diff suppressed because it is too large
Load Diff
162
3rdparty/hunspell/src/hunspell/hunspell.h
vendored
162
3rdparty/hunspell/src/hunspell/hunspell.h
vendored
@ -1,162 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef MYSPELLMGR_H_
|
||||
#define MYSPELLMGR_H_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct Hunhandle Hunhandle;
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED Hunhandle* Hunspell_create(const char* affpath,
|
||||
const char* dpath);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED Hunhandle* Hunspell_create_key(const char* affpath,
|
||||
const char* dpath,
|
||||
const char* key);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle* pHunspell);
|
||||
|
||||
/* load extra dictionaries (only dic files)
|
||||
* output: 0 = additional dictionary slots available, 1 = slots are now full*/
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_dic(Hunhandle* pHunspell,
|
||||
const char* dpath);
|
||||
|
||||
/* spell(word) - spellcheck word
|
||||
* output: 0 = bad word, not 0 = good word
|
||||
*/
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle* pHunspell, const char*);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED char* Hunspell_get_dic_encoding(Hunhandle* pHunspell);
|
||||
|
||||
/* suggest(suggestions, word) - search suggestions
|
||||
* input: pointer to an array of strings pointer and the (bad) word
|
||||
* array of strings pointer (here *slst) may not be initialized
|
||||
* output: number of suggestions in string array, and suggestions in
|
||||
* a newly allocated array of strings (*slts will be NULL when number
|
||||
* of suggestion equals 0.)
|
||||
*/
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
const char* word);
|
||||
|
||||
/* morphological functions */
|
||||
|
||||
/* analyze(result, word) - morphological analysis of the word */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
const char* word);
|
||||
|
||||
/* stem(result, word) - stemmer function */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
const char* word);
|
||||
|
||||
/* stem(result, analysis, n) - get stems from a morph. analysis
|
||||
* example:
|
||||
* char ** result, result2;
|
||||
* int n1 = Hunspell_analyze(result, "words");
|
||||
* int n2 = Hunspell_stem2(result2, result, n1);
|
||||
*/
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
char** desc,
|
||||
int n);
|
||||
|
||||
/* generate(result, word, word2) - morphological generation by example(s) */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
const char* word,
|
||||
const char* word2);
|
||||
|
||||
/* generate(result, word, desc, n) - generation by morph. description(s)
|
||||
* example:
|
||||
* char ** result;
|
||||
* char * affix = "is:plural"; // description depends from dictionaries, too
|
||||
* int n = Hunspell_generate2(result, "word", &affix, 1);
|
||||
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
||||
*/
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
const char* word,
|
||||
char** desc,
|
||||
int n);
|
||||
|
||||
/* functions for run-time modification of the dictionary */
|
||||
|
||||
/* add word to the run-time dictionary */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle* pHunspell,
|
||||
const char* word);
|
||||
|
||||
/* add word to the run-time dictionary with affix flags of
|
||||
* the example (a dictionary word): Hunspell will recognize
|
||||
* affixed forms of the new word, too.
|
||||
*/
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle* pHunspell,
|
||||
const char* word,
|
||||
const char* example);
|
||||
|
||||
/* remove word from the run-time dictionary */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle* pHunspell,
|
||||
const char* word);
|
||||
|
||||
/* free suggestion lists */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
int n);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
229
3rdparty/hunspell/src/hunspell/hunspell.hxx
vendored
229
3rdparty/hunspell/src/hunspell/hunspell.hxx
vendored
@ -1,229 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef MYSPELLMGR_HXX_
|
||||
#define MYSPELLMGR_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
#include "w_char.hxx"
|
||||
#include "atypes.hxx"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define SPELL_XML "<?xml?>"
|
||||
|
||||
#define MAXSUGGESTION 15
|
||||
#define MAXSHARPS 5
|
||||
|
||||
#ifndef MAXWORDLEN
|
||||
#define MAXWORDLEN 100
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
|
||||
# define H_DEPRECATED __attribute__((__deprecated__))
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1300)
|
||||
# define H_DEPRECATED __declspec(deprecated)
|
||||
#else
|
||||
# define H_DEPRECATED
|
||||
#endif
|
||||
|
||||
class HunspellImpl;
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED Hunspell {
|
||||
private:
|
||||
Hunspell(const Hunspell&);
|
||||
Hunspell& operator=(const Hunspell&);
|
||||
|
||||
private:
|
||||
HunspellImpl* m_Impl;
|
||||
|
||||
public:
|
||||
/* Hunspell(aff, dic) - constructor of Hunspell class
|
||||
* input: path of affix file and dictionary file
|
||||
*
|
||||
* In WIN32 environment, use UTF-8 encoded paths started with the long path
|
||||
* prefix \\\\?\\ to handle system-independent character encoding and very
|
||||
* long path names (without the long path prefix Hunspell will use fopen()
|
||||
* with system-dependent character encoding instead of _wfopen()).
|
||||
*/
|
||||
Hunspell(const char* affpath, const char* dpath, const char* key = NULL);
|
||||
~Hunspell();
|
||||
|
||||
/* load extra dictionaries (only dic files) */
|
||||
int add_dic(const char* dpath, const char* key = NULL);
|
||||
|
||||
/* spell(word) - spellcheck word
|
||||
* output: false = bad word, true = good word
|
||||
*
|
||||
* plus output:
|
||||
* info: information bit array, fields:
|
||||
* SPELL_COMPOUND = a compound word
|
||||
* SPELL_FORBIDDEN = an explicit forbidden word
|
||||
* root: root (stem), when input is a word with affix(es)
|
||||
*/
|
||||
bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
|
||||
H_DEPRECATED int spell(const char* word, int* info = NULL, char** root = NULL);
|
||||
|
||||
/* suggest(suggestions, word) - search suggestions
|
||||
* input: pointer to an array of strings pointer and the (bad) word
|
||||
* array of strings pointer (here *slst) may not be initialized
|
||||
* output: number of suggestions in string array, and suggestions in
|
||||
* a newly allocated array of strings (*slts will be NULL when number
|
||||
* of suggestion equals 0.)
|
||||
*/
|
||||
std::vector<std::string> suggest(const std::string& word);
|
||||
H_DEPRECATED int suggest(char*** slst, const char* word);
|
||||
|
||||
/* Suggest words from suffix rules
|
||||
* suffix_suggest(suggestions, root_word)
|
||||
* input: pointer to an array of strings pointer and the word
|
||||
* array of strings pointer (here *slst) may not be initialized
|
||||
* output: number of suggestions in string array, and suggestions in
|
||||
* a newly allocated array of strings (*slts will be NULL when number
|
||||
* of suggestion equals 0.)
|
||||
*/
|
||||
std::vector<std::string> suffix_suggest(const std::string& root_word);
|
||||
H_DEPRECATED int suffix_suggest(char*** slst, const char* root_word);
|
||||
|
||||
/* deallocate suggestion lists */
|
||||
H_DEPRECATED void free_list(char*** slst, int n);
|
||||
|
||||
const std::string& get_dict_encoding() const;
|
||||
char* get_dic_encoding();
|
||||
|
||||
/* morphological functions */
|
||||
|
||||
/* analyze(result, word) - morphological analysis of the word */
|
||||
std::vector<std::string> analyze(const std::string& word);
|
||||
H_DEPRECATED int analyze(char*** slst, const char* word);
|
||||
|
||||
/* stem(word) - stemmer function */
|
||||
std::vector<std::string> stem(const std::string& word);
|
||||
H_DEPRECATED int stem(char*** slst, const char* word);
|
||||
|
||||
/* stem(analysis, n) - get stems from a morph. analysis
|
||||
* example:
|
||||
* char ** result, result2;
|
||||
* int n1 = analyze(&result, "words");
|
||||
* int n2 = stem(&result2, result, n1);
|
||||
*/
|
||||
std::vector<std::string> stem(const std::vector<std::string>& morph);
|
||||
H_DEPRECATED int stem(char*** slst, char** morph, int n);
|
||||
|
||||
/* generate(result, word, word2) - morphological generation by example(s) */
|
||||
std::vector<std::string> generate(const std::string& word, const std::string& word2);
|
||||
H_DEPRECATED int generate(char*** slst, const char* word, const char* word2);
|
||||
|
||||
/* generate(result, word, desc, n) - generation by morph. description(s)
|
||||
* example:
|
||||
* char ** result;
|
||||
* char * affix = "is:plural"; // description depends from dictionaries, too
|
||||
* int n = generate(&result, "word", &affix, 1);
|
||||
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
||||
*/
|
||||
std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
|
||||
H_DEPRECATED int generate(char*** slst, const char* word, char** desc, int n);
|
||||
|
||||
/* functions for run-time modification of the dictionary */
|
||||
|
||||
/* add word to the run-time dictionary */
|
||||
|
||||
int add(const std::string& word);
|
||||
|
||||
/* add word to the run-time dictionary with affix flags of
|
||||
* the example (a dictionary word): Hunspell will recognize
|
||||
* affixed forms of the new word, too.
|
||||
*/
|
||||
|
||||
int add_with_affix(const std::string& word, const std::string& example);
|
||||
|
||||
/* remove word from the run-time dictionary */
|
||||
|
||||
int remove(const std::string& word);
|
||||
|
||||
/* other */
|
||||
|
||||
/* get extra word characters definied in affix file for tokenization */
|
||||
const char* get_wordchars() const;
|
||||
const std::string& get_wordchars_cpp() const;
|
||||
const std::vector<w_char>& get_wordchars_utf16() const;
|
||||
|
||||
struct cs_info* get_csconv();
|
||||
|
||||
const char* get_version() const;
|
||||
const std::string& get_version_cpp() const;
|
||||
|
||||
int get_langnum() const;
|
||||
|
||||
/* need for putdic */
|
||||
bool input_conv(const std::string& word, std::string& dest);
|
||||
H_DEPRECATED int input_conv(const char* word, char* dest, size_t destsize);
|
||||
};
|
||||
|
||||
#endif
|
18
3rdparty/hunspell/src/hunspell/hunvisapi.h
vendored
18
3rdparty/hunspell/src/hunspell/hunvisapi.h
vendored
@ -1,18 +0,0 @@
|
||||
#ifndef HUNSPELL_VISIBILITY_H_
|
||||
#define HUNSPELL_VISIBILITY_H_
|
||||
|
||||
#if defined(HUNSPELL_STATIC)
|
||||
# define LIBHUNSPELL_DLL_EXPORTED
|
||||
#elif defined(_MSC_VER)
|
||||
# if defined(BUILDING_LIBHUNSPELL)
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport)
|
||||
# else
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
||||
# endif
|
||||
#elif defined(BUILDING_LIBHUNSPELL) && 1
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
||||
#else
|
||||
# define LIBHUNSPELL_DLL_EXPORTED
|
||||
#endif
|
||||
|
||||
#endif
|
18
3rdparty/hunspell/src/hunspell/hunvisapi.h.in
vendored
18
3rdparty/hunspell/src/hunspell/hunvisapi.h.in
vendored
@ -1,18 +0,0 @@
|
||||
#ifndef HUNSPELL_VISIBILITY_H_
|
||||
#define HUNSPELL_VISIBILITY_H_
|
||||
|
||||
#if defined(HUNSPELL_STATIC)
|
||||
# define LIBHUNSPELL_DLL_EXPORTED
|
||||
#elif defined(_MSC_VER)
|
||||
# if defined(BUILDING_LIBHUNSPELL)
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport)
|
||||
# else
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
||||
# endif
|
||||
#elif defined(BUILDING_LIBHUNSPELL) && @HAVE_VISIBILITY@
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
||||
#else
|
||||
# define LIBHUNSPELL_DLL_EXPORTED
|
||||
#endif
|
||||
|
||||
#endif
|
256
3rdparty/hunspell/src/hunspell/hunzip.cxx
vendored
256
3rdparty/hunspell/src/hunspell/hunzip.cxx
vendored
@ -1,256 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "hunzip.hxx"
|
||||
#include "csutil.hxx"
|
||||
|
||||
#define CODELEN 65536
|
||||
#define BASEBITREC 5000
|
||||
|
||||
#define UNCOMPRESSED '\002'
|
||||
#define MAGIC "hz0"
|
||||
#define MAGIC_ENCRYPT "hz1"
|
||||
#define MAGICLEN (sizeof(MAGIC) - 1)
|
||||
|
||||
int Hunzip::fail(const char* err, const char* par) {
|
||||
fprintf(stderr, err, par);
|
||||
return -1;
|
||||
}
|
||||
|
||||
Hunzip::Hunzip(const char* file, const char* key)
|
||||
: bufsiz(0), lastbit(0), inc(0), inbits(0), outc(0) {
|
||||
in[0] = out[0] = line[0] = '\0';
|
||||
filename = mystrdup(file);
|
||||
if (getcode(key) == -1)
|
||||
bufsiz = -1;
|
||||
else
|
||||
bufsiz = getbuf();
|
||||
}
|
||||
|
||||
int Hunzip::getcode(const char* key) {
|
||||
unsigned char c[2];
|
||||
int i, j, n;
|
||||
int allocatedbit = BASEBITREC;
|
||||
const char* enc = key;
|
||||
|
||||
if (!filename)
|
||||
return -1;
|
||||
|
||||
myopen(fin, filename, std::ios_base::in | std::ios_base::binary);
|
||||
if (!fin.is_open())
|
||||
return -1;
|
||||
|
||||
// read magic number
|
||||
if (!fin.read(in, 3) ||
|
||||
!(strncmp(MAGIC, in, MAGICLEN) == 0 ||
|
||||
strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
|
||||
return fail(MSG_FORMAT, filename);
|
||||
}
|
||||
|
||||
// check encryption
|
||||
if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) {
|
||||
unsigned char cs;
|
||||
if (!key)
|
||||
return fail(MSG_KEY, filename);
|
||||
if (!fin.read(reinterpret_cast<char*>(c), 1))
|
||||
return fail(MSG_FORMAT, filename);
|
||||
for (cs = 0; *enc; enc++)
|
||||
cs ^= *enc;
|
||||
if (cs != c[0])
|
||||
return fail(MSG_KEY, filename);
|
||||
enc = key;
|
||||
} else
|
||||
key = NULL;
|
||||
|
||||
// read record count
|
||||
if (!fin.read(reinterpret_cast<char*>(c), 2))
|
||||
return fail(MSG_FORMAT, filename);
|
||||
|
||||
if (key) {
|
||||
c[0] ^= *enc;
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
c[1] ^= *enc;
|
||||
}
|
||||
|
||||
n = ((int)c[0] << 8) + c[1];
|
||||
dec.resize(BASEBITREC);
|
||||
dec[0].v[0] = 0;
|
||||
dec[0].v[1] = 0;
|
||||
|
||||
// read codes
|
||||
for (i = 0; i < n; i++) {
|
||||
unsigned char l;
|
||||
if (!fin.read(reinterpret_cast<char*>(c), 2))
|
||||
return fail(MSG_FORMAT, filename);
|
||||
if (key) {
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
c[0] ^= *enc;
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
c[1] ^= *enc;
|
||||
}
|
||||
if (!fin.read(reinterpret_cast<char*>(&l), 1))
|
||||
return fail(MSG_FORMAT, filename);
|
||||
if (key) {
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
l ^= *enc;
|
||||
}
|
||||
if (!fin.read(in, l / 8 + 1))
|
||||
return fail(MSG_FORMAT, filename);
|
||||
if (key)
|
||||
for (j = 0; j <= l / 8; j++) {
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
in[j] ^= *enc;
|
||||
}
|
||||
int p = 0;
|
||||
for (j = 0; j < l; j++) {
|
||||
int b = (in[j / 8] & (1 << (7 - (j % 8)))) ? 1 : 0;
|
||||
int oldp = p;
|
||||
p = dec[p].v[b];
|
||||
if (p == 0) {
|
||||
lastbit++;
|
||||
if (lastbit == allocatedbit) {
|
||||
allocatedbit += BASEBITREC;
|
||||
dec.resize(allocatedbit);
|
||||
}
|
||||
dec[lastbit].v[0] = 0;
|
||||
dec[lastbit].v[1] = 0;
|
||||
dec[oldp].v[b] = lastbit;
|
||||
p = lastbit;
|
||||
}
|
||||
}
|
||||
dec[p].c[0] = c[0];
|
||||
dec[p].c[1] = c[1];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Hunzip::~Hunzip() {
|
||||
if (filename)
|
||||
free(filename);
|
||||
}
|
||||
|
||||
int Hunzip::getbuf() {
|
||||
int p = 0;
|
||||
int o = 0;
|
||||
do {
|
||||
if (inc == 0) {
|
||||
fin.read(in, BUFSIZE);
|
||||
inbits = fin.gcount() * 8;
|
||||
}
|
||||
for (; inc < inbits; inc++) {
|
||||
int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
|
||||
int oldp = p;
|
||||
p = dec[p].v[b];
|
||||
if (p == 0) {
|
||||
if (oldp == lastbit) {
|
||||
fin.close();
|
||||
// add last odd byte
|
||||
if (dec[lastbit].c[0])
|
||||
out[o++] = dec[lastbit].c[1];
|
||||
return o;
|
||||
}
|
||||
out[o++] = dec[oldp].c[0];
|
||||
out[o++] = dec[oldp].c[1];
|
||||
if (o == BUFSIZE)
|
||||
return o;
|
||||
p = dec[p].v[b];
|
||||
}
|
||||
}
|
||||
inc = 0;
|
||||
} while (inbits == BUFSIZE * 8);
|
||||
return fail(MSG_FORMAT, filename);
|
||||
}
|
||||
|
||||
bool Hunzip::getline(std::string& dest) {
|
||||
char linebuf[BUFSIZE];
|
||||
int l = 0, eol = 0, left = 0, right = 0;
|
||||
if (bufsiz == -1)
|
||||
return false;
|
||||
while (l < bufsiz && !eol) {
|
||||
linebuf[l++] = out[outc];
|
||||
switch (out[outc]) {
|
||||
case '\t':
|
||||
break;
|
||||
case 31: { // escape
|
||||
if (++outc == bufsiz) {
|
||||
bufsiz = getbuf();
|
||||
outc = 0;
|
||||
}
|
||||
linebuf[l - 1] = out[outc];
|
||||
break;
|
||||
}
|
||||
case ' ':
|
||||
break;
|
||||
default:
|
||||
if (((unsigned char)out[outc]) < 47) {
|
||||
if (out[outc] > 32) {
|
||||
right = out[outc] - 31;
|
||||
if (++outc == bufsiz) {
|
||||
bufsiz = getbuf();
|
||||
outc = 0;
|
||||
}
|
||||
}
|
||||
if (out[outc] == 30)
|
||||
left = 9;
|
||||
else
|
||||
left = out[outc];
|
||||
linebuf[l - 1] = '\n';
|
||||
eol = 1;
|
||||
}
|
||||
}
|
||||
if (++outc == bufsiz) {
|
||||
outc = 0;
|
||||
bufsiz = fin.is_open() ? getbuf() : -1;
|
||||
}
|
||||
}
|
||||
if (right)
|
||||
strcpy(linebuf + l - 1, line + strlen(line) - right - 1);
|
||||
else
|
||||
linebuf[l] = '\0';
|
||||
strcpy(line + left, linebuf);
|
||||
dest.assign(line);
|
||||
return true;
|
||||
}
|
87
3rdparty/hunspell/src/hunspell/hunzip.hxx
vendored
87
3rdparty/hunspell/src/hunspell/hunzip.hxx
vendored
@ -1,87 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
/* hunzip: file decompression for sorted dictionaries with optional encryption,
|
||||
* algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
|
||||
|
||||
#ifndef HUNZIP_HXX_
|
||||
#define HUNZIP_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#define BUFSIZE 65536
|
||||
#define HZIP_EXTENSION ".hz"
|
||||
|
||||
#define MSG_OPEN "error: %s: cannot open\n"
|
||||
#define MSG_FORMAT "error: %s: not in hzip format\n"
|
||||
#define MSG_MEMORY "error: %s: missing memory\n"
|
||||
#define MSG_KEY "error: %s: missing or bad password\n"
|
||||
|
||||
struct bit {
|
||||
unsigned char c[2];
|
||||
int v[2];
|
||||
};
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED Hunzip {
|
||||
private:
|
||||
Hunzip(const Hunzip&);
|
||||
Hunzip& operator=(const Hunzip&);
|
||||
|
||||
protected:
|
||||
char* filename;
|
||||
std::ifstream fin;
|
||||
int bufsiz, lastbit, inc, inbits, outc;
|
||||
std::vector<bit> dec; // code table
|
||||
char in[BUFSIZE]; // input buffer
|
||||
char out[BUFSIZE + 1]; // Huffman-decoded buffer
|
||||
char line[BUFSIZE + 50]; // decoded line
|
||||
int getcode(const char* key);
|
||||
int getbuf();
|
||||
int fail(const char* err, const char* par);
|
||||
|
||||
public:
|
||||
Hunzip(const char* filename, const char* key = NULL);
|
||||
~Hunzip();
|
||||
bool is_open() { return fin.is_open(); }
|
||||
bool getline(std::string& dest);
|
||||
};
|
||||
|
||||
#endif
|
75
3rdparty/hunspell/src/hunspell/langnum.hxx
vendored
75
3rdparty/hunspell/src/hunspell/langnum.hxx
vendored
@ -1,75 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef LANGNUM_HXX_
|
||||
#define LANGNUM_HXX_
|
||||
|
||||
/*
|
||||
language numbers for language specific codes
|
||||
see https://wiki.openoffice.org/w/index.php?title=Languages&oldid=230199
|
||||
*/
|
||||
|
||||
enum {
|
||||
LANG_ar = 96,
|
||||
LANG_az = 100, // custom number
|
||||
LANG_bg = 41,
|
||||
LANG_ca = 37,
|
||||
LANG_cs = 42,
|
||||
LANG_da = 45,
|
||||
LANG_de = 49,
|
||||
LANG_el = 30,
|
||||
LANG_en = 01,
|
||||
LANG_es = 34,
|
||||
LANG_eu = 10,
|
||||
LANG_fr = 02,
|
||||
LANG_gl = 38,
|
||||
LANG_hr = 78,
|
||||
LANG_hu = 36,
|
||||
LANG_it = 39,
|
||||
LANG_la = 99, // custom number
|
||||
LANG_lv = 101, // custom number
|
||||
LANG_nl = 31,
|
||||
LANG_pl = 48,
|
||||
LANG_pt = 03,
|
||||
LANG_ru = 07,
|
||||
LANG_sv = 50,
|
||||
LANG_tr = 90,
|
||||
LANG_uk = 80,
|
||||
LANG_xx = 999
|
||||
};
|
||||
|
||||
#endif
|
270
3rdparty/hunspell/src/hunspell/phonet.cxx
vendored
270
3rdparty/hunspell/src/hunspell/phonet.cxx
vendored
@ -1,270 +0,0 @@
|
||||
/* phonetic.c - generic replacement aglogithms for phonetic transformation
|
||||
Copyright (C) 2000 Bjoern Jacke
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License version 2.1 as published by the Free Software Foundation;
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; If not, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
|
||||
Changelog:
|
||||
|
||||
2000-01-05 Bjoern Jacke <bjoern at j3e.de>
|
||||
Initial Release insprired by the article about phonetic
|
||||
transformations out of c't 25/1999
|
||||
|
||||
2007-07-26 Bjoern Jacke <bjoern at j3e.de>
|
||||
Released under MPL/GPL/LGPL tri-license for Hunspell
|
||||
|
||||
2007-08-23 Laszlo Nemeth <nemeth at OOo>
|
||||
Porting from Aspell to Hunspell using C-like structs
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "csutil.hxx"
|
||||
#include "phonet.hxx"
|
||||
|
||||
void init_phonet_hash(phonetable& parms) {
|
||||
for (int i = 0; i < HASHSIZE; i++) {
|
||||
parms.hash[i] = -1;
|
||||
}
|
||||
|
||||
for (int i = 0; parms.rules[i][0] != '\0'; i += 2) {
|
||||
/** set hash value **/
|
||||
int k = (unsigned char)parms.rules[i][0];
|
||||
|
||||
if (parms.hash[k] < 0) {
|
||||
parms.hash[k] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// like strcpy but safe if the strings overlap
|
||||
// but only if dest < src
|
||||
static inline void strmove(char* dest, char* src) {
|
||||
while (*src)
|
||||
*dest++ = *src++;
|
||||
*dest = '\0';
|
||||
}
|
||||
|
||||
static int myisalpha(char ch) {
|
||||
if ((unsigned char)ch < 128)
|
||||
return isalpha(ch);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Do phonetic transformation. */
|
||||
/* phonetic transcription algorithm */
|
||||
/* see: http://aspell.net/man-html/Phonetic-Code.html */
|
||||
/* convert string to uppercase before this call */
|
||||
std::string phonet(const std::string& inword, phonetable& parms) {
|
||||
|
||||
int i, k = 0, p, z;
|
||||
int k0, n0, p0 = -333;
|
||||
char c;
|
||||
typedef unsigned char uchar;
|
||||
|
||||
size_t len = inword.size();
|
||||
if (len > MAXPHONETUTF8LEN)
|
||||
return std::string();
|
||||
char word[MAXPHONETUTF8LEN + 1];
|
||||
strncpy(word, inword.c_str(), MAXPHONETUTF8LEN);
|
||||
word[MAXPHONETUTF8LEN] = '\0';
|
||||
|
||||
std::string target;
|
||||
/** check word **/
|
||||
i = z = 0;
|
||||
while ((c = word[i]) != '\0') {
|
||||
int n = parms.hash[(uchar)c];
|
||||
int z0 = 0;
|
||||
|
||||
if (n >= 0 && !parms.rules[n].empty()) {
|
||||
/** check all rules for the same letter **/
|
||||
while (parms.rules[n][0] == c) {
|
||||
/** check whole string **/
|
||||
k = 1; /** number of found letters **/
|
||||
p = 5; /** default priority **/
|
||||
const char*s = parms.rules[n].c_str();
|
||||
s++; /** important for (see below) "*(s-1)" **/
|
||||
|
||||
while (*s != '\0' && word[i + k] == *s && !isdigit((unsigned char)*s) &&
|
||||
strchr("(-<^$", *s) == NULL) {
|
||||
k++;
|
||||
s++;
|
||||
}
|
||||
if (*s == '(') {
|
||||
/** check letters in "(..)" **/
|
||||
if (myisalpha(word[i + k]) // ...could be implied?
|
||||
&& strchr(s + 1, word[i + k]) != NULL) {
|
||||
k++;
|
||||
while (*s != ')')
|
||||
s++;
|
||||
s++;
|
||||
}
|
||||
}
|
||||
p0 = (int)*s;
|
||||
k0 = k;
|
||||
while (*s == '-' && k > 1) {
|
||||
k--;
|
||||
s++;
|
||||
}
|
||||
if (*s == '<')
|
||||
s++;
|
||||
if (isdigit((unsigned char)*s)) {
|
||||
/** determine priority **/
|
||||
p = *s - '0';
|
||||
s++;
|
||||
}
|
||||
if (*s == '^' && *(s + 1) == '^')
|
||||
s++;
|
||||
|
||||
if (*s == '\0' || (*s == '^' && (i == 0 || !myisalpha(word[i - 1])) &&
|
||||
(*(s + 1) != '$' || (!myisalpha(word[i + k0])))) ||
|
||||
(*s == '$' && i > 0 && myisalpha(word[i - 1]) &&
|
||||
(!myisalpha(word[i + k0])))) {
|
||||
/** search for followup rules, if: **/
|
||||
/** parms.followup and k > 1 and NO '-' in searchstring **/
|
||||
char c0 = word[i + k - 1];
|
||||
n0 = parms.hash[(uchar)c0];
|
||||
|
||||
// if (parms.followup && k > 1 && n0 >= 0
|
||||
if (k > 1 && n0 >= 0 && p0 != (int)'-' && word[i + k] != '\0' && !parms.rules[n0].empty()) {
|
||||
/** test follow-up rule for "word[i+k]" **/
|
||||
while (parms.rules[n0][0] == c0) {
|
||||
/** check whole string **/
|
||||
k0 = k;
|
||||
p0 = 5;
|
||||
s = parms.rules[n0].c_str();
|
||||
s++;
|
||||
while (*s != '\0' && word[i + k0] == *s &&
|
||||
!isdigit((unsigned char)*s) &&
|
||||
strchr("(-<^$", *s) == NULL) {
|
||||
k0++;
|
||||
s++;
|
||||
}
|
||||
if (*s == '(') {
|
||||
/** check letters **/
|
||||
if (myisalpha(word[i + k0]) &&
|
||||
strchr(s + 1, word[i + k0]) != NULL) {
|
||||
k0++;
|
||||
while (*s != ')' && *s != '\0')
|
||||
s++;
|
||||
if (*s == ')')
|
||||
s++;
|
||||
}
|
||||
}
|
||||
while (*s == '-') {
|
||||
/** "k0" gets NOT reduced **/
|
||||
/** because "if (k0 == k)" **/
|
||||
s++;
|
||||
}
|
||||
if (*s == '<')
|
||||
s++;
|
||||
if (isdigit((unsigned char)*s)) {
|
||||
p0 = *s - '0';
|
||||
s++;
|
||||
}
|
||||
|
||||
if (*s == '\0'
|
||||
/** *s == '^' cuts **/
|
||||
|| (*s == '$' && !myisalpha(word[i + k0]))) {
|
||||
if (k0 == k) {
|
||||
/** this is just a piece of the string **/
|
||||
n0 += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (p0 < p) {
|
||||
/** priority too low **/
|
||||
n0 += 2;
|
||||
continue;
|
||||
}
|
||||
/** rule fits; stop search **/
|
||||
break;
|
||||
}
|
||||
n0 += 2;
|
||||
} /** End of "while (parms.rules[n0][0] == c0)" **/
|
||||
|
||||
if (p0 >= p && parms.rules[n0][0] == c0) {
|
||||
n += 2;
|
||||
continue;
|
||||
}
|
||||
} /** end of follow-up stuff **/
|
||||
|
||||
/** replace string **/
|
||||
s = parms.rules[n + 1].c_str();
|
||||
p0 = (!parms.rules[n].empty() &&
|
||||
strchr(parms.rules[n].c_str() + 1, '<') != NULL)
|
||||
? 1
|
||||
: 0;
|
||||
if (p0 == 1 && z == 0) {
|
||||
/** rule with '<' is used **/
|
||||
if (!target.empty() && *s != '\0' &&
|
||||
(target[target.size()-1] == c || target[target.size()-1] == *s)) {
|
||||
target.erase(target.size() - 1);
|
||||
}
|
||||
z0 = 1;
|
||||
z = 1;
|
||||
k0 = 0;
|
||||
while (*s != '\0' && word[i + k0] != '\0') {
|
||||
word[i + k0] = *s;
|
||||
k0++;
|
||||
s++;
|
||||
}
|
||||
if (k > k0)
|
||||
strmove(&word[0] + i + k0, &word[0] + i + k);
|
||||
|
||||
/** new "actual letter" **/
|
||||
c = word[i];
|
||||
} else { /** no '<' rule used **/
|
||||
i += k - 1;
|
||||
z = 0;
|
||||
while (*s != '\0' && *(s + 1) != '\0' && target.size() < len) {
|
||||
if (target.empty() || target[target.size()-1] != *s) {
|
||||
target.push_back(*s);
|
||||
}
|
||||
s++;
|
||||
}
|
||||
/** new "actual letter" **/
|
||||
c = *s;
|
||||
if (!parms.rules[n].empty() &&
|
||||
strstr(parms.rules[n].c_str() + 1, "^^") != NULL) {
|
||||
if (c != '\0') {
|
||||
target.push_back(c);
|
||||
}
|
||||
strmove(&word[0], &word[0] + i + 1);
|
||||
i = 0;
|
||||
z0 = 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
} /** end of follow-up stuff **/
|
||||
n += 2;
|
||||
} /** end of while (parms.rules[n][0] == c) **/
|
||||
} /** end of if (n >= 0) **/
|
||||
if (z0 == 0) {
|
||||
if (k && !p0 && target.size() < len && c != '\0') {
|
||||
/** condense only double letters **/
|
||||
target.push_back(c);
|
||||
/// printf("\n setting \n");
|
||||
}
|
||||
|
||||
i++;
|
||||
z = 0;
|
||||
k = 0;
|
||||
}
|
||||
} /** end of while ((c = word[i]) != '\0') **/
|
||||
|
||||
return target;
|
||||
} /** end of function "phonet" **/
|
50
3rdparty/hunspell/src/hunspell/phonet.hxx
vendored
50
3rdparty/hunspell/src/hunspell/phonet.hxx
vendored
@ -1,50 +0,0 @@
|
||||
/* phonetic.c - generic replacement aglogithms for phonetic transformation
|
||||
Copyright (C) 2000 Bjoern Jacke
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License version 2.1 as published by the Free Software Foundation;
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; If not, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
|
||||
Changelog:
|
||||
|
||||
2000-01-05 Bjoern Jacke <bjoern at j3e.de>
|
||||
Initial Release insprired by the article about phonetic
|
||||
transformations out of c't 25/1999
|
||||
|
||||
2007-07-26 Bjoern Jacke <bjoern at j3e.de>
|
||||
Released under MPL/GPL/LGPL tri-license for Hunspell
|
||||
|
||||
2007-08-23 Laszlo Nemeth <nemeth at OOo>
|
||||
Porting from Aspell to Hunspell using C-like structs
|
||||
*/
|
||||
|
||||
#ifndef PHONET_HXX_
|
||||
#define PHONET_HXX_
|
||||
|
||||
#define HASHSIZE 256
|
||||
#define MAXPHONETLEN 256
|
||||
#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
struct phonetable {
|
||||
char utf8;
|
||||
std::vector<std::string> rules;
|
||||
int hash[HASHSIZE];
|
||||
};
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable& parms);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string phonet(const std::string& inword,
|
||||
phonetable& phone);
|
||||
|
||||
#endif
|
196
3rdparty/hunspell/src/hunspell/replist.cxx
vendored
196
3rdparty/hunspell/src/hunspell/replist.cxx
vendored
@ -1,196 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <limits>
|
||||
|
||||
#include "replist.hxx"
|
||||
#include "csutil.hxx"
|
||||
|
||||
RepList::RepList(int n) {
|
||||
dat = (replentry**)malloc(sizeof(replentry*) * n);
|
||||
if (dat == 0)
|
||||
size = 0;
|
||||
else
|
||||
size = n;
|
||||
pos = 0;
|
||||
}
|
||||
|
||||
RepList::~RepList() {
|
||||
for (int i = 0; i < pos; i++) {
|
||||
delete dat[i];
|
||||
}
|
||||
free(dat);
|
||||
}
|
||||
|
||||
replentry* RepList::item(int n) {
|
||||
return dat[n];
|
||||
}
|
||||
|
||||
int RepList::find(const char* word) {
|
||||
int p1 = 0;
|
||||
int p2 = pos - 1;
|
||||
int ret = -1;
|
||||
while (p1 <= p2) {
|
||||
int m = ((unsigned)p1 + (unsigned)p2) >> 1;
|
||||
int c = strncmp(word, dat[m]->pattern.c_str(), dat[m]->pattern.size());
|
||||
if (c < 0)
|
||||
p2 = m - 1;
|
||||
else if (c > 0)
|
||||
p1 = m + 1;
|
||||
else { // scan in the right half for a longer match
|
||||
ret = m;
|
||||
p1 = m + 1;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string RepList::replace(const char* word, int ind, bool atstart) {
|
||||
int type = atstart ? 1 : 0;
|
||||
if (ind < 0)
|
||||
return std::string();
|
||||
if (strlen(word) == dat[ind]->pattern.size())
|
||||
type = atstart ? 3 : 2;
|
||||
while (type && dat[ind]->outstrings[type].empty())
|
||||
type = (type == 2 && !atstart) ? 0 : type - 1;
|
||||
return dat[ind]->outstrings[type];
|
||||
}
|
||||
|
||||
int RepList::add(const std::string& in_pat1, const std::string& pat2) {
|
||||
if (pos >= size || in_pat1.empty() || pat2.empty()) {
|
||||
return 1;
|
||||
}
|
||||
// analyse word context
|
||||
int type = 0;
|
||||
std::string pat1(in_pat1);
|
||||
if (pat1[0] == '_') {
|
||||
pat1.erase(0, 1);
|
||||
type = 1;
|
||||
}
|
||||
if (!pat1.empty() && pat1[pat1.size() - 1] == '_') {
|
||||
type = type + 2;
|
||||
pat1.erase(pat1.size() - 1);
|
||||
}
|
||||
mystrrep(pat1, "_", " ");
|
||||
|
||||
// find existing entry
|
||||
int m = find(pat1.c_str());
|
||||
if (m >= 0 && dat[m]->pattern == pat1) {
|
||||
// since already used
|
||||
dat[m]->outstrings[type] = pat2;
|
||||
mystrrep(dat[m]->outstrings[type], "_", " ");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// make a new entry if none exists
|
||||
replentry* r = new replentry;
|
||||
if (r == NULL)
|
||||
return 1;
|
||||
r->pattern = pat1;
|
||||
r->outstrings[type] = pat2;
|
||||
mystrrep(r->outstrings[type], "_", " ");
|
||||
dat[pos++] = r;
|
||||
// sort to the right place in the list
|
||||
int i;
|
||||
for (i = pos - 1; i > 0; i--) {
|
||||
if (strcmp(r->pattern.c_str(), dat[i - 1]->pattern.c_str()) < 0) {
|
||||
dat[i] = dat[i - 1];
|
||||
} else
|
||||
break;
|
||||
}
|
||||
dat[i] = r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool RepList::conv(const std::string& in_word, std::string& dest) {
|
||||
dest.clear();
|
||||
|
||||
size_t wordlen = in_word.size();
|
||||
const char* word = in_word.c_str();
|
||||
|
||||
bool change = false;
|
||||
for (size_t i = 0; i < wordlen; ++i) {
|
||||
int n = find(word + i);
|
||||
std::string l = replace(word + i, n, i == 0);
|
||||
if (!l.empty()) {
|
||||
dest.append(l);
|
||||
i += dat[n]->pattern.size() - 1;
|
||||
change = true;
|
||||
} else {
|
||||
dest.push_back(word[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return change;
|
||||
}
|
||||
|
100
3rdparty/hunspell/src/hunspell/replist.hxx
vendored
100
3rdparty/hunspell/src/hunspell/replist.hxx
vendored
@ -1,100 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* string replacement list class */
|
||||
#ifndef REPLIST_HXX_
|
||||
#define REPLIST_HXX_
|
||||
|
||||
#include "w_char.hxx"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class RepList {
|
||||
private:
|
||||
RepList(const RepList&);
|
||||
RepList& operator=(const RepList&);
|
||||
|
||||
protected:
|
||||
replentry** dat;
|
||||
int size;
|
||||
int pos;
|
||||
|
||||
public:
|
||||
explicit RepList(int n);
|
||||
~RepList();
|
||||
|
||||
int add(const std::string& pat1, const std::string& pat2);
|
||||
replentry* item(int n);
|
||||
int find(const char* word);
|
||||
std::string replace(const char* word, int n, bool atstart);
|
||||
bool conv(const std::string& word, std::string& dest);
|
||||
};
|
||||
#endif
|
2159
3rdparty/hunspell/src/hunspell/suggestmgr.cxx
vendored
2159
3rdparty/hunspell/src/hunspell/suggestmgr.cxx
vendored
File diff suppressed because it is too large
Load Diff
188
3rdparty/hunspell/src/hunspell/suggestmgr.hxx
vendored
188
3rdparty/hunspell/src/hunspell/suggestmgr.hxx
vendored
@ -1,188 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SUGGESTMGR_HXX_
|
||||
#define SUGGESTMGR_HXX_
|
||||
|
||||
#define MAX_ROOTS 100
|
||||
#define MAX_WORDS 100
|
||||
#define MAX_GUESS 200
|
||||
#define MAXNGRAMSUGS 4
|
||||
#define MAXPHONSUGS 2
|
||||
#define MAXCOMPOUNDSUGS 3
|
||||
|
||||
// timelimit: max ~1/4 sec (process time on Linux) for a time consuming function
|
||||
#define TIMELIMIT (CLOCKS_PER_SEC >> 2)
|
||||
#define MINTIMER 100
|
||||
#define MAXPLUSTIMER 100
|
||||
|
||||
#define NGRAM_LONGER_WORSE (1 << 0)
|
||||
#define NGRAM_ANY_MISMATCH (1 << 1)
|
||||
#define NGRAM_LOWERING (1 << 2)
|
||||
#define NGRAM_WEIGHTED (1 << 3)
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "affixmgr.hxx"
|
||||
#include "hashmgr.hxx"
|
||||
#include "langnum.hxx"
|
||||
#include <time.h>
|
||||
|
||||
enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
|
||||
|
||||
class SuggestMgr {
|
||||
private:
|
||||
SuggestMgr(const SuggestMgr&);
|
||||
SuggestMgr& operator=(const SuggestMgr&);
|
||||
|
||||
private:
|
||||
char* ckey;
|
||||
size_t ckeyl;
|
||||
std::vector<w_char> ckey_utf;
|
||||
|
||||
char* ctry;
|
||||
size_t ctryl;
|
||||
std::vector<w_char> ctry_utf;
|
||||
|
||||
AffixMgr* pAMgr;
|
||||
unsigned int maxSug;
|
||||
struct cs_info* csconv;
|
||||
int utf8;
|
||||
int langnum;
|
||||
int nosplitsugs;
|
||||
int maxngramsugs;
|
||||
int maxcpdsugs;
|
||||
int complexprefixes;
|
||||
|
||||
public:
|
||||
SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr);
|
||||
~SuggestMgr();
|
||||
|
||||
void suggest(std::vector<std::string>& slst, const char* word, int* onlycmpdsug);
|
||||
void ngsuggest(std::vector<std::string>& slst, const char* word, const std::vector<HashMgr*>& rHMgr);
|
||||
|
||||
std::string suggest_morph(const std::string& word);
|
||||
std::string suggest_gen(const std::vector<std::string>& pl, const std::string& pattern);
|
||||
|
||||
private:
|
||||
void testsug(std::vector<std::string>& wlst,
|
||||
const std::string& candidate,
|
||||
int cpdsuggest,
|
||||
int* timer,
|
||||
clock_t* timelimit);
|
||||
int checkword(const std::string& word, int, int*, clock_t*);
|
||||
int check_forbidden(const char*, int);
|
||||
|
||||
void capchars(std::vector<std::string>&, const char*, int);
|
||||
int replchars(std::vector<std::string>&, const char*, int);
|
||||
int doubletwochars(std::vector<std::string>&, const char*, int);
|
||||
int forgotchar(std::vector<std::string>&, const char*, int);
|
||||
int swapchar(std::vector<std::string>&, const char*, int);
|
||||
int longswapchar(std::vector<std::string>&, const char*, int);
|
||||
int movechar(std::vector<std::string>&, const char*, int);
|
||||
int extrachar(std::vector<std::string>&, const char*, int);
|
||||
int badcharkey(std::vector<std::string>&, const char*, int);
|
||||
int badchar(std::vector<std::string>&, const char*, int);
|
||||
int twowords(std::vector<std::string>&, const char*, int);
|
||||
|
||||
void capchars_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int doubletwochars_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int forgotchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int extrachar_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int badcharkey_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int badchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int swapchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int longswapchar_utf(std::vector<std::string>&, const w_char*, int, int);
|
||||
int movechar_utf(std::vector<std::string>&, const w_char*, int, int);
|
||||
|
||||
int mapchars(std::vector<std::string>&, const char*, int);
|
||||
int map_related(const char*,
|
||||
std::string&,
|
||||
int,
|
||||
std::vector<std::string>& wlst,
|
||||
int,
|
||||
const std::vector<mapentry>&,
|
||||
int*,
|
||||
clock_t*);
|
||||
int ngram(int n, const std::vector<w_char>& su1,
|
||||
const std::vector<w_char>& su2, int opt);
|
||||
int ngram(int n, const std::string& s1, const std::string& s2, int opt);
|
||||
int mystrlen(const char* word);
|
||||
int leftcommonsubstring(const std::vector<w_char>& su1,
|
||||
const std::vector<w_char>& su2);
|
||||
int leftcommonsubstring(const char* s1, const char* s2);
|
||||
int commoncharacterpositions(const char* s1, const char* s2, int* is_swap);
|
||||
void bubblesort(char** rwd, char** rwd2, int* rsc, int n);
|
||||
void lcs(const char* s, const char* s2, int* l1, int* l2, char** result);
|
||||
int lcslen(const char* s, const char* s2);
|
||||
int lcslen(const std::string& s, const std::string& s2);
|
||||
std::string suggest_hentry_gen(hentry* rv, const char* pattern);
|
||||
};
|
||||
|
||||
#endif
|
9876
3rdparty/hunspell/src/hunspell/utf_info.cxx
vendored
9876
3rdparty/hunspell/src/hunspell/utf_info.cxx
vendored
File diff suppressed because it is too large
Load Diff
72
3rdparty/hunspell/src/hunspell/w_char.hxx
vendored
72
3rdparty/hunspell/src/hunspell/w_char.hxx
vendored
@ -1,72 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef W_CHAR_HXX_
|
||||
#define W_CHAR_HXX_
|
||||
|
||||
#include <string>
|
||||
|
||||
#ifndef GCC
|
||||
struct w_char {
|
||||
#else
|
||||
struct __attribute__((packed)) w_char {
|
||||
#endif
|
||||
unsigned char l;
|
||||
unsigned char h;
|
||||
|
||||
friend bool operator<(const w_char a, const w_char b) {
|
||||
unsigned short a_idx = (a.h << 8) + a.l;
|
||||
unsigned short b_idx = (b.h << 8) + b.l;
|
||||
return a_idx < b_idx;
|
||||
}
|
||||
|
||||
friend bool operator==(const w_char a, const w_char b) {
|
||||
return (((a).l == (b).l) && ((a).h == (b).h));
|
||||
}
|
||||
|
||||
friend bool operator!=(const w_char a, const w_char b) {
|
||||
return !(a == b);;
|
||||
}
|
||||
};
|
||||
|
||||
// two character arrays
|
||||
struct replentry {
|
||||
std::string pattern;
|
||||
std::string outstrings[4]; // med, ini, fin, isol
|
||||
};
|
||||
|
||||
#endif
|
@ -1,6 +0,0 @@
|
||||
BasedOnStyle: LLVM
|
||||
IndentWidth: 8
|
||||
UseTab: ForIndentation
|
||||
BreakBeforeBraces: Stroustrup
|
||||
PointerAlignment: Left
|
||||
AlwaysBreakTemplateDeclarations: true
|
378
3rdparty/hunspell/src/hunspell2/aff_manager.cxx
vendored
378
3rdparty/hunspell/src/hunspell2/aff_manager.cxx
vendored
@ -1,378 +0,0 @@
|
||||
/* Copyright 2016-2017 Dimitrij Mijoski
|
||||
*
|
||||
* This file is part of Hunspell-2.
|
||||
*
|
||||
* Hunspell-2 is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Hunspell-2 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Hunspell-2. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Hunspell 2 is based on Hunspell v1 and MySpell.
|
||||
* Hunspell v1 is Copyright (C) 2002-2017 Németh László
|
||||
* MySpell is Copyright (C) 2002 Kevin Hendricks.
|
||||
*/
|
||||
|
||||
#include "aff_manager.hxx"
|
||||
|
||||
#include "string_utils.hxx"
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace hunspell {
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace {
|
||||
|
||||
template <class T, class Func>
|
||||
auto parse_vector_of_T(istream& in, const string& command,
|
||||
unordered_map<string, int>& counts, vector<T>& vec,
|
||||
Func parseLineFunc) -> void
|
||||
{
|
||||
auto dat = counts.find(command);
|
||||
if (dat == counts.end()) {
|
||||
// first line
|
||||
int a;
|
||||
in >> a;
|
||||
if (!in || a < 0) {
|
||||
a = 0; // err
|
||||
}
|
||||
counts[command] = a;
|
||||
}
|
||||
else if (dat->second) {
|
||||
vec.emplace_back();
|
||||
parseLineFunc(in, vec.back());
|
||||
if (in.fail()) {
|
||||
vec.pop_back();
|
||||
}
|
||||
dat->second--;
|
||||
}
|
||||
else {
|
||||
cerr << "Hunspell warning: extra entries of " << command
|
||||
<< '\n';
|
||||
}
|
||||
}
|
||||
|
||||
// Expects that there are flags in the stream.
|
||||
// If there are no flags in the stream (eg, stream is at eof)
|
||||
// or if the format of the flags is incorrect the stream failbit will be set.
|
||||
auto decode_flags(std::istream& in, flag_type_t t, utf8_to_ucs2_converter& cv)
|
||||
-> u16string
|
||||
{
|
||||
string s;
|
||||
u16string ret;
|
||||
// utf8 to ucs-2 converter. flags can be only in BPM
|
||||
// wstring_convert<codecvt_utf8<char16_t>,char16_t> cv;
|
||||
switch (t) {
|
||||
case single_char_flag:
|
||||
in >> s;
|
||||
ret.resize(s.size());
|
||||
transform(s.begin(), s.end(), ret.begin(),
|
||||
cast_lambda<unsigned char>());
|
||||
break;
|
||||
case double_char_flag: {
|
||||
in >> s;
|
||||
auto i = s.begin();
|
||||
auto e = s.end();
|
||||
if (s.size() & 1) {
|
||||
--e;
|
||||
}
|
||||
for (; i != e; i += 2) {
|
||||
char16_t c1 = (unsigned char)*i;
|
||||
char16_t c2 = (unsigned char)*(i + 1);
|
||||
ret.push_back((c1 << 8) | c2);
|
||||
}
|
||||
if (i != s.end()) {
|
||||
ret.push_back((unsigned char)*i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case number_flag:
|
||||
unsigned short flag;
|
||||
if (in >> flag) {
|
||||
ret.push_back(flag);
|
||||
}
|
||||
else {
|
||||
// err no flag at all
|
||||
cerr << "Hunspell error: missing flag\n";
|
||||
break;
|
||||
}
|
||||
// peek can set failbit
|
||||
while (in.good() && in.peek() == ',') {
|
||||
in.get();
|
||||
if (in >> flag) {
|
||||
ret.push_back(flag);
|
||||
}
|
||||
else {
|
||||
// err, comma and no number after that
|
||||
cerr << "Hunspell error: long flag, no number "
|
||||
"after comma\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
case utf8_flag:
|
||||
ret = cv.from_bytes(s);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto parse_affix(istream& ss, string& command, vector<aff_data::affix>& vec,
|
||||
unordered_map<string, pair<bool, int>>& cmd_affix,
|
||||
utf8_to_ucs2_converter& cv, aff_data& thiss) -> void
|
||||
{
|
||||
char16_t f = thiss.decode_single_flag(ss, cv);
|
||||
if (f == 0) {
|
||||
// err
|
||||
return;
|
||||
}
|
||||
char f1 = f & 0xff;
|
||||
char f2 = (f >> 8) & 0xff;
|
||||
command.push_back(f1);
|
||||
command.push_back(f2);
|
||||
auto dat = cmd_affix.find(command);
|
||||
// note: the current affix parser does not allow the same flag
|
||||
// to be used once with cross product and again witohut
|
||||
// one flag is tied to one cross product value
|
||||
if (dat == cmd_affix.end()) {
|
||||
char cross_char; // 'Y' or 'N'
|
||||
int cnt;
|
||||
ss >> cross_char >> cnt;
|
||||
bool cross = cross_char == 'Y';
|
||||
if (!ss || cnt < 0) {
|
||||
cnt = 0; // err
|
||||
}
|
||||
cmd_affix[command] = make_pair(cross, cnt);
|
||||
}
|
||||
else if (dat->second.second) {
|
||||
vec.emplace_back();
|
||||
auto& elem = vec.back();
|
||||
elem.flag = f;
|
||||
elem.cross_product = dat->second.first;
|
||||
ss >> elem.stripping;
|
||||
if (read_to_slash_or_space(ss, elem.affix)) {
|
||||
elem.new_flags = thiss.decode_flags(ss, cv);
|
||||
}
|
||||
ss >> elem.condition;
|
||||
if (ss.fail()) {
|
||||
vec.pop_back();
|
||||
}
|
||||
else {
|
||||
parse_morhological_fields(ss,
|
||||
elem.morphological_fields);
|
||||
}
|
||||
dat->second.second--;
|
||||
}
|
||||
else {
|
||||
cerr << "Hunspell warning: extra entries of "
|
||||
<< command.substr(0, 3) << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto aff_data::decode_flags(istream& in, utf8_to_ucs2_converter& cv) const
|
||||
-> u16string
|
||||
{
|
||||
return hunspell::decode_flags(in, flag_type, cv);
|
||||
}
|
||||
|
||||
auto aff_data::decode_single_flag(istream& in, utf8_to_ucs2_converter& cv) const
|
||||
-> char16_t
|
||||
{
|
||||
auto flags = decode_flags(in, cv);
|
||||
if (flags.size()) {
|
||||
return flags.front();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto aff_data::parse(std::istream& in) -> bool
|
||||
{
|
||||
unordered_map<string, string*> command_strings = {
|
||||
{"SET", &encoding}, {"LANG", &language_code},
|
||||
{"IGNORE", &ignore_chars},
|
||||
|
||||
{"KEY", &keyboard_layout}, {"TRY", &try_chars},
|
||||
|
||||
{"WORDCHARS", &wordchars}};
|
||||
|
||||
unordered_map<string, bool*> command_bools = {
|
||||
{"COMPLEXPREFIXES", &complex_prefixes},
|
||||
|
||||
{"ONLYMAXDIFF", &only_max_diff},
|
||||
{"NOSPLITSUGS", &no_split_suggestions},
|
||||
{"SUGSWITHDOTS", &suggest_with_dots},
|
||||
{"FORBIDWARN", &forbid_warn},
|
||||
|
||||
{"COMPOUNDMORESUFFIXES", &compound_more_suffixes},
|
||||
{"CHECKCOMPOUNDDUP", &compound_check_up},
|
||||
{"CHECKCOMPOUNDREP", &compound_check_rep},
|
||||
{"CHECKCOMPOUNDCASE", &compound_check_case},
|
||||
{"CHECKCOMPOUNDTRIPLE", &compound_check_triple},
|
||||
{"SIMPLIFIEDTRIPLE", &compound_simplified_triple},
|
||||
|
||||
{"FULLSTRIP", &fullstrip},
|
||||
{"CHECKSHARPS", &checksharps}};
|
||||
|
||||
unordered_map<string, vector<string>*> command_vec_str = {
|
||||
{"BREAK", &break_patterns},
|
||||
{"MAP", &map_related_chars}, // maybe add special parsing code
|
||||
{"COMPOUNDRULE", &compound_rules}};
|
||||
|
||||
unordered_map<string, short*> command_shorts = {
|
||||
{"MAXCPDSUGS", &max_compound_suggestions},
|
||||
{"MAXNGRAMSUGS", &max_ngram_suggestions},
|
||||
{"MAXDIFF", &max_diff_factor},
|
||||
|
||||
{"COMPOUNDMIN", &compoud_minimum},
|
||||
{"COMPOUNDWORDMAX", &compound_word_max}};
|
||||
|
||||
unordered_map<string, vector<pair<string, string>>*> command_vec_pair =
|
||||
{{"REP", &replacements},
|
||||
{"PHONE", &phonetic_replacements},
|
||||
{"ICONV", &input_conversion},
|
||||
{"OCONV", &output_conversion}};
|
||||
|
||||
unordered_map<string, char16_t*> command_flag = {
|
||||
{"NOSUGGEST", &nosuggest_flag},
|
||||
{"WARN", &warn_flag},
|
||||
|
||||
{"COMPOUNDFLAG", &compound_flag},
|
||||
{"COMPOUNDBEGIN", &compound_begin_flag},
|
||||
{"COMPOUNDLAST", &compound_last_flag},
|
||||
{"COMPOUNDMIDDLE", &compound_middle_flag},
|
||||
{"ONLYINCOMPOUND", &compound_onlyin_flag},
|
||||
{"COMPOUNDPERMITFLAG", &compound_permit_flag},
|
||||
{"COMPOUNDFORBIDFLAG", &compound_forbid_flag},
|
||||
{"COMPOUNDROOT", &compound_root_flag},
|
||||
{"FORCEUCASE", &compound_force_uppercase},
|
||||
|
||||
{"CIRCUMFIX", &circumfix_flag},
|
||||
{"FORBIDDENWORD", &forbiddenword_flag},
|
||||
{"KEEPCASE", &keepcase_flag},
|
||||
{"NEEDAFFIX", &need_affix_flag},
|
||||
{"SUBSTANDARD", &substandard_flag}};
|
||||
|
||||
// keeps count for each vector
|
||||
unordered_map<string, int> cmd_with_vec_cnt;
|
||||
unordered_map<string, pair<bool, int>> cmd_affix;
|
||||
utf8_to_ucs2_converter cv;
|
||||
string line;
|
||||
string command;
|
||||
int line_number = 0;
|
||||
flag_type = single_char_flag;
|
||||
while (getline(in, line)) {
|
||||
line_number++;
|
||||
istringstream ss(line);
|
||||
ss >> ws;
|
||||
if (ss.eof() || ss.peek() == '#') {
|
||||
continue; // skip comment or empty lines
|
||||
}
|
||||
ss >> command;
|
||||
toupper_ascii(command);
|
||||
ss >> ws;
|
||||
if (command == "PFX" || command == "SFX") {
|
||||
auto& vec = command[0] == 'P' ? prefixes : suffixes;
|
||||
parse_affix(ss, command, vec, cmd_affix, cv, *this);
|
||||
}
|
||||
else if (command_strings.count(command)) {
|
||||
auto& str = *command_strings[command];
|
||||
ss >> str;
|
||||
if (&str == &encoding) {
|
||||
toupper_ascii(str);
|
||||
}
|
||||
}
|
||||
else if (command_bools.count(command)) {
|
||||
*command_bools[command] = true;
|
||||
}
|
||||
else if (command_shorts.count(command)) {
|
||||
ss >> *command_shorts[command];
|
||||
}
|
||||
else if (command_flag.count(command)) {
|
||||
*command_flag[command] = decode_single_flag(ss, cv);
|
||||
}
|
||||
else if (command_vec_str.count(command)) {
|
||||
auto& vec = *command_vec_str[command];
|
||||
auto func = [&](istream& in, string& p) { in >> p; };
|
||||
parse_vector_of_T(ss, command, cmd_with_vec_cnt, vec,
|
||||
func);
|
||||
}
|
||||
else if (command_vec_pair.count(command)) {
|
||||
auto& vec = *command_vec_pair[command];
|
||||
auto func = [&](istream& in, pair<string, string>& p) {
|
||||
in >> p.first >> p.second;
|
||||
};
|
||||
parse_vector_of_T(ss, command, cmd_with_vec_cnt, vec,
|
||||
func);
|
||||
}
|
||||
else if (command == "FLAG") {
|
||||
string p;
|
||||
ss >> p;
|
||||
toupper_ascii(p);
|
||||
if (p == "LONG")
|
||||
flag_type = double_char_flag;
|
||||
else if (p == "NUM")
|
||||
flag_type = number_flag;
|
||||
else if (p == "UTF-8")
|
||||
flag_type = utf8_flag;
|
||||
}
|
||||
else if (command == "AF") {
|
||||
auto& vec = flag_aliases;
|
||||
auto func = [&](istream& inn, u16string& p) {
|
||||
p = decode_flags(inn, cv);
|
||||
};
|
||||
parse_vector_of_T(ss, command, cmd_with_vec_cnt, vec,
|
||||
func);
|
||||
}
|
||||
else if (command == "AM") {
|
||||
auto& vec = morphological_aliases;
|
||||
parse_vector_of_T(ss, command, cmd_with_vec_cnt, vec,
|
||||
parse_morhological_fields);
|
||||
}
|
||||
else if (command == "CHECKCOMPOUNDPATTERN") {
|
||||
auto& vec = compound_check_patterns;
|
||||
auto func = [&](istream& in,
|
||||
compound_check_pattern& p) {
|
||||
if (read_to_slash_or_space(in, p.end_chars)) {
|
||||
p.end_flag = decode_single_flag(in, cv);
|
||||
}
|
||||
if (read_to_slash_or_space(in, p.begin_chars)) {
|
||||
p.begin_flag =
|
||||
decode_single_flag(in, cv);
|
||||
}
|
||||
if (in.fail()) {
|
||||
return;
|
||||
}
|
||||
in >> p.replacement;
|
||||
reset_failbit_istream(in);
|
||||
};
|
||||
parse_vector_of_T(ss, command, cmd_with_vec_cnt, vec,
|
||||
func);
|
||||
}
|
||||
else if (command == "COMPOUNDSYLLABLE") {
|
||||
ss >> compound_syllable_max >> compound_syllable_vowels;
|
||||
}
|
||||
else if (command == "SYLLABLENUM") {
|
||||
compound_syllable_num = decode_flags(ss, cv);
|
||||
}
|
||||
if (ss.fail()) {
|
||||
cerr << "Hunspell aff error in line " << line_number
|
||||
<< ": " << line << endl;
|
||||
}
|
||||
}
|
||||
|
||||
return in.eof(); // success if we reached eof
|
||||
}
|
||||
}
|
142
3rdparty/hunspell/src/hunspell2/aff_manager.hxx
vendored
142
3rdparty/hunspell/src/hunspell2/aff_manager.hxx
vendored
@ -1,142 +0,0 @@
|
||||
/* Copyright 2016-2017 Dimitrij Mijoski
|
||||
*
|
||||
* This file is part of Hunspell-2.
|
||||
*
|
||||
* Hunspell-2 is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Hunspell-2 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Hunspell-2. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Hunspell 2 is based on Hunspell v1 and MySpell.
|
||||
* Hunspell v1 is Copyright (C) 2002-2017 Németh László
|
||||
* MySpell is Copyright (C) 2002 Kevin Hendricks.
|
||||
*/
|
||||
|
||||
#ifndef HUNSPELL_AFF_MANAGER_HXX
|
||||
#define HUNSPELL_AFF_MANAGER_HXX
|
||||
|
||||
#include "string_utils.hxx"
|
||||
#include <istream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace hunspell {
|
||||
|
||||
enum flag_type_t { single_char_flag, double_char_flag, number_flag, utf8_flag };
|
||||
|
||||
struct aff_data {
|
||||
using string = std::string;
|
||||
using u16string = std::u16string;
|
||||
using istream = std::istream;
|
||||
template <class T>
|
||||
using vector = std::vector<T>;
|
||||
template <class T, class U>
|
||||
using pair = std::pair<T, U>;
|
||||
|
||||
string encoding;
|
||||
flag_type_t flag_type;
|
||||
bool complex_prefixes;
|
||||
string language_code;
|
||||
string ignore_chars;
|
||||
vector<u16string> flag_aliases;
|
||||
vector<vector<string>> morphological_aliases;
|
||||
|
||||
// suggestion options
|
||||
string keyboard_layout;
|
||||
string try_chars;
|
||||
char16_t nosuggest_flag;
|
||||
short max_compound_suggestions;
|
||||
short max_ngram_suggestions;
|
||||
short max_diff_factor;
|
||||
bool only_max_diff;
|
||||
bool no_split_suggestions;
|
||||
bool suggest_with_dots;
|
||||
vector<pair<string, string>> replacements;
|
||||
vector<string> map_related_chars;
|
||||
vector<pair<string, string>> phonetic_replacements;
|
||||
char16_t warn_flag;
|
||||
bool forbid_warn;
|
||||
|
||||
// compouding options
|
||||
vector<string> break_patterns;
|
||||
vector<string> compound_rules;
|
||||
short compoud_minimum;
|
||||
char16_t compound_flag;
|
||||
char16_t compound_begin_flag;
|
||||
char16_t compound_last_flag;
|
||||
char16_t compound_middle_flag;
|
||||
char16_t compound_onlyin_flag;
|
||||
char16_t compound_permit_flag;
|
||||
char16_t compound_forbid_flag;
|
||||
bool compound_more_suffixes;
|
||||
char16_t compound_root_flag;
|
||||
short compound_word_max;
|
||||
bool compound_check_up;
|
||||
bool compound_check_rep;
|
||||
bool compound_check_case;
|
||||
bool compound_check_triple;
|
||||
bool compound_simplified_triple;
|
||||
|
||||
struct compound_check_pattern {
|
||||
string end_chars;
|
||||
char16_t end_flag;
|
||||
string begin_chars;
|
||||
char16_t begin_flag;
|
||||
string replacement;
|
||||
};
|
||||
vector<compound_check_pattern> compound_check_patterns;
|
||||
char16_t compound_force_uppercase;
|
||||
short compound_syllable_max;
|
||||
string compound_syllable_vowels;
|
||||
u16string compound_syllable_num;
|
||||
|
||||
// affix creation
|
||||
struct affix {
|
||||
char16_t flag;
|
||||
bool cross_product;
|
||||
string stripping;
|
||||
string affix;
|
||||
u16string new_flags;
|
||||
string condition;
|
||||
vector<string> morphological_fields;
|
||||
};
|
||||
vector<affix> prefixes;
|
||||
vector<affix> suffixes;
|
||||
|
||||
// others
|
||||
char16_t circumfix_flag;
|
||||
char16_t forbiddenword_flag;
|
||||
bool fullstrip;
|
||||
char16_t keepcase_flag;
|
||||
vector<pair<string, string>> input_conversion;
|
||||
vector<pair<string, string>> output_conversion;
|
||||
char16_t need_affix_flag;
|
||||
char16_t substandard_flag;
|
||||
string wordchars;
|
||||
bool checksharps;
|
||||
|
||||
// methods
|
||||
auto parse(std::istream& in) -> bool;
|
||||
|
||||
auto decode_flags(istream& in, utf8_to_ucs2_converter& cv) const
|
||||
-> u16string;
|
||||
|
||||
// u16string decode_flags(istream& in);
|
||||
|
||||
auto decode_single_flag(istream& in, utf8_to_ucs2_converter& cv) const
|
||||
-> char16_t;
|
||||
|
||||
// char16_t decode_single_flag(istream& in);
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
@ -1 +0,0 @@
|
||||
clang-format -style=file -i *.cxx *.hxx
|
100
3rdparty/hunspell/src/hunspell2/dic_manager.cxx
vendored
100
3rdparty/hunspell/src/hunspell2/dic_manager.cxx
vendored
@ -1,100 +0,0 @@
|
||||
/* Copyright 2016-2017 Dimitrij Mijoski
|
||||
*
|
||||
* This file is part of Hunspell-2.
|
||||
*
|
||||
* Hunspell-2 is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Hunspell-2 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Hunspell-2. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Hunspell 2 is based on Hunspell v1 and MySpell.
|
||||
* Hunspell v1 is Copyright (C) 2002-2017 Németh László
|
||||
* MySpell is Copyright (C) 2002 Kevin Hendricks.
|
||||
*/
|
||||
|
||||
#include "dic_manager.hxx"
|
||||
|
||||
#include "string_utils.hxx"
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
|
||||
namespace hunspell {
|
||||
|
||||
using namespace std;
|
||||
|
||||
auto dic_data::parse(std::istream& in, const aff_data& aff) -> bool
|
||||
{
|
||||
size_t approximate_size;
|
||||
if (in >> approximate_size) {
|
||||
words.reserve(approximate_size);
|
||||
in.ignore(numeric_limits<streamsize>::max(), '\n');
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
string line;
|
||||
string word;
|
||||
string morph;
|
||||
vector<string> morphs;
|
||||
u16string flags;
|
||||
istringstream ss;
|
||||
utf8_to_ucs2_converter cv;
|
||||
while (getline(in, line)) {
|
||||
ss.str(line);
|
||||
ss.clear();
|
||||
word.clear();
|
||||
morph.clear();
|
||||
flags.clear();
|
||||
morphs.clear();
|
||||
if (line.find('/') == line.npos) {
|
||||
// no slash, treat word until first space
|
||||
ss >> word;
|
||||
if (ss.fail()) {
|
||||
// probably all whitespace
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else { // slash found, word untill slash
|
||||
read_to_slash(ss, word);
|
||||
if (ss.fail() || word.empty()) {
|
||||
continue;
|
||||
}
|
||||
if (aff.flag_aliases.empty()) {
|
||||
flags = aff.decode_flags(ss, cv);
|
||||
}
|
||||
else {
|
||||
size_t flag_alias_idx;
|
||||
ss >> flag_alias_idx;
|
||||
if (ss.fail() ||
|
||||
flag_alias_idx > aff.flag_aliases.size()) {
|
||||
continue;
|
||||
}
|
||||
flags = aff.flag_aliases[flag_alias_idx - 1];
|
||||
}
|
||||
}
|
||||
parse_morhological_fields(ss, morphs);
|
||||
words[word].append(flags);
|
||||
if (morphs.size()) {
|
||||
auto& vec = morph_data[word];
|
||||
vec.insert(vec.end(), morphs.begin(), morphs.end());
|
||||
}
|
||||
}
|
||||
for (auto& wd : words) {
|
||||
// sort unique flag vectors
|
||||
auto& vec = wd.second;
|
||||
sort(vec.begin(), vec.end());
|
||||
vec.erase(unique(vec.begin(), vec.end()), vec.end());
|
||||
}
|
||||
return in.eof(); // success if we reached eof
|
||||
}
|
||||
}
|
52
3rdparty/hunspell/src/hunspell2/dic_manager.hxx
vendored
52
3rdparty/hunspell/src/hunspell2/dic_manager.hxx
vendored
@ -1,52 +0,0 @@
|
||||
/* Copyright 2016-2017 Dimitrij Mijoski
|
||||
*
|
||||
* This file is part of Hunspell-2.
|
||||
*
|
||||
* Hunspell-2 is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Hunspell-2 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Hunspell-2. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Hunspell 2 is based on Hunspell v1 and MySpell.
|
||||
* Hunspell v1 is Copyright (C) 2002-2017 Németh László
|
||||
* MySpell is Copyright (C) 2002 Kevin Hendricks.
|
||||
*/
|
||||
|
||||
#ifndef HUNSPELL_DIC_MANAGER_HXX
|
||||
#define HUNSPELL_DIC_MANAGER_HXX
|
||||
|
||||
#include "aff_manager.hxx"
|
||||
#include <istream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace hunspell {
|
||||
|
||||
struct dic_data {
|
||||
// word and flag vector
|
||||
// efficient for short flag vectors
|
||||
// for long flag vectors like in Korean dict
|
||||
// we should keep pointers to the string in the affix aliases vector
|
||||
// for now we will leave it like this
|
||||
std::unordered_map<std::string, std::u16string> words;
|
||||
|
||||
// word and morphological data
|
||||
// we keep them separate because morph data is generally absent
|
||||
std::unordered_map<std::string, std::vector<std::string>> morph_data;
|
||||
|
||||
// methods
|
||||
// parses the dic data to hashtable
|
||||
auto parse(std::istream& in, const aff_data& aff) -> bool;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
395
3rdparty/hunspell/src/hunspell2/dict_finder.cxx
vendored
395
3rdparty/hunspell/src/hunspell2/dict_finder.cxx
vendored
@ -1,395 +0,0 @@
|
||||
/* Copyright 2016-2017 Dimitrij Mijoski
|
||||
*
|
||||
* This file is part of Hunspell-2.
|
||||
*
|
||||
* Hunspell-2 is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Hunspell-2 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Hunspell-2. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Hunspell 2 is based on Hunspell v1 and MySpell.
|
||||
* Hunspell v1 is Copyright (C) 2002-2017 Németh László
|
||||
* MySpell is Copyright (C) 2002 Kevin Hendricks.
|
||||
*/
|
||||
|
||||
#include "dict_finder.hxx"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <iterator>
|
||||
#include <sstream>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || \
|
||||
(defined(__APPLE__) && defined(__MACH__)))
|
||||
#include <unistd.h>
|
||||
#ifdef _POSIX_VERSION
|
||||
#include <dirent.h>
|
||||
#include <glob.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
const char PATHSEP = ':';
|
||||
|
||||
#elif defined(_WIN32)
|
||||
|
||||
#ifdef __MINGW32__
|
||||
#include <dirent.h>
|
||||
//#include <glob.h> //not present in mingw-w64. present in vanilla mingw
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#endif //__MINGW32__
|
||||
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace hunspell {
|
||||
|
||||
template <class CharT, class OutIt>
|
||||
auto split(const basic_string<CharT>& s, CharT sep, OutIt out) -> OutIt
|
||||
{
|
||||
basic_istringstream<CharT> is(s);
|
||||
basic_string<CharT> out_str;
|
||||
while (getline(is, out_str, sep)) {
|
||||
*out = out_str;
|
||||
++out;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
const char PATHSEP = ';';
|
||||
#else
|
||||
const char PATHSEP = ':';
|
||||
#endif
|
||||
|
||||
template <class OutIt>
|
||||
auto get_default_search_directories(OutIt out) -> OutIt
|
||||
{
|
||||
*out++ = ".";
|
||||
char* dicpath = getenv("DICPATH");
|
||||
if (dicpath) {
|
||||
out = split(string(dicpath), PATHSEP, out);
|
||||
}
|
||||
*out++ = "/mingw64/share/hunspell";
|
||||
char* home = getenv("HOME");
|
||||
#ifdef _POSIX_VERSION
|
||||
array<string, 3> prefixes = {home ? string(home) + "/.local/" : "/",
|
||||
"/usr/local/", "/usr/"};
|
||||
array<const char*, 3> dirs = {"share/hunspell", "share/myspell",
|
||||
"share/myspell/dicts"};
|
||||
for (auto& dir : dirs) {
|
||||
for (auto& prefix : prefixes) {
|
||||
*out = prefix + dir;
|
||||
++out;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(__APPLE__) && defined(__MACH__)
|
||||
string osx = "/Library/Spelling";
|
||||
if (home) {
|
||||
*out++ = home + osx;
|
||||
}
|
||||
*out++ = osx;
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
array<char*, 2> winpaths = {getenv("LOCALAPPDATA"),
|
||||
getenv("PROGRAMDATA")};
|
||||
for (auto& p : winpaths) {
|
||||
if (p) {
|
||||
*out++ = string(p) + "/hunspell";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return out;
|
||||
}
|
||||
|
||||
auto get_default_search_directories() -> vector<string>
|
||||
{
|
||||
vector<string> v;
|
||||
get_default_search_directories(back_inserter(v));
|
||||
return v;
|
||||
}
|
||||
|
||||
#ifdef _POSIX_VERSION
|
||||
class Globber {
|
||||
private:
|
||||
glob_t g;
|
||||
int ret;
|
||||
|
||||
public:
|
||||
Globber(const char* pattern) : g{}
|
||||
{
|
||||
ret = ::glob(pattern, 0, nullptr, &g);
|
||||
}
|
||||
Globber(const string& pattern) : Globber(pattern.c_str()) {}
|
||||
auto glob(const char* pattern) -> bool
|
||||
{
|
||||
globfree(&g);
|
||||
ret = ::glob(pattern, 0, nullptr, &g);
|
||||
return ret == 0;
|
||||
}
|
||||
auto glob(const string& pattern) -> bool
|
||||
{
|
||||
return glob(pattern.c_str());
|
||||
}
|
||||
auto begin() -> const char* const* { return g.gl_pathv; }
|
||||
auto end() -> const char* const* { return begin() + g.gl_pathc; }
|
||||
template <class OutIt>
|
||||
auto copy_glob_paths(OutIt out) -> OutIt
|
||||
{
|
||||
if (ret == 0) {
|
||||
out = copy(begin(), end(), out);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
~Globber() { globfree(&g); }
|
||||
};
|
||||
#else
|
||||
// unimplemented
|
||||
struct Globber {
|
||||
Globber(const char* pattern) {}
|
||||
Globber(const string& pattern) {}
|
||||
auto glob(const char* pattern) -> bool { return false; }
|
||||
auto glob(const string& pattern) -> bool { return false; }
|
||||
auto begin() -> char** { return nullptr; }
|
||||
auto end() -> char** { return nullptr; }
|
||||
template <class OutIt>
|
||||
auto copy_glob_paths(OutIt out) -> OutIt
|
||||
{
|
||||
return out;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template <class OutIt>
|
||||
auto get_mozilla_directories(OutIt out) -> OutIt
|
||||
{
|
||||
#ifdef _POSIX_VERSION
|
||||
// add Mozilla linux global directory
|
||||
array<const char*, 2> dirs = {"/usr/local/lib/firefox/dictionaries",
|
||||
"/usr/lib/firefox/dictionaries"};
|
||||
struct stat dir_stat;
|
||||
for (auto& dir : dirs) {
|
||||
if (lstat(dir, &dir_stat) == 0) {
|
||||
if (S_ISDIR(dir_stat.st_mode)) {
|
||||
*out++ = dir;
|
||||
}
|
||||
// if SYMLINK do not add
|
||||
}
|
||||
}
|
||||
|
||||
// add Mozilla linux user directory
|
||||
char* home = getenv("HOME");
|
||||
if (home == nullptr) {
|
||||
return out;
|
||||
}
|
||||
string moz = home;
|
||||
moz += "/.mozilla/firefox/*/extensions/*/dictionaries";
|
||||
Globber g(moz);
|
||||
out = g.copy_glob_paths(out);
|
||||
|
||||
#elif defined(_WIN32)
|
||||
// add Mozilla windows global directory
|
||||
array<char*, 2> winpaths = {getenv("PROGRAMFILES"),
|
||||
getenv("PROGRAMFILES(x86)")};
|
||||
for (auto& p : winpaths) {
|
||||
if (p) {
|
||||
*out++ = string(p) + "/Mozilla Firefox/dictionaries";
|
||||
}
|
||||
}
|
||||
// add Mozilla windows local directory
|
||||
char* home = getenv("APPDATA");
|
||||
if (home == nullptr) {
|
||||
return out;
|
||||
}
|
||||
string moz = home;
|
||||
moz += "/Mozilla/Firefox/Profiles/*/extensions/*/dictionaries";
|
||||
Globber g(moz);
|
||||
out = g.copy_glob_paths(out);
|
||||
#endif
|
||||
return out;
|
||||
}
|
||||
|
||||
auto get_mozilla_directories(vector<string>& out) -> void
|
||||
{
|
||||
get_mozilla_directories(back_inserter(out));
|
||||
}
|
||||
|
||||
template <class OutIt>
|
||||
auto get_libreoffice_directories(OutIt out) -> OutIt
|
||||
{
|
||||
string lo_user_glob;
|
||||
#ifdef _POSIX_VERSION
|
||||
// add Libreoffice linux global directories
|
||||
array<const char*, 3> prefixes = {"/usr/local/lib/libreoffice",
|
||||
"/usr/lib/libreoffice",
|
||||
"/opt/libreoffice*"};
|
||||
for (auto& p : prefixes) {
|
||||
Globber g(string(p) + "/share/extensions/dict-*");
|
||||
out = g.copy_glob_paths(out);
|
||||
}
|
||||
|
||||
// add Libreoffice linux local
|
||||
|
||||
char* home = getenv("HOME");
|
||||
if (home == nullptr) {
|
||||
return out;
|
||||
}
|
||||
lo_user_glob = home;
|
||||
lo_user_glob += "/.config/libreoffice/?/user/uno_packages/cache"
|
||||
"/uno_packages/*/*.oxt/";
|
||||
#elif defined(_WIN32)
|
||||
// add Libreoffice windows global directories
|
||||
array<char*, 2> prefixes = {getenv("PROGRAMFILES"),
|
||||
getenv("PROGRAMFILES(x86)")};
|
||||
for (auto& p : prefixes) {
|
||||
if (p == nullptr) {
|
||||
continue;
|
||||
}
|
||||
Globber g(string(p) + "Libre Office ?/share/extensions/dict-*");
|
||||
out = g.copy_glob_paths(out);
|
||||
}
|
||||
|
||||
char* home = getenv("APPDATA");
|
||||
if (home == nullptr) {
|
||||
return out;
|
||||
}
|
||||
lo_user_glob = home;
|
||||
lo_user_glob += "/libreoffice/?/user/uno_packages/cache"
|
||||
"/uno_packages/*/*.oxt/";
|
||||
#else
|
||||
return out;
|
||||
#endif
|
||||
// finish adding LO user directory dicts (linux and windows)
|
||||
Globber g(lo_user_glob + "dictionaries");
|
||||
out = g.copy_glob_paths(out);
|
||||
|
||||
g.glob(lo_user_glob + "*.aff");
|
||||
string path_str;
|
||||
for (auto& path : g) {
|
||||
path_str = path;
|
||||
path_str.erase(path_str.rfind('/'));
|
||||
*out = path_str;
|
||||
++out;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
auto get_libreoffice_directories(std::vector<std::string>& out) -> void
|
||||
{
|
||||
get_libreoffice_directories(back_inserter(out));
|
||||
}
|
||||
|
||||
#if defined(_POSIX_VERSION) || defined(__MINGW32__)
|
||||
class Directory {
|
||||
DIR* dp = nullptr;
|
||||
#ifdef _POSIX_VERSION
|
||||
struct dirent ent;
|
||||
#endif
|
||||
struct dirent* ent_p = nullptr;
|
||||
|
||||
public:
|
||||
Directory() {}
|
||||
Directory(const Directory& d) = delete;
|
||||
void operator=(const Directory& d) = delete;
|
||||
auto open(const string& dirname) -> bool
|
||||
{
|
||||
if (dp) {
|
||||
(void)closedir(dp);
|
||||
}
|
||||
dp = opendir(dirname.c_str());
|
||||
return dp;
|
||||
}
|
||||
auto next() -> bool
|
||||
{
|
||||
#ifdef _POSIX_VERSION
|
||||
return readdir_r(dp, &ent, &ent_p) == 0 && ent_p;
|
||||
#else
|
||||
return (ent_p = readdir(dp));
|
||||
#endif
|
||||
}
|
||||
auto entry_name() -> const char* { return ent_p->d_name; }
|
||||
auto close() -> void
|
||||
{
|
||||
(void)closedir(dp);
|
||||
dp = nullptr;
|
||||
}
|
||||
~Directory() { close(); }
|
||||
};
|
||||
#else
|
||||
struct Directory()
|
||||
{
|
||||
Directory() {}
|
||||
Directory(const Directory& d) = delete;
|
||||
void operator=(const Directory& d) = delete;
|
||||
auto open(const string& dirname)->bool { return false; }
|
||||
auto next()->bool { return false; }
|
||||
auto entry_name()->const char* { return nullptr; }
|
||||
auto close() {}
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class OutIt>
|
||||
auto search_dir_for_dicts(const string& dir, OutIt out) -> OutIt
|
||||
{
|
||||
Directory d;
|
||||
if (d.open(dir) == false) {
|
||||
return out;
|
||||
}
|
||||
unordered_set<string> dics;
|
||||
string file_name;
|
||||
while (d.next()) {
|
||||
file_name = d.entry_name();
|
||||
auto sz = file_name.size();
|
||||
if (sz < 4) {
|
||||
continue;
|
||||
}
|
||||
if (file_name.find(".dic", sz - 4) != file_name.npos) {
|
||||
dics.insert(file_name);
|
||||
file_name.resize(sz - 4);
|
||||
file_name += ".aff";
|
||||
if (dics.count(file_name)) {
|
||||
file_name.resize(sz - 4);
|
||||
auto full_path = dir + '/' + file_name;
|
||||
*out = make_pair(file_name, full_path);
|
||||
out++;
|
||||
}
|
||||
}
|
||||
else if (file_name.find(".aff", sz - 4) != file_name.npos) {
|
||||
dics.insert(file_name);
|
||||
file_name.resize(sz - 4);
|
||||
file_name += ".dic";
|
||||
if (dics.count(file_name)) {
|
||||
file_name.resize(sz - 4);
|
||||
auto full_path = dir + '/' + file_name;
|
||||
*out = make_pair(file_name, full_path);
|
||||
out++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
auto search_dirs_for_dicts(const vector<string>& dirs)
|
||||
-> vector<pair<string, string>>
|
||||
{
|
||||
|
||||
vector<pair<string, string>> v;
|
||||
for (auto& dir : dirs) {
|
||||
search_dir_for_dicts(dir, back_inserter(v));
|
||||
}
|
||||
return v;
|
||||
}
|
||||
}
|
40
3rdparty/hunspell/src/hunspell2/dict_finder.hxx
vendored
40
3rdparty/hunspell/src/hunspell2/dict_finder.hxx
vendored
@ -1,40 +0,0 @@
|
||||
/* Copyright 2016-2017 Dimitrij Mijoski
|
||||
*
|
||||
* This file is part of Hunspell-2.
|
||||
*
|
||||
* Hunspell-2 is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Hunspell-2 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Hunspell-2. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Hunspell 2 is based on Hunspell v1 and MySpell.
|
||||
* Hunspell v1 is Copyright (C) 2002-2017 Németh László
|
||||
* MySpell is Copyright (C) 2002 Kevin Hendricks.
|
||||
*/
|
||||
|
||||
#ifndef HUNSPELL_DIC_FINDER_HXX
|
||||
#define HUNSPELL_DIC_FINDER_HXX
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace hunspell {
|
||||
|
||||
auto get_default_search_directories() -> std::vector<std::string>;
|
||||
|
||||
auto get_mozilla_directories(std::vector<std::string>& out) -> void;
|
||||
|
||||
auto get_libreoffice_directories(std::vector<std::string>& out) -> void;
|
||||
|
||||
auto search_dirs_for_dicts(const std::vector<std::string>& dirs)
|
||||
-> std::vector<std::pair<std::string, std::string>>;
|
||||
}
|
||||
#endif
|
117
3rdparty/hunspell/src/hunspell2/hunspell.hxx
vendored
117
3rdparty/hunspell/src/hunspell2/hunspell.hxx
vendored
@ -1,117 +0,0 @@
|
||||
#include <string>
|
||||
|
||||
namespace hunspell {
|
||||
|
||||
enum spell_result {
|
||||
bad_word,
|
||||
good_word,
|
||||
affixed_good_word,
|
||||
compound_good_word
|
||||
};
|
||||
|
||||
class hunspell {
|
||||
|
||||
public:
|
||||
using string = std::string;
|
||||
using u16string = std::u16string;
|
||||
|
||||
private:
|
||||
/* (0)
|
||||
All the major work is done here.
|
||||
(1) and (2) are the lowest level specializations.
|
||||
The rest just do some conversions and delegate to them.
|
||||
|
||||
(1) will simply call this
|
||||
with ConversionIterator set to string::iterator
|
||||
|
||||
(2) will call this with u8_u32 on the fly conversion iterator.
|
||||
*/
|
||||
template <class ConvIter>
|
||||
auto spell(ConvIter start, ConvIter end, const string& s)
|
||||
-> spell_result;
|
||||
|
||||
/**
|
||||
(1) This should be called when the input and the dictionary
|
||||
are in the same encoding and that encoding is single byte encoding.
|
||||
*/
|
||||
auto spell_singlechar_input_singlechar_dict(const string& word)
|
||||
-> spell_result;
|
||||
|
||||
/**
|
||||
(2) This should be called when the input and the dictionary
|
||||
are in the same encoding and that encoding UTF-8.
|
||||
*/
|
||||
auto spell_u8_input_u8_dict(const string& word) -> spell_result;
|
||||
|
||||
/*
|
||||
(3) This should be called when the input is UTF-8 string
|
||||
and the dictionary is byte encoding. Lossy conversion should happend
|
||||
UTF-8 to single byte, and then (1) should be called.
|
||||
*/
|
||||
auto spell_u8_input_singlechar_dict(const string& word) -> spell_result;
|
||||
|
||||
/*
|
||||
(4) This should be called when the input is
|
||||
single-byte narow OR multi-byte narrow string.
|
||||
and the dictionary is UTF-8
|
||||
The input can be anything so we must use some info about the input
|
||||
encoding, a C locale od C++ locale object.
|
||||
|
||||
One can do narrow -> u16 -> u8 like this:
|
||||
get old C locale,
|
||||
set C locale to loc,
|
||||
call mbrtoc16,
|
||||
revert old C locale,
|
||||
then codecvt<char16_t, char, mbstate_t>
|
||||
|
||||
There is no C++ way to go mbr to u16, we're limited to mbrtoc16.
|
||||
|
||||
For that reason we will do similar conversion,
|
||||
but in a more high level public funcrion.
|
||||
Tis function should be UNUSED.
|
||||
*/
|
||||
// spell_result spell_narrow_input_u8_dict(const string& word);
|
||||
|
||||
public:
|
||||
/**
|
||||
(5) This should be called when the input and the dictionary
|
||||
are in the same encoding which can be single byte or UTF-8.
|
||||
Simply calls (1) or (2).
|
||||
This is the same as spell() in v1.
|
||||
*/
|
||||
auto spell(const string& word) -> spell_result;
|
||||
|
||||
/**
|
||||
(6) Unknown narrow input (single byte or multi byte).
|
||||
Use current C locale and mbrtoc16 to convert it to known.
|
||||
Do a conversion mbr -> u16 -> u8.
|
||||
Use mbrtoc16, codecvt<char16_t, char, mbstate_t>
|
||||
We can check if the the current locale is already utf-8 to skip this.
|
||||
|
||||
Once we know we have a u8 string, just call (7).
|
||||
|
||||
This should be the recomended way to interface with the command line
|
||||
utility. Before calling this function, one should call
|
||||
setlocale(LC_ALL, "") or locale::global(locale("")).
|
||||
If we use std::cin, we should imbue it with cin.imbue(locale())
|
||||
*/
|
||||
auto spell_narrow_input(const string& word) -> spell_result;
|
||||
|
||||
/**
|
||||
(7) UTF-8 input. Will delegate either to (2) or (3).
|
||||
*/
|
||||
auto spell_u8_input(const string& word) -> spell_result;
|
||||
|
||||
private:
|
||||
/** (8) */
|
||||
auto spell_u16_input_singlechar_dict(const u16string& word)
|
||||
-> spell_result;
|
||||
|
||||
/** (9) */
|
||||
auto spell_u16_input_u8_dict(const u16string& word) -> spell_result;
|
||||
|
||||
public:
|
||||
/** (10) */
|
||||
auto spell_u16_input(const u16string& word) -> spell_result;
|
||||
};
|
||||
}
|
178
3rdparty/hunspell/src/hunspell2/main.cxx
vendored
178
3rdparty/hunspell/src/hunspell2/main.cxx
vendored
@ -1,178 +0,0 @@
|
||||
/* Copyright 2016-2017 Dimitrij Mijoski
|
||||
*
|
||||
* This file is part of Hunspell-2.
|
||||
*
|
||||
* Hunspell-2 is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Hunspell-2 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Hunspell-2. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Hunspell 2 is based on Hunspell v1 and MySpell.
|
||||
* Hunspell v1 is Copyright (C) 2002-2017 Németh László
|
||||
* MySpell is Copyright (C) 2002 Kevin Hendricks.
|
||||
*/
|
||||
|
||||
#include "aff_manager.hxx"
|
||||
#include "dic_manager.hxx"
|
||||
#include "dict_finder.hxx"
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#if defined(__MINGW32__) || defined(__unix__) || defined(__unix) || \
|
||||
(defined(__APPLE__) && defined(__MACH__))
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
struct args_t {
|
||||
unordered_map<char, string> options;
|
||||
vector<string> operands;
|
||||
};
|
||||
|
||||
auto parse_args(int argc, char* argv[]) -> args_t
|
||||
{
|
||||
// usage
|
||||
// hunspell -d dict [-l|-G]
|
||||
// hunspell [-D]
|
||||
#if defined(_POSIX_VERSION) || defined(__MINGW32__)
|
||||
unordered_map<char, string> ret;
|
||||
int c;
|
||||
int errflg = 0;
|
||||
while ((c = getopt(argc, argv, ":d:DGl")) != -1) {
|
||||
switch (c) {
|
||||
case 'd':
|
||||
ret[c] = optarg;
|
||||
break;
|
||||
case 'D':
|
||||
case 'G':
|
||||
case 'l':
|
||||
ret[c];
|
||||
break;
|
||||
case ':': /* -d without operand */
|
||||
ret[c] += optopt;
|
||||
cerr << "Option -" << (char)optopt
|
||||
<< " requires an operand\n";
|
||||
errflg++;
|
||||
break;
|
||||
case '?':
|
||||
ret[c] += optopt;
|
||||
cerr << "Unrecognized option: '-" << (char)optopt
|
||||
<< "'\n";
|
||||
errflg++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return {ret, vector<string>(argv + optind, argv + argc)};
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
auto args1 = parse_args(argc, argv);
|
||||
auto& args = args1.options;
|
||||
auto v = hunspell::get_default_search_directories();
|
||||
hunspell::get_mozilla_directories(v);
|
||||
hunspell::get_libreoffice_directories(v);
|
||||
auto dics = hunspell::search_dirs_for_dicts(v);
|
||||
if (args.empty() || args.count('D')) {
|
||||
for (auto& a : v) {
|
||||
cout << a << endl;
|
||||
}
|
||||
for (auto& a : dics) {
|
||||
cout << a.first << '\t' << a.second << endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (args.count('d') == 0) {
|
||||
return 0;
|
||||
}
|
||||
string filename;
|
||||
for (auto& a : dics) {
|
||||
if (a.first == args['d']) {
|
||||
filename = a.second;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (filename.empty()) {
|
||||
return 1;
|
||||
}
|
||||
/*
|
||||
locale::global(locale(""));
|
||||
cin.imbue(locale());
|
||||
hunspell::hunspell dic(filename);
|
||||
string word;
|
||||
if (args.count('l')) {
|
||||
while (cin >> word) {
|
||||
auto res = dic.spell_narrow_input(word);
|
||||
switch (res) {
|
||||
case bad_word:
|
||||
cout << word << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (args.count('G')) {
|
||||
while (cin >> word) {
|
||||
auto res = dic.spell_narrow_input(word);
|
||||
switch (res) {
|
||||
case bad_word:
|
||||
break;
|
||||
default:
|
||||
cout << word << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
while (cin >> word) {
|
||||
auto res = dic.spell_narrow_input(word);
|
||||
switch (res) {
|
||||
case bad_word:
|
||||
case good_word:
|
||||
case affixed_good_word:
|
||||
case compound_good_word:
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
ifstream affstream(filename + ".aff");
|
||||
ifstream dicstream(filename + ".dic");
|
||||
hunspell::aff_data aff;
|
||||
aff.parse(affstream);
|
||||
hunspell::dic_data dic;
|
||||
dic.parse(dicstream, aff);
|
||||
std::cout << aff.encoding << endl;
|
||||
std::cout << aff.try_chars << endl;
|
||||
for (auto& a : aff.compound_rules) {
|
||||
cout << a << endl;
|
||||
}
|
||||
for (auto& a : aff.suffixes) {
|
||||
cout << (char)a.flag << ' ' << (a.cross_product ? 'Y' : 'N')
|
||||
<< ' ' << a.stripping << ' ' << a.affix
|
||||
<< (a.new_flags.size() ? "/ " : " ") << a.condition;
|
||||
cout << endl;
|
||||
}
|
||||
for (auto& wd : dic.words) {
|
||||
cout << wd.first;
|
||||
if (wd.second.size()) {
|
||||
cout << '/';
|
||||
for (auto& flag : wd.second) {
|
||||
cout << flag << ',';
|
||||
}
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
106
3rdparty/hunspell/src/hunspell2/string_utils.hxx
vendored
106
3rdparty/hunspell/src/hunspell2/string_utils.hxx
vendored
@ -1,106 +0,0 @@
|
||||
/* Copyright 2016-2017 Dimitrij Mijoski
|
||||
*
|
||||
* This file is part of Hunspell-2.
|
||||
*
|
||||
* Hunspell-2 is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Hunspell-2 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Hunspell-2. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Hunspell 2 is based on Hunspell v1 and MySpell.
|
||||
* Hunspell v1 is Copyright (C) 2002-2017 Németh László
|
||||
* MySpell is Copyright (C) 2002 Kevin Hendricks.
|
||||
*/
|
||||
|
||||
#ifndef HUNSPELL_STRING_UTILS_HXX
|
||||
#define HUNSPELL_STRING_UTILS_HXX
|
||||
|
||||
#include <codecvt>
|
||||
#include <istream>
|
||||
#include <locale>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <cctype>
|
||||
|
||||
namespace hunspell {
|
||||
|
||||
using utf8_to_ucs2_converter =
|
||||
std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t>;
|
||||
|
||||
inline void toupper_ascii(std::string& s)
|
||||
{
|
||||
for (auto& c : s)
|
||||
c = toupper(c);
|
||||
}
|
||||
|
||||
inline void reset_failbit_istream(std::istream& in)
|
||||
{
|
||||
in.clear(in.rdstate() & ~in.failbit);
|
||||
}
|
||||
|
||||
template <class To>
|
||||
struct cast_lambda {
|
||||
template <class From>
|
||||
To operator()(From& f) const
|
||||
{
|
||||
return static_cast<To>(f);
|
||||
}
|
||||
};
|
||||
|
||||
inline bool read_to_slash_or_space(std::istream& in, std::string& out)
|
||||
{
|
||||
in >> std::ws;
|
||||
int c;
|
||||
bool readSomething = false;
|
||||
while ((c = in.get()) != std::istream::traits_type::eof() &&
|
||||
!isspace((char)c, in.getloc()) && c != '/') {
|
||||
out.push_back(c);
|
||||
readSomething = true;
|
||||
}
|
||||
bool slash = c == '/';
|
||||
if (readSomething || slash) {
|
||||
reset_failbit_istream(in);
|
||||
}
|
||||
return slash;
|
||||
}
|
||||
|
||||
inline bool read_to_slash(std::istream& in, std::string& out)
|
||||
{
|
||||
in >> std::ws;
|
||||
int c;
|
||||
bool readSomething = false;
|
||||
while ((c = in.get()) != std::istream::traits_type::eof() && c != '/') {
|
||||
out.push_back(c);
|
||||
readSomething = true;
|
||||
}
|
||||
bool slash = c == '/';
|
||||
if (readSomething || slash) {
|
||||
reset_failbit_istream(in);
|
||||
}
|
||||
return slash;
|
||||
}
|
||||
|
||||
inline void parse_morhological_fields(std::istream& in,
|
||||
std::vector<std::string>& vecOut)
|
||||
{
|
||||
if (!in.good()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::string morph;
|
||||
while (in >> morph) {
|
||||
vecOut.push_back(morph);
|
||||
}
|
||||
reset_failbit_istream(in);
|
||||
}
|
||||
}
|
||||
#endif
|
1
3rdparty/hunspell/src/parsers/.gitignore
vendored
1
3rdparty/hunspell/src/parsers/.gitignore
vendored
@ -1 +0,0 @@
|
||||
testparser
|
65
3rdparty/hunspell/src/parsers/firstparser.cxx
vendored
65
3rdparty/hunspell/src/parsers/firstparser.cxx
vendored
@ -1,65 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "firstparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
FirstParser::FirstParser(const char* wordchars)
|
||||
: TextParser(wordchars) {
|
||||
}
|
||||
|
||||
FirstParser::~FirstParser() {}
|
||||
|
||||
bool FirstParser::next_token(std::string& t) {
|
||||
t.clear();
|
||||
const size_t tabpos = line[actual].find('\t');
|
||||
if (tabpos != std::string::npos && tabpos > token) {
|
||||
token = tabpos;
|
||||
t = line[actual].substr(0, tabpos);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
56
3rdparty/hunspell/src/parsers/firstparser.hxx
vendored
56
3rdparty/hunspell/src/parsers/firstparser.hxx
vendored
@ -1,56 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef FIRSTPARSER_HXX_
|
||||
#define FIRSTPARSER_HXX_
|
||||
|
||||
#include "textparser.hxx"
|
||||
|
||||
/*
|
||||
* Check first word of the input line
|
||||
*
|
||||
*/
|
||||
|
||||
class FirstParser : public TextParser {
|
||||
public:
|
||||
explicit FirstParser(const char* wc);
|
||||
virtual ~FirstParser();
|
||||
|
||||
virtual bool next_token(std::string&);
|
||||
};
|
||||
|
||||
#endif
|
84
3rdparty/hunspell/src/parsers/htmlparser.cxx
vendored
84
3rdparty/hunspell/src/parsers/htmlparser.cxx
vendored
@ -1,84 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "htmlparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
static const char* PATTERN[][2] = {{"<script", "</script>"},
|
||||
{"<style", "</style>"},
|
||||
{"<code", "</code>"},
|
||||
{"<samp", "</samp>"},
|
||||
{"<kbd", "</kbd>"},
|
||||
{"<var", "</var>"},
|
||||
{"<listing", "</listing>"},
|
||||
{"<address", "</address>"},
|
||||
{"<pre", "</pre>"},
|
||||
{"<!--", "-->"},
|
||||
{"<[cdata[", "]]>"}, // XML comment
|
||||
{"<", ">"}};
|
||||
|
||||
#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char*) * 2))
|
||||
|
||||
static const char* PATTERN2[][2] = {
|
||||
{"<img", "alt="}, // ALT and TITLE attrib handled spec.
|
||||
{"<img", "title="},
|
||||
{"<a ", "title="}};
|
||||
|
||||
#define PATTERN_LEN2 (sizeof(PATTERN2) / (sizeof(char*) * 2))
|
||||
|
||||
HTMLParser::HTMLParser(const char* wordchars)
|
||||
: XMLParser(wordchars) {
|
||||
}
|
||||
|
||||
HTMLParser::HTMLParser(const w_char* wordchars, int len)
|
||||
: XMLParser(wordchars, len) {
|
||||
}
|
||||
|
||||
bool HTMLParser::next_token(std::string& t) {
|
||||
return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, t);
|
||||
}
|
||||
|
||||
HTMLParser::~HTMLParser() {}
|
56
3rdparty/hunspell/src/parsers/htmlparser.hxx
vendored
56
3rdparty/hunspell/src/parsers/htmlparser.hxx
vendored
@ -1,56 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef HTMLPARSER_HXX_
|
||||
#define HTMLPARSER_HXX_
|
||||
|
||||
#include "xmlparser.hxx"
|
||||
|
||||
/*
|
||||
* HTML Parser
|
||||
*
|
||||
*/
|
||||
|
||||
class HTMLParser : public XMLParser {
|
||||
public:
|
||||
explicit HTMLParser(const char* wc);
|
||||
HTMLParser(const w_char* wordchars, int len);
|
||||
virtual bool next_token(std::string&);
|
||||
virtual ~HTMLParser();
|
||||
};
|
||||
|
||||
#endif
|
261
3rdparty/hunspell/src/parsers/latexparser.cxx
vendored
261
3rdparty/hunspell/src/parsers/latexparser.cxx
vendored
@ -1,261 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "latexparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
static struct {
|
||||
const char* pat[2];
|
||||
int arg;
|
||||
} PATTERN[] = {{{"\\(", "\\)"}, 0},
|
||||
{{"$$", "$$"}, 0},
|
||||
{{"$", "$"}, 0},
|
||||
{{"\\begin{math}", "\\end{math}"}, 0},
|
||||
{{"\\[", "\\]"}, 0},
|
||||
{{"\\begin{displaymath}", "\\end{displaymath}"}, 0},
|
||||
{{"\\begin{equation}", "\\end{equation}"}, 0},
|
||||
{{"\\begin{equation*}", "\\end{equation*}"}, 0},
|
||||
{{"\\cite", NULL}, 1},
|
||||
{{"\\nocite", NULL}, 1},
|
||||
{{"\\index", NULL}, 1},
|
||||
{{"\\label", NULL}, 1},
|
||||
{{"\\ref", NULL}, 1},
|
||||
{{"\\pageref", NULL}, 1},
|
||||
{{"\\autoref", NULL}, 1},
|
||||
{{"\\parbox", NULL}, 1},
|
||||
{{"\\begin{verbatim}", "\\end{verbatim}"}, 0},
|
||||
{{"\\verb+", "+"}, 0},
|
||||
{{"\\verb|", "|"}, 0},
|
||||
{{"\\verb#", "#"}, 0},
|
||||
{{"\\verb*", "*"}, 0},
|
||||
{{"\\documentstyle", "\\begin{document}"}, 0},
|
||||
{{"\\documentclass", "\\begin{document}"}, 0},
|
||||
// { { "\\documentclass", NULL } , 1 },
|
||||
{{"\\usepackage", NULL}, 1},
|
||||
{{"\\includeonly", NULL}, 1},
|
||||
{{"\\include", NULL}, 1},
|
||||
{{"\\input", NULL}, 1},
|
||||
{{"\\vspace", NULL}, 1},
|
||||
{{"\\setlength", NULL}, 2},
|
||||
{{"\\addtolength", NULL}, 2},
|
||||
{{"\\settowidth", NULL}, 2},
|
||||
{{"\\rule", NULL}, 2},
|
||||
{{"\\hspace", NULL}, 1},
|
||||
{{"\\vspace", NULL}, 1},
|
||||
{{"\\\\[", "]"}, 0},
|
||||
{{"\\pagebreak[", "]"}, 0},
|
||||
{{"\\nopagebreak[", "]"}, 0},
|
||||
{{"\\enlargethispage", NULL}, 1},
|
||||
{{"\\begin{tabular}", NULL}, 1},
|
||||
{{"\\addcontentsline", NULL}, 2},
|
||||
{{"\\begin{thebibliography}", NULL}, 1},
|
||||
{{"\\bibliography", NULL}, 1},
|
||||
{{"\\bibliographystyle", NULL}, 1},
|
||||
{{"\\bibitem", NULL}, 1},
|
||||
{{"\\begin", NULL}, 1},
|
||||
{{"\\end", NULL}, 1},
|
||||
{{"\\pagestyle", NULL}, 1},
|
||||
{{"\\pagenumbering", NULL}, 1},
|
||||
{{"\\thispagestyle", NULL}, 1},
|
||||
{{"\\newtheorem", NULL}, 2},
|
||||
{{"\\newcommand", NULL}, 2},
|
||||
{{"\\renewcommand", NULL}, 2},
|
||||
{{"\\setcounter", NULL}, 2},
|
||||
{{"\\addtocounter", NULL}, 1},
|
||||
{{"\\stepcounter", NULL}, 1},
|
||||
{{"\\selectlanguage", NULL}, 1},
|
||||
{{"\\inputencoding", NULL}, 1},
|
||||
{{"\\hyphenation", NULL}, 1},
|
||||
{{"\\definecolor", NULL}, 3},
|
||||
{{"\\color", NULL}, 1},
|
||||
{{"\\textcolor", NULL}, 1},
|
||||
{{"\\pagecolor", NULL}, 1},
|
||||
{{"\\colorbox", NULL}, 2},
|
||||
{{"\\fcolorbox", NULL}, 2},
|
||||
{{"\\declaregraphicsextensions", NULL}, 1},
|
||||
{{"\\psfig", NULL}, 1},
|
||||
{{"\\url", NULL}, 1},
|
||||
{{"\\eqref", NULL}, 1},
|
||||
{{"\\vskip", NULL}, 1},
|
||||
{{"\\vglue", NULL}, 1},
|
||||
{{"\'\'", NULL}, 1}};
|
||||
|
||||
#define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
|
||||
|
||||
LaTeXParser::LaTeXParser(const char* wordchars)
|
||||
: TextParser(wordchars)
|
||||
, pattern_num(0), depth(0), arg(0), opt(0) {
|
||||
}
|
||||
|
||||
LaTeXParser::LaTeXParser(const w_char* wordchars, int len)
|
||||
: TextParser(wordchars, len)
|
||||
, pattern_num(0), depth(0), arg(0), opt(0) {
|
||||
}
|
||||
|
||||
LaTeXParser::~LaTeXParser() {}
|
||||
|
||||
int LaTeXParser::look_pattern(int col) {
|
||||
for (unsigned int i = 0; i < PATTERN_LEN; i++) {
|
||||
const char* j = line[actual].c_str() + head;
|
||||
const char* k = PATTERN[i].pat[col];
|
||||
if (!k)
|
||||
continue;
|
||||
while ((*k != '\0') && (tolower(*j) == *k)) {
|
||||
j++;
|
||||
k++;
|
||||
}
|
||||
if (*k == '\0')
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* LaTeXParser
|
||||
*
|
||||
* state 0: not wordchar
|
||||
* state 1: wordchar
|
||||
* state 2: comments
|
||||
* state 3: commands
|
||||
* state 4: commands with arguments
|
||||
* state 5: % comment
|
||||
*
|
||||
*/
|
||||
|
||||
bool LaTeXParser::next_token(std::string& t) {
|
||||
t.clear();
|
||||
int i;
|
||||
int slash = 0;
|
||||
int apostrophe;
|
||||
for (;;) {
|
||||
// fprintf(stderr,"depth: %d, state: %d, , arg: %d, token:
|
||||
// %s\n",depth,state,arg,line[actual]+head);
|
||||
|
||||
switch (state) {
|
||||
case 0: // non word chars
|
||||
if ((pattern_num = look_pattern(0)) != -1) {
|
||||
if (PATTERN[pattern_num].pat[1]) {
|
||||
state = 2;
|
||||
} else {
|
||||
state = 4;
|
||||
depth = 0;
|
||||
arg = 0;
|
||||
opt = 1;
|
||||
}
|
||||
head += strlen(PATTERN[pattern_num].pat[0]) - 1;
|
||||
} else if (line[actual][head] == '%') {
|
||||
state = 5;
|
||||
} else if (is_wordchar(line[actual].c_str() + head)) {
|
||||
state = 1;
|
||||
token = head;
|
||||
} else if (line[actual][head] == '\\') {
|
||||
if (line[actual][head + 1] == '\\' || // \\ (linebreak)
|
||||
(line[actual][head + 1] == '$') || // \$ (dollar sign)
|
||||
(line[actual][head + 1] == '%')) { // \% (percent)
|
||||
head++;
|
||||
break;
|
||||
}
|
||||
state = 3;
|
||||
}
|
||||
break;
|
||||
case 1: // wordchar
|
||||
apostrophe = 0;
|
||||
if (!is_wordchar(line[actual].c_str() + head) ||
|
||||
(line[actual][head] == '\'' && line[actual][head + 1] == '\'' &&
|
||||
++apostrophe)) {
|
||||
state = 0;
|
||||
bool ok = alloc_token(token, &head, t);
|
||||
if (apostrophe)
|
||||
head += 2;
|
||||
if (ok)
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case 2: // comment, labels, etc
|
||||
if (((i = look_pattern(1)) != -1) &&
|
||||
(strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) {
|
||||
state = 0;
|
||||
head += strlen(PATTERN[pattern_num].pat[1]) - 1;
|
||||
}
|
||||
break;
|
||||
case 3: // command
|
||||
if ((tolower(line[actual][head]) < 'a') ||
|
||||
(tolower(line[actual][head]) > 'z')) {
|
||||
state = 0;
|
||||
head--;
|
||||
}
|
||||
break;
|
||||
case 4: // command with arguments
|
||||
if (slash && (line[actual][head] != '\0')) {
|
||||
slash = 0;
|
||||
head++;
|
||||
break;
|
||||
} else if (line[actual][head] == '\\') {
|
||||
slash = 1;
|
||||
} else if ((line[actual][head] == '{') ||
|
||||
((opt) && (line[actual][head] == '['))) {
|
||||
depth++;
|
||||
opt = 0;
|
||||
} else if (line[actual][head] == '}') {
|
||||
depth--;
|
||||
if (depth == 0) {
|
||||
opt = 1;
|
||||
arg++;
|
||||
}
|
||||
if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
|
||||
(depth < 0)) {
|
||||
state = 0; // XXX not handles the last optional arg.
|
||||
}
|
||||
} else if (line[actual][head] == ']')
|
||||
depth--;
|
||||
} // case
|
||||
if (next_char(line[actual].c_str(), &head)) {
|
||||
if (state == 5)
|
||||
state = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
65
3rdparty/hunspell/src/parsers/latexparser.hxx
vendored
65
3rdparty/hunspell/src/parsers/latexparser.hxx
vendored
@ -1,65 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef LATEXPARSER_HXX_
|
||||
#define LATEXPARSER_HXX_
|
||||
|
||||
#include "textparser.hxx"
|
||||
|
||||
/*
|
||||
* HTML Parser
|
||||
*
|
||||
*/
|
||||
|
||||
class LaTeXParser : public TextParser {
|
||||
int pattern_num; // number of comment
|
||||
int depth; // depth of blocks
|
||||
int arg; // arguments's number
|
||||
int opt; // optional argument attrib.
|
||||
|
||||
public:
|
||||
explicit LaTeXParser(const char* wc);
|
||||
LaTeXParser(const w_char* wordchars, int len);
|
||||
virtual ~LaTeXParser();
|
||||
|
||||
virtual bool next_token(std::string&);
|
||||
|
||||
private:
|
||||
int look_pattern(int col);
|
||||
};
|
||||
|
||||
#endif
|
98
3rdparty/hunspell/src/parsers/manparser.cxx
vendored
98
3rdparty/hunspell/src/parsers/manparser.cxx
vendored
@ -1,98 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "manparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
ManParser::ManParser(const char* wordchars)
|
||||
: TextParser(wordchars) {
|
||||
}
|
||||
|
||||
ManParser::ManParser(const w_char* wordchars, int len)
|
||||
: TextParser(wordchars, len) {
|
||||
}
|
||||
|
||||
ManParser::~ManParser() {}
|
||||
|
||||
bool ManParser::next_token(std::string& t) {
|
||||
for (;;) {
|
||||
switch (state) {
|
||||
case 1: // command arguments
|
||||
if (line[actual][head] == ' ')
|
||||
state = 2;
|
||||
break;
|
||||
case 0: // dot in begin of line
|
||||
if (line[actual][0] == '.') {
|
||||
state = 1;
|
||||
break;
|
||||
} else {
|
||||
state = 2;
|
||||
}
|
||||
// no break
|
||||
case 2: // non word chars
|
||||
if (is_wordchar(line[actual].c_str() + head)) {
|
||||
state = 3;
|
||||
token = head;
|
||||
} else if ((line[actual][head] == '\\') &&
|
||||
(line[actual][head + 1] == 'f') &&
|
||||
(line[actual][head + 2] != '\0')) {
|
||||
head += 2;
|
||||
}
|
||||
break;
|
||||
case 3: // wordchar
|
||||
if (!is_wordchar(line[actual].c_str() + head)) {
|
||||
state = 2;
|
||||
if (alloc_token(token, &head, t))
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (next_char(line[actual].c_str(), &head)) {
|
||||
state = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
58
3rdparty/hunspell/src/parsers/manparser.hxx
vendored
58
3rdparty/hunspell/src/parsers/manparser.hxx
vendored
@ -1,58 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef MANPARSER_HXX_
|
||||
#define MANPARSER_HXX_
|
||||
|
||||
#include "textparser.hxx"
|
||||
|
||||
/*
|
||||
* Manparse Parser
|
||||
*
|
||||
*/
|
||||
|
||||
class ManParser : public TextParser {
|
||||
protected:
|
||||
public:
|
||||
explicit ManParser(const char* wc);
|
||||
ManParser(const w_char* wordchars, int len);
|
||||
virtual ~ManParser();
|
||||
|
||||
virtual bool next_token(std::string&);
|
||||
};
|
||||
|
||||
#endif
|
76
3rdparty/hunspell/src/parsers/odfparser.cxx
vendored
76
3rdparty/hunspell/src/parsers/odfparser.cxx
vendored
@ -1,76 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "odfparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
static const char* PATTERN[][2] = {
|
||||
{"<office:meta>", "</office:meta>"},
|
||||
{"<office:settings>", "</office:settings>"},
|
||||
{"<office:binary-data>", "</office:binary-data>"},
|
||||
{"<!--", "-->"},
|
||||
{"<[cdata[", "]]>"}, // XML comment
|
||||
{"<", ">"}};
|
||||
|
||||
#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char*) * 2))
|
||||
|
||||
static const char* (*PATTERN2)[2] = NULL;
|
||||
|
||||
#define PATTERN_LEN2 0
|
||||
|
||||
ODFParser::ODFParser(const char* wordchars)
|
||||
: XMLParser(wordchars) {
|
||||
}
|
||||
|
||||
ODFParser::ODFParser(const w_char* wordchars, int len)
|
||||
: XMLParser(wordchars, len) {
|
||||
}
|
||||
|
||||
bool ODFParser::next_token(std::string& t) {
|
||||
return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, t);
|
||||
}
|
||||
|
||||
ODFParser::~ODFParser() {}
|
56
3rdparty/hunspell/src/parsers/odfparser.hxx
vendored
56
3rdparty/hunspell/src/parsers/odfparser.hxx
vendored
@ -1,56 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef ODFPARSER_HXX_
|
||||
#define ODFPARSER_HXX_
|
||||
|
||||
#include "xmlparser.hxx"
|
||||
|
||||
/*
|
||||
* HTML Parser
|
||||
*
|
||||
*/
|
||||
|
||||
class ODFParser : public XMLParser {
|
||||
public:
|
||||
explicit ODFParser(const char* wc);
|
||||
ODFParser(const w_char* wordchars, int len);
|
||||
virtual bool next_token(std::string&);
|
||||
virtual ~ODFParser();
|
||||
};
|
||||
|
||||
#endif
|
86
3rdparty/hunspell/src/parsers/testparser.cxx
vendored
86
3rdparty/hunspell/src/parsers/testparser.cxx
vendored
@ -1,86 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
|
||||
#include "textparser.hxx"
|
||||
#include "htmlparser.hxx"
|
||||
#include "latexparser.hxx"
|
||||
#include "xmlparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
FILE* f;
|
||||
/* first parse the command line options */
|
||||
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "correct syntax is:\n");
|
||||
fprintf(stderr, "testparser file\n");
|
||||
fprintf(stderr, "example: testparser /dev/stdin\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* open the words to check list */
|
||||
f = fopen(argv[1], "r");
|
||||
if (!f) {
|
||||
fprintf(stderr, "Error - could not open file of words to check\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
TextParser* p = new TextParser(
|
||||
"qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM");
|
||||
|
||||
char buf[MAXLNLEN];
|
||||
|
||||
while (fgets(buf, MAXLNLEN, f)) {
|
||||
p->put_line(buf);
|
||||
p->set_url_checking(1);
|
||||
std::string next;
|
||||
while (p->next_token(next)) {
|
||||
fprintf(stdout, "token: %s\n", next.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
delete p;
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
298
3rdparty/hunspell/src/parsers/textparser.cxx
vendored
298
3rdparty/hunspell/src/parsers/textparser.cxx
vendored
@ -1,298 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "textparser.hxx"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
// ISO-8859-1 HTML character entities
|
||||
|
||||
static const char* LATIN1[] = {
|
||||
"À", "Ã", "Å", "Æ", "È", "Ê",
|
||||
"Ì", "Ï", "Ð", "Ñ", "Ò", "Ø",
|
||||
"Ù", "Þ", "à", "ã", "å", "æ",
|
||||
"è", "ê", "ì", "ï", "ð", "ñ",
|
||||
"ò", "ø", "ù", "þ", "ÿ"};
|
||||
|
||||
#define LATIN1_LEN (sizeof(LATIN1) / sizeof(char*))
|
||||
|
||||
#define ENTITY_APOS "'"
|
||||
#define UTF8_APOS "\xe2\x80\x99"
|
||||
#define APOSTROPHE "'"
|
||||
|
||||
TextParser::TextParser(const char* wordchars) {
|
||||
init(wordchars);
|
||||
}
|
||||
|
||||
TextParser::TextParser(const w_char* wordchars, int len) {
|
||||
init(wordchars, len);
|
||||
}
|
||||
|
||||
TextParser::~TextParser() {}
|
||||
|
||||
int TextParser::is_wordchar(const char* w) {
|
||||
if (*w == '\0')
|
||||
return 0;
|
||||
if (utf8) {
|
||||
std::vector<w_char> wc;
|
||||
unsigned short idx;
|
||||
u8_u16(wc, w);
|
||||
if (wc.empty())
|
||||
return 0;
|
||||
idx = (wc[0].h << 8) + wc[0].l;
|
||||
return (unicodeisalpha(idx) ||
|
||||
(wordchars_utf16 &&
|
||||
std::binary_search(wordchars_utf16, wordchars_utf16 + wclen, wc[0])));
|
||||
} else {
|
||||
return wordcharacters[(*w + 256) % 256];
|
||||
}
|
||||
}
|
||||
|
||||
const char* TextParser::get_latin1(const char* s) {
|
||||
if (s[0] == '&') {
|
||||
unsigned int i = 0;
|
||||
while ((i < LATIN1_LEN) && strncmp(LATIN1[i], s, strlen(LATIN1[i])))
|
||||
i++;
|
||||
if (i != LATIN1_LEN)
|
||||
return LATIN1[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void TextParser::init(const char* wordchars) {
|
||||
actual = 0;
|
||||
head = 0;
|
||||
token = 0;
|
||||
state = 0;
|
||||
utf8 = 0;
|
||||
checkurl = 0;
|
||||
wordchars_utf16 = NULL;
|
||||
wclen = 0;
|
||||
wordcharacters.resize(256, 0);
|
||||
if (!wordchars)
|
||||
wordchars = "qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM";
|
||||
for (unsigned int j = 0; j < strlen(wordchars); ++j) {
|
||||
wordcharacters[(wordchars[j] + 256) % 256] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void TextParser::init(const w_char* wc, int len) {
|
||||
actual = 0;
|
||||
head = 0;
|
||||
token = 0;
|
||||
state = 0;
|
||||
utf8 = 1;
|
||||
checkurl = 0;
|
||||
wordchars_utf16 = wc;
|
||||
wclen = len;
|
||||
}
|
||||
|
||||
int TextParser::next_char(const char* ln, size_t* pos) {
|
||||
if (*(ln + *pos) == '\0')
|
||||
return 1;
|
||||
if (utf8) {
|
||||
if (*(ln + *pos) >> 7) {
|
||||
// jump to next UTF-8 character
|
||||
for ((*pos)++; (*(ln + *pos) & 0xc0) == 0x80; (*pos)++)
|
||||
;
|
||||
} else {
|
||||
(*pos)++;
|
||||
}
|
||||
} else
|
||||
(*pos)++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void TextParser::put_line(const char* word) {
|
||||
actual = (actual + 1) % MAXPREVLINE;
|
||||
line[actual].assign(word);
|
||||
token = 0;
|
||||
head = 0;
|
||||
check_urls();
|
||||
}
|
||||
|
||||
std::string TextParser::get_prevline(int n) const {
|
||||
return line[(actual + MAXPREVLINE - n) % MAXPREVLINE];
|
||||
}
|
||||
|
||||
std::string TextParser::get_line() const {
|
||||
return get_prevline(0);
|
||||
}
|
||||
|
||||
bool TextParser::next_token(std::string &t) {
|
||||
const char* latin1;
|
||||
|
||||
for (;;) {
|
||||
switch (state) {
|
||||
case 0: // non word chars
|
||||
if (is_wordchar(line[actual].c_str() + head)) {
|
||||
state = 1;
|
||||
token = head;
|
||||
} else if ((latin1 = get_latin1(line[actual].c_str() + head))) {
|
||||
state = 1;
|
||||
token = head;
|
||||
head += strlen(latin1);
|
||||
}
|
||||
break;
|
||||
case 1: // wordchar
|
||||
if ((latin1 = get_latin1(line[actual].c_str() + head))) {
|
||||
head += strlen(latin1);
|
||||
} else if ((is_wordchar((char*)APOSTROPHE) ||
|
||||
(is_utf8() && is_wordchar((char*)UTF8_APOS))) &&
|
||||
!line[actual].empty() && line[actual][head] == '\'' &&
|
||||
is_wordchar(line[actual].c_str() + head + 1)) {
|
||||
head++;
|
||||
} else if (is_utf8() &&
|
||||
is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe
|
||||
// to the WORDCHARS, if
|
||||
// needed
|
||||
strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) ==
|
||||
0 &&
|
||||
is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) {
|
||||
head += strlen(UTF8_APOS) - 1;
|
||||
} else if (!is_wordchar(line[actual].c_str() + head)) {
|
||||
state = 0;
|
||||
if (alloc_token(token, &head, t))
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (next_char(line[actual].c_str(), &head))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
size_t TextParser::get_tokenpos() {
|
||||
return token;
|
||||
}
|
||||
|
||||
int TextParser::change_token(const char* word) {
|
||||
if (word) {
|
||||
std::string remainder(line[actual].substr(head));
|
||||
line[actual].resize(token);
|
||||
line[actual].append(word);
|
||||
line[actual].append(remainder);
|
||||
head = token;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void TextParser::check_urls() {
|
||||
urlline.resize(line[actual].size() + 1);
|
||||
int url_state = 0;
|
||||
size_t url_head = 0;
|
||||
size_t url_token = 0;
|
||||
int url = 0;
|
||||
for (;;) {
|
||||
switch (url_state) {
|
||||
case 0: // non word chars
|
||||
if (is_wordchar(line[actual].c_str() + url_head)) {
|
||||
url_state = 1;
|
||||
url_token = url_head;
|
||||
// Unix path
|
||||
} else if (line[actual][url_head] == '/') {
|
||||
url_state = 1;
|
||||
url_token = url_head;
|
||||
url = 1;
|
||||
}
|
||||
break;
|
||||
case 1: // wordchar
|
||||
char ch = line[actual][url_head];
|
||||
// e-mail address
|
||||
if ((ch == '@') ||
|
||||
// MS-DOS, Windows path
|
||||
(strncmp(line[actual].c_str() + url_head, ":\\", 2) == 0) ||
|
||||
// URL
|
||||
(strncmp(line[actual].c_str() + url_head, "://", 3) == 0)) {
|
||||
url = 1;
|
||||
} else if (!(is_wordchar(line[actual].c_str() + url_head) || (ch == '-') ||
|
||||
(ch == '_') || (ch == '\\') || (ch == '.') ||
|
||||
(ch == ':') || (ch == '/') || (ch == '~') || (ch == '%') ||
|
||||
(ch == '*') || (ch == '$') || (ch == '[') || (ch == ']') ||
|
||||
(ch == '?') || (ch == '!') ||
|
||||
((ch >= '0') && (ch <= '9')))) {
|
||||
url_state = 0;
|
||||
if (url == 1) {
|
||||
for (size_t i = url_token; i < url_head; ++i) {
|
||||
urlline[i] = true;
|
||||
}
|
||||
}
|
||||
url = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
urlline[url_head] = false;
|
||||
if (next_char(line[actual].c_str(), &url_head))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
int TextParser::get_url(size_t token_pos, size_t* hd) {
|
||||
for (size_t i = *hd; i < line[actual].size() && urlline[i]; i++, (*hd)++)
|
||||
;
|
||||
return checkurl ? 0 : urlline[token_pos];
|
||||
}
|
||||
|
||||
void TextParser::set_url_checking(int check) {
|
||||
checkurl = check;
|
||||
}
|
||||
|
||||
bool TextParser::alloc_token(size_t tokn, size_t* hd, std::string& t) {
|
||||
size_t url_head = *hd;
|
||||
if (get_url(tokn, &url_head))
|
||||
return false;
|
||||
t = line[actual].substr(tokn, *hd - tokn);
|
||||
// remove colon for Finnish and Swedish language
|
||||
if (!t.empty() && t[t.size() - 1] == ':') {
|
||||
t.resize(t.size() - 1);
|
||||
if (t.empty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
98
3rdparty/hunspell/src/parsers/textparser.hxx
vendored
98
3rdparty/hunspell/src/parsers/textparser.hxx
vendored
@ -1,98 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef TEXTPARSER_HXX_
|
||||
#define TEXTPARSER_HXX_
|
||||
|
||||
// set sum of actual and previous lines
|
||||
#define MAXPREVLINE 4
|
||||
|
||||
#ifndef MAXLNLEN
|
||||
#define MAXLNLEN 8192
|
||||
#endif
|
||||
|
||||
#include "../hunspell/w_char.hxx"
|
||||
|
||||
#include <vector>
|
||||
|
||||
/*
|
||||
* Base Text Parser
|
||||
*
|
||||
*/
|
||||
|
||||
class TextParser {
|
||||
protected:
|
||||
std::vector<int> wordcharacters;// for detection of the word boundaries
|
||||
std::string line[MAXPREVLINE]; // parsed and previous lines
|
||||
std::vector<bool> urlline; // mask for url detection
|
||||
int checkurl;
|
||||
int actual; // actual line
|
||||
size_t head; // head position
|
||||
size_t token;// begin of token
|
||||
int state; // state of automata
|
||||
int utf8; // UTF-8 character encoding
|
||||
int next_char(const char* line, size_t* pos);
|
||||
const w_char* wordchars_utf16;
|
||||
int wclen;
|
||||
|
||||
public:
|
||||
TextParser(const w_char* wordchars, int len);
|
||||
explicit TextParser(const char* wc);
|
||||
virtual ~TextParser();
|
||||
|
||||
void put_line(const char* line);
|
||||
std::string get_line() const;
|
||||
std::string get_prevline(int n) const;
|
||||
virtual bool next_token(std::string&);
|
||||
virtual int change_token(const char* word);
|
||||
void set_url_checking(int check);
|
||||
|
||||
size_t get_tokenpos();
|
||||
int is_wordchar(const char* w);
|
||||
inline int is_utf8() { return utf8; }
|
||||
const char* get_latin1(const char* s);
|
||||
char* next_char();
|
||||
int tokenize_urls();
|
||||
void check_urls();
|
||||
int get_url(size_t token_pos, size_t* head);
|
||||
bool alloc_token(size_t token, size_t* head, std::string& out);
|
||||
private:
|
||||
void init(const char*);
|
||||
void init(const w_char* wordchars, int len);
|
||||
};
|
||||
|
||||
#endif
|
213
3rdparty/hunspell/src/parsers/xmlparser.cxx
vendored
213
3rdparty/hunspell/src/parsers/xmlparser.cxx
vendored
@ -1,213 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "../hunspell/csutil.hxx"
|
||||
#include "xmlparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB };
|
||||
|
||||
static const char* __PATTERN__[][2] = {{"<!--", "-->"},
|
||||
{"<[cdata[", "]]>"}, // XML comment
|
||||
{"<", ">"}};
|
||||
|
||||
#define __PATTERN_LEN__ (sizeof(__PATTERN__) / (sizeof(char*) * 2))
|
||||
|
||||
static const char* (*__PATTERN2__)[2] = NULL;
|
||||
|
||||
#define __PATTERN_LEN2__ 0
|
||||
|
||||
#define ENTITY_APOS "'"
|
||||
#define UTF8_APOS "\xe2\x80\x99"
|
||||
#define APOSTROPHE "'"
|
||||
|
||||
XMLParser::XMLParser(const char* wordchars)
|
||||
: TextParser(wordchars)
|
||||
, pattern_num(0), pattern2_num(0), prevstate(0), checkattr(0), quotmark(0) {
|
||||
}
|
||||
|
||||
XMLParser::XMLParser(const w_char* wordchars, int len)
|
||||
: TextParser(wordchars, len)
|
||||
, pattern_num(0), pattern2_num(0), prevstate(0), checkattr(0), quotmark(0) {
|
||||
}
|
||||
|
||||
XMLParser::~XMLParser() {}
|
||||
|
||||
int XMLParser::look_pattern(const char* p[][2], unsigned int len, int column) {
|
||||
for (unsigned int i = 0; i < len; i++) {
|
||||
const char* j = line[actual].c_str() + head;
|
||||
const char* k = p[i][column];
|
||||
while ((*k != '\0') && (tolower(*j) == *k)) {
|
||||
j++;
|
||||
k++;
|
||||
}
|
||||
if (*k == '\0')
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* XML parser
|
||||
*
|
||||
*/
|
||||
|
||||
bool XMLParser::next_token(const char* PATTERN[][2],
|
||||
unsigned int PATTERN_LEN,
|
||||
const char* PATTERN2[][2],
|
||||
unsigned int PATTERN_LEN2,
|
||||
std::string& t) {
|
||||
t.clear();
|
||||
const char* latin1;
|
||||
|
||||
for (;;) {
|
||||
switch (state) {
|
||||
case ST_NON_WORD: // non word chars
|
||||
prevstate = ST_NON_WORD;
|
||||
if ((pattern_num = look_pattern(PATTERN, PATTERN_LEN, 0)) != -1) {
|
||||
checkattr = 0;
|
||||
if ((pattern2_num = look_pattern(PATTERN2, PATTERN_LEN2, 0)) != -1) {
|
||||
checkattr = 1;
|
||||
}
|
||||
state = ST_TAG;
|
||||
} else if (is_wordchar(line[actual].c_str() + head)) {
|
||||
state = ST_WORD;
|
||||
token = head;
|
||||
} else if ((latin1 = get_latin1(line[actual].c_str() + head))) {
|
||||
state = ST_WORD;
|
||||
token = head;
|
||||
head += strlen(latin1);
|
||||
} else if (line[actual][head] == '&') {
|
||||
state = ST_CHAR_ENTITY;
|
||||
}
|
||||
break;
|
||||
case ST_WORD: // wordchar
|
||||
if ((latin1 = get_latin1(line[actual].c_str() + head))) {
|
||||
head += strlen(latin1);
|
||||
} else if ((is_wordchar((char*)APOSTROPHE) ||
|
||||
(is_utf8() && is_wordchar((char*)UTF8_APOS))) &&
|
||||
strncmp(line[actual].c_str() + head, ENTITY_APOS,
|
||||
strlen(ENTITY_APOS)) == 0 &&
|
||||
is_wordchar(line[actual].c_str() + head + strlen(ENTITY_APOS))) {
|
||||
head += strlen(ENTITY_APOS) - 1;
|
||||
} else if (is_utf8() &&
|
||||
is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe
|
||||
// to the WORDCHARS, if
|
||||
// needed
|
||||
strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) ==
|
||||
0 &&
|
||||
is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) {
|
||||
head += strlen(UTF8_APOS) - 1;
|
||||
} else if (!is_wordchar(line[actual].c_str() + head)) {
|
||||
state = prevstate;
|
||||
if (alloc_token(token, &head, t))
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case ST_TAG: // comment, labels, etc
|
||||
int i;
|
||||
if ((checkattr == 1) &&
|
||||
((i = look_pattern(PATTERN2, PATTERN_LEN2, 1)) != -1) &&
|
||||
(strcmp(PATTERN2[i][0], PATTERN2[pattern2_num][0]) == 0)) {
|
||||
checkattr = 2;
|
||||
} else if ((checkattr > 0) && (line[actual][head] == '>')) {
|
||||
state = ST_NON_WORD;
|
||||
} else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) &&
|
||||
(strcmp(PATTERN[i][1], PATTERN[pattern_num][1]) == 0)) {
|
||||
state = ST_NON_WORD;
|
||||
head += strlen(PATTERN[pattern_num][1]) - 1;
|
||||
} else if ((strcmp(PATTERN[pattern_num][0], "<") == 0) &&
|
||||
((line[actual][head] == '"') ||
|
||||
(line[actual][head] == '\''))) {
|
||||
quotmark = line[actual][head];
|
||||
state = ST_ATTRIB;
|
||||
}
|
||||
break;
|
||||
case ST_ATTRIB: // non word chars
|
||||
prevstate = ST_ATTRIB;
|
||||
if (line[actual][head] == quotmark) {
|
||||
state = ST_TAG;
|
||||
if (checkattr == 2)
|
||||
checkattr = 1;
|
||||
// for IMG ALT
|
||||
} else if (is_wordchar(line[actual].c_str() + head) && (checkattr == 2)) {
|
||||
state = ST_WORD;
|
||||
token = head;
|
||||
} else if (line[actual][head] == '&') {
|
||||
state = ST_CHAR_ENTITY;
|
||||
}
|
||||
break;
|
||||
case ST_CHAR_ENTITY: // SGML element
|
||||
if ((tolower(line[actual][head]) == ';')) {
|
||||
state = prevstate;
|
||||
head--;
|
||||
}
|
||||
}
|
||||
if (next_char(line[actual].c_str(), &head))
|
||||
return false;
|
||||
}
|
||||
//FIXME No return, in function returning non-void
|
||||
}
|
||||
|
||||
bool XMLParser::next_token(std::string& t) {
|
||||
return next_token(__PATTERN__, __PATTERN_LEN__, __PATTERN2__,
|
||||
__PATTERN_LEN2__, t);
|
||||
}
|
||||
|
||||
int XMLParser::change_token(const char* word) {
|
||||
if (strstr(word, APOSTROPHE) != NULL || strchr(word, '"') != NULL ||
|
||||
strchr(word, '&') != NULL || strchr(word, '<') != NULL ||
|
||||
strchr(word, '>') != NULL) {
|
||||
std::string r(word);
|
||||
mystrrep(r, "&", "__namp;__");
|
||||
mystrrep(r, "__namp;__", "&");
|
||||
mystrrep(r, APOSTROPHE, ENTITY_APOS);
|
||||
mystrrep(r, "\"", """);
|
||||
mystrrep(r, ">", ">");
|
||||
mystrrep(r, "<", "<");
|
||||
return TextParser::change_token(r.c_str());
|
||||
}
|
||||
return TextParser::change_token(word);
|
||||
}
|
70
3rdparty/hunspell/src/parsers/xmlparser.hxx
vendored
70
3rdparty/hunspell/src/parsers/xmlparser.hxx
vendored
@ -1,70 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef XMLPARSER_HXX_
|
||||
#define XMLPARSER_HXX_
|
||||
|
||||
#include "textparser.hxx"
|
||||
|
||||
/*
|
||||
* XML Parser
|
||||
*
|
||||
*/
|
||||
|
||||
class XMLParser : public TextParser {
|
||||
public:
|
||||
explicit XMLParser(const char* wc);
|
||||
XMLParser(const w_char* wordchars, int len);
|
||||
bool next_token(const char* p[][2],
|
||||
unsigned int len,
|
||||
const char* p2[][2],
|
||||
unsigned int len2,
|
||||
std::string&);
|
||||
virtual bool next_token(std::string&);
|
||||
int change_token(const char* word);
|
||||
virtual ~XMLParser();
|
||||
|
||||
private:
|
||||
int look_pattern(const char* p[][2], unsigned int len, int column);
|
||||
int pattern_num;
|
||||
int pattern2_num;
|
||||
int prevstate;
|
||||
int checkattr;
|
||||
char quotmark;
|
||||
};
|
||||
|
||||
#endif
|
9
3rdparty/hunspell/src/tools/.gitignore
vendored
9
3rdparty/hunspell/src/tools/.gitignore
vendored
@ -1,9 +0,0 @@
|
||||
analyze
|
||||
bulkcheck
|
||||
chmorph
|
||||
example
|
||||
hunspell
|
||||
hunzip
|
||||
hzip
|
||||
munch
|
||||
unmunch
|
195
3rdparty/hunspell/src/tools/affixcompress
vendored
195
3rdparty/hunspell/src/tools/affixcompress
vendored
@ -1,195 +0,0 @@
|
||||
#!/bin/sh
|
||||
# affix compressor utility for Hunspell
|
||||
# 2008 (c) László Németh, version 0.3
|
||||
# usage: affixcompress sorted_word_list_file [max_affix_rules]
|
||||
case $# in
|
||||
0) echo \
|
||||
"affixcompress - compress a huge sorted word list to Hunspell format
|
||||
Usage:
|
||||
|
||||
LC_ALL=C sort word_list >sorted_word_list
|
||||
affixcompress sorted_word_list [max_affix_rules]
|
||||
|
||||
Default value of max_affix_rules = 5000
|
||||
|
||||
Note: output may need manually added affix parameters (SET character_encoding,
|
||||
TRY suggestion_characters etc., see man(4) hunspell)"
|
||||
exit 0;;
|
||||
esac
|
||||
|
||||
MAXAFFIX=${2:-5000}
|
||||
|
||||
# profiling
|
||||
#AWK="pgawk --profile"
|
||||
AWK="awk"
|
||||
if which gawk; then
|
||||
AWK="gawk"
|
||||
fi
|
||||
|
||||
rm -f $1.aff $1.dic
|
||||
cat $1 | $AWK '
|
||||
{
|
||||
# calculate frequent suffixes
|
||||
A[$1] = 1
|
||||
len = length($1)
|
||||
if (len > 2) {
|
||||
# print $1, substr($1, 1, len - 1), substr($1, len, 1) >"/dev/stderr"
|
||||
B[substr($1, 1, len - 1)] = substr($1, len, 1);
|
||||
}
|
||||
for(i = 2; i < len; i++) {
|
||||
r = substr($1, 1, i)
|
||||
if (i == 2) {
|
||||
if (prev != r) {
|
||||
delete A
|
||||
delete B
|
||||
print "Deleted roots: ", prev > "/dev/stderr"
|
||||
A[$1] = 1
|
||||
}
|
||||
prev = r
|
||||
}
|
||||
if (A[r]) {
|
||||
# print $1 ": " r " és "substr($1, i + 1, len - i + 1) >"/dev/stderr"
|
||||
sfx[substr($1, i + 1, len - i + 1)]++
|
||||
} else if (B[r] && B[r] != substr($1, i + 1, 1)) {
|
||||
r2 = substr($1, i + 1, len - i + 1)
|
||||
sfy[r2,B[r]]++
|
||||
}
|
||||
}
|
||||
}
|
||||
END {
|
||||
for (i in sfx) print i, 0, sfx[i]
|
||||
for (i in sfy) print i, sfy[i]
|
||||
}
|
||||
' | tr '\034' ' ' >affixcompress0.tmp
|
||||
sort -rnk 3 affixcompress0.tmp | $AWK '$3 >= 1{print $0}' |
|
||||
head -$MAXAFFIX >affixcompress1.tmp
|
||||
cat affixcompress1.tmp |
|
||||
$AWK '
|
||||
function potential_roots() {
|
||||
# potential roots with most frequent suffixes
|
||||
for(word in W) if (W[word]==1) {
|
||||
print word >"word"
|
||||
len = length(word);
|
||||
for(i = 2; i < len; i++) {
|
||||
root = substr(word, 1, i)
|
||||
suff = substr(word, i + 1, len - i + 1)
|
||||
if ((W[root]!="") && (sfxfr[suff] > 100)) C[root]++
|
||||
if (sfz[suff]) {
|
||||
l = split(sfz[suff], a)
|
||||
for (k=1; k <= l; k++) if ((W[root a[k]]!="") && (sfyfr[root a[k]] > 100)) {
|
||||
C[root a[k]]++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# calculate roots
|
||||
for(word in W) if (W[word]==1) {
|
||||
print word >"word2"
|
||||
len = length(word);
|
||||
z = 0
|
||||
# choose most frequent root (maybe the original word)
|
||||
max = C[word]
|
||||
maxword = word
|
||||
maxsuff = 0
|
||||
for(i = 2; i < len; i++) {
|
||||
root = substr(word, 1, i)
|
||||
suff = substr(word, i + 1, len - i + 1)
|
||||
if ((sfx[suff] != "") && (C[root] > max)) {
|
||||
max = C[root]
|
||||
maxword = root
|
||||
maxsuff = sfx[suff]
|
||||
}
|
||||
if (sfz[suff] != "") {
|
||||
l = split(sfz[suff], a)
|
||||
for (k=1; k <= l; k++) if (C[root a[k]] > max) {
|
||||
max = C[root a[k]]
|
||||
maxword = root a[k]
|
||||
maxsuff = sfy[suff,a[k]]
|
||||
}
|
||||
}
|
||||
}
|
||||
if (max > 0) {
|
||||
if (maxsuff > 0) print maxword, maxsuff; else print maxword
|
||||
A[maxword]++
|
||||
z=1
|
||||
} else {
|
||||
for(i = 2; i < len; i++) {
|
||||
root = substr(word, 1, i)
|
||||
suff = substr(word, i + 1, len - i + 1)
|
||||
if ((A[root] > 0) && sfx[suff]!="") {
|
||||
print root, sfx[suff]
|
||||
z = 1
|
||||
break
|
||||
}
|
||||
if (sfz[suff]) {
|
||||
l = split(sfz[suff], a)
|
||||
for (k=1; k <= l; k++) if (A[root a[k]]!="") {
|
||||
print root a[k], sfy[suff,a[k]]
|
||||
z = 1
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (z == 0) {
|
||||
print word
|
||||
A[word]++
|
||||
}
|
||||
}
|
||||
delete A
|
||||
delete C
|
||||
}
|
||||
FILENAME == "-" {
|
||||
if ($2 == 0) {
|
||||
sfx[$1] = NR
|
||||
sfxfr[$1] = $3
|
||||
} else {
|
||||
sfy[$1,$2] = NR
|
||||
sfyfr[$1,$2] = $3
|
||||
sfz[$1] = sfz[$1] " " $2
|
||||
}
|
||||
maxsuf = NR
|
||||
next
|
||||
}
|
||||
{
|
||||
cap = substr($1, 1, 3)
|
||||
if (cap != prev) {
|
||||
potential_roots()
|
||||
delete W
|
||||
print "Deleted class:", prev > "/dev/stderr"
|
||||
}
|
||||
prev = cap
|
||||
W[$1] = 1
|
||||
}
|
||||
END {
|
||||
potential_roots()
|
||||
# write out frequent suffixes
|
||||
out=FILENAME ".aff"
|
||||
print "FLAG num" >out
|
||||
for (i in sfx) if (sfx[i] > 0) {
|
||||
print "SFX", sfx[i], "Y 1" >out
|
||||
print "SFX", sfx[i], "0", i, "." >out
|
||||
}
|
||||
for (i in sfy) if (sfy[i] > 0) {
|
||||
print "SFX", sfy[i], "Y 1" >out
|
||||
split(i, c, "\034");
|
||||
print "SFX", sfy[i], c[2], c[1], c[2] >out
|
||||
}
|
||||
}
|
||||
' - $1 >affixcompress2.tmp
|
||||
sort -nk 2 affixcompress2.tmp >affixcompress3.tmp
|
||||
cat affixcompress3.tmp | $AWK -v out="$1.dic" '
|
||||
{
|
||||
if (A[$1]=="") A[$1]=$2;
|
||||
else if ($2!="") A[$1] = A[$1] "," $2
|
||||
}
|
||||
END {
|
||||
for (i in A) n++
|
||||
print n >out
|
||||
for (i in A) {
|
||||
if (A[i]=="") print i
|
||||
else print i "/" A[i]
|
||||
}
|
||||
}
|
||||
' | sort >>$1.dic
|
103
3rdparty/hunspell/src/tools/analyze.cxx
vendored
103
3rdparty/hunspell/src/tools/analyze.cxx
vendored
@ -1,103 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
|
||||
#include "hunspell.hxx"
|
||||
|
||||
#ifndef WIN32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
int main(int, char** argv) {
|
||||
/* first parse the command line options */
|
||||
|
||||
for (int i = 1; i < 3; ++i)
|
||||
if (!argv[i]) {
|
||||
fprintf(stderr, "correct syntax is:\nanalyze affix_file");
|
||||
fprintf(stderr, " dictionary_file file_of_words_to_check\n");
|
||||
fprintf(stderr, "use two words per line for morphological generation\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* open the words to check list */
|
||||
|
||||
FILE* wtclst = fopen(argv[3], "r");
|
||||
if (!wtclst) {
|
||||
fprintf(stderr, "Error - could not open file to check\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Hunspell* pMS = new Hunspell(argv[1], argv[2]);
|
||||
char buf[100];
|
||||
while (fgets(buf, sizeof(buf), wtclst)) {
|
||||
buf[strcspn(buf, "\n")] = 0;
|
||||
if (*buf == '\0')
|
||||
continue;
|
||||
// morphgen demo
|
||||
char* s = strchr(buf, ' ');
|
||||
if (s) {
|
||||
*s = '\0';
|
||||
std::vector<std::string> result = pMS->generate(buf, s + 1);
|
||||
for (size_t i = 0; i < result.size(); ++i) {
|
||||
fprintf(stdout, "generate(%s, %s) = %s\n", buf, s + 1, result[i].c_str());
|
||||
}
|
||||
if (result.empty())
|
||||
fprintf(stdout, "generate(%s, %s) = NO DATA\n", buf, s + 1);
|
||||
} else {
|
||||
int dp = pMS->spell(std::string(buf));
|
||||
fprintf(stdout, "> %s\n", buf);
|
||||
if (dp) {
|
||||
std::vector<std::string> result = pMS->analyze(buf);
|
||||
for (size_t i = 0; i < result.size(); ++i) {
|
||||
fprintf(stdout, "analyze(%s) = %s\n", buf, result[i].c_str());
|
||||
}
|
||||
result = pMS->stem(buf);
|
||||
for (size_t i = 0; i < result.size(); ++i) {
|
||||
fprintf(stdout, "stem(%s) = %s\n", buf, result[i].c_str());
|
||||
}
|
||||
} else {
|
||||
fprintf(stdout, "Unknown word.\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
delete pMS;
|
||||
fclose(wtclst);
|
||||
return 0;
|
||||
}
|
196
3rdparty/hunspell/src/tools/bulkcheck.cxx
vendored
196
3rdparty/hunspell/src/tools/bulkcheck.cxx
vendored
@ -1,196 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László for original code example.cxx
|
||||
* Copyright (C) 2017 Pander for new code bulkcheck.cxx
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#include "config.h" // for macro VERSION
|
||||
#include "hunspell.hxx"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
/* first parse the command line options */
|
||||
|
||||
if (argc < 4) {
|
||||
//TODO refactor to use a library for this
|
||||
fprintf(stderr, "bulkcheck (now it works with more dictionary files):\n");
|
||||
fprintf(stderr,
|
||||
"bulkcheck affix_file dictionary_file(s) file_of_words_to_check result_file\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* open the words to check list, word is expected on each line */
|
||||
ifstream input_file(argv[argc - 1], ios_base::in);
|
||||
if (!input_file.is_open()) {
|
||||
fprintf(stderr, "Error - could not open file of words to check %s\n", argv[argc - 1]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Hunspell* hunspell = new Hunspell(argv[1], argv[2]);
|
||||
|
||||
// load extra dictionaries, such as medical dictionaries or personal dictionaries that do not have affix file
|
||||
//TODO This should go into the documentation
|
||||
if (argc > 3)
|
||||
for (int k = 3; k < argc - 1; ++k)
|
||||
hunspell->add_dic(argv[k]);
|
||||
|
||||
// /* open output file */
|
||||
// string filename = string(argv[argc - 1]) + "-bulkcheck.tsv";
|
||||
// ofstream output_file(filename, ios_base::out);
|
||||
// if (!output_file.is_open()) {
|
||||
// fprintf(stderr, "Error - could not open result file\n");
|
||||
// exit(1);
|
||||
// }
|
||||
|
||||
/* declare variables for iteration */
|
||||
string word;
|
||||
int num = 0;
|
||||
int num_space = 0;
|
||||
int num_correct = 0;
|
||||
int num_correct_space = 0;
|
||||
|
||||
/* iterate all lines in input file */
|
||||
while (getline(input_file, word)) {
|
||||
/* count number of words and words with space */
|
||||
num++;
|
||||
bool has_space = false;
|
||||
if (count(word.begin(), word.end(), ' ') > 0 ) {
|
||||
has_space = true;
|
||||
num_space++;
|
||||
}
|
||||
|
||||
bool is_correct = hunspell->spell(word);
|
||||
// output_file << is_success << "\t" << is_correct << "\t" << has_space << "\t" << has_suggest
|
||||
// << "\t" << word.c_str() << "\t" << expect.c_str() << "\t";
|
||||
cout << is_correct << "\t" << word.c_str() << "\t";
|
||||
|
||||
bool is_first = true;
|
||||
if (is_correct) {
|
||||
num_correct++;
|
||||
if (has_space) {
|
||||
num_correct_space++;
|
||||
}
|
||||
} else {
|
||||
vector<string> suggestions = hunspell->suggest(word.c_str());
|
||||
for (size_t i = 0; i < suggestions.size(); ++i) {
|
||||
if (is_first) {
|
||||
cout << suggestions[i];
|
||||
is_first = false;
|
||||
} else {
|
||||
cout << ";" << suggestions[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
input_file.close();
|
||||
|
||||
if (num == 0) {
|
||||
fprintf(stderr, "ERROR: No words to check in file %s:\n", argv[argc - 2]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int num_nospace = num - num_space;
|
||||
int num_incorrect = num - num_correct;
|
||||
int num_incorrect_space = num_space - num_correct_space;
|
||||
int num_correct_nospace = num_correct - num_correct_space;
|
||||
int num_incorrect_nospace = num_nospace - num_correct_nospace;
|
||||
|
||||
float per_nospace = 100.0 * num_nospace / num;
|
||||
float per_correct = 100.0 * num_correct / num;
|
||||
float per_correct_space = 100.0;
|
||||
if (num_space != 0) {
|
||||
per_correct_space = 100.0 * num_correct_space / num_space;
|
||||
}
|
||||
float per_correct_nospace = 100.0;
|
||||
if (num_nospace != 0) {
|
||||
per_correct_nospace = 100.0 * num_correct_nospace / num_nospace;
|
||||
}
|
||||
|
||||
float per_space = 100.0 - per_nospace;
|
||||
float per_incorrect = 100.0 - per_correct;
|
||||
float per_incorrect_space = 100.0 - per_correct_space;
|
||||
float per_incorrect_nospace = 100.0 - per_correct_nospace;
|
||||
|
||||
cerr << "Hunspell version\t" << VERSION << endl;
|
||||
cerr << "Hunspell affix\t" << argv[1] << endl;
|
||||
cerr << "Hunspell dict\t" << argv[2] << endl;
|
||||
|
||||
cerr << "wordlist\t" << argv[argc - 1] << endl;
|
||||
cerr << "percentage of words without space\t" << fixed << setw(6)
|
||||
<< setprecision(2) << setfill('0') << per_nospace << endl;
|
||||
cerr << "percentage of words with space\t" << fixed << setw(6)
|
||||
<< setprecision(2) << setfill('0') << per_space << endl;
|
||||
|
||||
cerr << "number of words\t" << num << endl;
|
||||
cerr << "number of correct words\t" << num_correct << endl;
|
||||
cerr << "number of incorect words\t" << num_incorrect << endl;
|
||||
cerr << "percentage of correct words\t" << fixed << setw(6)
|
||||
<< setprecision(2) << setfill('0') << per_correct << endl;
|
||||
cerr << "percentage of incorrect words\t" << fixed << setw(6)
|
||||
<< setprecision(2) << setfill('0') << per_incorrect << endl;
|
||||
|
||||
cerr << "number of words without space\t" << num_nospace << endl;
|
||||
cerr << "number of correct words without space\t" << num_correct_nospace << endl;
|
||||
cerr << "number of incorrect words without space\t" << num_incorrect_nospace << endl;
|
||||
cerr << "percentage of correct words without space\t" << fixed << setw(6)
|
||||
<< setprecision(2) << setfill('0') << per_correct_nospace << endl;
|
||||
cerr << "percentage of incorrect words without space\t" << fixed << setw(6)
|
||||
<< setprecision(2) << setfill('0') << per_incorrect_nospace << endl;
|
||||
|
||||
cerr << "number of words with space\t" << num_space << endl;
|
||||
cerr << "number of correct words with space\t" << num_correct_space << endl;
|
||||
cerr << "number of incorrect words with space\t" << num_incorrect_space << endl;
|
||||
cerr << "percentage of correct words with space\t" << fixed << setw(6)
|
||||
<< setprecision(2) << setfill('0') << per_correct_space << endl;
|
||||
cerr << "percentage of incorrect words with space\t" << fixed << setw(6)
|
||||
<< setprecision(2) << setfill('0') << per_incorrect_space << endl;
|
||||
|
||||
// output_file.close();
|
||||
|
||||
delete hunspell;
|
||||
|
||||
return 0;
|
||||
}
|
115
3rdparty/hunspell/src/tools/chmorph.cxx
vendored
115
3rdparty/hunspell/src/tools/chmorph.cxx
vendored
@ -1,115 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
|
||||
#include "hunspell.hxx"
|
||||
#include "textparser.hxx"
|
||||
|
||||
#ifndef W32
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
int main(int, char** argv) {
|
||||
FILE* f;
|
||||
|
||||
/* first parse the command line options */
|
||||
|
||||
for (int i = 1; i < 6; i++)
|
||||
if (!argv[i]) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"chmorph - change affixes by morphological analysis and generation\n"
|
||||
"correct syntax is:\nchmorph affix_file "
|
||||
"dictionary_file file_to_convert STRING1 STRING2\n"
|
||||
"STRINGS may be arbitrary parts of the morphological descriptions\n"
|
||||
"example: chmorph hu.aff hu.dic hu.txt SG_2 SG_3 "
|
||||
" (convert informal Hungarian second person texts to formal third "
|
||||
"person texts)\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* open the words to check list */
|
||||
|
||||
f = fopen(argv[3], "r");
|
||||
if (!f) {
|
||||
fprintf(stderr, "Error - could not open file to check\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Hunspell* pMS = new Hunspell(argv[1], argv[2]);
|
||||
TextParser* p = new TextParser(
|
||||
"qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM");
|
||||
|
||||
char buf[MAXLNLEN];
|
||||
|
||||
while (fgets(buf, MAXLNLEN, f)) {
|
||||
p->put_line(buf);
|
||||
std::string next;
|
||||
while (p->next_token(next)) {
|
||||
std::vector<std::string> pl = pMS->analyze(next);
|
||||
if (!pl.empty()) {
|
||||
int gen = 0;
|
||||
for (size_t i = 0; i < pl.size(); ++i) {
|
||||
const char* pos = strstr(pl[i].c_str(), argv[4]);
|
||||
if (pos) {
|
||||
std::string r(pl[i], pos - pl[i].c_str());
|
||||
r.append(argv[5]);
|
||||
r.append(pos + strlen(argv[4]));
|
||||
pl[i] = r;
|
||||
gen = 1;
|
||||
}
|
||||
}
|
||||
if (gen) {
|
||||
std::vector<std::string> pl2 = pMS->generate(next, pl);
|
||||
if (!pl2.empty()) {
|
||||
p->change_token(pl2[0].c_str());
|
||||
// jump over the (possibly un)modified word
|
||||
(void)p->next_token(next);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(stdout, "%s\n", p->get_line().c_str());
|
||||
}
|
||||
|
||||
delete p;
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
93
3rdparty/hunspell/src/tools/example.cxx
vendored
93
3rdparty/hunspell/src/tools/example.cxx
vendored
@ -1,93 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
|
||||
#include "hunspell.hxx"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
/* first parse the command line options */
|
||||
|
||||
if (argc < 4) {
|
||||
fprintf(stderr, "example (now it works with more dictionary files):\n");
|
||||
fprintf(stderr,
|
||||
"example affix_file dictionary_file(s) file_of_words_to_check\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* open the words to check list */
|
||||
std::ifstream wtclst(argv[argc - 1], std::ios_base::in);
|
||||
if (!wtclst.is_open()) {
|
||||
fprintf(stderr, "Error - could not open file of words to check\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Hunspell* pMS = new Hunspell(argv[1], argv[2]);
|
||||
|
||||
// load extra dictionaries
|
||||
if (argc > 4)
|
||||
for (int k = 3; k < argc - 1; ++k)
|
||||
pMS->add_dic(argv[k]);
|
||||
|
||||
std::string buf;
|
||||
while (std::getline(wtclst, buf)) {
|
||||
int dp = pMS->spell(buf);
|
||||
if (dp) {
|
||||
fprintf(stdout, "\"%s\" is okay\n", buf.c_str());
|
||||
fprintf(stdout, "\n");
|
||||
} else {
|
||||
fprintf(stdout, "\"%s\" is incorrect!\n", buf.c_str());
|
||||
fprintf(stdout, " suggestions:\n");
|
||||
std::vector<std::string> wlst = pMS->suggest(buf.c_str());
|
||||
for (size_t i = 0; i < wlst.size(); ++i) {
|
||||
fprintf(stdout, " ...\"%s\"\n", wlst[i].c_str());
|
||||
}
|
||||
fprintf(stdout, "\n");
|
||||
}
|
||||
// for the same of testing this code path
|
||||
// do an analysis here and throw away the results
|
||||
pMS->analyze(buf);
|
||||
}
|
||||
|
||||
delete pMS;
|
||||
return 0;
|
||||
}
|
78
3rdparty/hunspell/src/tools/fuzzer.cxx
vendored
78
3rdparty/hunspell/src/tools/fuzzer.cxx
vendored
@ -1,78 +0,0 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <hunspell/hunspell.hxx>
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
#include <string.h>
|
||||
#include <libgen.h>
|
||||
|
||||
std::vector<Hunspell*> dictionaries;
|
||||
|
||||
bool endswith(const std::string &str, const std::string &suffix)
|
||||
{
|
||||
return str.size() >= suffix.size() &&
|
||||
str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
|
||||
}
|
||||
|
||||
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
|
||||
{
|
||||
char* exe_path = (*argv)[0];
|
||||
// dirname() can modify its argument.
|
||||
char* exe_path_copy = strdup(exe_path);
|
||||
char* dir = dirname(exe_path_copy);
|
||||
DIR* d = opendir(dir);
|
||||
struct dirent *direntry;
|
||||
while ((direntry = readdir(d)) != NULL)
|
||||
{
|
||||
std::string entry(direntry->d_name);
|
||||
if (endswith(entry, ".aff"))
|
||||
{
|
||||
std::string dic = entry.substr(0, entry.size() - 4) + ".dic";
|
||||
dictionaries.push_back(new Hunspell(entry.c_str(), dic.c_str()));
|
||||
}
|
||||
}
|
||||
closedir(d);
|
||||
free(exe_path_copy);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const char* data, size_t size)
|
||||
{
|
||||
std::string word(data, size);
|
||||
for (std::vector<Hunspell*>::const_iterator it = dictionaries.begin(); it != dictionaries.end(); ++it)
|
||||
{
|
||||
Hunspell *dict = *it;
|
||||
if (!dict->spell(word))
|
||||
dict->suggest(word);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|
2215
3rdparty/hunspell/src/tools/hunspell.cxx
vendored
2215
3rdparty/hunspell/src/tools/hunspell.cxx
vendored
File diff suppressed because it is too large
Load Diff
60
3rdparty/hunspell/src/tools/hunzip.cxx
vendored
60
3rdparty/hunspell/src/tools/hunzip.cxx
vendored
@ -1,60 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "hunzip.hxx"
|
||||
|
||||
#define DESC \
|
||||
"hunzip - decompress a hzip file to the standard output\n" \
|
||||
"Usage: hunzip file.hz [password]\n"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc == 1 || strcmp(argv[1], "-h") == 0) {
|
||||
fprintf(stderr, DESC);
|
||||
return 1;
|
||||
}
|
||||
Hunzip h(argv[1], (argc > 2) ? argv[2] : NULL);
|
||||
if (!h.is_open())
|
||||
return 0;
|
||||
std::string s;
|
||||
while (h.getline(s))
|
||||
printf("%s", s.c_str());
|
||||
return 0;
|
||||
}
|
419
3rdparty/hunspell/src/tools/hzip.cxx
vendored
419
3rdparty/hunspell/src/tools/hzip.cxx
vendored
@ -1,419 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
/* hzip: file compression for sorted dictionaries with optional encryption,
|
||||
* algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <string>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#define CODELEN 65536
|
||||
#define BUFSIZE 65536
|
||||
#define EXTENSION ".hz"
|
||||
|
||||
#define ESCAPE 31
|
||||
#define MAGIC "hz0"
|
||||
#define MAGIC_ENCRYPTED "hz1"
|
||||
|
||||
#define DESC \
|
||||
"hzip - dictionary compression utility\n" \
|
||||
"Usage: hzip [-h | -P password ] [file1 file2 ..]\n" \
|
||||
" -P password encrypted compression\n" \
|
||||
" -h display this help and exit\n"
|
||||
|
||||
enum { code_LEAF, code_TERM, code_NODE };
|
||||
|
||||
struct item {
|
||||
unsigned short word;
|
||||
int count;
|
||||
char type;
|
||||
struct item* left;
|
||||
struct item* right;
|
||||
};
|
||||
|
||||
int fail(const char* err, const char* par) {
|
||||
fprintf(stderr, err, par);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void code2table(struct item* tree, char** table, char* code, int deep) {
|
||||
int first = 0;
|
||||
if (!code) {
|
||||
first = 1;
|
||||
code = (char*)malloc(CODELEN);
|
||||
}
|
||||
code[deep] = '1';
|
||||
if (tree->left)
|
||||
code2table(tree->left, table, code, deep + 1);
|
||||
if (tree->type != code_NODE) {
|
||||
int i = tree->word;
|
||||
code[deep] = '\0';
|
||||
if (tree->type == code_TERM)
|
||||
i = CODELEN; /* terminal code */
|
||||
table[i] = (char*)malloc(deep + 1);
|
||||
strcpy(table[i], code);
|
||||
}
|
||||
code[deep] = '0';
|
||||
if (tree->right)
|
||||
code2table(tree->right, table, code, deep + 1);
|
||||
if (first)
|
||||
free(code);
|
||||
}
|
||||
|
||||
struct item* newitem(int c, struct item* l, struct item* r, int t) {
|
||||
struct item* ni = (struct item*)malloc(sizeof(struct item));
|
||||
ni->type = t;
|
||||
ni->word = 0;
|
||||
ni->count = c;
|
||||
ni->left = l;
|
||||
ni->right = r;
|
||||
return ni;
|
||||
}
|
||||
|
||||
/* return length of the freq array */
|
||||
int get_freqdata(struct item*** dest, FILE* f, unsigned short* termword) {
|
||||
int freq[CODELEN];
|
||||
int i, j, k, n;
|
||||
union {
|
||||
char c[2];
|
||||
unsigned short word;
|
||||
} u;
|
||||
for (i = 0; i < CODELEN; i++)
|
||||
freq[i] = 0;
|
||||
while ((j = getc(f)) != -1 && (k = getc(f)) != -1) {
|
||||
u.c[0] = j;
|
||||
u.c[1] = k;
|
||||
freq[u.word]++;
|
||||
}
|
||||
if (j != -1) {
|
||||
u.c[0] = 1;
|
||||
u.c[1] = j;
|
||||
} else {
|
||||
u.c[0] = 0;
|
||||
u.c[1] = 0;
|
||||
}
|
||||
|
||||
*dest = (struct item**)malloc((CODELEN + 1) * sizeof(struct item*));
|
||||
if (!*dest)
|
||||
return -1;
|
||||
for (i = 0, n = 0; i < CODELEN; i++)
|
||||
if (freq[i]) {
|
||||
(*dest)[n] = newitem(freq[i], NULL, NULL, code_LEAF);
|
||||
(*dest)[n]->word = i;
|
||||
n++;
|
||||
}
|
||||
/* terminal sequence (also contains the last odd byte of the file) */
|
||||
(*dest)[n] = newitem(1, NULL, NULL, code_TERM);
|
||||
*termword = u.word;
|
||||
return n + 1;
|
||||
}
|
||||
|
||||
void get_codetable(struct item** l, int n, char** table) {
|
||||
int i;
|
||||
while (n > 1) {
|
||||
int min = 0;
|
||||
int mi2 = 1;
|
||||
for (i = 1; i < n; i++) {
|
||||
if (l[i]->count < l[min]->count) {
|
||||
mi2 = min;
|
||||
min = i;
|
||||
} else if (l[i]->count < l[mi2]->count)
|
||||
mi2 = i;
|
||||
}
|
||||
l[min] = newitem(l[min]->count + l[mi2]->count, l[min], l[mi2], code_NODE);
|
||||
for (i = mi2 + 1; i < n; i++)
|
||||
l[i - 1] = l[i];
|
||||
n--;
|
||||
}
|
||||
code2table(l[0], table, NULL, 0);
|
||||
}
|
||||
|
||||
int write_bits(FILE* f, char* bitbuf, int* bits, char* code) {
|
||||
while (*code) {
|
||||
int b = (*bits) % 8;
|
||||
if (!b)
|
||||
bitbuf[(*bits) / 8] = ((*code) - '0') << 7;
|
||||
else
|
||||
bitbuf[(*bits) / 8] |= (((*code) - '0') << (7 - b));
|
||||
(*bits)++;
|
||||
code++;
|
||||
if (*bits == BUFSIZE * 8) {
|
||||
if (BUFSIZE != fwrite(bitbuf, 1, BUFSIZE, f))
|
||||
return 1;
|
||||
*bits = 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int encode_file(char** table,
|
||||
int n,
|
||||
FILE* f,
|
||||
FILE* f2,
|
||||
unsigned short tw,
|
||||
char* key) {
|
||||
char bitbuf[BUFSIZE];
|
||||
int i, bits = 0;
|
||||
unsigned char cl, ch;
|
||||
int cx[2];
|
||||
union {
|
||||
char c[2];
|
||||
unsigned short word;
|
||||
} u;
|
||||
char* enc = key;
|
||||
|
||||
/* header and codes */
|
||||
fprintf(f2, "%s", (key ? MAGIC_ENCRYPTED : MAGIC)); /* 3-byte HEADER */
|
||||
cl = (unsigned char)(n & 0x00ff);
|
||||
ch = (unsigned char)(n >> 8);
|
||||
if (key) {
|
||||
unsigned char cs;
|
||||
for (cs = 0; *enc; enc++)
|
||||
cs ^= *enc;
|
||||
fprintf(f2, "%c", cs); /* 1-byte check sum */
|
||||
enc = key;
|
||||
ch ^= *enc;
|
||||
if ((*(++enc)) == '\0')
|
||||
enc = key;
|
||||
cl ^= *enc;
|
||||
}
|
||||
fprintf(f2, "%c%c", ch, cl); /* upper and lower byte of record count */
|
||||
for (i = 0; i < BUFSIZE; i++)
|
||||
bitbuf[i] = '\0';
|
||||
for (i = 0; i < CODELEN + 1; i++)
|
||||
if (table[i]) {
|
||||
size_t nmemb;
|
||||
u.word = (unsigned short)i;
|
||||
if (i == CODELEN)
|
||||
u.word = tw;
|
||||
if (key) {
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
u.c[0] ^= *enc;
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
u.c[1] ^= *enc;
|
||||
}
|
||||
fprintf(f2, "%c%c", u.c[0], u.c[1]); /* 2-character code id */
|
||||
bits = 0;
|
||||
if (write_bits(f2, bitbuf, &bits, table[i]) != 0)
|
||||
return 1;
|
||||
if (key) {
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
fprintf(f2, "%c", ((unsigned char)bits) ^ *enc);
|
||||
for (cl = 0; cl <= bits / 8; cl++) {
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
bitbuf[cl] ^= *enc;
|
||||
}
|
||||
} else
|
||||
fprintf(f2, "%c", (unsigned char)bits); /* 1-byte code length */
|
||||
nmemb = bits / 8 + 1;
|
||||
if (fwrite(bitbuf, 1, bits / 8 + 1, f2) != nmemb) /* x-byte code */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* file encoding */
|
||||
bits = 0;
|
||||
while ((cx[0] = getc(f)) != -1 && (cx[1] = getc(f)) != -1) {
|
||||
u.c[0] = cx[0];
|
||||
u.c[1] = cx[1];
|
||||
if (write_bits(f2, bitbuf, &bits, table[u.word]) != 0)
|
||||
return 1;
|
||||
}
|
||||
/* terminal suffixes */
|
||||
if (write_bits(f2, bitbuf, &bits, table[CODELEN]) != 0)
|
||||
return 1;
|
||||
if (bits > 0) {
|
||||
size_t nmemb = bits / 8 + 1;
|
||||
if (fwrite(bitbuf, 1, nmemb, f2) != nmemb)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int prefixcompress(FILE* f, FILE* tempfile) {
|
||||
char buf[BUFSIZE];
|
||||
char buf2[BUFSIZE * 2];
|
||||
char prev[BUFSIZE];
|
||||
int prevlen = 0;
|
||||
while (fgets(buf, BUFSIZE, f)) {
|
||||
int i, j, k, m, c = 0;
|
||||
int pfx = prevlen;
|
||||
char* p = buf2;
|
||||
m = j = 0;
|
||||
for (i = 0; buf[i]; i++) {
|
||||
if ((pfx > 0) && (buf[i] == prev[i])) {
|
||||
j++;
|
||||
} else
|
||||
pfx = 0;
|
||||
}
|
||||
if (i > 0 && buf[i - 1] == '\n') {
|
||||
if (j == i)
|
||||
j--; /* line duplicate */
|
||||
if (j > 29)
|
||||
j = 29;
|
||||
c = j;
|
||||
if (c == '\t')
|
||||
c = 30;
|
||||
/* common suffix */
|
||||
for (; (m < i - j - 1) && (m < 15) && (prevlen - m - 2 >= 0) &&
|
||||
buf[i - m - 2] == prev[prevlen - m - 2];
|
||||
m++)
|
||||
;
|
||||
if (m == 1)
|
||||
m = 0;
|
||||
} else {
|
||||
j = 0;
|
||||
m = -1;
|
||||
}
|
||||
for (k = j; k < i - m - 1; k++, p++) {
|
||||
if (((unsigned char)buf[k]) < 47 && buf[k] != '\t' && buf[k] != ' ') {
|
||||
*p = ESCAPE;
|
||||
p++;
|
||||
}
|
||||
*p = buf[k];
|
||||
}
|
||||
if (m > 0) {
|
||||
*p = m + 31; /* 33-46 */
|
||||
p++;
|
||||
}
|
||||
if (i > 0 && buf[i - 1] == '\n') {
|
||||
size_t nmemb = p - buf2 + 1;
|
||||
*p = c;
|
||||
if (fwrite(buf2, 1, nmemb, tempfile) != nmemb)
|
||||
return 1;
|
||||
} else {
|
||||
size_t nmemb = p - buf2;
|
||||
if (fwrite(buf2, 1, nmemb, tempfile) != nmemb)
|
||||
return 1;
|
||||
}
|
||||
memcpy(prev, buf, i);
|
||||
prevlen = i;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hzip(const char* filename, char* key) {
|
||||
struct item** list;
|
||||
char* table[CODELEN + 1];
|
||||
int n;
|
||||
unsigned short termword;
|
||||
|
||||
FILE* f = fopen(filename, "r");
|
||||
if (!f)
|
||||
return fail("hzip: %s: Permission denied\n", filename);
|
||||
|
||||
char tmpfiletemplate[] = "/tmp/hunspellXXXXXX";
|
||||
mode_t mask = umask(S_IXUSR | S_IRWXG | S_IRWXO);
|
||||
int tempfileno = mkstemp(tmpfiletemplate);
|
||||
umask(mask);
|
||||
if (tempfileno == -1) {
|
||||
fclose(f);
|
||||
return fail("hzip: cannot create temporary file\n", NULL);
|
||||
}
|
||||
|
||||
FILE *tempfile = fdopen(tempfileno, "rw");
|
||||
if (!tempfile) {
|
||||
close(tempfileno);
|
||||
unlink(tmpfiletemplate);
|
||||
fclose(f);
|
||||
return fail("hzip: cannot create temporary file\n", NULL);
|
||||
}
|
||||
|
||||
std::string out(filename);
|
||||
out.append(EXTENSION);
|
||||
FILE* f2 = fopen(out.c_str(), "wb");
|
||||
if (!f2) {
|
||||
fclose(tempfile);
|
||||
fclose(f);
|
||||
unlink(tmpfiletemplate);
|
||||
return fail("hzip: %s: Permission denied\n", out.c_str());
|
||||
}
|
||||
for (n = 0; n < CODELEN; n++)
|
||||
table[n] = NULL;
|
||||
if (prefixcompress(f, tempfile) != 0) {
|
||||
fclose(f2);
|
||||
fclose(tempfile);
|
||||
fclose(f);
|
||||
unlink(tmpfiletemplate);
|
||||
return fail("hzip: cannot write file\n", NULL);
|
||||
}
|
||||
rewind(tempfile);
|
||||
n = get_freqdata(&list, tempfile, &termword);
|
||||
get_codetable(list, n, table);
|
||||
rewind(tempfile);
|
||||
n = encode_file(table, n, tempfile, f2, termword, key);
|
||||
free(list);
|
||||
fclose(f2);
|
||||
fclose(tempfile);
|
||||
fclose(f);
|
||||
unlink(tmpfiletemplate);
|
||||
if (n != 0)
|
||||
return fail("hzip: cannot write file\n", NULL);
|
||||
return n;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int i, j = 0;
|
||||
char* key = NULL;
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (*(argv[i]) == '-') {
|
||||
if (*(argv[i] + 1) == 'h')
|
||||
return fail(DESC, NULL);
|
||||
if (*(argv[i] + 1) == 'P') {
|
||||
if (i + 1 == argc)
|
||||
return fail("hzip: missing password\n", NULL);
|
||||
key = argv[i + 1];
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
return fail("hzip: no such option: %s\n", argv[i]);
|
||||
} else if (hzip(argv[i], key) != 0)
|
||||
return 1;
|
||||
else
|
||||
j = 1;
|
||||
}
|
||||
if (j == 0)
|
||||
return fail("hzip: need a filename parameter\n", NULL);
|
||||
return 0;
|
||||
}
|
471
3rdparty/hunspell/src/tools/ispellaff2myspell
vendored
471
3rdparty/hunspell/src/tools/ispellaff2myspell
vendored
@ -1,471 +0,0 @@
|
||||
#!/usr/bin/perl -w
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
# $Id$
|
||||
#
|
||||
# (C) 2002-2005 Agustin Martin Domingo <agustin.martin@hispalinux.es>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
sub usage {
|
||||
print "ispellaff2myspell: A program to convert ispell affix tables to myspell format
|
||||
(C) 2002-2005 Agustin Martin Domingo <agustin.martin\@hispalinux.es> License: GPL
|
||||
|
||||
Usage:
|
||||
ispellaff2myspell [options] <affixfile>
|
||||
|
||||
Options:
|
||||
--affixfile=s Affix file
|
||||
--bylocale Use current locale setup for upper/lowercase
|
||||
conversion
|
||||
--charset=s Use specified charset for upper/lowercase
|
||||
conversion (defaults to latin1)
|
||||
--debug Print debugging info
|
||||
--extraflags Allow some non alphabetic flags
|
||||
--lowercase=s Lowercase string
|
||||
--myheader=s Header file
|
||||
--printcomments Print commented lines in output
|
||||
--replacements=s Replacements file
|
||||
--split=i Split flags with more that i entries
|
||||
--uppercase=s Uppercase string
|
||||
--wordlist=s Still unused
|
||||
|
||||
Currently allowed valued for charset are: latin1, latin2, latin3
|
||||
|
||||
This script does not create the dict file. Something like
|
||||
|
||||
( echo `cat mydict.words+ | wc -l`; cat mydict.words+ ) > mydict.dict
|
||||
|
||||
should do the work, with mydict.words+ being the ispell munched wordlist
|
||||
|
||||
";
|
||||
exit;
|
||||
}
|
||||
|
||||
sub debugprint {
|
||||
if ( $debug ){
|
||||
print STDERR "@_";
|
||||
}
|
||||
}
|
||||
|
||||
sub shipoutflag{
|
||||
my $flag_entries=scalar @flag_array;
|
||||
|
||||
if ( $flag_entries != 0 ){
|
||||
if ( $split ){
|
||||
while ( @flag_array ){
|
||||
my @flag_subarray=splice(@flag_array,0,$split);
|
||||
my $subflag_entries=scalar @flag_subarray;
|
||||
if ( scalar @flag_array ){
|
||||
print "$myaffix $flagname $flagcombine $subflag_entries S\n";
|
||||
} else {
|
||||
print "$myaffix $flagname $flagcombine $subflag_entries\n";
|
||||
}
|
||||
print join("\n",@flag_subarray);
|
||||
print "\n\n";
|
||||
}
|
||||
} else {
|
||||
print "$myaffix $flagname $flagcombine $flag_entries\n";
|
||||
print join("\n",@flag_array);
|
||||
print "\n\n";
|
||||
}
|
||||
}
|
||||
@flag_array=();
|
||||
$flagname='';
|
||||
$flagcombine='';
|
||||
}
|
||||
|
||||
sub mylc{
|
||||
my $inputstring=shift;
|
||||
my $outputstring;
|
||||
|
||||
if ( $bylocale ){
|
||||
{
|
||||
use locale;
|
||||
$outputstring = lc $inputstring;
|
||||
}
|
||||
} else {
|
||||
if ( $charset eq "latin0" ){
|
||||
$lowercase='a-z珀矣粤肄蓍裨跋鈿韵鴦<E99FB5><E9B4A6>巐鄕<E5B790>秀<EFBFBD>';
|
||||
$uppercase='A-Z請唾津毒班碧麺力佰厶壞嶷掣桀毳痔<E6AFB3>';
|
||||
} elsif ( $charset eq "latin1" ){
|
||||
$lowercase='a-z珀矣粤肄蓍裨跋鈿韵鴦<E99FB5><E9B4A6>巐鄕<E5B790>';
|
||||
$uppercase='A-Z請唾津毒班碧麺力佰厶壞嶷掣桀毳';
|
||||
} elsif ( $charset eq "latin2" ){
|
||||
$lowercase='a-z嘘偽杭纂梢珀矣粤肄蓍裨跋鈿韵鴦<E99FB5><E9B4A6>巐鄕<E5B790>';
|
||||
$uppercase='A-Z。ウ<E38082><E382A6><EFBFBD>請唾津毒班碧麺力佰厶壞嶷掣桀毳';
|
||||
} elsif ( $charset eq "latin3" ){
|
||||
$lowercase='a-z蔚杭纂逗痰粤肄蓍裨跋鈿髓齡<E9AB93><E9BDA1>巐鄕<E5B790>';
|
||||
$uppercase='A-Z・<5A><E383BB><EFBFBD>疎津毒班碧麺力冫嘖孛忤掣桀毳';
|
||||
# } elsif ( $charset eq "other_charset" ){
|
||||
# die "latin2 still unimplemented";
|
||||
} else {
|
||||
if ( not $lowercase and not $uppercase ){
|
||||
die "Unsupported charset [$charset]
|
||||
|
||||
Explicitly use --lowercase=string and --uppercase=string
|
||||
options. Remember that both string must match exactly, but
|
||||
case changed.
|
||||
";
|
||||
}
|
||||
}
|
||||
$outputstring=$inputstring;
|
||||
eval "\$outputstring=~tr/$uppercase/$lowercase/";
|
||||
}
|
||||
return $outputstring;
|
||||
}
|
||||
|
||||
sub validate_flag (){
|
||||
my $flag = shift;
|
||||
if ($flag=~m/[a-zA-Z]+/){
|
||||
return $flag;
|
||||
} elsif ( $hasextraflags ){
|
||||
foreach ( keys %theextraflags ){
|
||||
if ($flag =~ m/^$_/){
|
||||
$flag =~ s/^$_//;
|
||||
return $flag;
|
||||
}
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
sub process_replacements{
|
||||
my $file = shift;
|
||||
my @replaces = ();
|
||||
|
||||
open (REPLACE,"< $file") ||
|
||||
die "Error: Could not open replacements file: $file\n";
|
||||
while (<REPLACE>){
|
||||
next unless m/^REP[\s\t]*\D.*/;
|
||||
next if m/^REP\s+[0-9]+/;
|
||||
s/\015\012//;
|
||||
s/\015//;
|
||||
chomp;
|
||||
push @replaces, $_;
|
||||
}
|
||||
close REPLACE;
|
||||
my $number = scalar @replaces;
|
||||
print "REP $number\n";
|
||||
foreach ( @replaces ){
|
||||
print $_ . "\n";
|
||||
}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# Now the progran start, after the functions are defined
|
||||
# -----------------------------------------------------------
|
||||
|
||||
use Getopt::Long;
|
||||
|
||||
# Initializing option values
|
||||
$affixfile = '';
|
||||
$bylocale = '';
|
||||
$charset = '';
|
||||
$debug = '';
|
||||
$lowercase = '';
|
||||
$myheader = '';
|
||||
$printcomments = '';
|
||||
$replacements = '';
|
||||
$split = '';
|
||||
$uppercase = '';
|
||||
$wordlist = '';
|
||||
$hasextraflags = '';
|
||||
@flag_array = ();
|
||||
%theextraflags = ();
|
||||
# Initializing root values
|
||||
$rootremove = "0";
|
||||
$rootname = '';
|
||||
$addtoroot = '';
|
||||
$comment = '';
|
||||
# Initializing flag values
|
||||
$flagname = '';
|
||||
$flagcombine = '';
|
||||
$inflags = '';
|
||||
|
||||
GetOptions ('affixfile=s' => \$affixfile,
|
||||
'bylocale' => \$bylocale,
|
||||
'charset=s' => \$charset,
|
||||
'debug' => \$debug,
|
||||
'extraflags:s' => sub {
|
||||
$hasextraflags = 1;
|
||||
shift;
|
||||
$theflag = shift;
|
||||
$theextraflags{$theflag}++ if $theflag},
|
||||
'lowercase=s' => \$lowercase,
|
||||
'myheader=s' => \$myheader,
|
||||
'printcomments' => \$printcomments,
|
||||
'replacements=s'=> \$replacements,
|
||||
'split=i' => \$split,
|
||||
'uppercase=s' => \$uppercase,
|
||||
'wordlist=s' => \$wordlist) or usage;
|
||||
|
||||
if ( not $affixfile ){
|
||||
$affixfile=shift or usage;
|
||||
}
|
||||
|
||||
if ( $charset and ( $lowercase or $uppercase )){
|
||||
die "Error: charset and lowercase/uppercase options
|
||||
are incompatible. Use either charset or lowercase/uppercase options to
|
||||
specify the patterns
|
||||
"
|
||||
} elsif ( not $lowercase and not $uppercase and not $charset ){
|
||||
$charset="latin1";
|
||||
}
|
||||
|
||||
if ( scalar(keys %theextraflags) == 0 && $hasextraflags ){
|
||||
$theextraflags{"\\\\"}++;
|
||||
}
|
||||
|
||||
debugprint "$affixfile $charset";
|
||||
|
||||
open (AFFIXFILE,"< $affixfile") ||
|
||||
die "Error: Could not open affix file: $affixfile";
|
||||
|
||||
if ( $myheader ){
|
||||
my $myspell_header=`cat $myheader`;
|
||||
print $myspell_header . "\n";
|
||||
}
|
||||
|
||||
while (<AFFIXFILE>){
|
||||
chomp;
|
||||
if (/^\s*\#.*/){
|
||||
debugprint "Ignoring line $.\n";
|
||||
print "$_\n" if $printcomments;
|
||||
} elsif (/^\s*$/){
|
||||
debugprint "Ignoring line $.\n";
|
||||
} elsif (/^\s*prefixes/){
|
||||
debugprint "Prefixes starting in line $.\n";
|
||||
$affix="PFX";
|
||||
} elsif (/^\s*suffixes/){
|
||||
debugprint "Suffixes starting in line $.\n";
|
||||
$affix="SFX";
|
||||
} elsif (/^[\s\t]*flag.*/){
|
||||
next if not $affix; # In case we are still in the preamble
|
||||
shipoutflag if $inflags;
|
||||
$inflags="yes";
|
||||
s/^[\s\t]*flag[\s\t]*//;
|
||||
s/[\s\t]*:.*$//;
|
||||
debugprint "Found flag $_ in line $.\n";
|
||||
|
||||
if (/\*/){
|
||||
s/[\*\s]//g;
|
||||
$flagcombine="Y";
|
||||
debugprint "Flag renamed to $_ with combine=$flagcombine\n";
|
||||
} else {
|
||||
$flagcombine="N";
|
||||
}
|
||||
|
||||
if ( $flagname = &validate_flag($_) ){
|
||||
$myaffix = $affix;
|
||||
} else {
|
||||
$myaffix = "\# $affix";
|
||||
$flagname = $_;
|
||||
print STDERR "Ignoring invalid flag $flagname in line $.\n";
|
||||
}
|
||||
} elsif ( $affix and $inflags ) {
|
||||
($rootname,@comments) = split('#',$_);
|
||||
$comment = '# ' . join('#',@comments);
|
||||
|
||||
$rootname =~ s/\s*//g;
|
||||
$rootname = mylc $rootname;
|
||||
($rootname,$addtoroot) = split('>',$rootname);
|
||||
|
||||
if ( $addtoroot =~ s/^\-//g ){
|
||||
($rootremove,$addtoroot) = split(',',$addtoroot);
|
||||
$addtoroot = "0" unless $addtoroot;
|
||||
$addtoroot = "0" if ( $addtoroot eq "-");
|
||||
} else {
|
||||
$rootremove = "0";
|
||||
}
|
||||
$addtoroot =~ s/\\\-/\-/g; # prefix ANTI\- to anti-
|
||||
|
||||
if ( $rootname eq '.' && $rootremove ne "0" ){
|
||||
$rootname = $rootremove;
|
||||
}
|
||||
|
||||
debugprint "$rootname, $addtoroot, $rootremove\n";
|
||||
if ( $printcomments ){
|
||||
$affix_line=sprintf("%s %s %-5s %-11s %-24s %s",
|
||||
$myaffix, $flagname, $rootremove,
|
||||
$addtoroot, $rootname, $comment);
|
||||
} else {
|
||||
$affix_line=sprintf("%s %s %-5s %-11s %s",
|
||||
$myaffix, $flagname, $rootremove,
|
||||
$addtoroot, $rootname);
|
||||
}
|
||||
$rootremove = "0";
|
||||
$rootname = '';
|
||||
$addtoroot = '';
|
||||
$comment = '';
|
||||
@comments = ();
|
||||
push @flag_array,$affix_line;
|
||||
debugprint "$affix_line\n";
|
||||
} else {
|
||||
#
|
||||
}
|
||||
}
|
||||
shipoutflag;
|
||||
|
||||
close AFFIXFILE;
|
||||
|
||||
if ( $replacements ){
|
||||
&process_replacements($replacements);
|
||||
}
|
||||
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
B<ispellaff2myspell> - A program to convert ispell affix tables to myspell format.
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
ispellaff2myspell [options] <affixfile> --myheader your_header
|
||||
|
||||
Options:
|
||||
|
||||
--affixfile=s Affix file
|
||||
--bylocale Use current locale setup for upper/lowercase
|
||||
conversion
|
||||
--charset=s Use specified charset for upper/lowercase
|
||||
conversion (defaults to latin1)
|
||||
--debug Print debugging info
|
||||
--extraflags=s Allow some non alphabetic flags
|
||||
--lowercase=s Lowercase string
|
||||
--myheader=s Header file
|
||||
--printcomments Print commented lines in output
|
||||
--replacements=s Replacements file
|
||||
--split=i Split flags with more that i entries
|
||||
--uppercase=s Uppercase string
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
B<ispellaff2myspell> is a script that will convert ispell affix tables
|
||||
to myspell format in a more or less successful way.
|
||||
|
||||
This script does not create the dict file. Something like
|
||||
|
||||
( echo `cat mydict.words+ | wc -l`; cat mydict.words+ ) > mydict.dict
|
||||
|
||||
should do the work, with mydict.words+ being the munched wordlist
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 8
|
||||
|
||||
=item B<--affixfile=s>
|
||||
|
||||
Affix file. You can put it directly in the command line.
|
||||
|
||||
=item B<--bylocale>
|
||||
|
||||
Use current locale setup for upper/lowercase conversion. Make sure
|
||||
that the selected locale match the dictionary one, or you might get
|
||||
into trouble.
|
||||
|
||||
=item B<--charset=s>
|
||||
|
||||
Use specified charset for upper/lowercase conversion (defaults to latin1).
|
||||
Currently allowed values for charset are: latin0, latin1, latin2, latin3.
|
||||
|
||||
=item B<--debug>
|
||||
|
||||
Print some debugging info.
|
||||
|
||||
=item B<--extraflags:s>
|
||||
|
||||
Allows some non alphabetic flags.
|
||||
|
||||
When invoked with no value the supported flags are currently those
|
||||
corresponding to chars represented with the escape char B<\> as
|
||||
first char. B<\> will be stripped.
|
||||
|
||||
When given with the flag prefix will allow that flag and strip the
|
||||
given prefix. Be careful when giving the prefix to properly escape chars,
|
||||
e.g. you will need B<-e "\\\\"> or B<-e '\\'> for flags like B<\[> to be stripped to
|
||||
B<[>. Otherwise you might even get errors. Use B<-e "^"> to allow all
|
||||
flags and pass them unmodified.
|
||||
|
||||
You will need a call to -e for each flag type, e.g.,
|
||||
B<-e "\\\\" -e "~\\\\"> (or B<-e '\\' -e '~\\'>).
|
||||
|
||||
When a prefix is explicitely set, the default value (anything starting by B<\>)
|
||||
is disabled and you need to enable it explicitely as in previous example.
|
||||
|
||||
=item B<--lowercase=s>
|
||||
|
||||
Lowercase string. Manually set the string of lowercase chars. This
|
||||
requires B<--uppercase> having exactly that string but uppercase.
|
||||
|
||||
=item B<--myheader=s>
|
||||
|
||||
Header file. The myspell aff header. You need to write it
|
||||
manually. This can contain everything you want to be before the affix table
|
||||
|
||||
=item B<--printcomments>
|
||||
|
||||
Print commented lines in output.
|
||||
|
||||
=item B<--replacements=file>
|
||||
|
||||
Add a pre-defined replacements table taken from 'file' to the .aff file.
|
||||
Will skip lines not beginning with REP, and set the replacements number
|
||||
appropriately.
|
||||
|
||||
=item B<--split=i>
|
||||
|
||||
Split flags with more that i entries. This can be of interest for flags
|
||||
having a lot of entries. Will split the flag in chunks containing B<i>
|
||||
entries.
|
||||
|
||||
=item B<--uppercase=s>
|
||||
|
||||
Uppercase string. Manually set the sring of uppercase chars. This
|
||||
requires B<--lowercase> having exactly that string but lowercase.
|
||||
|
||||
=back
|
||||
|
||||
If your encoding is currently unsupported you can send me a file with
|
||||
the two strings of lower and uppercase chars. Note that they must match
|
||||
exactly but case changed. It will look something like
|
||||
|
||||
$lowercase='a-z珀矣粤肄蓍裨跋鈿韵鴦<E99FB5><E9B4A6>巐鄕<E5B790>';
|
||||
$uppercase='A-Z請唾津毒班碧麺力佰厶壞嶷掣桀毳';
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
The OpenOffice.org Lingucomponent Project home page
|
||||
|
||||
L<http://lingucomponent.openoffice.org/index.html>
|
||||
|
||||
and the document
|
||||
|
||||
L<http://lingucomponent.openoffice.org/affix.readme>
|
||||
|
||||
that provides information about the basics of the myspell affix file format.
|
||||
|
||||
You can also take a look at
|
||||
|
||||
/usr/share/doc/libmyspell-dev/affix.readme.gz
|
||||
/usr/share/doc/libmyspell-dev/README.compoundwords
|
||||
/usr/share/doc/libmyspell-dev/README.replacetable
|
||||
|
||||
in your Debian system.
|
||||
|
||||
=head1 AUTHORS
|
||||
|
||||
Agustin Martin <agustin.martin@hispalinux.es>
|
||||
|
||||
=cut
|
115
3rdparty/hunspell/src/tools/makealias
vendored
115
3rdparty/hunspell/src/tools/makealias
vendored
@ -1,115 +0,0 @@
|
||||
#!/bin/sh
|
||||
# makealias: make alias compressed dic and aff files
|
||||
# Usage: alias.sh dic aff (not alias.sh aff dic!)
|
||||
# Version: 2007-10-26
|
||||
|
||||
case $# in
|
||||
0|1)
|
||||
echo 'makealias: make alias compressed dic and aff files
|
||||
Usage: makealias file.dic file.aff (not makefile file.aff file.dic!)' >/dev/stderr
|
||||
exit;;
|
||||
esac
|
||||
|
||||
DIC=`basename $1 .dic`
|
||||
AFF=`basename $2 .aff`
|
||||
|
||||
# FLAG type definition must be before alias definitions
|
||||
grep '^FLAG' $2 >"${AFF}_alias.aff"
|
||||
|
||||
awk 'BEGIN{n=1;m=1}
|
||||
function cutslash(st) {
|
||||
if (split(st,t,"/") > 1) return t[1]
|
||||
return st
|
||||
}
|
||||
function ltrim(st) {
|
||||
sub(/^ +/,"",st)
|
||||
return st
|
||||
}
|
||||
FILENAME ~ /.dic$/ && $1 ~ "/[^ \t]" {
|
||||
split($1,t,"/")
|
||||
if(!a[t[2]]){
|
||||
a[t[2]]=n
|
||||
b[n]=t[2]
|
||||
n++
|
||||
}
|
||||
if (NF > 1) {
|
||||
$1 = ""
|
||||
if(!a2[$0]){
|
||||
a2[$0]=m
|
||||
c[m]=$0
|
||||
m++
|
||||
}
|
||||
print t[1]"/"a[t[2]] "\t" a2[$0]
|
||||
} else {
|
||||
print t[1]"/"a[t[2]]
|
||||
}
|
||||
next
|
||||
}
|
||||
FILENAME ~ /.dic$/ && NF > 1 {
|
||||
x = $1
|
||||
$1 = ""
|
||||
if(!a2[$0]){
|
||||
a2[$0]=m
|
||||
c[m]=$0
|
||||
m++
|
||||
}
|
||||
print cutslash(x) "\t" a2[$0]
|
||||
next
|
||||
}
|
||||
FILENAME ~ /.dic$/ { print cutslash($1) }
|
||||
FILENAME ~ /.aff$/ && /^[PS]FX/ && ($4 ~ /\/[^ ]/) && NF > 4 {
|
||||
split($4,t,"/")
|
||||
if(!a[t[2]]){
|
||||
a[t[2]]=n
|
||||
b[n]=t[2]
|
||||
n++
|
||||
}
|
||||
begin = $1 " " $2 " " $3 " " (t[1]"/"a[t[2]]) " " $5
|
||||
if ($6!="") ok = 1; else ok = 0;
|
||||
$1 = ""
|
||||
$2 = ""
|
||||
$3 = ""
|
||||
$4 = ""
|
||||
$5 = ""
|
||||
if(ok){
|
||||
if(!a2[$0]){
|
||||
a2[$0]=m
|
||||
c[m]=$0
|
||||
m++
|
||||
}
|
||||
print begin " " a2[$0] >>"/dev/stderr"
|
||||
} else print begin >>"/dev/stderr"
|
||||
next
|
||||
}
|
||||
FILENAME ~ /.aff$/ && /^[PS]FX/ && NF > 4 {
|
||||
begin = $1 " " $2 " " $3 " " cutslash($4) " " $5
|
||||
if ($6!="") ok = 1; else ok = 0;
|
||||
$1 = ""
|
||||
$2 = ""
|
||||
$3 = ""
|
||||
$4 = ""
|
||||
$5 = ""
|
||||
if(ok) {
|
||||
if (!a2[$0]){
|
||||
a2[$0]=m
|
||||
c[m]=$0
|
||||
m++
|
||||
}
|
||||
print begin " " a2[$0] >>"/dev/stderr"
|
||||
} else print begin >>"/dev/stderr"
|
||||
next
|
||||
}
|
||||
FILENAME ~ /.aff$/ { print $0 >>"/dev/stderr" }
|
||||
END{
|
||||
if (n>1) {
|
||||
print "AF", n-1 >>"'${AFF}_alias.aff'"
|
||||
for(i=1;i<n;i++) print "AF", b[i],"#",i >>"'${AFF}_alias.aff'"
|
||||
}
|
||||
if (m>1) {
|
||||
print "AM", m-1 >>"'${AFF}_alias.aff'"
|
||||
for(i=1;i<m;i++) print "AM " ltrim(c[i]) >>"'${AFF}_alias.aff'"
|
||||
}
|
||||
}' $1 $2 >${DIC}_alias.dic 2>${AFF}_alias.$$
|
||||
grep -v '^FLAG' ${AFF}_alias.$$ >>${AFF}_alias.aff
|
||||
echo "output: ${DIC}_alias.dic, ${AFF}_alias.aff"
|
||||
rm ${AFF}_alias.$$
|
868
3rdparty/hunspell/src/tools/munch.cxx
vendored
868
3rdparty/hunspell/src/tools/munch.cxx
vendored
@ -1,868 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
/* Munch a word list and generate a smaller root word list with affixes*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <limits>
|
||||
|
||||
#include "munch.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int i, j, k, n;
|
||||
int rl, p, nwl;
|
||||
int al;
|
||||
|
||||
FILE* wrdlst;
|
||||
FILE* afflst;
|
||||
|
||||
char *nword, *wf, *af;
|
||||
char as[(MAX_PREFIXES + MAX_SUFFIXES)];
|
||||
char* ap;
|
||||
|
||||
struct hentry* ep;
|
||||
struct hentry* ep1;
|
||||
struct affent* pfxp;
|
||||
struct affent* sfxp;
|
||||
|
||||
(void)argc;
|
||||
|
||||
/* first parse the command line options */
|
||||
/* arg1 - wordlist, arg2 - affix file */
|
||||
|
||||
if (argv[1]) {
|
||||
wf = mystrdup(argv[1]);
|
||||
} else {
|
||||
fprintf(stderr, "correct syntax is:\n");
|
||||
fprintf(stderr, "munch word_list_file affix_file\n");
|
||||
exit(1);
|
||||
}
|
||||
if (argv[2]) {
|
||||
af = mystrdup(argv[2]);
|
||||
} else {
|
||||
fprintf(stderr, "correct syntax is:\n");
|
||||
fprintf(stderr, "munch word_list_file affix_file\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* open the affix file */
|
||||
afflst = fopen(af, "r");
|
||||
if (!afflst) {
|
||||
fprintf(stderr, "Error - could not open affix description file\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* step one is to parse the affix file building up the internal
|
||||
affix data structures */
|
||||
|
||||
numpfx = 0;
|
||||
numsfx = 0;
|
||||
|
||||
if (parse_aff_file(afflst)) {
|
||||
fprintf(stderr, "Error - in affix file loading\n");
|
||||
exit(1);
|
||||
}
|
||||
fclose(afflst);
|
||||
|
||||
fprintf(stderr, "parsed in %d prefixes and %d suffixes\n", numpfx, numsfx);
|
||||
|
||||
/* affix file is now parsed so create hash table of wordlist on the fly */
|
||||
|
||||
/* open the wordlist */
|
||||
wrdlst = fopen(wf, "r");
|
||||
if (!wrdlst) {
|
||||
fprintf(stderr, "Error - could not open word list file\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (load_tables(wrdlst)) {
|
||||
fprintf(stderr, "Error building hash tables\n");
|
||||
exit(1);
|
||||
}
|
||||
fclose(wrdlst);
|
||||
|
||||
for (i = 0; i < tablesize; i++) {
|
||||
ep = &tableptr[i];
|
||||
if (ep->word == NULL)
|
||||
continue;
|
||||
for (; ep != NULL; ep = ep->next) {
|
||||
numroots = 0;
|
||||
aff_chk(ep->word, strlen(ep->word));
|
||||
if (numroots) {
|
||||
/* now there might be a number of combinations */
|
||||
/* of prefixes and suffixes that might match this */
|
||||
/* word. So how to choose? As a first shot look */
|
||||
/* for the shortest remaining root word to */
|
||||
/* to maximize the combinatorial power */
|
||||
|
||||
/* but be careful, do not REQUIRE a specific combination */
|
||||
/* of a prefix and a suffix to generate the word since */
|
||||
/* that violates the rule that the root word with just */
|
||||
/* the prefix or just the suffix must also exist in the */
|
||||
/* wordlist as well */
|
||||
|
||||
/* in fact because of the cross product issue, this not a */
|
||||
/* simple choice since some combinations of previous */
|
||||
/* prefixes and new suffixes may not be valid. */
|
||||
/* The only way to know is to simply try them all */
|
||||
|
||||
rl = 1000;
|
||||
p = -1;
|
||||
|
||||
for (j = 0; j < numroots; j++) {
|
||||
/* first collect the root word info and build up */
|
||||
/* the potential new affix string */
|
||||
nword = (roots[j].hashent)->word;
|
||||
nwl = strlen(nword);
|
||||
*as = '\0';
|
||||
ap = as;
|
||||
if (roots[j].prefix)
|
||||
*ap++ = (roots[j].prefix)->achar;
|
||||
if (roots[j].suffix)
|
||||
*ap++ = (roots[j].suffix)->achar;
|
||||
if ((roots[j].hashent)->affstr) {
|
||||
strcpy(ap, (roots[j].hashent)->affstr);
|
||||
} else {
|
||||
*ap = '\0';
|
||||
}
|
||||
al = strlen(as);
|
||||
|
||||
/* now expand the potential affix string to generate */
|
||||
/* all legal words and make sure they all exist in the */
|
||||
/* word list */
|
||||
numwords = 0;
|
||||
wlist[numwords].word = mystrdup(nword);
|
||||
wlist[numwords].pallow = 0;
|
||||
numwords++;
|
||||
n = 0;
|
||||
if (al)
|
||||
expand_rootword(nword, nwl, as);
|
||||
for (k = 0; k < numwords; k++) {
|
||||
if (lookup(wlist[k].word))
|
||||
n++;
|
||||
free(wlist[k].word);
|
||||
wlist[k].word = NULL;
|
||||
wlist[k].pallow = 0;
|
||||
}
|
||||
|
||||
/* if all exist in word list then okay */
|
||||
if (n == numwords) {
|
||||
if (nwl < rl) {
|
||||
rl = nwl;
|
||||
p = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (p != -1) {
|
||||
ep1 = roots[p].hashent;
|
||||
pfxp = roots[p].prefix;
|
||||
sfxp = roots[p].suffix;
|
||||
ep1->keep = 1;
|
||||
if (pfxp != NULL)
|
||||
add_affix_char(ep1, pfxp->achar);
|
||||
if (sfxp != NULL)
|
||||
add_affix_char(ep1, sfxp->achar);
|
||||
} else {
|
||||
ep->keep = 1;
|
||||
}
|
||||
} else {
|
||||
ep->keep = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* now output only the words to keep along with affixes info */
|
||||
/* first count how many words that is */
|
||||
k = 0;
|
||||
for (i = 0; i < tablesize; i++) {
|
||||
ep = &tableptr[i];
|
||||
if (ep->word == NULL)
|
||||
continue;
|
||||
for (; ep != NULL; ep = ep->next) {
|
||||
if (ep->keep > 0)
|
||||
k++;
|
||||
}
|
||||
}
|
||||
fprintf(stdout, "%d\n", k);
|
||||
|
||||
for (i = 0; i < tablesize; i++) {
|
||||
ep = &tableptr[i];
|
||||
if (ep->word == NULL)
|
||||
continue;
|
||||
for (; ep != NULL; ep = ep->next) {
|
||||
if (ep->keep > 0) {
|
||||
if (ep->affstr != NULL) {
|
||||
fprintf(stdout, "%s/%s\n", ep->word, ep->affstr);
|
||||
} else {
|
||||
fprintf(stdout, "%s\n", ep->word);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int parse_aff_file(FILE* afflst) {
|
||||
int i, j;
|
||||
int numents = 0;
|
||||
char achar = '\0';
|
||||
short ff = 0;
|
||||
struct affent* ptr = NULL;
|
||||
struct affent* nptr = NULL;
|
||||
char* line = (char*)malloc(MAX_LN_LEN);
|
||||
|
||||
while (fgets(line, MAX_LN_LEN, afflst)) {
|
||||
mychomp(line);
|
||||
char ft = ' ';
|
||||
fprintf(stderr, "parsing line: %s\n", line);
|
||||
if (strncmp(line, "PFX", 3) == 0)
|
||||
ft = 'P';
|
||||
if (strncmp(line, "SFX", 3) == 0)
|
||||
ft = 'S';
|
||||
if (ft != ' ') {
|
||||
char* tp = line;
|
||||
char* piece;
|
||||
i = 0;
|
||||
ff = 0;
|
||||
while ((piece = mystrsep(&tp, ' '))) {
|
||||
if (*piece != '\0') {
|
||||
switch (i) {
|
||||
case 0:
|
||||
break;
|
||||
case 1: {
|
||||
achar = *piece;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
if (*piece == 'Y')
|
||||
ff = XPRODUCT;
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
numents = atoi(piece);
|
||||
if ((numents <= 0) || ((std::numeric_limits<size_t>::max() /
|
||||
sizeof(struct affent)) < static_cast<size_t>(numents))) {
|
||||
fprintf(stderr, "Error: too many entries: %d\n", numents);
|
||||
numents = 0;
|
||||
} else {
|
||||
ptr = (struct affent*)malloc(numents * sizeof(struct affent));
|
||||
ptr->achar = achar;
|
||||
ptr->xpflg = ff;
|
||||
fprintf(stderr, "parsing %c entries %d\n", achar, numents);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
free(piece);
|
||||
}
|
||||
/* now parse all of the sub entries*/
|
||||
nptr = ptr;
|
||||
for (j = 0; j < numents; j++) {
|
||||
if (!fgets(line, MAX_LN_LEN, afflst))
|
||||
return 1;
|
||||
mychomp(line);
|
||||
tp = line;
|
||||
i = 0;
|
||||
while ((piece = mystrsep(&tp, ' '))) {
|
||||
if (*piece != '\0') {
|
||||
switch (i) {
|
||||
case 0: {
|
||||
if (nptr != ptr) {
|
||||
nptr->achar = ptr->achar;
|
||||
nptr->xpflg = ptr->xpflg;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
break;
|
||||
case 2: {
|
||||
nptr->strip = mystrdup(piece);
|
||||
nptr->stripl = strlen(nptr->strip);
|
||||
if (strcmp(nptr->strip, "0") == 0) {
|
||||
free(nptr->strip);
|
||||
nptr->strip = mystrdup("");
|
||||
nptr->stripl = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
nptr->appnd = mystrdup(piece);
|
||||
nptr->appndl = strlen(nptr->appnd);
|
||||
if (strcmp(nptr->appnd, "0") == 0) {
|
||||
free(nptr->appnd);
|
||||
nptr->appnd = mystrdup("");
|
||||
nptr->appndl = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
encodeit(nptr, piece);
|
||||
}
|
||||
fprintf(stderr, " affix: %s %d, strip: %s %d\n", nptr->appnd,
|
||||
nptr->appndl, nptr->strip, nptr->stripl);
|
||||
// no break
|
||||
default:
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
free(piece);
|
||||
}
|
||||
nptr++;
|
||||
}
|
||||
if (ft == 'P') {
|
||||
if (numpfx < MAX_PREFIXES) {
|
||||
ptable[numpfx].aep = ptr;
|
||||
ptable[numpfx].num = numents;
|
||||
fprintf(stderr, "ptable %d num is %d\n", numpfx, ptable[numpfx].num);
|
||||
numpfx++;
|
||||
} else {
|
||||
fprintf(stderr, "prefix buffer ptable is full\n");
|
||||
}
|
||||
} else {
|
||||
if (numsfx < MAX_SUFFIXES) {
|
||||
stable[numsfx].aep = ptr;
|
||||
stable[numsfx].num = numents;
|
||||
fprintf(stderr, "stable %d num is %d\n", numsfx, stable[numsfx].num);
|
||||
numsfx++;
|
||||
} else {
|
||||
fprintf(stderr, "suffix buffer stable is full\n");
|
||||
}
|
||||
}
|
||||
ptr = NULL;
|
||||
nptr = NULL;
|
||||
numents = 0;
|
||||
achar = '\0';
|
||||
}
|
||||
}
|
||||
free(line);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void encodeit(struct affent* ptr, char* cs) {
|
||||
int nc;
|
||||
int neg;
|
||||
int grp;
|
||||
int n;
|
||||
int ec;
|
||||
int nm;
|
||||
int i, j, k;
|
||||
unsigned char mbr[MAX_WD_LEN];
|
||||
|
||||
/* now clear the conditions array */
|
||||
for (i = 0; i < SET_SIZE; i++)
|
||||
ptr->conds[i] = (unsigned char)0;
|
||||
|
||||
/* now parse the string to create the conds array */
|
||||
nc = strlen(cs);
|
||||
neg = 0; /* complement indicator */
|
||||
grp = 0; /* group indicator */
|
||||
n = 0; /* number of conditions */
|
||||
ec = 0; /* end condition indicator */
|
||||
nm = 0; /* number of member in group */
|
||||
i = 0;
|
||||
if (strcmp(cs, ".") == 0) {
|
||||
ptr->numconds = 0;
|
||||
return;
|
||||
}
|
||||
while (i < nc) {
|
||||
unsigned char c = *((unsigned char*)(cs + i));
|
||||
if (c == '[') {
|
||||
grp = 1;
|
||||
c = 0;
|
||||
}
|
||||
if ((grp == 1) && (c == '^')) {
|
||||
neg = 1;
|
||||
c = 0;
|
||||
}
|
||||
if (c == ']') {
|
||||
ec = 1;
|
||||
c = 0;
|
||||
}
|
||||
if ((grp == 1) && (c != 0)) {
|
||||
*(mbr + nm) = c;
|
||||
nm++;
|
||||
c = 0;
|
||||
}
|
||||
if (c != 0) {
|
||||
ec = 1;
|
||||
}
|
||||
if (ec) {
|
||||
if (grp == 1) {
|
||||
if (neg == 0) {
|
||||
for (j = 0; j < nm; j++) {
|
||||
k = (unsigned int)mbr[j];
|
||||
ptr->conds[k] = ptr->conds[k] | (1 << n);
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < SET_SIZE; j++)
|
||||
ptr->conds[j] = ptr->conds[j] | (1 << n);
|
||||
for (j = 0; j < nm; j++) {
|
||||
k = (unsigned int)mbr[j];
|
||||
ptr->conds[k] = ptr->conds[k] & ~(1 << n);
|
||||
}
|
||||
}
|
||||
neg = 0;
|
||||
grp = 0;
|
||||
nm = 0;
|
||||
} else {
|
||||
/* not a group so just set the proper bit for this char */
|
||||
/* but first handle special case of . inside condition */
|
||||
if (c == '.') {
|
||||
/* wild card character so set them all */
|
||||
for (j = 0; j < SET_SIZE; j++)
|
||||
ptr->conds[j] = ptr->conds[j] | (1 << n);
|
||||
} else {
|
||||
ptr->conds[(unsigned int)c] = ptr->conds[(unsigned int)c] | (1 << n);
|
||||
}
|
||||
}
|
||||
n++;
|
||||
ec = 0;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
ptr->numconds = n;
|
||||
return;
|
||||
}
|
||||
|
||||
/* search for a prefix */
|
||||
void pfx_chk(const char* word, int len, struct affent* ep, int num) {
|
||||
struct affent* aent;
|
||||
int cond;
|
||||
struct hentry* hent;
|
||||
int i;
|
||||
|
||||
for (aent = ep, i = num; i > 0; aent++, i--) {
|
||||
int tlen = len - aent->appndl;
|
||||
|
||||
if (tlen > 0 &&
|
||||
(aent->appndl == 0 || strncmp(aent->appnd, word, aent->appndl) == 0) &&
|
||||
tlen + aent->stripl >= aent->numconds) {
|
||||
std::string tword(aent->strip);
|
||||
tword.append(word + aent->appndl);
|
||||
|
||||
/* now go through the conds and make sure they all match */
|
||||
unsigned char* cp = (unsigned char*)tword.c_str();
|
||||
for (cond = 0; cond < aent->numconds; cond++) {
|
||||
if ((aent->conds[*cp++] & (1 << cond)) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (cond >= aent->numconds) {
|
||||
if ((hent = lookup(tword.c_str())) != NULL) {
|
||||
if (numroots < MAX_ROOTS) {
|
||||
roots[numroots].hashent = hent;
|
||||
roots[numroots].prefix = aent;
|
||||
roots[numroots].suffix = NULL;
|
||||
numroots++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void suf_chk(const char* word,
|
||||
int len,
|
||||
struct affent* ep,
|
||||
int num,
|
||||
struct affent* pfxent,
|
||||
int cpflag) {
|
||||
struct affent* aent;
|
||||
int cond;
|
||||
struct hentry* hent;
|
||||
int i;
|
||||
|
||||
for (aent = ep, i = num; i > 0; aent++, i--) {
|
||||
if ((cpflag & XPRODUCT) != 0 && (aent->xpflg & XPRODUCT) == 0)
|
||||
continue;
|
||||
|
||||
int tlen = len - aent->appndl;
|
||||
if (tlen > 0 &&
|
||||
(aent->appndl == 0 || strcmp(aent->appnd, (word + tlen)) == 0) &&
|
||||
tlen + aent->stripl >= aent->numconds) {
|
||||
std::string tword(word);
|
||||
tword.resize(tlen);
|
||||
tword.append(aent->strip);
|
||||
unsigned char* cp = (unsigned char*)(tword.c_str() + tword.size());
|
||||
|
||||
for (cond = aent->numconds; --cond >= 0;) {
|
||||
if ((aent->conds[*--cp] & (1 << cond)) == 0)
|
||||
break;
|
||||
}
|
||||
if (cond < 0) {
|
||||
if ((hent = lookup(tword.c_str())) != NULL) {
|
||||
if (numroots < MAX_ROOTS) {
|
||||
roots[numroots].hashent = hent;
|
||||
roots[numroots].prefix = pfxent;
|
||||
roots[numroots].suffix = aent;
|
||||
numroots++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void aff_chk(const char* word, int len) {
|
||||
int i;
|
||||
int nh = 0;
|
||||
|
||||
if (len < 4)
|
||||
return;
|
||||
|
||||
for (i = 0; i < numpfx; i++) {
|
||||
pfx_chk(word, len, ptable[i].aep, ptable[i].num);
|
||||
}
|
||||
|
||||
nh = numroots;
|
||||
|
||||
if (nh > 0) {
|
||||
for (int j = 0; j < nh; j++) {
|
||||
if (roots[j].prefix->xpflg & XPRODUCT) {
|
||||
char* nword = mystrdup((roots[j].hashent)->word);
|
||||
int nwl = strlen(nword);
|
||||
for (i = 0; i < numsfx; i++) {
|
||||
suf_chk(nword, nwl, stable[i].aep, stable[i].num, roots[j].prefix,
|
||||
XPRODUCT);
|
||||
}
|
||||
free(nword);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < numsfx; i++) {
|
||||
suf_chk(word, len, stable[i].aep, stable[i].num, NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* lookup a root word in the hashtable */
|
||||
|
||||
struct hentry* lookup(const char* word) {
|
||||
struct hentry* dp;
|
||||
dp = &tableptr[hash(word)];
|
||||
if (dp->word == NULL)
|
||||
return NULL;
|
||||
for (; dp != NULL; dp = dp->next) {
|
||||
if (strcmp(word, dp->word) == 0)
|
||||
return dp;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* add a word to the hash table */
|
||||
|
||||
int add_word(char* word) {
|
||||
int i;
|
||||
struct hentry* dp;
|
||||
struct hentry* hp = (struct hentry*)malloc(sizeof(struct hentry));
|
||||
|
||||
hp->word = word;
|
||||
hp->affstr = NULL;
|
||||
hp->keep = 0;
|
||||
hp->next = NULL;
|
||||
|
||||
i = hash(word);
|
||||
dp = &tableptr[i];
|
||||
|
||||
if (dp->word == NULL) {
|
||||
*dp = *hp;
|
||||
free(hp);
|
||||
} else {
|
||||
while (dp->next != NULL)
|
||||
dp = dp->next;
|
||||
dp->next = hp;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* load a word list and build a hash table on the fly */
|
||||
|
||||
int load_tables(FILE* wdlst) {
|
||||
char ts[MAX_LN_LEN];
|
||||
int nExtra = 5;
|
||||
|
||||
/* first read the first line of file to get hash table size */
|
||||
if (!fgets(ts, MAX_LN_LEN - 1, wdlst))
|
||||
return 2;
|
||||
mychomp(ts);
|
||||
tablesize = atoi(ts);
|
||||
|
||||
if (tablesize <= 0 ||
|
||||
(tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) / (int)sizeof(struct hentry*))) {
|
||||
return 3;
|
||||
}
|
||||
|
||||
tablesize += nExtra;
|
||||
if ((tablesize % 2) == 0)
|
||||
tablesize++;
|
||||
|
||||
/* allocate the hash table */
|
||||
tableptr = (struct hentry*)calloc(tablesize, sizeof(struct hentry));
|
||||
if (!tableptr)
|
||||
return 3;
|
||||
|
||||
/* loop thorugh all words on much list and add to hash
|
||||
* table and store away word and affix strings in tmpfile
|
||||
*/
|
||||
|
||||
while (fgets(ts, MAX_LN_LEN - 1, wdlst)) {
|
||||
mychomp(ts);
|
||||
char* ap = mystrdup(ts);
|
||||
add_word(ap);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* the hash function is a simple load and rotate
|
||||
* algorithm borrowed
|
||||
*/
|
||||
|
||||
int hash(const char* word) {
|
||||
int i;
|
||||
long hv = 0;
|
||||
for (i = 0; i < 4 && *word != 0; i++)
|
||||
hv = (hv << 8) | (*word++);
|
||||
while (*word != 0) {
|
||||
ROTATE(hv, ROTATE_LEN);
|
||||
hv ^= (*word++);
|
||||
}
|
||||
return (unsigned long)hv % tablesize;
|
||||
}
|
||||
|
||||
void add_affix_char(struct hentry* ep, char ac) {
|
||||
int al;
|
||||
int i;
|
||||
char* tmp;
|
||||
if (ep->affstr == NULL) {
|
||||
ep->affstr = (char*)malloc(2);
|
||||
*(ep->affstr) = ac;
|
||||
*((ep->affstr) + 1) = '\0';
|
||||
return;
|
||||
}
|
||||
al = strlen(ep->affstr);
|
||||
for (i = 0; i < al; i++)
|
||||
if (ac == (ep->affstr)[i])
|
||||
return;
|
||||
tmp = (char*)calloc(al + 2, 1);
|
||||
memcpy(tmp, ep->affstr, (al + 1));
|
||||
*(tmp + al) = ac;
|
||||
*(tmp + al + 1) = '\0';
|
||||
free(ep->affstr);
|
||||
ep->affstr = tmp;
|
||||
return;
|
||||
}
|
||||
|
||||
/* add a prefix to word */
|
||||
void pfx_add(const char* word, int len, struct affent* ep, int num) {
|
||||
struct affent* aent;
|
||||
int cond;
|
||||
unsigned char* cp;
|
||||
int i;
|
||||
char* pp;
|
||||
char tword[MAX_WD_LEN];
|
||||
|
||||
for (aent = ep, i = num; i > 0; aent++, i--) {
|
||||
/* now make sure all conditions match */
|
||||
if ((len > aent->stripl) && (len >= aent->numconds)) {
|
||||
cp = (unsigned char*)word;
|
||||
for (cond = 0; cond < aent->numconds; cond++) {
|
||||
if ((aent->conds[*cp++] & (1 << cond)) == 0)
|
||||
break;
|
||||
}
|
||||
if (cond >= aent->numconds) {
|
||||
/* we have a match so add prefix */
|
||||
int tlen = 0;
|
||||
if (aent->appndl) {
|
||||
strncpy(tword, aent->appnd, MAX_WD_LEN - 1);
|
||||
tword[MAX_WD_LEN - 1] = '\0';
|
||||
tlen += aent->appndl;
|
||||
}
|
||||
pp = tword + tlen;
|
||||
strcpy(pp, (word + aent->stripl));
|
||||
|
||||
if (numwords < MAX_WORDS) {
|
||||
wlist[numwords].word = mystrdup(tword);
|
||||
wlist[numwords].pallow = 0;
|
||||
numwords++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* add a suffix to a word */
|
||||
void suf_add(const char* word, int len, struct affent* ep, int num) {
|
||||
struct affent* aent;
|
||||
int cond;
|
||||
unsigned char* cp;
|
||||
int i;
|
||||
char tword[MAX_WD_LEN];
|
||||
char* pp;
|
||||
|
||||
for (aent = ep, i = num; i > 0; aent++, i--) {
|
||||
/* if conditions hold on root word
|
||||
* then strip off strip string and add suffix
|
||||
*/
|
||||
|
||||
if ((len > aent->stripl) && (len >= aent->numconds)) {
|
||||
cp = (unsigned char*)(word + len);
|
||||
for (cond = aent->numconds; --cond >= 0;) {
|
||||
if ((aent->conds[*--cp] & (1 << cond)) == 0)
|
||||
break;
|
||||
}
|
||||
if (cond < 0) {
|
||||
/* we have a matching condition */
|
||||
int tlen = len;
|
||||
strncpy(tword, word, MAX_WD_LEN - 1);
|
||||
tword[MAX_WD_LEN - 1] = '\0';
|
||||
if (aent->stripl) {
|
||||
tlen -= aent->stripl;
|
||||
}
|
||||
pp = (tword + tlen);
|
||||
if (aent->appndl) {
|
||||
strcpy(pp, aent->appnd);
|
||||
} else
|
||||
*pp = '\0';
|
||||
|
||||
if (numwords < MAX_WORDS) {
|
||||
wlist[numwords].word = mystrdup(tword);
|
||||
wlist[numwords].pallow = (aent->xpflg & XPRODUCT);
|
||||
numwords++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int expand_rootword(const char* ts, int wl, const char* ap) {
|
||||
int i;
|
||||
int nh = 0;
|
||||
|
||||
for (i = 0; i < numsfx; i++) {
|
||||
if (strchr(ap, (stable[i].aep)->achar)) {
|
||||
suf_add(ts, wl, stable[i].aep, stable[i].num);
|
||||
}
|
||||
}
|
||||
|
||||
nh = numwords;
|
||||
|
||||
if (nh > 1) {
|
||||
for (int j = 1; j < nh; j++) {
|
||||
if (wlist[j].pallow) {
|
||||
for (i = 0; i < numpfx; i++) {
|
||||
if (strchr(ap, (ptable[i].aep)->achar)) {
|
||||
if ((ptable[i].aep)->xpflg & XPRODUCT) {
|
||||
int nwl = strlen(wlist[j].word);
|
||||
pfx_add(wlist[j].word, nwl, ptable[i].aep, ptable[i].num);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < numpfx; i++) {
|
||||
if (strchr(ap, (ptable[i].aep)->achar)) {
|
||||
pfx_add(ts, wl, ptable[i].aep, ptable[i].num);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* strip strings into token based on single char delimiter
|
||||
* acts like strsep() but only uses a delim char and not
|
||||
* a delim string
|
||||
*/
|
||||
char* mystrsep(char** stringp, const char delim) {
|
||||
char* rv = NULL;
|
||||
char* mp = *stringp;
|
||||
int n = strlen(mp);
|
||||
if (n > 0) {
|
||||
char* dp = (char*)memchr(mp, (int)((unsigned char)delim), n);
|
||||
if (dp) {
|
||||
ptrdiff_t nc;
|
||||
*stringp = dp + 1;
|
||||
nc = dp - mp;
|
||||
rv = (char*)malloc(nc + 1);
|
||||
if (rv) {
|
||||
memcpy(rv, mp, nc);
|
||||
*(rv + nc) = '\0';
|
||||
}
|
||||
} else {
|
||||
rv = (char*)malloc(n + 1);
|
||||
if (rv) {
|
||||
memcpy(rv, mp, n);
|
||||
*(rv + n) = '\0';
|
||||
*stringp = mp + n;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
char* mystrdup(const char* s) {
|
||||
char* d = NULL;
|
||||
if (s) {
|
||||
int sl = strlen(s) + 1;
|
||||
d = (char*)malloc(sl);
|
||||
if (d)
|
||||
memcpy(d, s, sl);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
void mychomp(char* s) {
|
||||
int k = strlen(s);
|
||||
if (k > 0)
|
||||
*(s + k - 1) = '\0';
|
||||
if ((k > 1) && (*(s + k - 2) == '\r'))
|
||||
*(s + k - 2) = '\0';
|
||||
}
|
156
3rdparty/hunspell/src/tools/munch.h
vendored
156
3rdparty/hunspell/src/tools/munch.h
vendored
@ -1,156 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
/* munch header file */
|
||||
|
||||
#define MAX_LN_LEN 200
|
||||
#define MAX_WD_LEN 200
|
||||
#define MAX_PREFIXES 2048
|
||||
#define MAX_SUFFIXES 2048
|
||||
#define MAX_ROOTS 20
|
||||
#define MAX_WORDS 5000
|
||||
|
||||
#define ROTATE_LEN 5
|
||||
|
||||
#define ROTATE(v, q) \
|
||||
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q)) - 1));
|
||||
|
||||
#define SET_SIZE 256
|
||||
|
||||
#define XPRODUCT (1 << 0)
|
||||
|
||||
/* the affix table entry */
|
||||
|
||||
struct affent {
|
||||
char* appnd;
|
||||
char* strip;
|
||||
short appndl;
|
||||
short stripl;
|
||||
char achar;
|
||||
char xpflg;
|
||||
short numconds;
|
||||
char conds[SET_SIZE];
|
||||
};
|
||||
|
||||
struct affixptr {
|
||||
struct affent* aep;
|
||||
int num;
|
||||
};
|
||||
|
||||
/* the prefix and suffix table */
|
||||
int numpfx; /* Number of prefixes in table */
|
||||
int numsfx; /* Number of suffixes in table */
|
||||
|
||||
/* the prefix table */
|
||||
struct affixptr ptable[MAX_PREFIXES];
|
||||
|
||||
/* the suffix table */
|
||||
struct affixptr stable[MAX_SUFFIXES];
|
||||
|
||||
/* data structure to store results of lookups */
|
||||
struct matches {
|
||||
struct hentry* hashent; /* hash table entry */
|
||||
struct affent* prefix; /* Prefix used, or NULL */
|
||||
struct affent* suffix; /* Suffix used, or NULL */
|
||||
};
|
||||
|
||||
int numroots; /* number of root words found */
|
||||
struct matches roots[MAX_ROOTS]; /* list of root words found */
|
||||
|
||||
/* hashing stuff */
|
||||
|
||||
struct hentry {
|
||||
char* word;
|
||||
char* affstr;
|
||||
struct hentry* next;
|
||||
int keep;
|
||||
};
|
||||
|
||||
int tablesize;
|
||||
struct hentry* tableptr;
|
||||
|
||||
/* unmunch stuff */
|
||||
|
||||
int numwords; /* number of words found */
|
||||
struct dwords {
|
||||
char* word;
|
||||
int pallow;
|
||||
};
|
||||
|
||||
struct dwords wlist[MAX_WORDS]; /* list words found */
|
||||
|
||||
/* the routines */
|
||||
|
||||
int parse_aff_file(FILE* afflst);
|
||||
|
||||
void encodeit(struct affent* ptr, char* cs);
|
||||
|
||||
int load_tables(FILE* wrdlst);
|
||||
|
||||
int hash(const char*);
|
||||
|
||||
int add_word(char*);
|
||||
|
||||
struct hentry* lookup(const char*);
|
||||
|
||||
void aff_chk(const char* word, int len);
|
||||
|
||||
void pfx_chk(const char* word, int len, struct affent* ep, int num);
|
||||
|
||||
void suf_chk(const char* word,
|
||||
int len,
|
||||
struct affent* ep,
|
||||
int num,
|
||||
struct affent* pfxent,
|
||||
int cpflag);
|
||||
|
||||
void add_affix_char(struct hentry* hent, char ac);
|
||||
|
||||
int expand_rootword(const char*, int, const char*);
|
||||
|
||||
void pfx_add(const char* word, int len, struct affent* ep, int num);
|
||||
|
||||
void suf_add(const char* word, int len, struct affent* ep, int num);
|
||||
|
||||
char* mystrsep(char** stringp, const char delim);
|
||||
|
||||
char* mystrdup(const char* s);
|
||||
|
||||
void mychomp(char* s);
|
@ -1,3 +0,0 @@
|
||||
./usr
|
||||
./var
|
||||
./debs
|
213
3rdparty/hunspell/src/tools/optionusage/analyse.py
vendored
213
3rdparty/hunspell/src/tools/optionusage/analyse.py
vendored
@ -1,213 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from os import listdir, path
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def report(output, desc, dikt_has, options, doc, option_count, all_dikts=False):
|
||||
# header
|
||||
output.write('## {} Options\n\n'.format(desc))
|
||||
difopt = 0
|
||||
for option in options:
|
||||
if option in option_count:
|
||||
difopt += 1
|
||||
if difopt == 0:
|
||||
output.write('A total of {} {} different options are recognised by Hunspell. None of these options are used'.format(
|
||||
len(options), desc.lower()))
|
||||
elif difopt == len(options):
|
||||
output.write('A total of {} {} different options are recognised by Hunspell. All of these options are used'.format(
|
||||
len(options), desc.lower()))
|
||||
elif difopt == 1:
|
||||
output.write('A total of {} {} different options are recognised by Hunspell. Of these, only 1 option is used'.format(
|
||||
len(options), desc.lower()))
|
||||
else:
|
||||
output.write('A total of {} {} different options are recognised by Hunspell. Of these, only {} different options are used'.format(
|
||||
len(options), desc.lower(), difopt))
|
||||
if len(dikt_has) == 0:
|
||||
output.write('\n\n')
|
||||
elif len(dikt_has) == 1:
|
||||
if len(dikt_has) == len(doc):
|
||||
output.write(' in all 1 dictionary.\n\n')
|
||||
else:
|
||||
output.write(' in only 1 dictionary.\n\n')
|
||||
else:
|
||||
if len(dikt_has) == len(doc):
|
||||
output.write(' in all {} dictionaries.\n\n'.format(len(dikt_has)))
|
||||
else:
|
||||
output.write(' in only {} dictionaries.\n\n'.format(len(dikt_has)))
|
||||
|
||||
output.write('| {} vs. Dictionary'.format(desc))
|
||||
if all_dikts:
|
||||
for dikt in sorted(doc):
|
||||
output.write(' | {}'.format(dikt.replace('_', '\_')))
|
||||
else:
|
||||
for dikt in sorted(dikt_has):
|
||||
output.write(' | {}'.format(dikt.replace('_', '\_')))
|
||||
output.write(' |\n')
|
||||
|
||||
# format
|
||||
output.write('|---')
|
||||
if all_dikts:
|
||||
for dikt in sorted(doc):
|
||||
output.write('|--:')
|
||||
else:
|
||||
for dikt in sorted(dikt_has):
|
||||
output.write('|--:')
|
||||
output.write('|\n')
|
||||
|
||||
# content
|
||||
for option in options:
|
||||
output.write('| {}'.format(option))
|
||||
if all_dikts:
|
||||
for dikt in sorted(doc):
|
||||
oc = doc[dikt]
|
||||
if option in oc:
|
||||
output.write(' | {}'.format(oc[option]))
|
||||
else:
|
||||
output.write(' |')
|
||||
else:
|
||||
for dikt in sorted(dikt_has):
|
||||
oc = doc[dikt]
|
||||
if option in oc:
|
||||
output.write(' | {}'.format(oc[option]))
|
||||
else:
|
||||
output.write(' |')
|
||||
output.write(' |\n')
|
||||
output.write('\n\n')
|
||||
|
||||
# followings list are manually obtained from $ man -K 5 hunspell
|
||||
options_general = ('SET', 'FLAG', 'COMPLEXPREFIXES',
|
||||
'LANG', 'IGNORE', 'AF', 'AM', )
|
||||
options_suggest = ('KEY', 'TRY', 'NOSUGGEST', 'MAXCPDSUGS', 'MAXNGRAMSUGS', 'MAXDIFF',
|
||||
'ONLYMAXDIFF', 'NOSPLITSUGS', 'SUGSWITHDOTS', 'REP', 'MAP', 'PHONE', 'WARN', 'FORBIDWARN', )
|
||||
options_compounding = ('BREAK', 'COMPOUNDRULE', 'COMPOUNDMIN', 'COMPOUNDFLAG', 'COMPOUNDBEGIN', 'COMPOUNDLAST', 'COMPOUNDMIDDLE', 'ONLYINCOMPOUND', 'COMPOUNDPERMITFLAG', 'COMPOUNDFORBIDFLAG', 'COMPOUNDMORESUFFIXES', 'COMPOUNDROOT',
|
||||
'COMPOUNDWORDMAX', 'CHECKCOMPOUNDDUP', 'CHECKCOMPOUNDREP', 'CHECKCOMPOUNDCASE', 'CHECKCOMPOUNDTRIPLE', 'SIMPLIFIEDTRIPLE', 'CHECKCOMPOUNDPATTERN', 'FORCEUCASE', 'COMPOUNDSYLLABLE', 'SYLLABLENUM', ) # 'COMPOUND',
|
||||
options_affix = ('PFX', 'SFX', 'CIRCUMFIX', 'FORBIDDENWORD', 'FULLSTRIP', 'KEEPCASE',
|
||||
'ICONV', 'OCONV', 'NEEDAFFIX', 'SUBSTANDARD', 'WORDCHARS', 'CHECKSHARPS', )
|
||||
options_deprecated = ('LEMMA_PRESENT', 'PSEUDOROOT', )
|
||||
|
||||
# self-check
|
||||
for o in options_general:
|
||||
if o in options_suggest:
|
||||
print('ERROR: Overlap general and sugest')
|
||||
exit(1)
|
||||
if o in options_compounding:
|
||||
print('ERROR: Overlap general and compounding')
|
||||
exit(1)
|
||||
if o in options_affix:
|
||||
print('ERROR: Overlap general and affix')
|
||||
exit(1)
|
||||
if o in options_deprecated:
|
||||
print('ERROR: Overlap general and deprecated')
|
||||
exit(1)
|
||||
for o in options_suggest:
|
||||
if o in options_compounding:
|
||||
print('ERROR: Overlap suggest and compounding')
|
||||
exit(1)
|
||||
if o in options_affix:
|
||||
print('ERROR: Overlap suggest and affix')
|
||||
exit(1)
|
||||
if o in options_deprecated:
|
||||
print('ERROR: Overlap sugges and deprecated')
|
||||
exit(1)
|
||||
for o in options_compounding:
|
||||
if o in options_affix:
|
||||
print('ERROR: Overlap compounding and affix')
|
||||
exit(1)
|
||||
if o in options_deprecated:
|
||||
print('ERROR: Overlap compounding and deprecated')
|
||||
exit(1)
|
||||
for o in options_affix:
|
||||
if o in options_deprecated:
|
||||
print('ERROR: Overlap affix and deprecated')
|
||||
exit(1)
|
||||
|
||||
options_found = []
|
||||
options_undocumented = []
|
||||
option_count = {} # option / count
|
||||
|
||||
doc = {} # dictionary / option / count
|
||||
options = []
|
||||
dikt_has_general = []
|
||||
dikt_has_suggest = []
|
||||
dikt_has_compounding = []
|
||||
dikt_has_affix = []
|
||||
dikt_has_deprecated = []
|
||||
dikt_has_undocumented = []
|
||||
|
||||
directory = 'usr/share/hunspell/'
|
||||
for filename in listdir(directory):
|
||||
filepath = directory + filename
|
||||
if filename.endswith('.aff') and path.islink(filepath):
|
||||
print('XX', filename)
|
||||
if filename.endswith('.dic') and not path.islink(filepath):
|
||||
print('YY', filename)
|
||||
if not filename.endswith('.aff') or path.islink(filepath) or filename in ('kk_KZ.aff', ): #FIXME kk_KZ.aff has invalid first character
|
||||
continue
|
||||
input = None
|
||||
print(filename)
|
||||
if filename in ('de_AT_frami.aff', 'de_CH_frami.aff', 'de_DE_frami.aff', 'de_DE.aff', 'en_US.aff', 'pt_BR.aff', 'sl_SI.aff', 'th_TH.aff', 'ru_RU.aff', 'nn_NO.aff', 'an_ES.aff', 'af_ZA.aff', 'el_GR.aff', 'bg_BG.aff', 'de_CH.aff', 'it_IT.aff', 'hu_HU.aff', 'pl_PL.aff', 'cs_CZ.aff', 'eu.aff', 'lt_LT.aff', 'nb_NO.aff', 'oc_FR.aff', 'bs_BA.aff', 'de_AT.aff', ):
|
||||
input = open(filepath, 'r', encoding='ISO-8859-1')
|
||||
else:
|
||||
input = open(filepath, 'r')
|
||||
dikt = filename.replace('.aff', '')
|
||||
doc[dikt] = {}
|
||||
oc = doc[dikt]
|
||||
for line in input:
|
||||
if dikt == 'kk_KZ':
|
||||
line.replace('', '')
|
||||
print(line)
|
||||
line = line.strip()
|
||||
if line == '' or line.startswith('#'):
|
||||
continue
|
||||
while ' ' in line: # TODO
|
||||
line = line.replace(' ', ' ')
|
||||
while '\t' in line: # TODO report?
|
||||
line = line.replace('\t', ' ')
|
||||
br = line.split(' ')
|
||||
option = br[0]
|
||||
# print(option, oc[option])
|
||||
if option not in options_found:
|
||||
options_found.append(option)
|
||||
if option in oc:
|
||||
oc[option] += 1
|
||||
else:
|
||||
oc[option] = 1
|
||||
if option in options_general:
|
||||
if dikt not in dikt_has_general:
|
||||
dikt_has_general.append(dikt)
|
||||
elif option in options_suggest:
|
||||
if dikt not in dikt_has_suggest:
|
||||
dikt_has_suggest.append(dikt)
|
||||
elif option in options_compounding:
|
||||
if dikt not in dikt_has_compounding:
|
||||
dikt_has_compounding.append(dikt)
|
||||
elif option in options_affix:
|
||||
if dikt not in dikt_has_affix:
|
||||
dikt_has_affix.append(dikt)
|
||||
elif option in options_deprecated:
|
||||
if dikt not in dikt_has_deprecated:
|
||||
dikt_has_deprecated.append(dikt)
|
||||
else:
|
||||
if dikt not in dikt_has_undocumented:
|
||||
dikt_has_undocumented.append(dikt)
|
||||
if option not in options_undocumented:
|
||||
options_undocumented.append(option)
|
||||
if option in option_count:
|
||||
option_count[option] += 1
|
||||
else:
|
||||
option_count[option] = 1
|
||||
|
||||
output = open('option-usage.md', 'w')
|
||||
output.write('# Hunspell Option Usage per Dictionary\n\n')
|
||||
output.write('This page has been generated at {}. Do not edit this page manually.\n\n'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S').replace(' ' , ' at ')))
|
||||
|
||||
print('dictionaries found', len(doc))
|
||||
print('options found', len(options_found))
|
||||
|
||||
report(output, 'General', dikt_has_general, options_general, doc, option_count, all_dikts=True)
|
||||
report(output, 'Suggest', dikt_has_suggest, options_suggest, doc, option_count)
|
||||
report(output, 'Compounding', dikt_has_compounding, options_compounding, doc, option_count)
|
||||
report(output, 'Affix', dikt_has_affix, options_affix, doc, option_count)
|
||||
report(output, 'Deprecated', dikt_has_deprecated, options_deprecated, doc, option_count)
|
||||
report(output, 'Undocumented', dikt_has_undocumented, options_undocumented, doc, option_count)
|
@ -1,44 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
PACKAGES=`apt-cache search hunspell|grep ^hunspell|grep dict|awk '{print $1}'|tr '\n' ' '`
|
||||
echo $PACKAGES|sed 's/ /\n/g'
|
||||
|
||||
if [ -e usr ]
|
||||
then
|
||||
rm -rf usr
|
||||
fi
|
||||
if [ -e var ]
|
||||
then
|
||||
rm -rf var
|
||||
fi
|
||||
|
||||
if [ -e debs ]
|
||||
then
|
||||
rm -rf debs
|
||||
fi
|
||||
mkdir debs
|
||||
|
||||
cd debs
|
||||
apt-get download $PACKAGES
|
||||
for i in *.deb
|
||||
do
|
||||
dpkg -x $i ..
|
||||
done
|
||||
cd ..
|
||||
|
||||
if [ -e debs ]
|
||||
then
|
||||
rm -rf debs
|
||||
fi
|
||||
if [ -e var ]
|
||||
then
|
||||
rm -rf var
|
||||
fi
|
||||
if [ -e usr/share/myspell ]
|
||||
then
|
||||
rm -rf usr/share/myspell
|
||||
fi
|
||||
if [ -e usr/share/doc ]
|
||||
then
|
||||
rm -rf usr/share/doc
|
||||
fi
|
@ -1,121 +0,0 @@
|
||||
# Hunspell Option Usage per Dictionary
|
||||
|
||||
This page has been generated at 2017-04-25 at 23:17:56. Do not edit this page manually.
|
||||
|
||||
## General Options
|
||||
|
||||
A total of 7 general different options are recognised by Hunspell. Of these, only 6 different options are used in all 63 dictionaries.
|
||||
|
||||
| General vs. Dictionary | af\_ZA | an\_ES | ar | be\_BY | bg\_BG | bn\_BD | bo | br\_FR | bs\_BA | ca | ca\_ES-valencia | cs\_CZ | da\_DK | de\_AT\_frami | de\_CH\_frami | de\_DE\_frami | el\_GR | en\_AU | en\_CA | en\_GB | en\_US | en\_ZA | es\_ES | eu | fr | gd\_GB | gl\_ES | gu\_IN | he\_IL | hi\_IN | hr\_HR | hu\_HU | is\_IS | it\_IT | kmr\_Latn | ko | lo\_LA | lt\_LT | ml\_IN | nb\_NO | ne\_NP | nl\_NL | nn\_NO | oc\_FR | pl\_PL | pt\_BR | pt\_PT | ro\_RO | ru\_RU | se | si\_LK | sk\_SK | sl\_SI | sr\_Latn\_RS | sr\_RS | sv\_FI | sv\_SE | sw\_TZ | te\_IN | th\_TH | uk\_UA | uz\_UZ | vi\_VN |
|
||||
|---|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|
|
||||
| SET | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
|
||||
| FLAG | | | 1 | | | | | 1 | | 1 | 1 | | 1 | | | | | | | | | | | 1 | 1 | | 1 | | | | 1 | | 1 | | | 1 | | | | | 1 | 1 | | | | | | | | 1 | 1 | | | | | | | | | | | | |
|
||||
| COMPLEXPREFIXES | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
|
||||
| LANG | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 1 | | 1 | | 1 | | | | | | | | | | | 1 | | | | | | | 1 | 1 | | | | | | | | |
|
||||
| IGNORE | | | 1 | | | | 1 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 1 | | |
|
||||
| AF | | | 512 | | | | | | | | | | | | | | | | | | | | | | 277 | | | | | | 99 | 1307 | | | | 56 | | | | | | | | | | | | | | 3501 | | | | | | | | | | | | | |
|
||||
| AM | | | 23625 | | | | | | | | | | | | | | | | | | | | | | 485 | | | | | | | 23051 | 701 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
|
||||
|
||||
|
||||
## Suggest Options
|
||||
|
||||
A total of 14 suggest different options are recognised by Hunspell. Of these, only 12 different options are used in only 60 dictionaries.
|
||||
|
||||
| Suggest vs. Dictionary | af\_ZA | an\_ES | ar | be\_BY | bg\_BG | bn\_BD | bo | br\_FR | bs\_BA | ca | ca\_ES-valencia | cs\_CZ | da\_DK | de\_AT\_frami | de\_CH\_frami | de\_DE\_frami | el\_GR | en\_AU | en\_CA | en\_GB | en\_US | en\_ZA | es\_ES | eu | fr | gd\_GB | gl\_ES | gu\_IN | he\_IL | hi\_IN | hr\_HR | hu\_HU | is\_IS | it\_IT | kmr\_Latn | ko | lt\_LT | nb\_NO | ne\_NP | nl\_NL | nn\_NO | oc\_FR | pl\_PL | pt\_BR | pt\_PT | ro\_RO | ru\_RU | se | si\_LK | sk\_SK | sl\_SI | sr\_Latn\_RS | sr\_RS | sv\_FI | sv\_SE | sw\_TZ | te\_IN | th\_TH | uk\_UA | vi\_VN |
|
||||
|---|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|
|
||||
| KEY | | | 1 | | | | | | | 1 | 1 | | 1 | | | | | | | | 1 | | | | 1 | | 1 | | | | | 1 | 1 | | | | | | | 1 | | | | | 1 | 1 | | | | | | 1 | 1 | | | | | | | |
|
||||
| TRY | 1 | 1 | 1 | 1 | 1 | 1 | | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | | 1 | 1 |
|
||||
| NOSUGGEST | | | | | | | | | | | | | 1 | 1 | 1 | 1 | | 1 | 1 | 1 | 1 | 1 | | | 1 | | | | | | 1 | 1 | | | | | | | | 1 | | | | 1 | | | | | | | | | | 1 | 1 | | | | | |
|
||||
| MAXCPDSUGS | | | | | | | | | | | | | 1 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 1 | 1 | | | | | |
|
||||
| MAXNGRAMSUGS | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 1 | | | | | | | | | | | | | | | | |
|
||||
| MAXDIFF | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 1 | | | | | | | | | | 1 | 1 | | | | | |
|
||||
| ONLYMAXDIFF | | | | | | | | | | | | | 1 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 1 | | | | | | | | | | 1 | 1 | | | | | |
|
||||
| NOSPLITSUGS | 1 | | | | | | 1 | | | | | | 1 | | | | | | | | | | | 1 | | | 1 | | | | | | 1 | | | | | | | 1 | | | | | | | | | | | | | | 1 | 1 | | | | | |
|
||||
| SUGSWITHDOTS | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
|
||||
| REP | 28 | 114 | 81 | 36 | | | 7 | | | 49 | 49 | | 113 | 29 | 29 | 29 | 523 | 115 | 91 | 28 | 98 | 37 | 21 | 37 | 83 | 49 | 3737 | | | | 100 | 125 | 58 | 5 | 37 | 60 | | | 23 | 488 | | | 65 | 1056 | 26 | 5 | | 74 | 26 | 53 | | 2 | 9 | 60 | 60 | | | 5 | 6 | 19 |
|
||||
| MAP | 7 | 37 | 17 | 21 | 27 | | | | | 25 | 25 | | | | | | 6 | | | | | | 6 | | 26 | 6 | 22 | | 11 | | | 6 | | 6 | | 13 | | | | 6 | | 6 | 9 | 7 | 12 | 9 | | | | | | 5 | 5 | 3 | 3 | | | | | 19 |
|
||||
| PHONE | | | | | | | | | | | | | | | | | | 105 | | | 106 | 105 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
|
||||
| WARN | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 1 | | | | | | | | | | | | | 1 | | | | | | | | | | | | | | | | |
|
||||
| FORBIDWARN | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
|
||||
|
||||
|
||||
## Compounding Options
|
||||
|
||||
A total of 22 compounding different options are recognised by Hunspell. Of these, only 21 different options are used in only 25 dictionaries.
|
||||
|
||||
| Compounding vs. Dictionary | be\_BY | da\_DK | de\_AT\_frami | de\_CH\_frami | de\_DE\_frami | en\_AU | en\_CA | en\_GB | en\_US | en\_ZA | eu | fr | gd\_GB | he\_IL | hr\_HR | hu\_HU | ko | nb\_NO | nl\_NL | nn\_NO | pt\_BR | se | sv\_FI | sv\_SE | uk\_UA |
|
||||
|---|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|
|
||||
| BREAK | 3 | | 3 | 3 | 3 | 4 | | | | 4 | 2 | 8 | 2 | 4 | 3 | 5 | | | 2 | | 65 | 2 | 4 | 4 | 2 |
|
||||
| COMPOUNDRULE | | | | | | 3 | 3 | 3 | 3 | | | | | | 8 | 3 | 7 | | | | | | 13 | 13 | |
|
||||
| COMPOUNDMIN | | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | | | | | | 1 | 1 | 1 | 1 | 1 | 1 | | | 1 | 1 | |
|
||||
| COMPOUNDFLAG | | | | | | | | | | | | | | | 1 | 1 | | 1 | | 1 | | | | | |
|
||||
| COMPOUNDBEGIN | | 1 | 1 | 1 | 1 | | | | | | | | | | 1 | 1 | | | 1 | | | 1 | 1 | 1 | |
|
||||
| COMPOUNDLAST | | | | | | | | | | | | | | | | 1 | | | | | | | | | |
|
||||
| COMPOUNDMIDDLE | | 1 | 1 | 1 | 1 | | | | | | | | | | 1 | | | | 1 | | | 1 | 1 | 1 | |
|
||||
| ONLYINCOMPOUND | | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | | | | | | 1 | 1 | | | 1 | | | 1 | 1 | 1 | |
|
||||
| COMPOUNDPERMITFLAG | | 1 | 1 | 1 | 1 | | | | | | | | | | 1 | 1 | | | 1 | | | 1 | 1 | 1 | |
|
||||
| COMPOUNDFORBIDFLAG | | | | | | | | | | | | | | | 1 | 1 | | | | | | 1 | | | |
|
||||
| COMPOUNDMORESUFFIXES | | | | | | | | | | | | | | | | | | | | | | | | | |
|
||||
| COMPOUNDROOT | | | | | | | | | | | | | | | | 1 | | | | | | | | | |
|
||||
| COMPOUNDWORDMAX | | 1 | | | | | | | | | | | | | | 1 | | | | | | | | | |
|
||||
| CHECKCOMPOUNDDUP | | | | | | | | | | | | | | | | 1 | | | 1 | | | | 1 | 1 | |
|
||||
| CHECKCOMPOUNDREP | | | | | | | | | | | | | | | | 1 | | | | | | | 1 | 1 | |
|
||||
| CHECKCOMPOUNDCASE | | | | | | | | | | | | | | | | 1 | | | 1 | | | | | | |
|
||||
| CHECKCOMPOUNDTRIPLE | | | | | | | | | | | | | | | | 1 | | 1 | | | | | 1 | 1 | |
|
||||
| SIMPLIFIEDTRIPLE | | | | | | | | | | | | | | | | | | 1 | | | | | 1 | 1 | |
|
||||
| CHECKCOMPOUNDPATTERN | | | | | | | | | | | | | | | | 8 | | | 43 | | | | | | |
|
||||
| FORCEUCASE | | | | | | | | | | | | | | | | | | | | | | | 1 | 1 | |
|
||||
| COMPOUNDSYLLABLE | | | | | | | | | | | | | | | | 1 | | | | | | | | | |
|
||||
| SYLLABLENUM | | | | | | | | | | | | | | | | 1 | | | | | | | | | |
|
||||
|
||||
|
||||
## Affix Options
|
||||
|
||||
A total of 12 affix different options are recognised by Hunspell. All of these options are used in only 54 dictionaries.
|
||||
|
||||
| Affix vs. Dictionary | af\_ZA | an\_ES | ar | be\_BY | bg\_BG | bo | br\_FR | bs\_BA | ca | ca\_ES-valencia | cs\_CZ | da\_DK | de\_AT\_frami | de\_CH\_frami | de\_DE\_frami | el\_GR | en\_AU | en\_CA | en\_GB | en\_US | en\_ZA | es\_ES | eu | fr | gd\_GB | gl\_ES | he\_IL | hr\_HR | hu\_HU | is\_IS | it\_IT | kmr\_Latn | ko | lt\_LT | nb\_NO | ne\_NP | nl\_NL | nn\_NO | oc\_FR | pl\_PL | pt\_BR | pt\_PT | ro\_RO | ru\_RU | se | si\_LK | sk\_SK | sl\_SI | sr\_Latn\_RS | sr\_RS | sv\_FI | sv\_SE | sw\_TZ | uk\_UA |
|
||||
|---|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|
|
||||
| PFX | 20 | 58 | 194 | 10 | | | 27 | | 132 | 132 | 19 | 264 | 68 | 68 | 68 | 10 | 38 | 14 | 36 | 14 | 36 | 78 | 12 | 216 | 34 | | 3335 | | 366 | | 489 | 36 | | 90 | 41 | 2 | 154 | 41 | 16 | 2 | 162 | 38 | 26 | | | | 4 | 10 | | | | | 5 | 6 |
|
||||
| SFX | 75 | 996 | 1609 | 879 | 1652 | 40 | 355 | 998 | 13318 | 13368 | 2551 | 1034 | 437 | 437 | 437 | 179 | 1078 | 59 | 1078 | 59 | 1078 | 6767 | 94118 | 9735 | 7 | 8586 | | 534 | 24052 | 13282 | 2744 | 80 | 55722 | 2586 | 495 | 525 | 447 | 383 | 708 | 7102 | 25770 | 1302 | 1624 | 1606 | 371599 | 10000 | 2443 | 526 | | | 492 | 492 | | 4477 |
|
||||
| CIRCUMFIX | | | | | | | | | | | | | 1 | 1 | 1 | | | | | | | | | 1 | | | | 1 | | | | | | | | | | | | | | | | | | | | | | | | | | |
|
||||
| FORBIDDENWORD | | | | | | | | | 1 | 1 | | 1 | 1 | 1 | 1 | | | | | | | | | 1 | | | | 1 | 1 | | | | 1 | | | | 1 | | | | 1 | | | | | | | | | | 1 | 1 | | |
|
||||
| FULLSTRIP | | | | | | | | | 1 | 1 | | | | | | | | | | | | | | 1 | | | | | | 1 | | | | | | | | | | | | | | | | | | | | | 1 | 1 | | |
|
||||
| KEEPCASE | | | | | | | | | | | | | 1 | 1 | 1 | | | | | | | | | 1 | | 1 | | 1 | 1 | | | | | | | | 1 | | | | | | | | | | | | | | | | | |
|
||||
| ICONV | | | 10 | | | | | | | | | | | | | | 7 | 2 | | | 7 | | | 42 | 2 | | | 29 | 6 | | | | 11173 | | | | 10 | | | | | | | | | | | | 5 | 4 | | | | |
|
||||
| OCONV | | | | | | | | | | | | | | | | | 2 | | | | 2 | | | 2 | | | | | | | | | 11173 | | | | 3 | | | | | | | | | | | | | | | | | |
|
||||
| NEEDAFFIX | | | | | | 1 | | | | | | 1 | 1 | 1 | 1 | | | | | | | | | 1 | | 1 | 1 | 1 | 1 | | | | | | | | | | | | | | | | 1 | | | | | | 1 | 1 | | |
|
||||
| SUBSTANDARD | | | | | | | | | | | | | | | | | | | | | | | | | | | | 1 | 1 | | | | | | | | | | | | | | | | | | | | | | | | | |
|
||||
| WORDCHARS | | | | 1 | | | 1 | | 1 | 1 | | 1 | 1 | 1 | 1 | | 1 | 1 | 1 | 1 | | | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | | | 1 | | | | 1 | | | 1 | | 1 | | | 1 | | | | | | 1 | 1 | | 1 |
|
||||
| CHECKSHARPS | | | | | | | | | | | | | 1 | 1 | 1 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
|
||||
|
||||
|
||||
## Deprecated Options
|
||||
|
||||
A total of 2 deprecated different options are recognised by Hunspell. Of these, only 1 option is used in only 1 dictionary.
|
||||
|
||||
| Deprecated vs. Dictionary | hu\_HU |
|
||||
|---|--:|
|
||||
| LEMMA_PRESENT | 1 |
|
||||
| PSEUDOROOT | |
|
||||
|
||||
|
||||
## Undocumented Options
|
||||
|
||||
A total of 12 undocumented different options are recognised by Hunspell. All of these options are used in only 15 dictionaries.
|
||||
|
||||
| Undocumented vs. Dictionary | bo | da\_DK | de\_AT\_frami | de\_CH\_frami | de\_DE\_frami | en\_AU | gd\_GB | hr\_HR | hu\_HU | it\_IT | ko | nl\_NL | se | sv\_FI | sv\_SE |
|
||||
|---|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|
|
||||
| LANGCODE | 1 | | | | | | | | | | | | | | |
|
||||
| NAME | | | | | | | | | 1 | 1 | | | | | |
|
||||
| HOME | | | | | | | | | 1 | 1 | | | | | |
|
||||
| VERSION | | | | | | | | | 1 | 1 | 1 | | | | |
|
||||
| COMPOUNDFIRST | | | | | | | | | 1 | | | | | | |
|
||||
| ONLYROOT | | | | | | | | | 1 | | | | | | |
|
||||
| HU_KOTOHANGZO | | | | | | | | | 1 | | | | | | |
|
||||
| COMPOUNDEND | | 1 | 1 | 1 | 1 | | | 1 | 1 | | | 1 | 1 | 1 | 1 |
|
||||
| GENERATE | | | | | | | | | 1 | | | | | | |
|
||||
| LEFTHYPHENMIN | | | | | | | 1 | | | | | | | | |
|
||||
| MIDWORD | | | | | | 1 | | | | | | | | | |
|
||||
| BAD | | | | | | 1 | | | | | | | | | |
|
||||
|
||||
|
550
3rdparty/hunspell/src/tools/unmunch.cxx
vendored
550
3rdparty/hunspell/src/tools/unmunch.cxx
vendored
@ -1,550 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
/* Un-munch a root word list with affix tags
|
||||
* to recreate the original word list
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <limits>
|
||||
|
||||
#include "unmunch.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int i;
|
||||
int al;
|
||||
|
||||
FILE* wrdlst;
|
||||
FILE* afflst;
|
||||
|
||||
char *wf, *af;
|
||||
char ts[MAX_LN_LEN];
|
||||
|
||||
(void)argc;
|
||||
|
||||
/* first parse the command line options */
|
||||
/* arg1 - munched wordlist, arg2 - affix file */
|
||||
|
||||
if (argv[1]) {
|
||||
wf = mystrdup(argv[1]);
|
||||
} else {
|
||||
fprintf(stderr, "correct syntax is:\n");
|
||||
fprintf(stderr, "unmunch dic_file affix_file\n");
|
||||
exit(1);
|
||||
}
|
||||
if (argv[2]) {
|
||||
af = mystrdup(argv[2]);
|
||||
} else {
|
||||
fprintf(stderr, "correct syntax is:\n");
|
||||
fprintf(stderr, "unmunch dic_file affix_file\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* open the affix file */
|
||||
afflst = fopen(af, "r");
|
||||
if (!afflst) {
|
||||
fprintf(stderr, "Error - could not open affix description file\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* step one is to parse the affix file building up the internal
|
||||
affix data structures */
|
||||
|
||||
numpfx = 0;
|
||||
numsfx = 0;
|
||||
fullstrip = 0;
|
||||
|
||||
if (parse_aff_file(afflst)) {
|
||||
fprintf(stderr, "Error - in affix file loading\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fclose(afflst);
|
||||
|
||||
fprintf(stderr, "parsed in %d prefixes and %d suffixes\n", numpfx, numsfx);
|
||||
|
||||
/* affix file is now parsed so create hash table of wordlist on the fly */
|
||||
|
||||
/* open the wordlist */
|
||||
wrdlst = fopen(wf, "r");
|
||||
if (!wrdlst) {
|
||||
fprintf(stderr, "Error - could not open word list file\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* skip over the hash table size */
|
||||
if (!fgets(ts, MAX_LN_LEN - 1, wrdlst)) {
|
||||
fclose(wrdlst);
|
||||
return 2;
|
||||
}
|
||||
mychomp(ts);
|
||||
|
||||
while (fgets(ts, MAX_LN_LEN - 1, wrdlst)) {
|
||||
mychomp(ts);
|
||||
/* split each line into word and affix char strings */
|
||||
char* ap = strchr(ts, '/');
|
||||
if (ap) {
|
||||
*ap = '\0';
|
||||
ap++;
|
||||
al = strlen(ap);
|
||||
} else {
|
||||
al = 0;
|
||||
ap = NULL;
|
||||
}
|
||||
|
||||
int wl = strlen(ts);
|
||||
|
||||
numwords = 0;
|
||||
wlist[numwords].word = mystrdup(ts);
|
||||
wlist[numwords].pallow = 0;
|
||||
numwords++;
|
||||
|
||||
if (al)
|
||||
expand_rootword(ts, wl, ap);
|
||||
|
||||
for (i = 0; i < numwords; i++) {
|
||||
fprintf(stdout, "%s\n", wlist[i].word);
|
||||
free(wlist[i].word);
|
||||
wlist[i].word = NULL;
|
||||
wlist[i].pallow = 0;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(wrdlst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int parse_aff_file(FILE* afflst) {
|
||||
int i, j;
|
||||
int numents = 0;
|
||||
char achar = '\0';
|
||||
short ff = 0;
|
||||
struct affent* ptr = NULL;
|
||||
struct affent* nptr = NULL;
|
||||
char* line = (char*)malloc(MAX_LN_LEN);
|
||||
|
||||
while (fgets(line, MAX_LN_LEN, afflst)) {
|
||||
mychomp(line);
|
||||
char ft = ' ';
|
||||
fprintf(stderr, "parsing line: %s\n", line);
|
||||
if (strncmp(line, "FULLSTRIP", 9) == 0)
|
||||
fullstrip = 1;
|
||||
if (strncmp(line, "PFX", 3) == 0)
|
||||
ft = 'P';
|
||||
if (strncmp(line, "SFX", 3) == 0)
|
||||
ft = 'S';
|
||||
if (ft != ' ') {
|
||||
char* tp = line;
|
||||
char* piece;
|
||||
ff = 0;
|
||||
i = 0;
|
||||
while ((piece = mystrsep(&tp, ' '))) {
|
||||
if (*piece != '\0') {
|
||||
switch (i) {
|
||||
case 0:
|
||||
break;
|
||||
case 1: {
|
||||
achar = *piece;
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
if (*piece == 'Y')
|
||||
ff = XPRODUCT;
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
numents = atoi(piece);
|
||||
if ((numents <= 0) || ((std::numeric_limits<size_t>::max() /
|
||||
sizeof(struct affent)) < static_cast<size_t>(numents))) {
|
||||
fprintf(stderr, "Error: too many entries: %d\n", numents);
|
||||
numents = 0;
|
||||
} else {
|
||||
ptr = (struct affent*)malloc(numents * sizeof(struct affent));
|
||||
ptr->achar = achar;
|
||||
ptr->xpflg = ff;
|
||||
fprintf(stderr, "parsing %c entries %d\n", achar, numents);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
free(piece);
|
||||
}
|
||||
/* now parse all of the sub entries*/
|
||||
nptr = ptr;
|
||||
for (j = 0; j < numents; j++) {
|
||||
if (!fgets(line, MAX_LN_LEN, afflst))
|
||||
return 1;
|
||||
mychomp(line);
|
||||
tp = line;
|
||||
i = 0;
|
||||
while ((piece = mystrsep(&tp, ' '))) {
|
||||
if (*piece != '\0') {
|
||||
switch (i) {
|
||||
case 0: {
|
||||
if (nptr != ptr) {
|
||||
nptr->achar = ptr->achar;
|
||||
nptr->xpflg = ptr->xpflg;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
break;
|
||||
case 2: {
|
||||
nptr->strip = mystrdup(piece);
|
||||
nptr->stripl = strlen(nptr->strip);
|
||||
if (strcmp(nptr->strip, "0") == 0) {
|
||||
free(nptr->strip);
|
||||
nptr->strip = mystrdup("");
|
||||
nptr->stripl = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
nptr->appnd = mystrdup(piece);
|
||||
nptr->appndl = strlen(nptr->appnd);
|
||||
if (strcmp(nptr->appnd, "0") == 0) {
|
||||
free(nptr->appnd);
|
||||
nptr->appnd = mystrdup("");
|
||||
nptr->appndl = 0;
|
||||
}
|
||||
if (strchr(nptr->appnd, '/')) {
|
||||
char* addseparator =
|
||||
(char*)realloc(nptr->appnd, nptr->appndl + 2);
|
||||
if (addseparator) {
|
||||
nptr->appndl++;
|
||||
addseparator[nptr->appndl - 1] = '|';
|
||||
addseparator[nptr->appndl] = '\0';
|
||||
nptr->appnd = addseparator;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
encodeit(nptr, piece);
|
||||
}
|
||||
fprintf(stderr, " affix: %s %d, strip: %s %d\n", nptr->appnd,
|
||||
nptr->appndl, nptr->strip, nptr->stripl);
|
||||
// no break
|
||||
default:
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
free(piece);
|
||||
}
|
||||
nptr++;
|
||||
}
|
||||
if (ptr) {
|
||||
if (ft == 'P') {
|
||||
ptable[numpfx].aep = ptr;
|
||||
ptable[numpfx].num = numents;
|
||||
fprintf(stderr, "ptable %d num is %d flag %c\n", numpfx,
|
||||
ptable[numpfx].num, ptr->achar);
|
||||
numpfx++;
|
||||
} else if (ft == 'S') {
|
||||
stable[numsfx].aep = ptr;
|
||||
stable[numsfx].num = numents;
|
||||
fprintf(stderr, "stable %d num is %d flag %c\n", numsfx,
|
||||
stable[numsfx].num, ptr->achar);
|
||||
numsfx++;
|
||||
}
|
||||
ptr = NULL;
|
||||
}
|
||||
nptr = NULL;
|
||||
numents = 0;
|
||||
achar = '\0';
|
||||
}
|
||||
}
|
||||
free(line);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void encodeit(struct affent* ptr, char* cs) {
|
||||
int nc;
|
||||
int neg;
|
||||
int grp;
|
||||
int n;
|
||||
int ec;
|
||||
int nm;
|
||||
int i, j, k;
|
||||
unsigned char mbr[MAX_WD_LEN];
|
||||
|
||||
/* now clear the conditions array */
|
||||
for (i = 0; i < SET_SIZE; i++)
|
||||
ptr->conds[i] = (unsigned char)0;
|
||||
|
||||
/* now parse the string to create the conds array */
|
||||
nc = strlen(cs);
|
||||
neg = 0; /* complement indicator */
|
||||
grp = 0; /* group indicator */
|
||||
n = 0; /* number of conditions */
|
||||
ec = 0; /* end condition indicator */
|
||||
nm = 0; /* number of member in group */
|
||||
i = 0;
|
||||
if (strcmp(cs, ".") == 0) {
|
||||
ptr->numconds = 0;
|
||||
return;
|
||||
}
|
||||
while (i < nc) {
|
||||
unsigned char c = *((unsigned char*)(cs + i));
|
||||
if (c == '[') {
|
||||
grp = 1;
|
||||
c = 0;
|
||||
}
|
||||
if ((grp == 1) && (c == '^')) {
|
||||
neg = 1;
|
||||
c = 0;
|
||||
}
|
||||
if (c == ']') {
|
||||
ec = 1;
|
||||
c = 0;
|
||||
}
|
||||
if ((grp == 1) && (c != 0)) {
|
||||
*(mbr + nm) = c;
|
||||
nm++;
|
||||
c = 0;
|
||||
}
|
||||
if (c != 0) {
|
||||
ec = 1;
|
||||
}
|
||||
if (ec) {
|
||||
if (grp == 1) {
|
||||
if (neg == 0) {
|
||||
for (j = 0; j < nm; j++) {
|
||||
k = (unsigned int)mbr[j];
|
||||
ptr->conds[k] = ptr->conds[k] | (1 << n);
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < SET_SIZE; j++)
|
||||
ptr->conds[j] = ptr->conds[j] | (1 << n);
|
||||
for (j = 0; j < nm; j++) {
|
||||
k = (unsigned int)mbr[j];
|
||||
ptr->conds[k] = ptr->conds[k] & ~(1 << n);
|
||||
}
|
||||
}
|
||||
neg = 0;
|
||||
grp = 0;
|
||||
nm = 0;
|
||||
} else {
|
||||
/* not a group so just set the proper bit for this char */
|
||||
/* but first handle special case of . inside condition */
|
||||
if (c == '.') {
|
||||
/* wild card character so set them all */
|
||||
for (j = 0; j < SET_SIZE; j++)
|
||||
ptr->conds[j] = ptr->conds[j] | (1 << n);
|
||||
} else {
|
||||
ptr->conds[(unsigned int)c] = ptr->conds[(unsigned int)c] | (1 << n);
|
||||
}
|
||||
}
|
||||
n++;
|
||||
ec = 0;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
ptr->numconds = n;
|
||||
return;
|
||||
}
|
||||
|
||||
/* add a prefix to word */
|
||||
void pfx_add(const char* word, int len, struct affent* ep, int num) {
|
||||
struct affent* aent;
|
||||
int cond;
|
||||
unsigned char* cp;
|
||||
int i;
|
||||
|
||||
for (aent = ep, i = num; i > 0; aent++, i--) {
|
||||
/* now make sure all conditions match */
|
||||
if ((len + fullstrip > aent->stripl) && (len >= aent->numconds) &&
|
||||
((aent->stripl == 0) ||
|
||||
(strncmp(aent->strip, word, aent->stripl) == 0))) {
|
||||
cp = (unsigned char*)word;
|
||||
for (cond = 0; cond < aent->numconds; cond++) {
|
||||
if ((aent->conds[*cp++] & (1 << cond)) == 0)
|
||||
break;
|
||||
}
|
||||
if (cond >= aent->numconds) {
|
||||
std::string tword;
|
||||
/* we have a match so add prefix */
|
||||
if (aent->appndl) {
|
||||
tword.append(aent->appnd);
|
||||
}
|
||||
tword.append(word + aent->stripl);
|
||||
|
||||
if (numwords < MAX_WORDS) {
|
||||
wlist[numwords].word = mystrdup(tword.c_str());
|
||||
wlist[numwords].pallow = 0;
|
||||
numwords++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* add a suffix to a word */
|
||||
void suf_add(const char* word, int len, struct affent* ep, int num) {
|
||||
struct affent* aent;
|
||||
int cond;
|
||||
unsigned char* cp;
|
||||
int i;
|
||||
|
||||
for (aent = ep, i = num; i > 0; aent++, i--) {
|
||||
/* if conditions hold on root word
|
||||
* then strip off strip string and add suffix
|
||||
*/
|
||||
|
||||
if ((len + fullstrip > aent->stripl) && (len >= aent->numconds) &&
|
||||
((aent->stripl == 0) ||
|
||||
(strcmp(aent->strip, word + len - aent->stripl) == 0))) {
|
||||
cp = (unsigned char*)(word + len);
|
||||
for (cond = aent->numconds; --cond >= 0;) {
|
||||
if ((aent->conds[*--cp] & (1 << cond)) == 0)
|
||||
break;
|
||||
}
|
||||
if (cond < 0) {
|
||||
/* we have a matching condition */
|
||||
std::string tword(word);
|
||||
tword.resize(len - aent->stripl);
|
||||
tword.append(aent->appnd);
|
||||
|
||||
if (numwords < MAX_WORDS) {
|
||||
wlist[numwords].word = mystrdup(tword.c_str());
|
||||
wlist[numwords].pallow = (aent->xpflg & XPRODUCT);
|
||||
numwords++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int expand_rootword(const char* ts, int wl, const char* ap) {
|
||||
int i;
|
||||
int nh = 0;
|
||||
|
||||
for (i = 0; i < numsfx; i++) {
|
||||
if (strchr(ap, (stable[i].aep)->achar)) {
|
||||
suf_add(ts, wl, stable[i].aep, stable[i].num);
|
||||
}
|
||||
}
|
||||
|
||||
nh = numwords;
|
||||
|
||||
if (nh > 1) {
|
||||
for (int j = 1; j < nh; j++) {
|
||||
if (wlist[j].pallow) {
|
||||
for (i = 0; i < numpfx; i++) {
|
||||
if (strchr(ap, (ptable[i].aep)->achar)) {
|
||||
if ((ptable[i].aep)->xpflg & XPRODUCT) {
|
||||
int nwl = strlen(wlist[j].word);
|
||||
pfx_add(wlist[j].word, nwl, ptable[i].aep, ptable[i].num);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < numpfx; i++) {
|
||||
if (strchr(ap, (ptable[i].aep)->achar)) {
|
||||
pfx_add(ts, wl, ptable[i].aep, ptable[i].num);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* strip strings into token based on single char delimiter
|
||||
* acts like strsep() but only uses a delim char and not
|
||||
* a delim string
|
||||
*/
|
||||
char* mystrsep(char** stringp, const char delim) {
|
||||
char* rv = NULL;
|
||||
char* mp = *stringp;
|
||||
int n = strlen(mp);
|
||||
if (n > 0) {
|
||||
char* dp = (char*)memchr(mp, (int)((unsigned char)delim), n);
|
||||
if (dp) {
|
||||
ptrdiff_t nc;
|
||||
*stringp = dp + 1;
|
||||
nc = dp - mp;
|
||||
rv = (char*)malloc(nc + 1);
|
||||
if (rv) {
|
||||
memcpy(rv, mp, nc);
|
||||
*(rv + nc) = '\0';
|
||||
}
|
||||
} else {
|
||||
rv = (char*)malloc(n + 1);
|
||||
if (rv) {
|
||||
memcpy(rv, mp, n);
|
||||
*(rv + n) = '\0';
|
||||
*stringp = mp + n;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
char* mystrdup(const char* s) {
|
||||
char* d = NULL;
|
||||
if (s) {
|
||||
int sl = strlen(s) + 1;
|
||||
d = (char*)malloc(sl);
|
||||
if (d)
|
||||
memcpy(d, s, sl);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
void mychomp(char* s) {
|
||||
int k = strlen(s);
|
||||
if ((k > 0) && (*(s + k - 1) == '\n'))
|
||||
*(s + k - 1) = '\0';
|
||||
if ((k > 1) && (*(s + k - 2) == '\r'))
|
||||
*(s + k - 2) = '\0';
|
||||
}
|
109
3rdparty/hunspell/src/tools/unmunch.h
vendored
109
3rdparty/hunspell/src/tools/unmunch.h
vendored
@ -1,109 +0,0 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* Copyright (C) 2002-2017 Németh László
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
/* unmunch header file */
|
||||
|
||||
#define MAX_LN_LEN 200
|
||||
#define MAX_WD_LEN 200
|
||||
#define MAX_PREFIXES 256
|
||||
#define MAX_SUFFIXES 256
|
||||
#define MAX_WORDS 500000
|
||||
|
||||
#define ROTATE_LEN 5
|
||||
|
||||
#define ROTATE(v, q) \
|
||||
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q)) - 1));
|
||||
|
||||
#define SET_SIZE 256
|
||||
|
||||
#define XPRODUCT (1 << 0)
|
||||
|
||||
/* the affix table entry */
|
||||
|
||||
struct affent {
|
||||
char* appnd;
|
||||
char* strip;
|
||||
short appndl;
|
||||
short stripl;
|
||||
char achar;
|
||||
char xpflg;
|
||||
short numconds;
|
||||
char conds[SET_SIZE];
|
||||
};
|
||||
|
||||
struct affixptr {
|
||||
struct affent* aep;
|
||||
int num;
|
||||
};
|
||||
|
||||
/* the prefix and suffix table */
|
||||
int numpfx; /* Number of prefixes in table */
|
||||
int numsfx; /* Number of suffixes in table */
|
||||
|
||||
/* the prefix table */
|
||||
struct affixptr ptable[MAX_PREFIXES];
|
||||
|
||||
/* the suffix table */
|
||||
struct affixptr stable[MAX_SUFFIXES];
|
||||
|
||||
int fullstrip;
|
||||
|
||||
int numwords; /* number of words found */
|
||||
struct dwords {
|
||||
char* word;
|
||||
int pallow;
|
||||
};
|
||||
|
||||
struct dwords wlist[MAX_WORDS]; /* list words found */
|
||||
|
||||
/* the routines */
|
||||
|
||||
int parse_aff_file(FILE* afflst);
|
||||
|
||||
void encodeit(struct affent* ptr, char* cs);
|
||||
|
||||
int expand_rootword(const char*, int, const char*);
|
||||
|
||||
void pfx_add(const char* word, int len, struct affent* ep, int num);
|
||||
|
||||
void suf_add(const char* word, int len, struct affent* ep, int num);
|
||||
|
||||
char* mystrsep(char** stringp, const char delim);
|
||||
|
||||
char* mystrdup(const char* s);
|
||||
|
||||
void mychomp(char* s);
|
35
3rdparty/hunspell/src/tools/wordforms
vendored
35
3rdparty/hunspell/src/tools/wordforms
vendored
@ -1,35 +0,0 @@
|
||||
#!/bin/sh
|
||||
case $# in
|
||||
0|1|2) echo "Usage: wordforms [-s | -p] dictionary.aff dictionary.dic word
|
||||
-s: print only suffixed forms
|
||||
-p: print only prefixed forms
|
||||
"; exit 1;;
|
||||
esac
|
||||
fx=0
|
||||
case $1 in
|
||||
-s) fx=1; shift;;
|
||||
-p) fx=2; shift;;
|
||||
esac
|
||||
test -h /tmp/wordforms.aff && rm /tmp/wordforms.aff
|
||||
ln -s $PWD/$1 /tmp/wordforms.aff
|
||||
# prepared dic only with the query word
|
||||
echo 1 >/tmp/wordforms.dic
|
||||
grep "^$3/" $2 >>/tmp/wordforms.dic
|
||||
echo $3 | awk -v "fx=$fx" '
|
||||
fx!=2 && FILENAME!="-" && /^SFX/ && NF > 4{split($4,a,"/");clen=($3=="0") ? 0 : length($3);sfx[a[1],clen]=a[1];sfxc[a[1],clen]=clen;next}
|
||||
fx!=1 && FILENAME!="-" && /^PFX/ && NF > 4{split($4,a,"/");clen=($3=="0") ? 0 : length($3);pfx[a[1],clen]=a[1];pfxc[a[1],clen]=clen;next}
|
||||
FILENAME=="-"{
|
||||
wlen=length($1)
|
||||
if (fx==0 || fx==2) {
|
||||
for (j in pfx) {if (wlen<=pfxc[j]) continue; print (pfx[j]=="0" ? "" : pfx[j]) substr($1, pfxc[j]+1)}
|
||||
}
|
||||
if (fx==0 || fx==1) {
|
||||
for(i in sfx){clen=sfxc[i];if (wlen<=clen) continue; print substr($1, 1, wlen-clen) (sfx[i]=="0" ? "": sfx[i]) }
|
||||
}
|
||||
if (fx==0) {
|
||||
for (j in pfx) {if (wlen<=pfxc[j]) continue;
|
||||
for(i in sfx){clen=sfxc[i];if (wlen<=clen || wlen <= (clen + pfxc[j]))continue;
|
||||
print (pfx[j]=="0" ? "" : pfx[j]) substr($1, pfxc[j]+1, wlen-clen-pfxc[j]) (sfx[i]=="0" ? "": sfx[i]) }}
|
||||
}
|
||||
}
|
||||
' /tmp/wordforms.aff - | hunspell -d /tmp/wordforms -G -l
|
37
3rdparty/hunspell/src/tools/wordlist2hunspell
vendored
37
3rdparty/hunspell/src/tools/wordlist2hunspell
vendored
@ -1,37 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# (C) 2008 Caolán McNamara <caolanm@redhat.com>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# This creates a LANG_TERRITORY .aff & .dic from a wordlist.
|
||||
# It is only a simple wordlist spellchecking dictionary output, no
|
||||
# knowledge of language rules can be extrapolated to shrink the
|
||||
# wordlist or provide .aff rules for extending wordstems
|
||||
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "Usage: wordlist2hunspell wordlist_file locale"
|
||||
echo "e.g. wordlist2hunspell breton.words br_FR to create br_FR.dic and br_FR.aff in cwd"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export LANG=$2.utf8
|
||||
echo "# A basic .aff for a raw wordlist, created through wordlist2hunspell" > $2.aff
|
||||
echo SET UTF-8 >> $2.aff
|
||||
#see https://bugzilla.redhat.com/show_bug.cgi?id=462184 for the "C" hacks
|
||||
echo TRY `sed 's/./&\n/g' $1 | sed '/^$/d' | LC_ALL=C sort -n | LC_ALL=C uniq -c | LC_ALL=C sort -rn | tr -s ' ' | cut -d ' ' -f 3 | tr -d '\n'` >> $2.aff
|
||||
cat $1 | sed '/^$/d' | wc -l > $2.dic
|
||||
LC_ALL=C sort $1 | sed '/^$/d' >> $2.dic
|
||||
|
||||
echo Basic $2.dic and $2.aff created
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user