![Uwe Rathmann](/assets/img/avatar_default.png)
The 3rdparty files are now compiled as part of the corresponding input method, so that the project files can be written without using platform specific linker flags.
833 lines
22 KiB
C++
833 lines
22 KiB
C++
/*
|
|
* Copyright (C) 2009 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include "../include/dictdef.h"
|
|
|
|
#ifdef _WIN32
|
|
#define snprintf _snprintf
|
|
#endif
|
|
|
|
#ifdef ___BUILD_MODEL___
|
|
#include "../include/spellingtable.h"
|
|
#endif
|
|
|
|
#include "../include/spellingtrie.h"
|
|
|
|
namespace ime_pinyin {
|
|
|
|
SpellingTrie* SpellingTrie::instance_ = NULL;
|
|
|
|
// z/c/s is for Zh/Ch/Sh
|
|
const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] =
|
|
"0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";
|
|
|
|
// Bit 0 : is it a Shengmu char?
|
|
// Bit 1 : is it a Yunmu char? (one char is a Yunmu)
|
|
// Bit 2 : is it enabled in ShouZiMu(first char) mode?
|
|
unsigned char SpellingTrie::char_flags_[] = {
|
|
// a b c d e f g
|
|
0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
|
|
// h i j k l m n
|
|
0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
// o p q r s t
|
|
0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
// u v w x y z
|
|
0x00, 0x00, 0x01, 0x01, 0x01, 0x01
|
|
};
|
|
|
|
int compare_spl(const void* p1, const void* p2) {
|
|
return strcmp((const char*)(p1), (const char*)(p2));
|
|
}
|
|
|
|
SpellingTrie::SpellingTrie() {
|
|
spelling_buf_ = NULL;
|
|
spelling_size_ = 0;
|
|
spelling_num_ = 0;
|
|
spl_ym_ids_ = NULL;
|
|
splstr_queried_ = NULL;
|
|
splstr16_queried_ = NULL;
|
|
root_ = NULL;
|
|
dumb_node_ = NULL;
|
|
splitter_node_ = NULL;
|
|
instance_ = NULL;
|
|
ym_buf_ = NULL;
|
|
f2h_ = NULL;
|
|
|
|
szm_enable_shm(true);
|
|
szm_enable_ym(true);
|
|
|
|
#ifdef ___BUILD_MODEL___
|
|
node_num_ = 0;
|
|
#endif
|
|
}
|
|
|
|
SpellingTrie::~SpellingTrie() {
|
|
if (NULL != spelling_buf_)
|
|
delete [] spelling_buf_;
|
|
|
|
if (NULL != splstr_queried_)
|
|
delete [] splstr_queried_;
|
|
|
|
if (NULL != splstr16_queried_)
|
|
delete [] splstr16_queried_;
|
|
|
|
if (NULL != spl_ym_ids_)
|
|
delete [] spl_ym_ids_;
|
|
|
|
if (NULL != root_) {
|
|
free_son_trie(root_);
|
|
delete root_;
|
|
}
|
|
|
|
if (NULL != dumb_node_) {
|
|
delete [] dumb_node_;
|
|
}
|
|
|
|
if (NULL != splitter_node_) {
|
|
delete [] splitter_node_;
|
|
}
|
|
|
|
if (NULL != instance_) {
|
|
delete instance_;
|
|
instance_ = NULL;
|
|
}
|
|
|
|
if (NULL != ym_buf_)
|
|
delete [] ym_buf_;
|
|
|
|
if (NULL != f2h_)
|
|
delete [] f2h_;
|
|
}
|
|
|
|
bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
|
|
if (NULL == splid || 0 == *splid)
|
|
return false;
|
|
|
|
if (*splid >= kFullSplIdStart)
|
|
return true;
|
|
if (*splid < kFullSplIdStart) {
|
|
char ch = kHalfId2Sc_[*splid];
|
|
if (ch > 'Z') {
|
|
return true;
|
|
} else {
|
|
if (szm_is_enabled(ch)) {
|
|
return true;
|
|
} else if (is_yunmu_char(ch)) {
|
|
assert(h2f_num_[*splid] > 0);
|
|
*splid = h2f_start_[*splid];
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool SpellingTrie::is_half_id(uint16 splid) const {
|
|
if (0 == splid || splid >= kFullSplIdStart)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool SpellingTrie::is_full_id(uint16 splid) const {
|
|
if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
|
|
uint16 half_fr_full = full_to_half(full_id);
|
|
|
|
if (half_fr_full == half_id)
|
|
return true;
|
|
|
|
// &~0x20 is used to conver the char to upper case.
|
|
// So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
|
|
char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
|
|
char ch_h = kHalfId2Sc_[half_id];
|
|
if (ch_f == ch_h)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
|
|
if (0 == splid || splid >= kFullSplIdStart)
|
|
return false;
|
|
|
|
char ch = kHalfId2Sc_[splid];
|
|
// If ch >= 'a', that means the half id is one of Zh/Ch/Sh
|
|
if (ch >= 'a') {
|
|
return false;
|
|
}
|
|
|
|
return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
|
|
}
|
|
|
|
bool SpellingTrie::is_shengmu_char(char ch) const {
|
|
return char_flags_[ch - 'A'] & kHalfIdShengmuMask;
|
|
}
|
|
|
|
bool SpellingTrie::is_yunmu_char(char ch) const {
|
|
return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
|
|
}
|
|
|
|
bool SpellingTrie::is_szm_char(char ch) const {
|
|
return is_shengmu_char(ch) || is_yunmu_char(ch);
|
|
}
|
|
|
|
bool SpellingTrie::szm_is_enabled(char ch) const {
|
|
return char_flags_[ch - 'A'] & kHalfIdSzmMask;
|
|
}
|
|
|
|
void SpellingTrie::szm_enable_shm(bool enable) {
|
|
if (enable) {
|
|
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
|
if (is_shengmu_char(ch))
|
|
char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
|
|
}
|
|
} else {
|
|
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
|
if (is_shengmu_char(ch))
|
|
char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
|
|
}
|
|
}
|
|
}
|
|
|
|
void SpellingTrie::szm_enable_ym(bool enable) {
|
|
if (enable) {
|
|
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
|
if (is_yunmu_char(ch))
|
|
char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
|
|
}
|
|
} else {
|
|
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
|
if (is_yunmu_char(ch))
|
|
char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool SpellingTrie::is_szm_enabled(char ch) const {
|
|
return char_flags_[ch - 'A'] & kHalfIdSzmMask;
|
|
}
|
|
|
|
const SpellingTrie* SpellingTrie::get_cpinstance() {
|
|
return &get_instance();
|
|
}
|
|
|
|
SpellingTrie& SpellingTrie::get_instance() {
|
|
if (NULL == instance_)
|
|
instance_ = new SpellingTrie();
|
|
|
|
return *instance_;
|
|
}
|
|
|
|
uint16 SpellingTrie::half2full_num(uint16 half_id) const {
|
|
if (NULL == root_ || half_id >= kFullSplIdStart)
|
|
return 0;
|
|
return h2f_num_[half_id];
|
|
}
|
|
|
|
uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
|
|
if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart)
|
|
return 0;
|
|
|
|
*spl_id_start = h2f_start_[half_id];
|
|
return h2f_num_[half_id];
|
|
}
|
|
|
|
uint16 SpellingTrie::full_to_half(uint16 full_id) const {
|
|
if (NULL == root_ || full_id < kFullSplIdStart ||
|
|
full_id > spelling_num_ + kFullSplIdStart)
|
|
return 0;
|
|
|
|
return f2h_[full_id - kFullSplIdStart];
|
|
}
|
|
|
|
void SpellingTrie::free_son_trie(SpellingNode* node) {
|
|
if (NULL == node)
|
|
return;
|
|
|
|
for (size_t pos = 0; pos < node->num_of_son; pos++) {
|
|
free_son_trie(node->first_son + pos);
|
|
}
|
|
|
|
if (NULL != node->first_son)
|
|
delete [] node->first_son;
|
|
}
|
|
|
|
bool SpellingTrie::construct(const char* spelling_arr, size_t item_size,
|
|
size_t item_num, float score_amplifier,
|
|
unsigned char average_score) {
|
|
if (spelling_arr == NULL)
|
|
return false;
|
|
|
|
memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
|
|
memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);
|
|
|
|
// If the arr is the same as the buf, means this function is called by
|
|
// load_table(), the table data are ready; otherwise the array should be
|
|
// saved.
|
|
if (spelling_arr != spelling_buf_) {
|
|
if (NULL != spelling_buf_)
|
|
delete [] spelling_buf_;
|
|
spelling_buf_ = new char[item_size * item_num];
|
|
if (NULL == spelling_buf_)
|
|
return false;
|
|
memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
|
|
}
|
|
|
|
spelling_size_ = item_size;
|
|
spelling_num_ = item_num;
|
|
|
|
score_amplifier_ = score_amplifier;
|
|
average_score_ = average_score;
|
|
|
|
if (NULL != splstr_queried_)
|
|
delete [] splstr_queried_;
|
|
splstr_queried_ = new char[spelling_size_];
|
|
if (NULL == splstr_queried_)
|
|
return false;
|
|
|
|
if (NULL != splstr16_queried_)
|
|
delete [] splstr16_queried_;
|
|
splstr16_queried_ = new char16[spelling_size_];
|
|
if (NULL == splstr16_queried_)
|
|
return false;
|
|
|
|
// First, sort the buf to ensure they are in ascendant order
|
|
qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);
|
|
|
|
#ifdef ___BUILD_MODEL___
|
|
node_num_ = 1;
|
|
#endif
|
|
|
|
root_ = new SpellingNode();
|
|
memset(root_, 0, sizeof(SpellingNode));
|
|
|
|
dumb_node_ = new SpellingNode();
|
|
memset(dumb_node_, 0, sizeof(SpellingNode));
|
|
dumb_node_->score = average_score_;
|
|
|
|
splitter_node_ = new SpellingNode();
|
|
memset(splitter_node_, 0, sizeof(SpellingNode));
|
|
splitter_node_->score = average_score_;
|
|
|
|
memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum);
|
|
|
|
root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);
|
|
|
|
// Root's score should be cleared.
|
|
root_->score = 0;
|
|
|
|
if (NULL == root_->first_son)
|
|
return false;
|
|
|
|
h2f_start_[0] = h2f_num_[0] = 0;
|
|
|
|
if (!build_f2h())
|
|
return false;
|
|
|
|
#ifdef ___BUILD_MODEL___
|
|
if (kPrintDebug0) {
|
|
printf("---SpellingTrie Nodes: %d\n", (int)node_num_);
|
|
}
|
|
return build_ym_info();
|
|
#else
|
|
return true;
|
|
#endif
|
|
}
|
|
|
|
#ifdef ___BUILD_MODEL___
|
|
const char* SpellingTrie::get_ym_str(const char *spl_str) {
|
|
bool start_ZCS = false;
|
|
if (is_shengmu_char(*spl_str)) {
|
|
if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str)
|
|
start_ZCS = true;
|
|
spl_str += 1;
|
|
if (start_ZCS && 'h' == *spl_str)
|
|
spl_str += 1;
|
|
}
|
|
return spl_str;
|
|
}
|
|
|
|
bool SpellingTrie::build_ym_info() {
|
|
bool sucess;
|
|
SpellingTable *spl_table = new SpellingTable();
|
|
|
|
sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
|
|
assert(sucess);
|
|
|
|
for (uint16 pos = 0; pos < spelling_num_; pos++) {
|
|
const char *spl_str = spelling_buf_ + spelling_size_ * pos;
|
|
spl_str = get_ym_str(spl_str);
|
|
if ('\0' != spl_str[0]) {
|
|
sucess = spl_table->put_spelling(spl_str, 0);
|
|
assert(sucess);
|
|
}
|
|
}
|
|
|
|
size_t ym_item_size; // '\0' is included
|
|
size_t ym_num;
|
|
const char* ym_buf;
|
|
ym_buf = spl_table->arrange(&ym_item_size, &ym_num);
|
|
|
|
if (NULL != ym_buf_)
|
|
delete [] ym_buf_;
|
|
ym_buf_ = new char[ym_item_size * ym_num];
|
|
if (NULL == ym_buf_) {
|
|
delete spl_table;
|
|
return false;
|
|
}
|
|
|
|
memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
|
|
ym_size_ = ym_item_size;
|
|
ym_num_ = ym_num;
|
|
|
|
delete spl_table;
|
|
|
|
// Generate the maping from the spelling ids to the Yunmu ids.
|
|
if (spl_ym_ids_)
|
|
delete spl_ym_ids_;
|
|
spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
|
|
if (NULL == spl_ym_ids_)
|
|
return false;
|
|
|
|
memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));
|
|
|
|
for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
|
|
const char *str = get_spelling_str(id);
|
|
|
|
str = get_ym_str(str);
|
|
if ('\0' != str[0]) {
|
|
uint8 ym_id = get_ym_id(str);
|
|
spl_ym_ids_[id] = ym_id;
|
|
assert(ym_id > 0);
|
|
} else {
|
|
spl_ym_ids_[id] = 0;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
SpellingNode* SpellingTrie::construct_spellings_subset(
|
|
size_t item_start, size_t item_end, size_t level, SpellingNode* parent) {
|
|
if (level >= spelling_size_ || item_end <= item_start || NULL == parent)
|
|
return NULL;
|
|
|
|
SpellingNode *first_son = NULL;
|
|
uint16 num_of_son = 0;
|
|
unsigned char min_son_score = 255;
|
|
|
|
const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
|
|
char char_for_node = spelling_last_start[level];
|
|
assert((char_for_node >= 'A' && char_for_node <= 'Z') ||
|
|
'h' == char_for_node);
|
|
|
|
// Scan the array to find how many sons
|
|
for (size_t i = item_start + 1; i < item_end; i++) {
|
|
const char *spelling_current = spelling_buf_ + spelling_size_ * i;
|
|
char char_current = spelling_current[level];
|
|
if (char_current != char_for_node) {
|
|
num_of_son++;
|
|
char_for_node = char_current;
|
|
}
|
|
}
|
|
num_of_son++;
|
|
|
|
// Allocate memory
|
|
#ifdef ___BUILD_MODEL___
|
|
node_num_ += num_of_son;
|
|
#endif
|
|
first_son = new SpellingNode[num_of_son];
|
|
memset(first_son, 0, sizeof(SpellingNode)*num_of_son);
|
|
|
|
// Now begin construct tree
|
|
size_t son_pos = 0;
|
|
|
|
spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
|
|
char_for_node = spelling_last_start[level];
|
|
|
|
bool spelling_endable = true;
|
|
if (spelling_last_start[level + 1] != '\0')
|
|
spelling_endable = false;
|
|
|
|
size_t item_start_next = item_start;
|
|
|
|
for (size_t i = item_start + 1; i < item_end; i++) {
|
|
const char *spelling_current = spelling_buf_ + spelling_size_ * i;
|
|
char char_current = spelling_current[level];
|
|
assert(is_valid_spl_char(char_current));
|
|
|
|
if (char_current != char_for_node) {
|
|
// Construct a node
|
|
SpellingNode *node_current = first_son + son_pos;
|
|
node_current->char_this_node = char_for_node;
|
|
|
|
// For quick search in the first level
|
|
if (0 == level)
|
|
level1_sons_[char_for_node - 'A'] = node_current;
|
|
|
|
if (spelling_endable) {
|
|
node_current->spelling_idx = kFullSplIdStart + item_start_next;
|
|
}
|
|
|
|
if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
|
|
size_t real_start = item_start_next;
|
|
if (spelling_last_start[level + 1] == '\0')
|
|
real_start++;
|
|
|
|
node_current->first_son =
|
|
construct_spellings_subset(real_start, i, level + 1,
|
|
node_current);
|
|
|
|
if (real_start == item_start_next + 1) {
|
|
uint16 score_this = static_cast<unsigned char>(
|
|
spelling_last_start[spelling_size_ - 1]);
|
|
if (score_this < node_current->score)
|
|
node_current->score = score_this;
|
|
}
|
|
} else {
|
|
node_current->first_son = NULL;
|
|
node_current->score = static_cast<unsigned char>(
|
|
spelling_last_start[spelling_size_ - 1]);
|
|
}
|
|
|
|
if (node_current->score < min_son_score)
|
|
min_son_score = node_current->score;
|
|
|
|
bool is_half = false;
|
|
if (level == 0 && is_szm_char(char_for_node)) {
|
|
node_current->spelling_idx =
|
|
static_cast<uint16>(char_for_node - 'A' + 1);
|
|
|
|
if (char_for_node > 'C')
|
|
node_current->spelling_idx++;
|
|
if (char_for_node > 'S')
|
|
node_current->spelling_idx++;
|
|
|
|
h2f_num_[node_current->spelling_idx] = i - item_start_next;
|
|
is_half = true;
|
|
} else if (level == 1 && char_for_node == 'h') {
|
|
char ch_level0 = spelling_last_start[0];
|
|
uint16 part_id = 0;
|
|
if (ch_level0 == 'C')
|
|
part_id = 'C' - 'A' + 1 + 1;
|
|
else if (ch_level0 == 'S')
|
|
part_id = 'S' - 'A' + 1 + 2;
|
|
else if (ch_level0 == 'Z')
|
|
part_id = 'Z' - 'A' + 1 + 3;
|
|
if (0 != part_id) {
|
|
node_current->spelling_idx = part_id;
|
|
h2f_num_[node_current->spelling_idx] = i - item_start_next;
|
|
is_half = true;
|
|
}
|
|
}
|
|
|
|
if (is_half) {
|
|
if (h2f_num_[node_current->spelling_idx] > 0)
|
|
h2f_start_[node_current->spelling_idx] =
|
|
item_start_next + kFullSplIdStart;
|
|
else
|
|
h2f_start_[node_current->spelling_idx] = 0;
|
|
}
|
|
|
|
// for next sibling
|
|
spelling_last_start = spelling_current;
|
|
char_for_node = char_current;
|
|
item_start_next = i;
|
|
spelling_endable = true;
|
|
if (spelling_current[level + 1] != '\0')
|
|
spelling_endable = false;
|
|
|
|
son_pos++;
|
|
}
|
|
}
|
|
|
|
// the last one
|
|
SpellingNode *node_current = first_son + son_pos;
|
|
node_current->char_this_node = char_for_node;
|
|
|
|
// For quick search in the first level
|
|
if (0 == level)
|
|
level1_sons_[char_for_node - 'A'] = node_current;
|
|
|
|
if (spelling_endable) {
|
|
node_current->spelling_idx = kFullSplIdStart + item_start_next;
|
|
}
|
|
|
|
if (spelling_last_start[level + 1] != '\0' ||
|
|
item_end - item_start_next > 1) {
|
|
size_t real_start = item_start_next;
|
|
if (spelling_last_start[level + 1] == '\0')
|
|
real_start++;
|
|
|
|
node_current->first_son =
|
|
construct_spellings_subset(real_start, item_end, level + 1,
|
|
node_current);
|
|
|
|
if (real_start == item_start_next + 1) {
|
|
uint16 score_this = static_cast<unsigned char>(
|
|
spelling_last_start[spelling_size_ - 1]);
|
|
if (score_this < node_current->score)
|
|
node_current->score = score_this;
|
|
}
|
|
} else {
|
|
node_current->first_son = NULL;
|
|
node_current->score = static_cast<unsigned char>(
|
|
spelling_last_start[spelling_size_ - 1]);
|
|
}
|
|
|
|
if (node_current->score < min_son_score)
|
|
min_son_score = node_current->score;
|
|
|
|
assert(son_pos + 1 == num_of_son);
|
|
|
|
bool is_half = false;
|
|
if (level == 0 && szm_is_enabled(char_for_node)) {
|
|
node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
|
|
|
|
if (char_for_node > 'C')
|
|
node_current->spelling_idx++;
|
|
if (char_for_node > 'S')
|
|
node_current->spelling_idx++;
|
|
|
|
h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
|
|
is_half = true;
|
|
} else if (level == 1 && char_for_node == 'h') {
|
|
char ch_level0 = spelling_last_start[0];
|
|
uint16 part_id = 0;
|
|
if (ch_level0 == 'C')
|
|
part_id = 'C' - 'A' + 1 + 1;
|
|
else if (ch_level0 == 'S')
|
|
part_id = 'S' - 'A' + 1 + 2;
|
|
else if (ch_level0 == 'Z')
|
|
part_id = 'Z' - 'A' + 1 + 3;
|
|
if (0 != part_id) {
|
|
node_current->spelling_idx = part_id;
|
|
h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
|
|
is_half = true;
|
|
}
|
|
}
|
|
if (is_half) {
|
|
if (h2f_num_[node_current->spelling_idx] > 0)
|
|
h2f_start_[node_current->spelling_idx] =
|
|
item_start_next + kFullSplIdStart;
|
|
else
|
|
h2f_start_[node_current->spelling_idx] = 0;
|
|
}
|
|
|
|
parent->num_of_son = num_of_son;
|
|
parent->score = min_son_score;
|
|
return first_son;
|
|
}
|
|
|
|
bool SpellingTrie::save_spl_trie(FILE *fp) {
|
|
if (NULL == fp || NULL == spelling_buf_)
|
|
return false;
|
|
|
|
if (fwrite(&spelling_size_, sizeof(uint32), 1, fp) != 1)
|
|
return false;
|
|
|
|
if (fwrite(&spelling_num_, sizeof(uint32), 1, fp) != 1)
|
|
return false;
|
|
|
|
if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1)
|
|
return false;
|
|
|
|
if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1)
|
|
return false;
|
|
|
|
if (fwrite(spelling_buf_, sizeof(char) * spelling_size_,
|
|
spelling_num_, fp) != spelling_num_)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool SpellingTrie::load_spl_trie(FILE *fp) {
|
|
if (NULL == fp)
|
|
return false;
|
|
|
|
if (fread(&spelling_size_, sizeof(uint32), 1, fp) != 1)
|
|
return false;
|
|
|
|
if (fread(&spelling_num_, sizeof(uint32), 1, fp) != 1)
|
|
return false;
|
|
|
|
if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1)
|
|
return false;
|
|
|
|
if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1)
|
|
return false;
|
|
|
|
if (NULL != spelling_buf_)
|
|
delete [] spelling_buf_;
|
|
|
|
spelling_buf_ = new char[spelling_size_ * spelling_num_];
|
|
if (NULL == spelling_buf_)
|
|
return false;
|
|
|
|
if (fread(spelling_buf_, sizeof(char) * spelling_size_,
|
|
spelling_num_, fp) != spelling_num_)
|
|
return false;
|
|
|
|
return construct(spelling_buf_, spelling_size_, spelling_num_,
|
|
score_amplifier_, average_score_);
|
|
}
|
|
|
|
bool SpellingTrie::build_f2h() {
|
|
if (NULL != f2h_)
|
|
delete [] f2h_;
|
|
f2h_ = new uint16[spelling_num_];
|
|
if (NULL == f2h_)
|
|
return false;
|
|
|
|
for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
|
|
for (uint16 fid = h2f_start_[hid];
|
|
fid < h2f_start_[hid] + h2f_num_[hid]; fid++)
|
|
f2h_[fid - kFullSplIdStart] = hid;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
size_t SpellingTrie::get_spelling_num() {
|
|
return spelling_num_;
|
|
}
|
|
|
|
uint8 SpellingTrie::get_ym_id(const char *ym_str) {
|
|
if (NULL == ym_str || NULL == ym_buf_)
|
|
return 0;
|
|
|
|
for (uint8 pos = 0; pos < ym_num_; pos++)
|
|
if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0)
|
|
return pos + 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
const char* SpellingTrie::get_spelling_str(uint16 splid) {
|
|
splstr_queried_[0] = '\0';
|
|
|
|
if (splid >= kFullSplIdStart) {
|
|
splid -= kFullSplIdStart;
|
|
snprintf(splstr_queried_, spelling_size_, "%s",
|
|
spelling_buf_ + splid * spelling_size_);
|
|
} else {
|
|
if (splid == 'C' - 'A' + 1 + 1) {
|
|
snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
|
|
} else if (splid == 'S' - 'A' + 1 + 2) {
|
|
snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
|
|
} else if (splid == 'Z' - 'A' + 1 + 3) {
|
|
snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
|
|
} else {
|
|
if (splid > 'C' - 'A' + 1)
|
|
splid--;
|
|
if (splid > 'S' - 'A' + 1)
|
|
splid--;
|
|
splstr_queried_[0] = 'A' + splid - 1;
|
|
splstr_queried_[1] = '\0';
|
|
}
|
|
}
|
|
return splstr_queried_;
|
|
}
|
|
|
|
const char16* SpellingTrie::get_spelling_str16(uint16 splid) {
|
|
splstr16_queried_[0] = '\0';
|
|
|
|
if (splid >= kFullSplIdStart) {
|
|
splid -= kFullSplIdStart;
|
|
for (size_t pos = 0; pos < spelling_size_; pos++) {
|
|
splstr16_queried_[pos] = static_cast<char16>
|
|
(spelling_buf_[splid * spelling_size_ + pos]);
|
|
}
|
|
} else {
|
|
if (splid == 'C' - 'A' + 1 + 1) {
|
|
splstr16_queried_[0] = static_cast<char16>('C');
|
|
splstr16_queried_[1] = static_cast<char16>('h');
|
|
splstr16_queried_[2] = static_cast<char16>('\0');
|
|
} else if (splid == 'S' - 'A' + 1 + 2) {
|
|
splstr16_queried_[0] = static_cast<char16>('S');
|
|
splstr16_queried_[1] = static_cast<char16>('h');
|
|
splstr16_queried_[2] = static_cast<char16>('\0');
|
|
} else if (splid == 'Z' - 'A' + 1 + 3) {
|
|
splstr16_queried_[0] = static_cast<char16>('Z');
|
|
splstr16_queried_[1] = static_cast<char16>('h');
|
|
splstr16_queried_[2] = static_cast<char16>('\0');
|
|
} else {
|
|
if (splid > 'C' - 'A' + 1)
|
|
splid--;
|
|
if (splid > 'S' - 'A' + 1)
|
|
splid--;
|
|
splstr16_queried_[0] = 'A' + splid - 1;
|
|
splstr16_queried_[1] = '\0';
|
|
}
|
|
}
|
|
return splstr16_queried_;
|
|
}
|
|
|
|
size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16,
|
|
size_t splstr16_len) {
|
|
if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;
|
|
|
|
if (splid >= kFullSplIdStart) {
|
|
splid -= kFullSplIdStart;
|
|
for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
|
|
splstr16[pos] = static_cast<char16>
|
|
(spelling_buf_[splid * spelling_size_ + pos]);
|
|
if (static_cast<char16>('\0') == splstr16[pos]) {
|
|
return pos;
|
|
}
|
|
}
|
|
} else {
|
|
if (splid == 'C' - 'A' + 1 + 1) {
|
|
splstr16[0] = static_cast<char16>('C');
|
|
splstr16[1] = static_cast<char16>('h');
|
|
splstr16[2] = static_cast<char16>('\0');
|
|
return 2;
|
|
} else if (splid == 'S' - 'A' + 1 + 2) {
|
|
splstr16[0] = static_cast<char16>('S');
|
|
splstr16[1] = static_cast<char16>('h');
|
|
splstr16[2] = static_cast<char16>('\0');
|
|
return 2;
|
|
} else if (splid == 'Z' - 'A' + 1 + 3) {
|
|
splstr16[0] = static_cast<char16>('Z');
|
|
splstr16[1] = static_cast<char16>('h');
|
|
splstr16[2] = static_cast<char16>('\0');
|
|
return 2;
|
|
} else {
|
|
if (splid > 'C' - 'A' + 1)
|
|
splid--;
|
|
if (splid > 'S' - 'A' + 1)
|
|
splid--;
|
|
splstr16[0] = 'A' + splid - 1;
|
|
splstr16[1] = '\0';
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
// Not reachable.
|
|
return 0;
|
|
}
|
|
|
|
} // namespace ime_pinyin
|