qskinny/inputcontext/3rdparty/pinyin/include/userdict.h

/*
 * Copyright (C) 2009 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef PINYINIME_INCLUDE_USERDICT_H__
#define PINYINIME_INCLUDE_USERDICT_H__

#define ___CACHE_ENABLED___
#define ___SYNC_ENABLED___
#define ___PREDICT_ENABLED___

// Debug performance for operations
// #define ___DEBUG_PERF___

#ifdef _WIN32
#include <winsock.h> // timeval
#else
#include <pthread.h>
#endif
#include "atomdictbase.h"

namespace ime_pinyin {

class UserDict : public AtomDictBase {
 public:
  UserDict();
  ~UserDict();

  bool load_dict(const char *file_name, LemmaIdType start_id,
                 LemmaIdType end_id);

  bool close_dict();

  size_t number_of_lemmas();

  void reset_milestones(uint16 from_step, MileStoneHandle from_handle);

  MileStoneHandle extend_dict(MileStoneHandle from_handle,
                              const DictExtPara *dep, LmaPsbItem *lpi_items,
                              size_t lpi_max, size_t *lpi_num);

  size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
                  LmaPsbItem *lpi_items, size_t lpi_max);

  uint16 get_lemma_str(LemmaIdType id_lemma, char16* str_buf,
                       uint16 str_max);

  uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
                          uint16 splids_max, bool arg_valid);

  size_t predict(const char16 last_hzs[], uint16 hzs_len,
                 NPredictItem *npre_items, size_t npre_max,
                 size_t b4_used);

  // Full spelling ids are required
  LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
                        uint16 lemma_len, uint16 count);

  LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
                           bool selected);

  LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
                           uint16 lemma_len);

  LmaScoreType get_lemma_score(LemmaIdType lemma_id);

  LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
                        uint16 lemma_len);

  bool remove_lemma(LemmaIdType lemma_id);

  size_t get_total_lemma_count();
  void set_total_lemma_count_of_others(size_t count);

  void flush_cache();

  void set_limit(uint32 max_lemma_count, uint32 max_lemma_size,
                 uint32 reclaim_ratio);

  void reclaim();

  void defragment();

#ifdef ___SYNC_ENABLED___
  void clear_sync_lemmas(unsigned int start, unsigned int end);

  int get_sync_count();

  LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[],
                        uint16 lemma_len, uint16 count, uint64 lmt);
   /**
    * Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.
    *
    * @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'
    * @param len length of lemmas string in UTF-16LE
    * @return newly added lemma count
    */
  int put_lemmas_no_sync_from_utf16le_string(char16 * lemmas, int len);

  /**
   * Get lemmas need sync to a UTF-16LE string of above format.
   * Note: input buffer (str) must not be too small. If str is too small to
   *       contain single one lemma, there might be a dead loop.
   *
   * @param str buffer to write lemmas
   * @param size buffer size in UTF-16LE
   * @param count output value of lemma returned
   * @return UTF-16LE string length
   */
  int get_sync_lemmas_in_utf16le_string_from_beginning(
      char16 * str, int size, int * count);

#endif

  struct UserDictStat {
    uint32 version;
    const char * file_name;
    struct timeval load_time;
    struct timeval last_update;
    uint32 disk_size;
    uint32 lemma_count;
    uint32 lemma_size;
    uint32 delete_count;
    uint32 delete_size;
#ifdef ___SYNC_ENABLED___
    uint32 sync_count;
#endif
    uint32 reclaim_ratio;
    uint32 limit_lemma_count;
    uint32 limit_lemma_size;
  };

  bool state(UserDictStat * stat);

 private:
  uint32 total_other_nfreq_;
  struct timeval load_time_;
  LemmaIdType start_id_;
  uint32 version_;
  uint8 * lemmas_;

  // In-Memory-Only flag for each lemma
  static const uint8 kUserDictLemmaFlagRemove = 1;
  // Inuse lemmas' offset
  uint32 * offsets_;
  // Highest bit in offset tells whether corresponding lemma is removed
  static const uint32 kUserDictOffsetFlagRemove = (1 << 31);
  // Maximum possible for the offset
  static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);
  // Bit width for last modified time, from 1 to 16
  static const uint32 kUserDictLMTBitWidth = 16;
  // Granularity for last modified time in second
  static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;
  // Maximum frequency count
  static const uint16 kUserDictMaxFrequency = 0xFFFF;

#define COARSE_UTC(year, month, day, hour, minute, second) \
  ( \
    (year - 1970) * 365 * 24 * 60 * 60 + \
    (month - 1) * 30 * 24 * 60 * 60 + \
    (day - 1) * 24 * 60 * 60 + \
    (hour - 0) * 60 * 60 + \
    (minute - 0) * 60 + \
    (second - 0) \
  )
  static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);

  // Correspond to offsets_
  uint32 * scores_;
  // Following two fields are only valid in memory
  uint32 * ids_;
#ifdef ___PREDICT_ENABLED___
  uint32 * predicts_;
#endif
#ifdef ___SYNC_ENABLED___
  uint32 * syncs_;
  size_t sync_count_size_;
#endif
  uint32 * offsets_by_id_;

  size_t lemma_count_left_;
  size_t lemma_size_left_;

  const char * dict_file_;

  // Be sure size is 4xN
  struct UserDictInfo {
    // When limitation reached, how much percentage will be reclaimed (1 ~ 100)
    uint32 reclaim_ratio;
    // maximum lemma count, 0 means no limitation
    uint32 limit_lemma_count;
    // Maximum lemma size, it's different from
    // whole disk file size or in-mem dict size
    // 0 means no limitation
    uint32 limit_lemma_size;
    // Total lemma count including deleted and inuse
    // Also indicate offsets_ size
    uint32 lemma_count;
    // Total size of lemmas including used and freed
    uint32 lemma_size;
    // Freed lemma count
    uint32 free_count;
    // Freed lemma size in byte
    uint32 free_size;
#ifdef ___SYNC_ENABLED___
    uint32 sync_count;
#endif
    int32 total_nfreq;
  } dict_info_;

  static const uint32 kUserDictVersion = 0x0ABCDEF0;

  static const uint32 kUserDictPreAlloc = 32;
  static const uint32 kUserDictAverageNchar = 8;

  enum UserDictState {
    // Keep in order
    USER_DICT_NONE = 0,
    USER_DICT_SYNC,
#ifdef ___SYNC_ENABLED___
    USER_DICT_SYNC_DIRTY,
#endif
    USER_DICT_SCORE_DIRTY,
    USER_DICT_OFFSET_DIRTY,
    USER_DICT_LEMMA_DIRTY,

    USER_DICT_DEFRAGMENTED,
  } state_;

  struct UserDictSearchable {
    uint16 splids_len;
    uint16 splid_start[kMaxLemmaSize];
    uint16 splid_count[kMaxLemmaSize];
    // Compact inital letters for both FuzzyCompareSpellId and cache system
    uint32 signature[kMaxLemmaSize / 4];
  };

#ifdef ___CACHE_ENABLED___
  enum UserDictCacheType {
    USER_DICT_CACHE,
    USER_DICT_MISS_CACHE,
  };

  static const int kUserDictCacheSize = 4;
  static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;

  struct UserDictMissCache {
    uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];
    uint16 head, tail;
  } miss_caches_[kMaxLemmaSize];

  struct UserDictCache {
    uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];
    uint32 offsets[kUserDictCacheSize];
    uint32 lengths[kUserDictCacheSize];
    // Ring buffer
    uint16 head, tail;
  } caches_[kMaxLemmaSize];

  void cache_init();

  void cache_push(UserDictCacheType type,
                 UserDictSearchable *searchable,
                 uint32 offset, uint32 length);

  bool cache_hit(UserDictSearchable *searchable,
                 uint32 *offset, uint32 *length);

  bool load_cache(UserDictSearchable *searchable,
                  uint32 *offset, uint32 *length);

  void save_cache(UserDictSearchable *searchable,
                  uint32 offset, uint32 length);

  void reset_cache();

  bool load_miss_cache(UserDictSearchable *searchable);

  void save_miss_cache(UserDictSearchable *searchable);

  void reset_miss_cache();
#endif

  LmaScoreType translate_score(int f);

  int extract_score_freq(int raw_score);

  uint64 extract_score_lmt(int raw_score);

  inline int build_score(uint64 lmt, int freq);

  inline int64 utf16le_atoll(uint16 *s, int len);

  inline int utf16le_lltoa(int64 v, uint16 *s, int size);

  LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[],
                        uint16 lemma_len, uint16 count, uint64 lmt);

  size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len,
                   LmaPsbItem *lpi_items, size_t lpi_max, bool * need_extend);

  int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);

  int _get_lemma_score(LemmaIdType lemma_id);

  int is_fuzzy_prefix_spell_id(const uint16 * id1, uint16 len1,
                               const UserDictSearchable *searchable);

  bool is_prefix_spell_id(const uint16 * fullids,
                          uint16 fulllen, const UserDictSearchable *searchable);

  uint32 get_dict_file_size(UserDictInfo * info);

  bool reset(const char *file);

  bool validate(const char *file);

  bool load(const char *file, LemmaIdType start_id);

  bool is_valid_state();

  bool is_valid_lemma_id(LemmaIdType id);

  LemmaIdType get_max_lemma_id();

  void set_lemma_flag(uint32 offset, uint8 flag);

  char get_lemma_flag(uint32 offset);

  char get_lemma_nchar(uint32 offset);

  uint16 * get_lemma_spell_ids(uint32 offset);

  uint16 * get_lemma_word(uint32 offset);

  // Prepare searchable to fasten locate process
  void prepare_locate(UserDictSearchable *searchable,
                      const uint16 * splids, uint16 len);

  // Compare initial letters only
  int32 fuzzy_compare_spell_id(const uint16 * id1, uint16 len1,
                               const UserDictSearchable *searchable);

  // Compare exactly two spell ids
  // First argument must be a full id spell id
  bool equal_spell_id(const uint16 * fullids,
                      uint16 fulllen, const UserDictSearchable *searchable);

  // Find first item by initial letters
  int32 locate_first_in_offsets(const UserDictSearchable *searchable);

  LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[],
                           uint16 lemma_len, uint16 count, uint64 lmt);

  // Check if a lemma is in dictionary
  int32 locate_in_offsets(char16 lemma_str[],
                          uint16 splid_str[], uint16 lemma_len);

  bool remove_lemma_by_offset_index(int offset_index);
#ifdef ___PREDICT_ENABLED___
  uint32 locate_where_to_insert_in_predicts(const uint16 * words,
                                            int lemma_len);

  int32 locate_first_in_predicts(const uint16 * words, int lemma_len);

  void remove_lemma_from_predict_list(uint32 offset);
#endif
#ifdef ___SYNC_ENABLED___
  void queue_lemma_for_sync(LemmaIdType id);

  void remove_lemma_from_sync_list(uint32 offset);

  void write_back_sync(int fd);
#endif
  void write_back_score(int fd);
  void write_back_offset(int fd);
  void write_back_lemma(int fd);
  void write_back_all(int fd);
  void write_back();

  struct UserDictScoreOffsetPair {
    int score;
    uint32 offset_index;
  };

  inline void swap(UserDictScoreOffsetPair * sop, int i, int j);

  void shift_down(UserDictScoreOffsetPair * sop, int i, int n);

  // On-disk format for each lemma
  // +-------------+
  // | Version (4) |
  // +-------------+
  // +-----------+-----------+--------------------+-------------------+
  // | Spare (1) | Nchar (1) | Splids (2 x Nchar) | Lemma (2 x Nchar) |
  // +-----------+-----------+--------------------+-------------------+
  // ...
  // +-----------------------+     +-------------+      <---Offset of offset
  // | Offset1 by_splids (4) | ... | OffsetN (4) |
  // +-----------------------+     +-------------+
#ifdef ___PREDICT_ENABLED___
  // +----------------------+     +-------------+
  // | Offset1 by_lemma (4) | ... | OffsetN (4) |
  // +----------------------+     +-------------+
#endif
  // +------------+     +------------+
  // | Score1 (4) | ... | ScoreN (4) |
  // +------------+     +------------+
#ifdef ___SYNC_ENABLED___
  // +-------------+     +-------------+
  // | NewAdd1 (4) | ... | NewAddN (4) |
  // +-------------+     +-------------+
#endif
  // +----------------+
  // | Dict Info (4x) |
  // +----------------+
};
}

#endif
Add Pinyin / Hunspell and some more refactoring (#58) * more keyboard refactoring * keyboard: Add Pinyin and Hunspell, and change suggestions API * Add object files to gitignore 2018-03-30 01:15:05 -07:00			`/*`
			`* Copyright (C) 2009 The Android Open Source Project`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`#ifndef PINYINIME_INCLUDE_USERDICT_H__`
			`#define PINYINIME_INCLUDE_USERDICT_H__`

			`#define ___CACHE_ENABLED___`
			`#define ___SYNC_ENABLED___`
			`#define ___PREDICT_ENABLED___`

			`// Debug performance for operations`
			`// #define ___DEBUG_PERF___`

			`#ifdef _WIN32`
			`#include <winsock.h> // timeval`
			`#else`
			`#include <pthread.h>`
			`#endif`
			`#include "atomdictbase.h"`

			`namespace ime_pinyin {`

			`class UserDict : public AtomDictBase {`
			`public:`
			`UserDict();`
			`~UserDict();`

			`bool load_dict(const char *file_name, LemmaIdType start_id,`
			`LemmaIdType end_id);`

			`bool close_dict();`

			`size_t number_of_lemmas();`

			`void reset_milestones(uint16 from_step, MileStoneHandle from_handle);`

			`MileStoneHandle extend_dict(MileStoneHandle from_handle,`
			`const DictExtPara dep, LmaPsbItem lpi_items,`
			`size_t lpi_max, size_t *lpi_num);`

			`size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,`
			`LmaPsbItem *lpi_items, size_t lpi_max);`

			`uint16 get_lemma_str(LemmaIdType id_lemma, char16* str_buf,`
			`uint16 str_max);`

			`uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,`
			`uint16 splids_max, bool arg_valid);`

			`size_t predict(const char16 last_hzs[], uint16 hzs_len,`
			`NPredictItem *npre_items, size_t npre_max,`
			`size_t b4_used);`

			`// Full spelling ids are required`
			`LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],`
			`uint16 lemma_len, uint16 count);`

			`LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,`
			`bool selected);`

			`LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],`
			`uint16 lemma_len);`

			`LmaScoreType get_lemma_score(LemmaIdType lemma_id);`

			`LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],`
			`uint16 lemma_len);`

			`bool remove_lemma(LemmaIdType lemma_id);`

			`size_t get_total_lemma_count();`
			`void set_total_lemma_count_of_others(size_t count);`

			`void flush_cache();`

			`void set_limit(uint32 max_lemma_count, uint32 max_lemma_size,`
			`uint32 reclaim_ratio);`

			`void reclaim();`

			`void defragment();`

			`#ifdef ___SYNC_ENABLED___`
			`void clear_sync_lemmas(unsigned int start, unsigned int end);`

			`int get_sync_count();`

			`LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[],`
			`uint16 lemma_len, uint16 count, uint64 lmt);`
			`/**`
			`* Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.`
			`*`
			`* @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'`
			`* @param len length of lemmas string in UTF-16LE`
			`* @return newly added lemma count`
			`*/`
			`int put_lemmas_no_sync_from_utf16le_string(char16 * lemmas, int len);`

			`/**`
			`* Get lemmas need sync to a UTF-16LE string of above format.`
			`* Note: input buffer (str) must not be too small. If str is too small to`
			`* contain single one lemma, there might be a dead loop.`
			`*`
			`* @param str buffer to write lemmas`
			`* @param size buffer size in UTF-16LE`
			`* @param count output value of lemma returned`
			`* @return UTF-16LE string length`
			`*/`
			`int get_sync_lemmas_in_utf16le_string_from_beginning(`
			`char16 * str, int size, int * count);`

			`#endif`

			`struct UserDictStat {`
			`uint32 version;`
			`const char * file_name;`
			`struct timeval load_time;`
			`struct timeval last_update;`
			`uint32 disk_size;`
			`uint32 lemma_count;`
			`uint32 lemma_size;`
			`uint32 delete_count;`
			`uint32 delete_size;`
			`#ifdef ___SYNC_ENABLED___`
			`uint32 sync_count;`
			`#endif`
			`uint32 reclaim_ratio;`
			`uint32 limit_lemma_count;`
			`uint32 limit_lemma_size;`
			`};`

			`bool state(UserDictStat * stat);`

			`private:`
			`uint32 total_other_nfreq_;`
			`struct timeval load_time_;`
			`LemmaIdType start_id_;`
			`uint32 version_;`
			`uint8 * lemmas_;`

			`// In-Memory-Only flag for each lemma`
			`static const uint8 kUserDictLemmaFlagRemove = 1;`
			`// Inuse lemmas' offset`
			`uint32 * offsets_;`
			`// Highest bit in offset tells whether corresponding lemma is removed`
			`static const uint32 kUserDictOffsetFlagRemove = (1 << 31);`
			`// Maximum possible for the offset`
			`static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);`
			`// Bit width for last modified time, from 1 to 16`
			`static const uint32 kUserDictLMTBitWidth = 16;`
			`// Granularity for last modified time in second`
			`static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;`
			`// Maximum frequency count`
			`static const uint16 kUserDictMaxFrequency = 0xFFFF;`

			`#define COARSE_UTC(year, month, day, hour, minute, second) \`
			`( \`
			`(year - 1970) * 365 * 24 * 60 * 60 + \`
			`(month - 1) * 30 * 24 * 60 * 60 + \`
			`(day - 1) * 24 * 60 * 60 + \`
			`(hour - 0) * 60 * 60 + \`
			`(minute - 0) * 60 + \`
			`(second - 0) \`
			`)`
			`static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);`

			`// Correspond to offsets_`
			`uint32 * scores_;`
			`// Following two fields are only valid in memory`
			`uint32 * ids_;`
			`#ifdef ___PREDICT_ENABLED___`
			`uint32 * predicts_;`
			`#endif`
			`#ifdef ___SYNC_ENABLED___`
			`uint32 * syncs_;`
			`size_t sync_count_size_;`
			`#endif`
			`uint32 * offsets_by_id_;`

			`size_t lemma_count_left_;`
			`size_t lemma_size_left_;`

			`const char * dict_file_;`

			`// Be sure size is 4xN`
			`struct UserDictInfo {`
			`// When limitation reached, how much percentage will be reclaimed (1 ~ 100)`
			`uint32 reclaim_ratio;`
			`// maximum lemma count, 0 means no limitation`
			`uint32 limit_lemma_count;`
			`// Maximum lemma size, it's different from`
			`// whole disk file size or in-mem dict size`
			`// 0 means no limitation`
			`uint32 limit_lemma_size;`
			`// Total lemma count including deleted and inuse`
			`// Also indicate offsets_ size`
			`uint32 lemma_count;`
			`// Total size of lemmas including used and freed`
			`uint32 lemma_size;`
			`// Freed lemma count`
			`uint32 free_count;`
			`// Freed lemma size in byte`
			`uint32 free_size;`
			`#ifdef ___SYNC_ENABLED___`
			`uint32 sync_count;`
			`#endif`
			`int32 total_nfreq;`
			`} dict_info_;`

			`static const uint32 kUserDictVersion = 0x0ABCDEF0;`

			`static const uint32 kUserDictPreAlloc = 32;`
			`static const uint32 kUserDictAverageNchar = 8;`

			`enum UserDictState {`
			`// Keep in order`
			`USER_DICT_NONE = 0,`
			`USER_DICT_SYNC,`
			`#ifdef ___SYNC_ENABLED___`
			`USER_DICT_SYNC_DIRTY,`
			`#endif`
			`USER_DICT_SCORE_DIRTY,`
			`USER_DICT_OFFSET_DIRTY,`
			`USER_DICT_LEMMA_DIRTY,`

			`USER_DICT_DEFRAGMENTED,`
			`} state_;`

			`struct UserDictSearchable {`
			`uint16 splids_len;`
			`uint16 splid_start[kMaxLemmaSize];`
			`uint16 splid_count[kMaxLemmaSize];`
			`// Compact inital letters for both FuzzyCompareSpellId and cache system`
			`uint32 signature[kMaxLemmaSize / 4];`
			`};`

			`#ifdef ___CACHE_ENABLED___`
			`enum UserDictCacheType {`
			`USER_DICT_CACHE,`
			`USER_DICT_MISS_CACHE,`
			`};`

			`static const int kUserDictCacheSize = 4;`
			`static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;`

			`struct UserDictMissCache {`
			`uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];`
			`uint16 head, tail;`
			`} miss_caches_[kMaxLemmaSize];`

			`struct UserDictCache {`
			`uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];`
			`uint32 offsets[kUserDictCacheSize];`
			`uint32 lengths[kUserDictCacheSize];`
			`// Ring buffer`
			`uint16 head, tail;`
			`} caches_[kMaxLemmaSize];`

			`void cache_init();`

			`void cache_push(UserDictCacheType type,`
			`UserDictSearchable *searchable,`
			`uint32 offset, uint32 length);`

			`bool cache_hit(UserDictSearchable *searchable,`
			`uint32 offset, uint32 length);`

			`bool load_cache(UserDictSearchable *searchable,`
			`uint32 offset, uint32 length);`

			`void save_cache(UserDictSearchable *searchable,`
			`uint32 offset, uint32 length);`

			`void reset_cache();`

			`bool load_miss_cache(UserDictSearchable *searchable);`

			`void save_miss_cache(UserDictSearchable *searchable);`

			`void reset_miss_cache();`
			`#endif`

			`LmaScoreType translate_score(int f);`

			`int extract_score_freq(int raw_score);`

			`uint64 extract_score_lmt(int raw_score);`

			`inline int build_score(uint64 lmt, int freq);`

			`inline int64 utf16le_atoll(uint16 *s, int len);`

			`inline int utf16le_lltoa(int64 v, uint16 *s, int size);`

			`LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[],`
			`uint16 lemma_len, uint16 count, uint64 lmt);`

			`size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len,`
			`LmaPsbItem lpi_items, size_t lpi_max, bool need_extend);`

			`int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);`

			`int _get_lemma_score(LemmaIdType lemma_id);`

			`int is_fuzzy_prefix_spell_id(const uint16 * id1, uint16 len1,`
			`const UserDictSearchable *searchable);`

			`bool is_prefix_spell_id(const uint16 * fullids,`
			`uint16 fulllen, const UserDictSearchable *searchable);`

			`uint32 get_dict_file_size(UserDictInfo * info);`

			`bool reset(const char *file);`

			`bool validate(const char *file);`

			`bool load(const char *file, LemmaIdType start_id);`

			`bool is_valid_state();`

			`bool is_valid_lemma_id(LemmaIdType id);`

			`LemmaIdType get_max_lemma_id();`

			`void set_lemma_flag(uint32 offset, uint8 flag);`

			`char get_lemma_flag(uint32 offset);`

			`char get_lemma_nchar(uint32 offset);`

			`uint16 * get_lemma_spell_ids(uint32 offset);`

			`uint16 * get_lemma_word(uint32 offset);`

			`// Prepare searchable to fasten locate process`
			`void prepare_locate(UserDictSearchable *searchable,`
			`const uint16 * splids, uint16 len);`

			`// Compare initial letters only`
			`int32 fuzzy_compare_spell_id(const uint16 * id1, uint16 len1,`
			`const UserDictSearchable *searchable);`

			`// Compare exactly two spell ids`
			`// First argument must be a full id spell id`
			`bool equal_spell_id(const uint16 * fullids,`
			`uint16 fulllen, const UserDictSearchable *searchable);`

			`// Find first item by initial letters`
			`int32 locate_first_in_offsets(const UserDictSearchable *searchable);`

			`LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[],`
			`uint16 lemma_len, uint16 count, uint64 lmt);`

			`// Check if a lemma is in dictionary`
			`int32 locate_in_offsets(char16 lemma_str[],`
			`uint16 splid_str[], uint16 lemma_len);`

			`bool remove_lemma_by_offset_index(int offset_index);`
			`#ifdef ___PREDICT_ENABLED___`
			`uint32 locate_where_to_insert_in_predicts(const uint16 * words,`
			`int lemma_len);`

			`int32 locate_first_in_predicts(const uint16 * words, int lemma_len);`

			`void remove_lemma_from_predict_list(uint32 offset);`
			`#endif`
			`#ifdef ___SYNC_ENABLED___`
			`void queue_lemma_for_sync(LemmaIdType id);`

			`void remove_lemma_from_sync_list(uint32 offset);`

			`void write_back_sync(int fd);`
			`#endif`
			`void write_back_score(int fd);`
			`void write_back_offset(int fd);`
			`void write_back_lemma(int fd);`
			`void write_back_all(int fd);`
			`void write_back();`

			`struct UserDictScoreOffsetPair {`
			`int score;`
			`uint32 offset_index;`
			`};`

			`inline void swap(UserDictScoreOffsetPair * sop, int i, int j);`

			`void shift_down(UserDictScoreOffsetPair * sop, int i, int n);`

			`// On-disk format for each lemma`
			`// +-------------+`
			`// \| Version (4) \|`
			`// +-------------+`
			`// +-----------+-----------+--------------------+-------------------+`
			`// \| Spare (1) \| Nchar (1) \| Splids (2 x Nchar) \| Lemma (2 x Nchar) \|`
			`// +-----------+-----------+--------------------+-------------------+`
			`// ...`
			`// +-----------------------+ +-------------+ <---Offset of offset`
			`// \| Offset1 by_splids (4) \| ... \| OffsetN (4) \|`
			`// +-----------------------+ +-------------+`
			`#ifdef ___PREDICT_ENABLED___`
			`// +----------------------+ +-------------+`
			`// \| Offset1 by_lemma (4) \| ... \| OffsetN (4) \|`
			`// +----------------------+ +-------------+`
			`#endif`
			`// +------------+ +------------+`
			`// \| Score1 (4) \| ... \| ScoreN (4) \|`
			`// +------------+ +------------+`
			`#ifdef ___SYNC_ENABLED___`
			`// +-------------+ +-------------+`
			`// \| NewAdd1 (4) \| ... \| NewAddN (4) \|`
			`// +-------------+ +-------------+`
			`#endif`
			`// +----------------+`
			`// \| Dict Info (4x) \|`
			`// +----------------+`
			`};`
			`}`

			`#endif`