530 lines
16 KiB
C++
530 lines
16 KiB
C++
#include "zh.h"
|
|
|
|
template< int n >
|
|
constexpr PinyinCandidateSection section( const std::uint16_t( &arr )[n] )
|
|
{
|
|
static_assert( n <= 127, "Check the character map or increase the permitted size of the PinyinCandidateList." );
|
|
return { arr, n };
|
|
}
|
|
|
|
struct CandidateGroup
|
|
{
|
|
const PinyinCandidateSections a;
|
|
const PinyinCandidateSections ai;
|
|
const PinyinCandidateSections an;
|
|
const PinyinCandidateSections ang;
|
|
const PinyinCandidateSections ao;
|
|
const PinyinCandidateSections e;
|
|
const PinyinCandidateSections ei;
|
|
const PinyinCandidateSections en;
|
|
const PinyinCandidateSections eng;
|
|
const PinyinCandidateSections er;
|
|
const PinyinCandidateSections i;
|
|
const PinyinCandidateSections ia;
|
|
const PinyinCandidateSections ian;
|
|
const PinyinCandidateSections iang;
|
|
const PinyinCandidateSections iao;
|
|
const PinyinCandidateSections ie;
|
|
const PinyinCandidateSections in;
|
|
const PinyinCandidateSections ing;
|
|
const PinyinCandidateSections io;
|
|
const PinyinCandidateSections iong;
|
|
const PinyinCandidateSections iu;
|
|
const PinyinCandidateSections o;
|
|
const PinyinCandidateSections ong;
|
|
const PinyinCandidateSections ou;
|
|
const PinyinCandidateSections u;
|
|
const PinyinCandidateSections ua;
|
|
const PinyinCandidateSections uai;
|
|
const PinyinCandidateSections uan;
|
|
const PinyinCandidateSections uang;
|
|
const PinyinCandidateSections ue;
|
|
const PinyinCandidateSections ui;
|
|
const PinyinCandidateSections un;
|
|
const PinyinCandidateSections uo;
|
|
const PinyinCandidateSections v;
|
|
const PinyinCandidateSections ve;
|
|
};
|
|
|
|
enum PinyinInitial
|
|
{
|
|
Initial_invalid = -1,
|
|
Initial_none = 0,
|
|
Initial_hMask = 0x80,
|
|
Initial_b = 'b',
|
|
Initial_c = 'c',
|
|
Initial_ch = 'c' + Initial_hMask,
|
|
Initial_d = 'd',
|
|
Initial_f = 'f',
|
|
Initial_g = 'g',
|
|
Initial_h = 'h',
|
|
Initial_j = 'j',
|
|
Initial_k = 'k',
|
|
Initial_l = 'l',
|
|
Initial_m = 'm',
|
|
Initial_n = 'n',
|
|
Initial_p = 'p',
|
|
Initial_q = 'q',
|
|
Initial_r = 'r',
|
|
Initial_s = 's',
|
|
Initial_sh = 's' + Initial_hMask,
|
|
Initial_t = 't',
|
|
Initial_w = 'w',
|
|
Initial_x = 'x',
|
|
Initial_y = 'y',
|
|
Initial_z = 'z',
|
|
Initial_zh = 'z' + Initial_hMask
|
|
};
|
|
|
|
// Helpers to create an enum from a preedit string
|
|
inline constexpr std::int32_t makePinyinFinal( const char(&c)[2] )
|
|
{ return c[0]; }
|
|
inline constexpr std::int32_t makePinyinFinal( const char(&c)[3] )
|
|
{ return c[1] << 8 | c[0]; }
|
|
inline constexpr std::int32_t makePinyinFinal( const char(&c)[4] )
|
|
{ return c[2] << 16 | c[1] << 8 | c[0]; }
|
|
inline constexpr std::int32_t makePinyinFinal( const char(&c)[5] )
|
|
{ return c[3] << 24 | c[2] << 16 | c[1] << 8 | c[0]; }
|
|
|
|
enum PinyinFinal : std::int32_t
|
|
{
|
|
Final_invalid = -1,
|
|
Final_empty = 0,
|
|
Final_a = makePinyinFinal("a"),
|
|
Final_ai = makePinyinFinal("ai"),
|
|
Final_an = makePinyinFinal("an"),
|
|
Final_ang = makePinyinFinal("ang"),
|
|
Final_ao = makePinyinFinal("ao"),
|
|
Final_e = makePinyinFinal("e"),
|
|
Final_ei = makePinyinFinal("ei"),
|
|
Final_en = makePinyinFinal("en"),
|
|
Final_eng = makePinyinFinal("eng"),
|
|
Final_er = makePinyinFinal("er"),
|
|
Final_ia = makePinyinFinal("ia"),
|
|
Final_ian = makePinyinFinal("ian"),
|
|
Final_iang = makePinyinFinal("iang"),
|
|
Final_iao = makePinyinFinal("iao"),
|
|
Final_i = makePinyinFinal("i"),
|
|
Final_ie = makePinyinFinal("ie"),
|
|
Final_in = makePinyinFinal("in"),
|
|
Final_ing = makePinyinFinal("ing"),
|
|
Final_io = makePinyinFinal("io"),
|
|
Final_ion = makePinyinFinal("ion"), // intermediate to iong
|
|
Final_iong = makePinyinFinal("iong"),
|
|
Final_iu = makePinyinFinal("iu"),
|
|
Final_o = makePinyinFinal("o"),
|
|
Final_on = makePinyinFinal("on"), // intermediate to ong
|
|
Final_ong = makePinyinFinal("ong"),
|
|
Final_ou = makePinyinFinal("ou"),
|
|
Final_ua = makePinyinFinal("ua"),
|
|
Final_uai = makePinyinFinal("uai"),
|
|
Final_uan = makePinyinFinal("uan"),
|
|
Final_uang = makePinyinFinal("uang"),
|
|
Final_u = makePinyinFinal("u"),
|
|
Final_ue = makePinyinFinal("ue"),
|
|
Final_ui = makePinyinFinal("ui"),
|
|
Final_un = makePinyinFinal("un"),
|
|
Final_uo = makePinyinFinal("uo"),
|
|
Final_v = makePinyinFinal("v"),
|
|
Final_ve = makePinyinFinal("ve"),
|
|
};
|
|
|
|
// character codes are split into separate files based on the Initial segment
|
|
#include "zh_none.cpp"
|
|
#include "zh_b.cpp"
|
|
#include "zh_c.cpp"
|
|
#include "zh_ch.cpp"
|
|
#include "zh_d.cpp"
|
|
#include "zh_f.cpp"
|
|
#include "zh_g.cpp"
|
|
#include "zh_h.cpp"
|
|
#include "zh_j.cpp"
|
|
#include "zh_k.cpp"
|
|
#include "zh_l.cpp"
|
|
#include "zh_m.cpp"
|
|
#include "zh_n.cpp"
|
|
#include "zh_p.cpp"
|
|
#include "zh_q.cpp"
|
|
#include "zh_r.cpp"
|
|
#include "zh_s.cpp"
|
|
#include "zh_sh.cpp"
|
|
#include "zh_t.cpp"
|
|
#include "zh_w.cpp"
|
|
#include "zh_x.cpp"
|
|
#include "zh_y.cpp"
|
|
#include "zh_z.cpp"
|
|
#include "zh_zh.cpp"
|
|
|
|
using namespace zh;
|
|
|
|
static PinyinInitial pinyinInitial( const QByteArray& text )
|
|
{
|
|
const int length = text.length();
|
|
if ( length <= 0 )
|
|
return Initial_invalid;
|
|
|
|
switch ( text.at( 0 ) )
|
|
{
|
|
case 'a':
|
|
case 'e':
|
|
case 'o':
|
|
case 'u':
|
|
return Initial_none;
|
|
case 'b':
|
|
return Initial_b;
|
|
case 'c':
|
|
if ( length > 1 && text.at(1) == 'h' )
|
|
return Initial_ch;
|
|
return Initial_c;
|
|
case 'd':
|
|
return Initial_d;
|
|
case 'f':
|
|
return Initial_f;
|
|
case 'g':
|
|
return Initial_g;
|
|
case 'h':
|
|
return Initial_h;
|
|
case 'j':
|
|
return Initial_j;
|
|
case 'k':
|
|
return Initial_k;
|
|
case 'l':
|
|
return Initial_l;
|
|
case 'm':
|
|
return Initial_m;
|
|
case 'n':
|
|
return Initial_n;
|
|
case 'p':
|
|
return Initial_p;
|
|
case 'q':
|
|
return Initial_q;
|
|
case 'r':
|
|
return Initial_r;
|
|
case 's':
|
|
if ( length > 1 && text.at(1) == 'h' )
|
|
return Initial_sh;
|
|
return Initial_s;
|
|
case 't':
|
|
return Initial_t;
|
|
case 'w':
|
|
return Initial_w;
|
|
case 'x':
|
|
return Initial_x;
|
|
case 'y':
|
|
return Initial_y;
|
|
case 'z':
|
|
if ( length > 1 && text.at(1) == 'h' )
|
|
return Initial_zh;
|
|
return Initial_z;
|
|
default:
|
|
break;
|
|
}
|
|
return Initial_invalid;
|
|
}
|
|
|
|
static PinyinFinal pinyinFinal( PinyinInitial i, const QByteArray& text )
|
|
{
|
|
// rebuild the string to match the enum
|
|
const int offset = i == Initial_none ? 0 : ( i > Initial_hMask ? 2 : 1 );
|
|
const auto data = text.constData() + offset;
|
|
uint hash = 0;
|
|
switch ( text.length() - offset )
|
|
{
|
|
default: // the ending is too long
|
|
return Final_invalid;
|
|
case 4:
|
|
hash |= *( data + 3 ) << 24;
|
|
// fall through
|
|
case 3:
|
|
hash |= *( data + 2 ) << 16;
|
|
// fall through
|
|
case 2:
|
|
hash |= *( data + 1 ) << 8;
|
|
// fall through
|
|
case 1:
|
|
hash |= *data;
|
|
// fall through
|
|
case 0: // the ending is empty
|
|
break;
|
|
}
|
|
|
|
// verify hash
|
|
switch ( static_cast< PinyinFinal >( hash ) )
|
|
{
|
|
case Final_invalid:
|
|
Q_UNREACHABLE();
|
|
break;
|
|
case Final_empty:
|
|
case Final_a:
|
|
case Final_ai:
|
|
case Final_an:
|
|
case Final_ang:
|
|
case Final_ao:
|
|
case Final_e:
|
|
case Final_ei:
|
|
case Final_en:
|
|
case Final_eng:
|
|
case Final_er:
|
|
case Final_ia:
|
|
case Final_ian:
|
|
case Final_iang:
|
|
case Final_iao:
|
|
case Final_i:
|
|
case Final_ie:
|
|
case Final_in:
|
|
case Final_ing:
|
|
case Final_io:
|
|
case Final_ion:
|
|
case Final_iong:
|
|
case Final_iu:
|
|
case Final_o:
|
|
case Final_on:
|
|
case Final_ong:
|
|
case Final_ou:
|
|
case Final_ua:
|
|
case Final_uai:
|
|
case Final_uan:
|
|
case Final_uang:
|
|
case Final_u:
|
|
case Final_ue:
|
|
case Final_ui:
|
|
case Final_un:
|
|
case Final_uo:
|
|
case Final_v:
|
|
case Final_ve:
|
|
return static_cast< PinyinFinal >( hash );
|
|
}
|
|
|
|
// all others are invalid
|
|
return Final_invalid;
|
|
}
|
|
|
|
constexpr static std::uint8_t tones( const PinyinCandidateSections& candidates )
|
|
{
|
|
return ( candidates[1].data ? FlatTone : NoTone )
|
|
| ( candidates[2].data ? RisingTone : NoTone )
|
|
| ( candidates[3].data ? LowTone : NoTone )
|
|
| ( candidates[4].data ? FallingTone : NoTone )
|
|
| ( candidates[5].data ? NeutralTone : NoTone );
|
|
}
|
|
|
|
constexpr static const PinyinCandidateSections emptySection =
|
|
{ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } };
|
|
|
|
static PinyinCandidates candidatesForFinal(
|
|
const CandidateGroup& group, PinyinFinal final )
|
|
{
|
|
// This table is organized such that if there are no candidates for a given
|
|
// ending (final), an empty section is returned (signaling that we are in
|
|
// an intermediate editing phase. If there is only one possible
|
|
// remaining ending (e.g. in->ing), that possible ending is returned.
|
|
// The tone mark is placed by the order: a, o, e, i, u, v (umlaut u).
|
|
// The only exception is "iu" where the tone mark is placed on the u instead.
|
|
static const PinyinCandidates emptyCandidates = { emptySection, 0, 0 };
|
|
switch (final) {
|
|
case Final_empty:
|
|
return emptyCandidates; // Any empty list is returned for intermediate values
|
|
case Final_invalid:
|
|
break;
|
|
case Final_a:
|
|
if ( group.a[0].data )
|
|
return { group.a, 'a', tones( group.a ) };
|
|
return emptyCandidates;
|
|
case Final_ai:
|
|
return { group.ai, 'a', tones(group.ai) };
|
|
case Final_an:
|
|
if ( group.an[0].data )
|
|
return { group.an, 'a', tones(group.an) };
|
|
// fall through
|
|
case Final_ang:
|
|
return { group.ang, 'a', tones(group.ang) };
|
|
case Final_ao:
|
|
return { group.ao, 'a', tones(group.ao) };
|
|
case Final_e:
|
|
if ( group.e[0].data )
|
|
return { group.e, 'e', tones(group.e) };
|
|
return emptyCandidates;
|
|
case Final_ei:
|
|
return { group.ei, 'e', tones(group.ei) };
|
|
case Final_en:
|
|
if ( group.en[0].data )
|
|
return { group.en, 'e', tones(group.en) };
|
|
// fall through
|
|
case Final_eng:
|
|
return { group.eng, 'e', tones(group.eng) };
|
|
case Final_er:
|
|
return { group.er, 'e', tones(group.er) };
|
|
case Final_ia:
|
|
if ( group.ia[0].data )
|
|
return { group.ia, 'a', tones(group.ia) };
|
|
return emptyCandidates;
|
|
case Final_ian:
|
|
if ( group.ian[0].data )
|
|
return { group.ian, 'a', tones(group.ian) };
|
|
// fall through
|
|
case Final_iang:
|
|
return { group.iang, 'a', tones(group.iang) };
|
|
case Final_iao:
|
|
return { group.iao, 'a', tones(group.iao) };
|
|
case Final_i:
|
|
if ( group.i[0].data )
|
|
return { group.i, 'i', tones(group.i) };
|
|
return emptyCandidates;
|
|
case Final_ie:
|
|
return { group.ie, 'e', tones(group.ie) };
|
|
case Final_in:
|
|
if ( group.in[0].data )
|
|
return { group.in, 'i', tones(group.in) };
|
|
// fall through
|
|
case Final_ing:
|
|
return { group.ing, 'i', tones(group.ing) };
|
|
case Final_io:
|
|
if ( group.io[0].data )
|
|
return { group.io, 'o', tones(group.io) };
|
|
// fall through
|
|
case Final_ion:
|
|
case Final_iong:
|
|
return { group.iong, 'o', tones(group.iong) };
|
|
case Final_iu:
|
|
return { group.iu, 'u', tones(group.iu) };
|
|
case Final_o:
|
|
if (group.o[0].data)
|
|
return { group.o, 'o', tones(group.o) };
|
|
return emptyCandidates;
|
|
case Final_on:
|
|
case Final_ong:
|
|
return { group.ong, 'o', tones(group.ong) };
|
|
case Final_ou:
|
|
return { group.ou, 'o', tones(group.ou) };
|
|
case Final_ua:
|
|
if ( group.ua[0].data )
|
|
return { group.ua, 'a', tones(group.ua) };
|
|
return emptyCandidates;
|
|
case Final_uai:
|
|
return { group.uai, 'a', tones(group.uai) };
|
|
case Final_uan:
|
|
if ( group.uan[0].data )
|
|
return { group.uan, 'a', tones(group.uan) };
|
|
// fall through
|
|
case Final_uang:
|
|
return { group.uang, 'a', tones(group.uang) };
|
|
case Final_u:
|
|
if ( group.u[0].data )
|
|
return { group.u, 'u', tones(group.u) };
|
|
return emptyCandidates;
|
|
case Final_ue:
|
|
return { group.ue, 'e', tones(group.ue) };
|
|
case Final_ui:
|
|
return { group.ui, 'i', tones(group.ui) };
|
|
case Final_un:
|
|
return { group.un, 'u', tones(group.un) };
|
|
case Final_uo:
|
|
return { group.uo, 'o', tones(group.uo) };
|
|
case Final_v:
|
|
if ( group.v[0].data )
|
|
return { group.v, 'v', tones(group.v) };
|
|
// fall through
|
|
case Final_ve:
|
|
return { group.ve, 'e', tones(group.ve) };
|
|
}
|
|
return { emptySection, -1, 0 };
|
|
}
|
|
|
|
PinyinCandidates pinyinCandidates( const QByteArray& text )
|
|
{
|
|
const auto initial = pinyinInitial( text );
|
|
const auto final = pinyinFinal( initial, text );
|
|
switch ( initial )
|
|
{
|
|
case Initial_invalid:
|
|
case Initial_hMask:
|
|
break;
|
|
case Initial_none:
|
|
return candidatesForFinal( zh::none, final );
|
|
case Initial_b:
|
|
return candidatesForFinal( zh::b, final );
|
|
case Initial_c:
|
|
return candidatesForFinal( zh::c, final );
|
|
case Initial_ch:
|
|
return candidatesForFinal (zh::ch, final );
|
|
case Initial_d:
|
|
return candidatesForFinal( zh::d, final );
|
|
case Initial_f:
|
|
return candidatesForFinal( zh::f, final );
|
|
case Initial_g:
|
|
return candidatesForFinal( zh::g, final );
|
|
case Initial_h:
|
|
return candidatesForFinal( zh::h, final );
|
|
case Initial_j:
|
|
return candidatesForFinal( zh::j, final );
|
|
case Initial_k:
|
|
return candidatesForFinal( zh::k, final );
|
|
case Initial_l:
|
|
return candidatesForFinal( zh::l, final);
|
|
case Initial_m:
|
|
return candidatesForFinal( zh::m, final );
|
|
case Initial_n:
|
|
return candidatesForFinal( zh::n, final );
|
|
case Initial_p:
|
|
return candidatesForFinal( zh::p, final );
|
|
case Initial_q:
|
|
return candidatesForFinal( zh::q, final );
|
|
case Initial_r:
|
|
return candidatesForFinal( zh::r, final );
|
|
case Initial_s:
|
|
return candidatesForFinal( zh::s, final );
|
|
case Initial_sh:
|
|
return candidatesForFinal( zh::sh, final );
|
|
case Initial_t:
|
|
return candidatesForFinal( zh::t, final );
|
|
case Initial_w:
|
|
return candidatesForFinal( zh::w, final );
|
|
case Initial_x:
|
|
return candidatesForFinal( zh::x, final );
|
|
case Initial_y:
|
|
return candidatesForFinal( zh::y, final );
|
|
case Initial_z:
|
|
return candidatesForFinal( zh::z, final );
|
|
case Initial_zh:
|
|
return candidatesForFinal( zh::zh, final );
|
|
}
|
|
return { emptySection, -1, 0 };
|
|
}
|
|
|
|
static constexpr const std::uint16_t vowelsWithTones[][5] =
|
|
{
|
|
{ 0x0061, 0x0101, 0x00E1, 0x01CE, 0x00E0 }, // a
|
|
{ 0x0065, 0x0113, 0x00E9, 0x011B, 0x00E8 }, // e
|
|
{ 0x0069, 0x012B, 0x00ED, 0x01D0, 0x00EC }, // i
|
|
{ 0x006F, 0x014D, 0x00F3, 0x01D2, 0x00F2 }, // o
|
|
{ 0x0075, 0x016B, 0x00FA, 0x01D4, 0x00F9 }, // u
|
|
{ 0x00FC, 0x01D6, 0x01D8, 0x01DA, 0x01DC } // umlaut u
|
|
};
|
|
|
|
Qt::Key vowelWithTone( QChar vowel, int toneIndex )
|
|
{
|
|
if ( toneIndex == 5 )
|
|
toneIndex = 0;
|
|
|
|
if ( toneIndex >= 0 && toneIndex <= 4 )
|
|
{
|
|
switch ( vowel.toLatin1() )
|
|
{
|
|
case 'a':
|
|
return Qt::Key( vowelsWithTones[ 0 ][ toneIndex ] );
|
|
case 'e':
|
|
return Qt::Key( vowelsWithTones[ 1 ][ toneIndex ] );
|
|
case 'i':
|
|
return Qt::Key( vowelsWithTones[ 2 ][ toneIndex ] );
|
|
case 'o':
|
|
return Qt::Key( vowelsWithTones[ 3 ][ toneIndex ] );
|
|
case 'u':
|
|
return Qt::Key( vowelsWithTones[ 4 ][ toneIndex ] );
|
|
case 'v':
|
|
return Qt::Key( vowelsWithTones[ 5 ][ toneIndex ] );
|
|
}
|
|
}
|
|
|
|
return Qt::Key( vowel.toLatin1() );
|
|
}
|