2017-07-21 18:21:34 +02:00

530 lines
16 KiB
C++

#include "zh.h"
template< int n >
constexpr PinyinCandidateSection section( const std::uint16_t( &arr )[n] )
{
static_assert( n <= 127, "Check the character map or increase the permitted size of the PinyinCandidateList." );
return { arr, n };
}
struct CandidateGroup
{
const PinyinCandidateSections a;
const PinyinCandidateSections ai;
const PinyinCandidateSections an;
const PinyinCandidateSections ang;
const PinyinCandidateSections ao;
const PinyinCandidateSections e;
const PinyinCandidateSections ei;
const PinyinCandidateSections en;
const PinyinCandidateSections eng;
const PinyinCandidateSections er;
const PinyinCandidateSections i;
const PinyinCandidateSections ia;
const PinyinCandidateSections ian;
const PinyinCandidateSections iang;
const PinyinCandidateSections iao;
const PinyinCandidateSections ie;
const PinyinCandidateSections in;
const PinyinCandidateSections ing;
const PinyinCandidateSections io;
const PinyinCandidateSections iong;
const PinyinCandidateSections iu;
const PinyinCandidateSections o;
const PinyinCandidateSections ong;
const PinyinCandidateSections ou;
const PinyinCandidateSections u;
const PinyinCandidateSections ua;
const PinyinCandidateSections uai;
const PinyinCandidateSections uan;
const PinyinCandidateSections uang;
const PinyinCandidateSections ue;
const PinyinCandidateSections ui;
const PinyinCandidateSections un;
const PinyinCandidateSections uo;
const PinyinCandidateSections v;
const PinyinCandidateSections ve;
};
enum PinyinInitial
{
Initial_invalid = -1,
Initial_none = 0,
Initial_hMask = 0x80,
Initial_b = 'b',
Initial_c = 'c',
Initial_ch = 'c' + Initial_hMask,
Initial_d = 'd',
Initial_f = 'f',
Initial_g = 'g',
Initial_h = 'h',
Initial_j = 'j',
Initial_k = 'k',
Initial_l = 'l',
Initial_m = 'm',
Initial_n = 'n',
Initial_p = 'p',
Initial_q = 'q',
Initial_r = 'r',
Initial_s = 's',
Initial_sh = 's' + Initial_hMask,
Initial_t = 't',
Initial_w = 'w',
Initial_x = 'x',
Initial_y = 'y',
Initial_z = 'z',
Initial_zh = 'z' + Initial_hMask
};
// Helpers to create an enum from a preedit string
inline constexpr std::int32_t makePinyinFinal( const char(&c)[2] )
{ return c[0]; }
inline constexpr std::int32_t makePinyinFinal( const char(&c)[3] )
{ return c[1] << 8 | c[0]; }
inline constexpr std::int32_t makePinyinFinal( const char(&c)[4] )
{ return c[2] << 16 | c[1] << 8 | c[0]; }
inline constexpr std::int32_t makePinyinFinal( const char(&c)[5] )
{ return c[3] << 24 | c[2] << 16 | c[1] << 8 | c[0]; }
enum PinyinFinal : std::int32_t
{
Final_invalid = -1,
Final_empty = 0,
Final_a = makePinyinFinal("a"),
Final_ai = makePinyinFinal("ai"),
Final_an = makePinyinFinal("an"),
Final_ang = makePinyinFinal("ang"),
Final_ao = makePinyinFinal("ao"),
Final_e = makePinyinFinal("e"),
Final_ei = makePinyinFinal("ei"),
Final_en = makePinyinFinal("en"),
Final_eng = makePinyinFinal("eng"),
Final_er = makePinyinFinal("er"),
Final_ia = makePinyinFinal("ia"),
Final_ian = makePinyinFinal("ian"),
Final_iang = makePinyinFinal("iang"),
Final_iao = makePinyinFinal("iao"),
Final_i = makePinyinFinal("i"),
Final_ie = makePinyinFinal("ie"),
Final_in = makePinyinFinal("in"),
Final_ing = makePinyinFinal("ing"),
Final_io = makePinyinFinal("io"),
Final_ion = makePinyinFinal("ion"), // intermediate to iong
Final_iong = makePinyinFinal("iong"),
Final_iu = makePinyinFinal("iu"),
Final_o = makePinyinFinal("o"),
Final_on = makePinyinFinal("on"), // intermediate to ong
Final_ong = makePinyinFinal("ong"),
Final_ou = makePinyinFinal("ou"),
Final_ua = makePinyinFinal("ua"),
Final_uai = makePinyinFinal("uai"),
Final_uan = makePinyinFinal("uan"),
Final_uang = makePinyinFinal("uang"),
Final_u = makePinyinFinal("u"),
Final_ue = makePinyinFinal("ue"),
Final_ui = makePinyinFinal("ui"),
Final_un = makePinyinFinal("un"),
Final_uo = makePinyinFinal("uo"),
Final_v = makePinyinFinal("v"),
Final_ve = makePinyinFinal("ve"),
};
// character codes are split into separate files based on the Initial segment
#include "zh_none.cpp"
#include "zh_b.cpp"
#include "zh_c.cpp"
#include "zh_ch.cpp"
#include "zh_d.cpp"
#include "zh_f.cpp"
#include "zh_g.cpp"
#include "zh_h.cpp"
#include "zh_j.cpp"
#include "zh_k.cpp"
#include "zh_l.cpp"
#include "zh_m.cpp"
#include "zh_n.cpp"
#include "zh_p.cpp"
#include "zh_q.cpp"
#include "zh_r.cpp"
#include "zh_s.cpp"
#include "zh_sh.cpp"
#include "zh_t.cpp"
#include "zh_w.cpp"
#include "zh_x.cpp"
#include "zh_y.cpp"
#include "zh_z.cpp"
#include "zh_zh.cpp"
using namespace zh;
static PinyinInitial pinyinInitial( const QByteArray& text )
{
const int length = text.length();
if ( length <= 0 )
return Initial_invalid;
switch ( text.at( 0 ) )
{
case 'a':
case 'e':
case 'o':
case 'u':
return Initial_none;
case 'b':
return Initial_b;
case 'c':
if ( length > 1 && text.at(1) == 'h' )
return Initial_ch;
return Initial_c;
case 'd':
return Initial_d;
case 'f':
return Initial_f;
case 'g':
return Initial_g;
case 'h':
return Initial_h;
case 'j':
return Initial_j;
case 'k':
return Initial_k;
case 'l':
return Initial_l;
case 'm':
return Initial_m;
case 'n':
return Initial_n;
case 'p':
return Initial_p;
case 'q':
return Initial_q;
case 'r':
return Initial_r;
case 's':
if ( length > 1 && text.at(1) == 'h' )
return Initial_sh;
return Initial_s;
case 't':
return Initial_t;
case 'w':
return Initial_w;
case 'x':
return Initial_x;
case 'y':
return Initial_y;
case 'z':
if ( length > 1 && text.at(1) == 'h' )
return Initial_zh;
return Initial_z;
default:
break;
}
return Initial_invalid;
}
static PinyinFinal pinyinFinal( PinyinInitial i, const QByteArray& text )
{
// rebuild the string to match the enum
const int offset = i == Initial_none ? 0 : ( i > Initial_hMask ? 2 : 1 );
const auto data = text.constData() + offset;
uint hash = 0;
switch ( text.length() - offset )
{
default: // the ending is too long
return Final_invalid;
case 4:
hash |= *( data + 3 ) << 24;
// fall through
case 3:
hash |= *( data + 2 ) << 16;
// fall through
case 2:
hash |= *( data + 1 ) << 8;
// fall through
case 1:
hash |= *data;
// fall through
case 0: // the ending is empty
break;
}
// verify hash
switch ( static_cast< PinyinFinal >( hash ) )
{
case Final_invalid:
Q_UNREACHABLE();
break;
case Final_empty:
case Final_a:
case Final_ai:
case Final_an:
case Final_ang:
case Final_ao:
case Final_e:
case Final_ei:
case Final_en:
case Final_eng:
case Final_er:
case Final_ia:
case Final_ian:
case Final_iang:
case Final_iao:
case Final_i:
case Final_ie:
case Final_in:
case Final_ing:
case Final_io:
case Final_ion:
case Final_iong:
case Final_iu:
case Final_o:
case Final_on:
case Final_ong:
case Final_ou:
case Final_ua:
case Final_uai:
case Final_uan:
case Final_uang:
case Final_u:
case Final_ue:
case Final_ui:
case Final_un:
case Final_uo:
case Final_v:
case Final_ve:
return static_cast< PinyinFinal >( hash );
}
// all others are invalid
return Final_invalid;
}
constexpr static std::uint8_t tones( const PinyinCandidateSections& candidates )
{
return ( candidates[1].data ? FlatTone : NoTone )
| ( candidates[2].data ? RisingTone : NoTone )
| ( candidates[3].data ? LowTone : NoTone )
| ( candidates[4].data ? FallingTone : NoTone )
| ( candidates[5].data ? NeutralTone : NoTone );
}
constexpr static const PinyinCandidateSections emptySection =
{ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } };
static PinyinCandidates candidatesForFinal(
const CandidateGroup& group, PinyinFinal final )
{
// This table is organized such that if there are no candidates for a given
// ending (final), an empty section is returned (signaling that we are in
// an intermediate editing phase. If there is only one possible
// remaining ending (e.g. in->ing), that possible ending is returned.
// The tone mark is placed by the order: a, o, e, i, u, v (umlaut u).
// The only exception is "iu" where the tone mark is placed on the u instead.
static const PinyinCandidates emptyCandidates = { emptySection, 0, 0 };
switch (final) {
case Final_empty:
return emptyCandidates; // Any empty list is returned for intermediate values
case Final_invalid:
break;
case Final_a:
if ( group.a[0].data )
return { group.a, 'a', tones( group.a ) };
return emptyCandidates;
case Final_ai:
return { group.ai, 'a', tones(group.ai) };
case Final_an:
if ( group.an[0].data )
return { group.an, 'a', tones(group.an) };
// fall through
case Final_ang:
return { group.ang, 'a', tones(group.ang) };
case Final_ao:
return { group.ao, 'a', tones(group.ao) };
case Final_e:
if ( group.e[0].data )
return { group.e, 'e', tones(group.e) };
return emptyCandidates;
case Final_ei:
return { group.ei, 'e', tones(group.ei) };
case Final_en:
if ( group.en[0].data )
return { group.en, 'e', tones(group.en) };
// fall through
case Final_eng:
return { group.eng, 'e', tones(group.eng) };
case Final_er:
return { group.er, 'e', tones(group.er) };
case Final_ia:
if ( group.ia[0].data )
return { group.ia, 'a', tones(group.ia) };
return emptyCandidates;
case Final_ian:
if ( group.ian[0].data )
return { group.ian, 'a', tones(group.ian) };
// fall through
case Final_iang:
return { group.iang, 'a', tones(group.iang) };
case Final_iao:
return { group.iao, 'a', tones(group.iao) };
case Final_i:
if ( group.i[0].data )
return { group.i, 'i', tones(group.i) };
return emptyCandidates;
case Final_ie:
return { group.ie, 'e', tones(group.ie) };
case Final_in:
if ( group.in[0].data )
return { group.in, 'i', tones(group.in) };
// fall through
case Final_ing:
return { group.ing, 'i', tones(group.ing) };
case Final_io:
if ( group.io[0].data )
return { group.io, 'o', tones(group.io) };
// fall through
case Final_ion:
case Final_iong:
return { group.iong, 'o', tones(group.iong) };
case Final_iu:
return { group.iu, 'u', tones(group.iu) };
case Final_o:
if (group.o[0].data)
return { group.o, 'o', tones(group.o) };
return emptyCandidates;
case Final_on:
case Final_ong:
return { group.ong, 'o', tones(group.ong) };
case Final_ou:
return { group.ou, 'o', tones(group.ou) };
case Final_ua:
if ( group.ua[0].data )
return { group.ua, 'a', tones(group.ua) };
return emptyCandidates;
case Final_uai:
return { group.uai, 'a', tones(group.uai) };
case Final_uan:
if ( group.uan[0].data )
return { group.uan, 'a', tones(group.uan) };
// fall through
case Final_uang:
return { group.uang, 'a', tones(group.uang) };
case Final_u:
if ( group.u[0].data )
return { group.u, 'u', tones(group.u) };
return emptyCandidates;
case Final_ue:
return { group.ue, 'e', tones(group.ue) };
case Final_ui:
return { group.ui, 'i', tones(group.ui) };
case Final_un:
return { group.un, 'u', tones(group.un) };
case Final_uo:
return { group.uo, 'o', tones(group.uo) };
case Final_v:
if ( group.v[0].data )
return { group.v, 'v', tones(group.v) };
// fall through
case Final_ve:
return { group.ve, 'e', tones(group.ve) };
}
return { emptySection, -1, 0 };
}
PinyinCandidates pinyinCandidates( const QByteArray& text )
{
const auto initial = pinyinInitial( text );
const auto final = pinyinFinal( initial, text );
switch ( initial )
{
case Initial_invalid:
case Initial_hMask:
break;
case Initial_none:
return candidatesForFinal( zh::none, final );
case Initial_b:
return candidatesForFinal( zh::b, final );
case Initial_c:
return candidatesForFinal( zh::c, final );
case Initial_ch:
return candidatesForFinal (zh::ch, final );
case Initial_d:
return candidatesForFinal( zh::d, final );
case Initial_f:
return candidatesForFinal( zh::f, final );
case Initial_g:
return candidatesForFinal( zh::g, final );
case Initial_h:
return candidatesForFinal( zh::h, final );
case Initial_j:
return candidatesForFinal( zh::j, final );
case Initial_k:
return candidatesForFinal( zh::k, final );
case Initial_l:
return candidatesForFinal( zh::l, final);
case Initial_m:
return candidatesForFinal( zh::m, final );
case Initial_n:
return candidatesForFinal( zh::n, final );
case Initial_p:
return candidatesForFinal( zh::p, final );
case Initial_q:
return candidatesForFinal( zh::q, final );
case Initial_r:
return candidatesForFinal( zh::r, final );
case Initial_s:
return candidatesForFinal( zh::s, final );
case Initial_sh:
return candidatesForFinal( zh::sh, final );
case Initial_t:
return candidatesForFinal( zh::t, final );
case Initial_w:
return candidatesForFinal( zh::w, final );
case Initial_x:
return candidatesForFinal( zh::x, final );
case Initial_y:
return candidatesForFinal( zh::y, final );
case Initial_z:
return candidatesForFinal( zh::z, final );
case Initial_zh:
return candidatesForFinal( zh::zh, final );
}
return { emptySection, -1, 0 };
}
static constexpr const std::uint16_t vowelsWithTones[][5] =
{
{ 0x0061, 0x0101, 0x00E1, 0x01CE, 0x00E0 }, // a
{ 0x0065, 0x0113, 0x00E9, 0x011B, 0x00E8 }, // e
{ 0x0069, 0x012B, 0x00ED, 0x01D0, 0x00EC }, // i
{ 0x006F, 0x014D, 0x00F3, 0x01D2, 0x00F2 }, // o
{ 0x0075, 0x016B, 0x00FA, 0x01D4, 0x00F9 }, // u
{ 0x00FC, 0x01D6, 0x01D8, 0x01DA, 0x01DC } // umlaut u
};
Qt::Key vowelWithTone( QChar vowel, int toneIndex )
{
if ( toneIndex == 5 )
toneIndex = 0;
if ( toneIndex >= 0 && toneIndex <= 4 )
{
switch ( vowel.toLatin1() )
{
case 'a':
return Qt::Key( vowelsWithTones[ 0 ][ toneIndex ] );
case 'e':
return Qt::Key( vowelsWithTones[ 1 ][ toneIndex ] );
case 'i':
return Qt::Key( vowelsWithTones[ 2 ][ toneIndex ] );
case 'o':
return Qt::Key( vowelsWithTones[ 3 ][ toneIndex ] );
case 'u':
return Qt::Key( vowelsWithTones[ 4 ][ toneIndex ] );
case 'v':
return Qt::Key( vowelsWithTones[ 5 ][ toneIndex ] );
}
}
return Qt::Key( vowel.toLatin1() );
}