AimRT/_deps/boost-src/libs/locale/test/test_codepage_converter.cpp
2025-01-12 20:40:48 +08:00

266 lines
7.5 KiB
C++

//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
#include <boost/locale/util.hpp>
#ifdef BOOST_LOCALE_WITH_ICU
# include "../src/boost/locale/icu/codecvt.hpp"
#endif
#include "../src/boost/locale/shared/iconv_codecvt.hpp"
#include <cstring>
#include <iostream>
#include "boostLocale/test/tools.hpp"
#include "boostLocale/test/unit_test.hpp"
constexpr auto illegal = boost::locale::util::base_converter::illegal;
constexpr auto incomplete = boost::locale::util::base_converter::incomplete;
bool test_to(boost::locale::util::base_converter& cvt, const char* s, unsigned codepoint)
{
size_t len = strlen(s);
const char* end = s + len;
return cvt.to_unicode(s, end) == codepoint;
}
bool test_from(boost::locale::util::base_converter& cvt, unsigned codepoint, const char* str)
{
char buf[32] = {0};
unsigned res = cvt.from_unicode(codepoint, buf, buf + sizeof(buf));
if(res == boost::locale::util::base_converter::illegal) {
return str == 0;
} else {
return str != 0 && strlen(str) == res && memcmp(str, buf, res) == 0;
}
}
bool test_incomplete(boost::locale::util::base_converter& cvt, unsigned codepoint, int len)
{
char buf[32] = {0};
unsigned res = cvt.from_unicode(codepoint, buf, buf + len);
return res == boost::locale::util::base_converter::incomplete;
}
#define TEST_TO(str, codepoint) TEST(test_to(*cvt, str, codepoint))
#define TEST_FROM(str, codepoint) TEST(test_from(*cvt, codepoint, str))
#define TEST_INC(codepoint, len) TEST(test_incomplete(*cvt, codepoint, len))
void test_shiftjis(std::unique_ptr<boost::locale::util::base_converter>& cvt)
{
std::cout << "- Correct" << std::endl;
TEST_TO("a", 'a');
TEST_TO("X", 'X');
TEST_TO("\xCB", 0xFF8b); // half width katakana Hi ヒ
TEST_TO("\x83\x71", 0x30d2); // Full width katakana Hi ヒ
TEST_TO("\x82\xd0", 0x3072); // Full width hiragana Hi ひ
TEST_FROM("a", 'a');
TEST_FROM("X", 'X');
TEST_FROM("\xCB", 0xFF8b); // half width katakana Hi ヒ
TEST_FROM("\x83\x71", 0x30d2); // Full width katakana Hi ヒ
TEST_FROM("\x82\xd0", 0x3072); // Full width hiragana Hi ひ
std::cout << "- Illegal/incomplete" << std::endl;
TEST_TO("\xa0", illegal);
TEST_TO("\x82", incomplete);
TEST_TO("\x83\xf0", illegal);
TEST_INC(0x30d2, 1); // Full width katakana Hi ヒ
TEST_INC(0x3072, 1); // Full width hiragana Hi ひ
TEST_FROM(0, 0x5e9); // Hebrew ש not in ShiftJIS
}
void test_main(int /*argc*/, char** /*argv*/)
{
using namespace boost::locale::util;
std::cout << "Test UTF-8\n";
std::cout << "- From UTF-8" << std::endl;
std::unique_ptr<base_converter> cvt = create_utf8_converter();
TEST(cvt.get());
TEST(cvt->is_thread_safe());
TEST_EQ(cvt->max_len(), 4);
std::cout << "-- Correct" << std::endl;
TEST_TO("\x7f", 0x7f);
TEST_TO("\xC2\x80", 0x80);
TEST_TO("\xdf\xBF", 0x7FF);
TEST_TO("\xe0\xa0\x80", 0x800);
TEST_TO("\xef\xbf\xbf", 0xFFFF);
TEST_TO("\xf0\x90\x80\x80", 0x10000);
TEST_TO("\xf4\x8f\xbf\xbf", 0x10FFFF);
std::cout << "-- Too big" << std::endl;
TEST_TO("\xf4\x9f\x80\x80", illegal); // 11 0000
TEST_TO("\xfb\xbf\xbf\xbf", illegal); // 3FF FFFF
TEST_TO("\xf8\x90\x80\x80\x80", illegal); // 400 0000
TEST_TO("\xfd\xbf\xbf\xbf\xbf\xbf", illegal); // 7fff ffff
std::cout << "-- Invalid trail" << std::endl;
TEST_TO("\xC2\x7F", illegal);
TEST_TO("\xdf\x7F", illegal);
TEST_TO("\xe0\x7F\x80", illegal);
TEST_TO("\xef\xbf\x7F", illegal);
TEST_TO("\xe0\x7F\x80", illegal);
TEST_TO("\xef\xbf\x7F", illegal);
TEST_TO("\xf0\x7F\x80\x80", illegal);
TEST_TO("\xf4\x7f\xbf\xbf", illegal);
TEST_TO("\xf0\x90\x7F\x80", illegal);
TEST_TO("\xf4\x8f\x7F\xbf", illegal);
TEST_TO("\xf0\x90\x80\x7F", illegal);
TEST_TO("\xf4\x8f\xbf\x7F", illegal);
std::cout << "-- Invalid length" << std::endl;
/// Test that this actually works
TEST_TO(make2(0x80), 0x80);
TEST_TO(make2(0x7ff), 0x7ff);
TEST_TO(make3(0x800), 0x800);
TEST_TO(make3(0xffff), 0xffff);
TEST_TO(make4(0x10000), 0x10000);
TEST_TO(make4(0x10ffff), 0x10ffff);
TEST_TO(make4(0x110000), illegal);
TEST_TO(make4(0x1fffff), illegal);
TEST_TO(make2(0), illegal);
TEST_TO(make3(0), illegal);
TEST_TO(make4(0), illegal);
TEST_TO(make2(0x7f), illegal);
TEST_TO(make3(0x7f), illegal);
TEST_TO(make4(0x7f), illegal);
TEST_TO(make3(0x80), illegal);
TEST_TO(make4(0x80), illegal);
TEST_TO(make3(0x7ff), illegal);
TEST_TO(make4(0x7ff), illegal);
TEST_TO(make4(0x8000), illegal);
TEST_TO(make4(0xffff), illegal);
std::cout << "-- Invalid surrogate" << std::endl;
TEST_TO(make3(0xD800), illegal);
TEST_TO(make3(0xDBFF), illegal);
TEST_TO(make3(0xDC00), illegal);
TEST_TO(make3(0xDFFF), illegal);
TEST_TO(make4(0xD800), illegal);
TEST_TO(make4(0xDBFF), illegal);
TEST_TO(make4(0xDC00), illegal);
TEST_TO(make4(0xDFFF), illegal);
std::cout << "-- Incomplete" << std::endl;
TEST_TO("\x80", illegal);
TEST_TO("\xC2", incomplete);
TEST_TO("\xdf", incomplete);
TEST_TO("\xe0", incomplete);
TEST_TO("\xe0\xa0", incomplete);
TEST_TO("\xef\xbf", incomplete);
TEST_TO("\xef", incomplete);
TEST_TO("\xf0\x90\x80", incomplete);
TEST_TO("\xf0\x90", incomplete);
TEST_TO("\xf0", incomplete);
TEST_TO("\xf4\x8f\xbf", incomplete);
TEST_TO("\xf4\x8f", incomplete);
TEST_TO("\xf4", incomplete);
std::cout << "- To UTF-8\n";
std::cout << "-- Test correct" << std::endl;
TEST_FROM("\x7f", 0x7f);
TEST_FROM("\xC2\x80", 0x80);
TEST_FROM("\xdf\xBF", 0x7FF);
TEST_INC(0x7FF, 1);
TEST_FROM("\xe0\xa0\x80", 0x800);
TEST_INC(0x800, 2);
TEST_INC(0x800, 1);
TEST_FROM("\xef\xbf\xbf", 0xFFFF);
TEST_INC(0x10000, 3);
TEST_INC(0x10000, 2);
TEST_INC(0x10000, 1);
TEST_FROM("\xf0\x90\x80\x80", 0x10000);
TEST_FROM("\xf4\x8f\xbf\xbf", 0x10FFFF);
std::cout << "-- Test no surrogate " << std::endl;
TEST_FROM(0, 0xD800);
TEST_FROM(0, 0xDBFF);
TEST_FROM(0, 0xDC00);
TEST_FROM(0, 0xDFFF);
std::cout << "-- Test invalid " << std::endl;
TEST_FROM(0, 0x110000);
TEST_FROM(0, 0x1FFFFF);
std::cout << "Test windows-1255" << std::endl;
cvt = create_simple_converter("windows-1255");
TEST(cvt.get());
TEST(cvt->is_thread_safe());
TEST_EQ(cvt->max_len(), 1);
std::cout << "- From 1255" << std::endl;
TEST_TO("\xa4", 0x20aa);
TEST_TO("\xe0", 0x05d0);
TEST_TO("\xc4", 0x5b4);
TEST_TO("\xfb", illegal);
TEST_TO("\xdd", illegal);
TEST_TO("\xff", illegal);
TEST_TO("\xfe", 0x200f);
std::cout << "- To 1255" << std::endl;
TEST_FROM("\xa4", 0x20aa);
TEST_FROM("\xe0", 0x05d0);
TEST_FROM("\xc4", 0x5b4);
TEST_FROM("\xfe", 0x200f);
TEST_FROM(0, 0xe4);
TEST_FROM(0, 0xd0);
#ifdef BOOST_LOCALE_WITH_ICU
std::cout << "Testing Shift-JIS using ICU/uconv" << std::endl;
cvt = boost::locale::impl_icu::create_uconv_converter("Shift-JIS");
TEST_REQUIRE(cvt);
test_shiftjis(cvt);
#endif
std::cout << "Testing Shift-JIS using POSIX/iconv" << std::endl;
cvt = boost::locale::create_iconv_converter("Shift-JIS");
#ifndef BOOST_LOCALE_WITH_ICONV
TEST(!cvt);
#endif
if(cvt)
test_shiftjis(cvt);
else {
#ifdef BOOST_LOCALE_WITH_ICONV
std::cout << "- Shift-JIS is not supported!" << std::endl;
#endif
}
}
// boostinspect:noascii