180 lines
5.9 KiB
C++
180 lines
5.9 KiB
C++
//
|
|
// Copyright (c) 2022 Dmitry Arkhipov (grisumbras@yandex.ru)
|
|
//
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
//
|
|
// Official repository: https://github.com/boostorg/json
|
|
//
|
|
|
|
#include <boost/json/detail/utf8.hpp>
|
|
#include <boost/json/string_view.hpp>
|
|
|
|
#include "test_suite.hpp"
|
|
|
|
namespace boost {
|
|
namespace json {
|
|
|
|
class utf8_test
|
|
{
|
|
public:
|
|
void
|
|
testLoadLittleEndian()
|
|
{
|
|
BOOST_TEST(
|
|
detail::load_little_endian<4>("\x01\x02\x03\x04\xFF")
|
|
== 0x04030201);
|
|
|
|
BOOST_TEST(
|
|
detail::load_little_endian<4>("\x12\x34\x56\x78\xFF")
|
|
== 0x78563412);
|
|
|
|
BOOST_TEST(
|
|
detail::load_little_endian<4>("\xFE\xDC\xBA\x98\xFF")
|
|
== 0x98BADCFE);
|
|
|
|
BOOST_TEST(
|
|
detail::load_little_endian<3>("\x12\x45\xFE\xFF")
|
|
== 0x00FE4512);
|
|
|
|
BOOST_TEST(
|
|
detail::load_little_endian<3>("\xE0\xA0\x80\xFF")
|
|
== 0x0080A0E0);
|
|
|
|
BOOST_TEST(
|
|
detail::load_little_endian<2>("\x37\xFC\xFF")
|
|
== 0x0000FC37);
|
|
|
|
BOOST_TEST(detail::load_little_endian<1>("\xF1\xFF") == 0x000000F1);
|
|
}
|
|
|
|
void
|
|
testClassifyUtf8()
|
|
{
|
|
BOOST_TEST((detail::classify_utf8('\x00') & 0xFF) == 0);
|
|
// from code point U+0080 (0xC280 in UTF-8)
|
|
BOOST_TEST((detail::classify_utf8('\xC2') & 0xFF) == 2);
|
|
// from code point U+07FF (0xDFBF in UTF-8)
|
|
BOOST_TEST((detail::classify_utf8('\xDF') & 0xFF) == 2);
|
|
// from code point U+0800 (0xE0A080 in UTF-8)
|
|
BOOST_TEST((detail::classify_utf8('\xE0') & 0xFF) == 3);
|
|
// from code point U+0FFFF (0xEFBFBF in UTF-8)
|
|
BOOST_TEST((detail::classify_utf8('\xEF') & 0xFF) == 3);
|
|
// from code point U+010000 (0xF0908080 in UTF-8)
|
|
BOOST_TEST((detail::classify_utf8('\xF0') & 0xFF) == 4);
|
|
// from code point U+010000 (0xF0908080 in UTF-8)
|
|
BOOST_TEST((detail::classify_utf8('\xF0') & 0xFF) == 4);
|
|
// from code point U+010FFFF (0xF48FBFBF in UTF-8)
|
|
BOOST_TEST((detail::classify_utf8('\xF4') & 0xFF) == 4);
|
|
}
|
|
|
|
void
|
|
testIsValidUtf8()
|
|
{
|
|
auto is_valid_utf8 = [](char const* str) {
|
|
std::uint16_t first = detail::classify_utf8(*str);
|
|
return detail::is_valid_utf8(str, first);
|
|
};
|
|
|
|
BOOST_TEST(is_valid_utf8("\xC2\x80")); // code point U+0080
|
|
BOOST_TEST(is_valid_utf8("\xDF\xBF")); // code point U+07FF
|
|
BOOST_TEST(is_valid_utf8("\xE0\xA0\x80")); // code point U+0800
|
|
BOOST_TEST(is_valid_utf8("\xEF\xBF\xBF")); // from code point U+0FFFF
|
|
BOOST_TEST(is_valid_utf8("\xF0\x90\x80\x80")); // code point U+010000
|
|
BOOST_TEST(is_valid_utf8("\xF4\x8F\xBF\xBF")); // code point U+010FFFF
|
|
|
|
BOOST_TEST(! is_valid_utf8("\x80"));
|
|
BOOST_TEST(! is_valid_utf8("\xBF"));
|
|
|
|
BOOST_TEST(! is_valid_utf8("\xDF\x00"));
|
|
BOOST_TEST(! is_valid_utf8("\xDF\x7F"));
|
|
BOOST_TEST(! is_valid_utf8("\xDF\xFF"));
|
|
|
|
BOOST_TEST(! is_valid_utf8("\xE0\x00\x80"));
|
|
BOOST_TEST(! is_valid_utf8("\xE1\x7F\x80"));
|
|
BOOST_TEST(! is_valid_utf8("\xE4\xFF\x80"));
|
|
BOOST_TEST(! is_valid_utf8("\xE8\x80\x00"));
|
|
BOOST_TEST(! is_valid_utf8("\xEC\x80\x7F"));
|
|
BOOST_TEST(! is_valid_utf8("\xEF\x80\xFF"));
|
|
|
|
BOOST_TEST(! is_valid_utf8("\xF0\x00\x80\x80"));
|
|
BOOST_TEST(! is_valid_utf8("\xF1\x7F\x80\x80"));
|
|
BOOST_TEST(! is_valid_utf8("\xF2\xFF\x80\x80"));
|
|
BOOST_TEST(! is_valid_utf8("\xF3\x80\x00\x80"));
|
|
BOOST_TEST(! is_valid_utf8("\xF4\x80\x7F\x80"));
|
|
BOOST_TEST(! is_valid_utf8("\xF0\x80\xFF\x80"));
|
|
BOOST_TEST(! is_valid_utf8("\xF1\x80\x80\x00"));
|
|
BOOST_TEST(! is_valid_utf8("\xF2\x80\x80\x7F"));
|
|
BOOST_TEST(! is_valid_utf8("\xF3\x80\x80\xFF"));
|
|
}
|
|
|
|
void
|
|
testUtf8Sequence()
|
|
{
|
|
char const* str = "\xE0\xA0\x80\00\00";
|
|
detail::utf8_sequence seq;
|
|
seq.save(str, std::strlen(str));
|
|
BOOST_TEST(seq.complete());
|
|
BOOST_TEST(seq.length() == 3);
|
|
BOOST_TEST(seq.needed() == 0);
|
|
BOOST_TEST(string_view(seq.data(), 3) == str);
|
|
BOOST_TEST(seq.valid());
|
|
|
|
seq.save(str, 1);
|
|
BOOST_TEST(!seq.complete());
|
|
BOOST_TEST(seq.length() == 3);
|
|
BOOST_TEST(seq.needed() == 2);
|
|
BOOST_TEST(string_view(seq.data(), 1) == string_view(str, 1));
|
|
|
|
seq.append(str + 1, 1);
|
|
BOOST_TEST(!seq.complete());
|
|
BOOST_TEST(seq.length() == 3);
|
|
BOOST_TEST(seq.needed() == 1);
|
|
BOOST_TEST(string_view(seq.data(), 2) == string_view(str, 2));
|
|
|
|
seq.append(str + 2, 2);
|
|
BOOST_TEST(seq.complete());
|
|
BOOST_TEST(seq.length() == 3);
|
|
BOOST_TEST(seq.needed() == 0);
|
|
BOOST_TEST(string_view(seq.data(), 3) == str);
|
|
BOOST_TEST(seq.valid());
|
|
|
|
seq.append(str + 3, 1);
|
|
BOOST_TEST(seq.complete());
|
|
BOOST_TEST(seq.length() == 3);
|
|
BOOST_TEST(seq.needed() == 0);
|
|
BOOST_TEST(string_view(seq.data(), 3) == str);
|
|
BOOST_TEST(seq.valid());
|
|
|
|
str = "\xF0\x90\x80\x80";
|
|
seq.save(str, std::strlen(str));
|
|
BOOST_TEST(seq.complete());
|
|
BOOST_TEST(seq.length() == 4);
|
|
BOOST_TEST(seq.needed() == 0);
|
|
BOOST_TEST(string_view(seq.data(), 4) == str);
|
|
BOOST_TEST(seq.valid());
|
|
|
|
str = "\xF0\x90\x80\xC0";
|
|
seq.save(str, std::strlen(str));
|
|
BOOST_TEST(seq.complete());
|
|
BOOST_TEST(seq.length() == 4);
|
|
BOOST_TEST(seq.needed() == 0);
|
|
BOOST_TEST(string_view(seq.data(), 4) == str);
|
|
BOOST_TEST(!seq.valid());
|
|
}
|
|
|
|
void
|
|
run()
|
|
{
|
|
testLoadLittleEndian();
|
|
testClassifyUtf8();
|
|
testIsValidUtf8();
|
|
testUtf8Sequence();
|
|
}
|
|
};
|
|
|
|
TEST_SUITE(utf8_test, "boost.json.utf8");
|
|
|
|
} // namespace json
|
|
} // namespace boost
|