refactor(string): refactor TChar to enhance support for unicode

This commit is contained in:
Redstone1024 2024-10-26 21:15:27 +08:00
parent 66dc21310f
commit 3df9122ef0
2 changed files with 413 additions and 202 deletions

View File

@ -22,10 +22,11 @@ void TestChar()
{
always_check(!CCharType<int>);
always_check(CCharType<char>);
always_check(CCharType<wchar_t>);
always_check(CCharType<char8_t>);
always_check(CCharType<char16_t>);
always_check(CCharType<char32_t>);
always_check(CCharType<wchar>);
always_check(CCharType<u8char>);
always_check(CCharType<u16char>);
always_check(CCharType<u32char>);
always_check(CCharType<unicodechar>);
}
{
@ -71,31 +72,31 @@ void TestChar()
}
{
// always_check(FU16Char::IsAlnum(U16TEXT('0')));
// always_check(FU16Char::IsAlpha(U16TEXT('A')));
// always_check(FU16Char::IsLower(U16TEXT('a')));
// always_check(FU16Char::IsUpper(U16TEXT('A')));
always_check(FU16Char::IsAlnum(U16TEXT('0')));
always_check(FU16Char::IsAlpha(U16TEXT('A')));
always_check(FU16Char::IsLower(U16TEXT('a')));
always_check(FU16Char::IsUpper(U16TEXT('A')));
always_check(FU16Char::IsDigit(U16TEXT('0')));
always_check(FU16Char::IsCntrl(U16TEXT('\n')));
// always_check(FU16Char::IsGraph(U16TEXT('!')));
always_check(FU16Char::IsGraph(U16TEXT('!')));
always_check(FU16Char::IsSpace(U16TEXT('\t')));
always_check(FU16Char::IsBlank(U16TEXT(' ')));
// always_check(FU16Char::IsPrint(U16TEXT('#')));
// always_check(FU16Char::IsPunct(U16TEXT('[')));
always_check(FU16Char::IsPrint(U16TEXT('#')));
always_check(FU16Char::IsPunct(U16TEXT('[')));
}
{
// always_check(FU32Char::IsAlnum(U32TEXT('0')));
// always_check(FU32Char::IsAlpha(U32TEXT('A')));
// always_check(FU32Char::IsLower(U32TEXT('a')));
// always_check(FU32Char::IsUpper(U32TEXT('A')));
always_check(FU32Char::IsAlnum(U32TEXT('0')));
always_check(FU32Char::IsAlpha(U32TEXT('A')));
always_check(FU32Char::IsLower(U32TEXT('a')));
always_check(FU32Char::IsUpper(U32TEXT('A')));
always_check(FU32Char::IsDigit(U32TEXT('0')));
always_check(FU32Char::IsCntrl(U32TEXT('\n')));
// always_check(FU32Char::IsGraph(U32TEXT('!')));
always_check(FU32Char::IsGraph(U32TEXT('!')));
always_check(FU32Char::IsSpace(U32TEXT('\t')));
always_check(FU32Char::IsBlank(U32TEXT(' ')));
// always_check(FU32Char::IsPrint(U32TEXT('#')));
// always_check(FU32Char::IsPunct(U32TEXT('[')));
always_check(FU32Char::IsPrint(U32TEXT('#')));
always_check(FU32Char::IsPunct(U32TEXT('[')));
}
{
@ -107,7 +108,7 @@ void TestChar()
always_check(!FChar::IsCntrl(TEXT('_')));
always_check(!FChar::IsGraph(TEXT(' ')));
always_check(!FChar::IsSpace(TEXT('=')));
always_check(!FChar::IsBlank(TEXT('\r')));
always_check(!FChar::IsBlank(TEXT('+')));
always_check(!FChar::IsPrint(TEXT('\n')));
always_check(!FChar::IsPunct(TEXT('H')));
}
@ -121,7 +122,7 @@ void TestChar()
always_check(!FWChar::IsCntrl(WTEXT('_')));
always_check(!FWChar::IsGraph(WTEXT(' ')));
always_check(!FWChar::IsSpace(WTEXT('=')));
always_check(!FWChar::IsBlank(WTEXT('\r')));
always_check(!FWChar::IsBlank(WTEXT('+')));
always_check(!FWChar::IsPrint(WTEXT('\n')));
always_check(!FWChar::IsPunct(WTEXT('H')));
}
@ -135,37 +136,37 @@ void TestChar()
always_check(!FU8Char::IsCntrl(U8TEXT('_')));
always_check(!FU8Char::IsGraph(U8TEXT(' ')));
always_check(!FU8Char::IsSpace(U8TEXT('=')));
always_check(!FU8Char::IsBlank(U8TEXT('\r')));
always_check(!FU8Char::IsBlank(U8TEXT('+')));
always_check(!FU8Char::IsPrint(U8TEXT('\n')));
always_check(!FU8Char::IsPunct(U8TEXT('H')));
}
{
// always_check(!FU16Char::IsAlnum(U16TEXT('$')));
// always_check(!FU16Char::IsAlpha(U16TEXT('0')));
// always_check(!FU16Char::IsLower(U16TEXT('A')));
// always_check(!FU16Char::IsUpper(U16TEXT('a')));
always_check(!FU16Char::IsAlnum(U16TEXT('$')));
always_check(!FU16Char::IsAlpha(U16TEXT('0')));
always_check(!FU16Char::IsLower(U16TEXT('A')));
always_check(!FU16Char::IsUpper(U16TEXT('a')));
always_check(!FU16Char::IsDigit(U16TEXT('I')));
always_check(!FU16Char::IsCntrl(U16TEXT('_')));
// always_check(!FU16Char::IsGraph(U16TEXT(' ')));
always_check(!FU16Char::IsGraph(U16TEXT(' ')));
always_check(!FU16Char::IsSpace(U16TEXT('=')));
always_check(!FU16Char::IsBlank(U16TEXT('\r')));
// always_check(!FU16Char::IsPrint(U16TEXT('\n')));
// always_check(!FU16Char::IsPunct(U16TEXT('H')));
always_check(!FU16Char::IsBlank(U16TEXT('+')));
always_check(!FU16Char::IsPrint(U16TEXT('\n')));
always_check(!FU16Char::IsPunct(U16TEXT('H')));
}
{
// always_check(!FU32Char::IsAlnum(U32TEXT('$')));
// always_check(!FU32Char::IsAlpha(U32TEXT('0')));
// always_check(!FU32Char::IsLower(U32TEXT('A')));
// always_check(!FU32Char::IsUpper(U32TEXT('a')));
always_check(!FU32Char::IsAlnum(U32TEXT('$')));
always_check(!FU32Char::IsAlpha(U32TEXT('0')));
always_check(!FU32Char::IsLower(U32TEXT('A')));
always_check(!FU32Char::IsUpper(U32TEXT('a')));
always_check(!FU32Char::IsDigit(U32TEXT('I')));
always_check(!FU32Char::IsCntrl(U32TEXT('_')));
// always_check(!FU32Char::IsGraph(U32TEXT(' ')));
always_check(!FU32Char::IsGraph(U32TEXT(' ')));
always_check(!FU32Char::IsSpace(U32TEXT('=')));
always_check(!FU32Char::IsBlank(U32TEXT('\r')));
// always_check(!FU32Char::IsPrint(U32TEXT('\n')));
// always_check(!FU32Char::IsPunct(U32TEXT('H')));
always_check(!FU32Char::IsBlank(U32TEXT('+')));
always_check(!FU32Char::IsPrint(U32TEXT('\n')));
always_check(!FU32Char::IsPunct(U32TEXT('H')));
}
{
@ -188,10 +189,10 @@ void TestChar()
always_check(FWChar::ToLower(WTEXT('i')) == WTEXT('i'));
always_check(FU8Char::ToLower(U8TEXT('i')) == U8TEXT('i'));
always_check(FU8Char::ToUpper(U8TEXT('l')) == U8TEXT('L'));
// always_check(FU16Char::ToLower(U16TEXT('i')) == U16TEXT('i'));
// always_check(FU16Char::ToUpper(U16TEXT('l')) == U16TEXT('L'));
// always_check(FU32Char::ToLower(U32TEXT('i')) == U32TEXT('i'));
// always_check(FU32Char::ToUpper(U32TEXT('l')) == U32TEXT('L'));
always_check(FU16Char::ToLower(U16TEXT('i')) == U16TEXT('i'));
always_check(FU16Char::ToUpper(U16TEXT('l')) == U16TEXT('L'));
always_check(FU32Char::ToLower(U32TEXT('i')) == U32TEXT('i'));
always_check(FU32Char::ToUpper(U32TEXT('l')) == U32TEXT('L'));
}
{
@ -279,14 +280,14 @@ void TestCString()
Memory::Memzero(StrD);
always_check(TCString<T>::Copy(StrD, &StrD[4], StrA, nullptr) == nullptr);
always_check(TCString<T>::Compare(StrC, EndC , StrD, EndD ) == 0);
always_check(TCString<T>::Compare(StrC, nullptr, StrD, nullptr) == 0);
always_check(TCString<T>::Copy(StrD, nullptr, StrA, &StrA[4]) != nullptr);
always_check(TCString<T>::Length(StrD, nullptr) == 4);
always_check(TCString<T>::Compare(StrA, &StrA[4], StrD, &StrD[4]) == 0);
always_check(TCString<T>::Compare(StrA, nullptr , StrD, nullptr ) > 0);
@ -304,7 +305,7 @@ void TestCString()
always_check(TCString<T>::Compare(StrD, nullptr, LITERAL(T, "Hello "), nullptr) == 0);
Memory::Memzero(StrD);
always_check(TCString<T>::Cat(StrD, nullptr, StrA, nullptr) != nullptr);
always_check(TCString<T>::Cat(StrD, nullptr, LITERAL(T, " "), nullptr) != nullptr);
always_check(TCString<T>::Cat(StrD, nullptr, StrB, nullptr) != nullptr);
@ -323,13 +324,13 @@ void TestCString()
always_check(TCString<T>::Find(StrA, nullptr, [](T A) { return A == LITERAL(T, 'l'); })
!= TCString<T>::Find(StrA, nullptr, [](T A) { return A == LITERAL(T, 'l'); }, ESearchDirection::FromEnd));
always_check(TCString<T>::Find(StrA, EndA, [](T A) { return A == LITERAL(T, 'o'); })
== TCString<T>::Find(StrA, EndA, [](T A) { return A == LITERAL(T, 'o'); }, ESearchDirection::FromEnd));
always_check(TCString<T>::Find(StrA, EndA, [](T A) { return A == LITERAL(T, 'l'); })
!= TCString<T>::Find(StrA, EndA, [](T A) { return A == LITERAL(T, 'l'); }, ESearchDirection::FromEnd));
always_check(TCString<T>::Find(StrA, &StrA[4], [](T A) { return A == LITERAL(T, 'o'); })
== TCString<T>::Find(StrA, &StrA[4], [](T A) { return A == LITERAL(T, 'o'); }, ESearchDirection::FromEnd));
@ -346,13 +347,13 @@ void TestCString()
always_check(TCString<T>::FindChar(StrA, nullptr, LITERAL(T, 'l'))
!= TCString<T>::FindChar(StrA, nullptr, LITERAL(T, 'l'), ESearchDirection::FromEnd));
always_check(TCString<T>::FindChar(StrA, EndA, LITERAL(T, 'o'))
== TCString<T>::FindChar(StrA, EndA, LITERAL(T, 'o'), ESearchDirection::FromEnd));
always_check(TCString<T>::FindChar(StrA, EndA, LITERAL(T, 'l'))
!= TCString<T>::FindChar(StrA, EndA, LITERAL(T, 'l'), ESearchDirection::FromEnd));
always_check(TCString<T>::FindChar(StrA, &StrA[4], LITERAL(T, 'o'))
== TCString<T>::FindChar(StrA, &StrA[4], LITERAL(T, 'o'), ESearchDirection::FromEnd));
@ -370,7 +371,7 @@ void TestCString()
always_check(TCString<T>::FindNotChar(StrA, EndA , LITERAL(T, '\0')) == StrA);
always_check(TCString<T>::FindNotChar(StrA, nullptr , LITERAL(T, 'I')) == StrA);
always_check(TCString<T>::FindNotChar(StrA, &StrA[2], LITERAL(T, 'I')) == StrA);
always_check(TCString<T>::FindNotChar(StrA, nullptr , LITERAL(T, '\0'), ESearchDirection::FromEnd) == StrA + 2);
always_check(TCString<T>::FindNotChar(StrA, EndA , LITERAL(T, '\0'), ESearchDirection::FromEnd) == StrA + 2);
always_check(TCString<T>::FindNotChar(StrA, nullptr , LITERAL(T, 'I'), ESearchDirection::FromEnd) == StrA + 3);
@ -400,10 +401,11 @@ void TestCString()
};
TestTCString(InPlaceType<char>);
TestTCString(InPlaceType<wchar_t>);
TestTCString(InPlaceType<char8_t>);
TestTCString(InPlaceType<char16_t>);
TestTCString(InPlaceType<char32_t>);
TestTCString(InPlaceType<wchar>);
TestTCString(InPlaceType<u8char>);
TestTCString(InPlaceType<u16char>);
TestTCString(InPlaceType<u32char>);
TestTCString(InPlaceType<unicodechar>);
}
NAMESPACE_END(Testing)

View File

@ -1,18 +1,19 @@
#pragma once
#include "CoreTypes.h"
#include "Templates/Optional.h"
#include "TypeTraits/TypeTraits.h"
#include "Miscellaneous/AssertionMacros.h"
#include <cctype>
#include <cwctype>
#include <locale>
#include <climits>
NAMESPACE_REDCRAFT_BEGIN
NAMESPACE_MODULE_BEGIN(Redcraft)
NAMESPACE_MODULE_BEGIN(Utility)
template <typename T>
concept CCharType = CSameAs<T, char> || CSameAs<T, wchar_t> || CSameAs<T, char8_t> || CSameAs<T, char16_t> || CSameAs<T, char32_t>;
concept CCharType = CSameAs<T, char> || CSameAs<T, wchar> || CSameAs<T, u8char> || CSameAs<T, u16char> || CSameAs<T, u32char>;
NAMESPACE_PRIVATE_BEGIN
@ -22,78 +23,161 @@ struct TLiteral;
template <>
struct TLiteral<char>
{
NODISCARD FORCEINLINE static constexpr const char Select(const char X, const wchar_t , const char8_t , const char16_t , const char32_t ) { return X; }
NODISCARD FORCEINLINE static constexpr const char* Select(const char* X, const wchar_t*, const char8_t*, const char16_t*, const char32_t*) { return X; }
NODISCARD FORCEINLINE static constexpr char Select(const char X, const wchar , const u8char , const u16char , const u32char ) { return X; }
NODISCARD FORCEINLINE static constexpr const char* Select(const char* X, const wchar*, const u8char*, const u16char*, const u32char*) { return X; }
};
template <>
struct TLiteral<wchar_t>
struct TLiteral<wchar>
{
NODISCARD FORCEINLINE static constexpr const wchar_t Select(const char , const wchar_t X, const char8_t , const char16_t , const char32_t ) { return X; }
NODISCARD FORCEINLINE static constexpr const wchar_t* Select(const char*, const wchar_t* X, const char8_t*, const char16_t*, const char32_t*) { return X; }
NODISCARD FORCEINLINE static constexpr wchar Select(const char , const wchar X, const u8char , const u16char , const u32char ) { return X; }
NODISCARD FORCEINLINE static constexpr const wchar* Select(const char*, const wchar* X, const u8char*, const u16char*, const u32char*) { return X; }
};
template <>
struct TLiteral<char8_t>
struct TLiteral<u8char>
{
NODISCARD FORCEINLINE static constexpr const char8_t Select(const char , const wchar_t , const char8_t X, const char16_t , const char32_t ) { return X; }
NODISCARD FORCEINLINE static constexpr const char8_t* Select(const char*, const wchar_t*, const char8_t* X, const char16_t*, const char32_t*) { return X; }
NODISCARD FORCEINLINE static constexpr u8char Select(const char , const wchar , const u8char X, const u16char , const u32char ) { return X; }
NODISCARD FORCEINLINE static constexpr const u8char* Select(const char*, const wchar*, const u8char* X, const u16char*, const u32char*) { return X; }
};
template <>
struct TLiteral<char16_t>
struct TLiteral<u16char>
{
NODISCARD FORCEINLINE static constexpr const char16_t Select(const char , const wchar_t , const char8_t , const char16_t X, const char32_t ) { return X; }
NODISCARD FORCEINLINE static constexpr const char16_t* Select(const char*, const wchar_t*, const char8_t*, const char16_t* X, const char32_t*) { return X; }
NODISCARD FORCEINLINE static constexpr u16char Select(const char , const wchar , const u8char , const u16char X, const u32char ) { return X; }
NODISCARD FORCEINLINE static constexpr const u16char* Select(const char*, const wchar*, const u8char*, const u16char* X, const u32char*) { return X; }
};
template <>
struct TLiteral<char32_t>
struct TLiteral<u32char>
{
NODISCARD FORCEINLINE static constexpr const char32_t Select(const char , const wchar_t , const char8_t , const char16_t , const char32_t X) { return X; }
NODISCARD FORCEINLINE static constexpr const char32_t* Select(const char*, const wchar_t*, const char8_t*, const char16_t*, const char32_t* X) { return X; }
NODISCARD FORCEINLINE static constexpr u32char Select(const char , const wchar , const u8char , const u16char , const u32char X) { return X; }
NODISCARD FORCEINLINE static constexpr const u32char* Select(const char*, const wchar*, const u8char*, const u16char*, const u32char* X) { return X; }
};
NAMESPACE_PRIVATE_END
/** Templated literal struct to allow selection of string literals based on the character type provided, and not on compiler switches. */
#define LITERAL(CharType, StringLiteral) NAMESPACE_PRIVATE::TLiteral<CharType>::Select(StringLiteral, WTEXT(StringLiteral), U8TEXT(StringLiteral), U16TEXT(StringLiteral), U32TEXT(StringLiteral))
#define LITERAL(CharType, StringLiteral) NAMESPACE_PRIVATE::TLiteral<CharType>::Select(TEXT(StringLiteral), WTEXT(StringLiteral), U8TEXT(StringLiteral), U16TEXT(StringLiteral), U32TEXT(StringLiteral))
/** Set of utility functions operating on a single character. Implemented based on ISO 30112 "i18n". */
/** Set of utility functions operating on a single character. Implemented based on user-preferred locale and ISO 30112 "i18n". */
template <CCharType T>
struct TChar
{
using CharType = T;
inline static constexpr CharType NONE = CharType(-1);
/** The maximum number of code units required to represent a single character. if unknown, guess 1. */
static constexpr size_t MaxCodeUnitLength =
CSameAs<CharType, char> ? MB_LEN_MAX :
CSameAs<CharType, wchar> ?
PLATFORM_WINDOWS ? 2 :
PLATFORM_LINUX ? 1 : 1 :
CSameAs<CharType, u8char> ? 4 :
CSameAs<CharType, u16char> ? 2 :
CSameAs<CharType, u32char> ? 1 : 1;
/** Whether the character type is fixed-length. */
static constexpr bool bIsFixedLength = MaxCodeUnitLength == 1;
NODISCARD FORCEINLINE static constexpr bool IsValid(CharType InChar)
{
if constexpr (CSameAs<CharType, u8char>)
{
if ((InChar & 0b10000000) == 0b00000000) return true;
return false;
}
else if constexpr (CSameAs<CharType, u16char> || CSameAs<CharType, u32char>)
{
if (InChar >= 0xD800 && InChar <= 0xDBFF) return false;
if (InChar >= 0xDC00 && InChar <= 0xDFFF) return false;
return InChar <= 0x10FFFF;
}
// Windows uses UTF-16 encoding for wchar.
else if constexpr (PLATFORM_WINDOWS && (CSameAs<CharType, wchar>))
{
return TChar::IsValid(static_cast<u16char>(InChar));
}
// Linux uses UTF-32 encoding for wchar.
else if constexpr (PLATFORM_LINUX && (CSameAs<CharType, wchar>))
{
return TChar::IsValid(static_cast<u32char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr bool IsNonch(CharType InChar)
{
if constexpr (CSameAs<CharType, u8char>)
{
return false;
}
else if constexpr (CSameAs<CharType, u16char>)
{
if (InChar >= U16TEXT('\uFDD0') && InChar <= U16TEXT('\uFDEF')) return true;
if (InChar == U16TEXT('\uFFFE')) return true;
if (InChar == U16TEXT('\uFFFF')) return true;
return false;
}
else if constexpr (CSameAs<CharType, u32char>)
{
if (InChar >= U32TEXT('\uFDD0') && InChar <= U32TEXT('\uFDEF')) return true;
if ((InChar & 0x0000FFFE) == 0x0000FFFE) return TChar::IsValid(InChar);
return false;
}
// Windows uses UTF-16 encoding for wchar.
else if constexpr (PLATFORM_WINDOWS && (CSameAs<CharType, wchar>))
{
return TChar::IsNonch(static_cast<u16char>(InChar));
}
// Linux uses UTF-32 encoding for wchar.
else if constexpr (PLATFORM_LINUX && (CSameAs<CharType, wchar>))
{
return TChar::IsNonch(static_cast<u32char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr bool IsAlnum(CharType InChar)
{
if constexpr (CSameAs<CharType, char>)
if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{
return NAMESPACE_STD::isalnum(static_cast<unsigned char>(InChar));
}
else if constexpr (CSameAs<CharType, wchar_t>)
{
return NAMESPACE_STD::iswalnum(InChar);
NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isalnum(InChar, Loc);
}
else
{
return IsAlpha(InChar) || IsDigit(InChar);
return TChar::IsAlpha(InChar) || TChar::IsDigit(InChar);
}
}
NODISCARD FORCEINLINE static constexpr bool IsAlpha(CharType InChar)
{
if constexpr (CSameAs<CharType, char>)
if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{
return NAMESPACE_STD::isalpha(static_cast<unsigned char>(InChar));
NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isalpha(InChar, Loc);
}
else if constexpr (CSameAs<CharType, wchar_t>)
{
return NAMESPACE_STD::iswalpha(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
else if constexpr (CSameAs<CharType, u8char>)
{
/*
* BASIC LATIN
@ -105,23 +189,30 @@ struct TChar
return false;
}
else
else if constexpr (CSameAs<CharType, u16char> || CSameAs<CharType, u32char>)
{
static_assert(sizeof(CharType) == -1, "Unsupported character type");
checkf(InChar <= LITERAL(CharType, '\u007F'), TEXT("TChar::IsAlpha() only supports basic latin block."));
if (InChar > LITERAL(CharType, '\u007F')) return false;
return TChar<u8char>::IsAlpha(static_cast<u8char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr bool IsLower(CharType InChar)
{
if constexpr (CSameAs<CharType, char>)
if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{
return NAMESPACE_STD::islower(static_cast<unsigned char>(InChar));
NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::islower(InChar, Loc);
}
else if constexpr (CSameAs<CharType, wchar_t>)
{
return NAMESPACE_STD::iswlower(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
else if constexpr (CSameAs<CharType, u8char>)
{
/*
* BASIC LATIN
@ -131,23 +222,39 @@ struct TChar
return false;
}
else
else if constexpr (CSameAs<CharType, u16char>)
{
static_assert(sizeof(CharType) == -1, "Unsupported character type");
checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsLower() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return TChar<u8char>::IsLower(static_cast<u8char>(InChar));
}
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsLower() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return TChar<u8char>::IsLower(static_cast<u8char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr bool IsUpper(CharType InChar)
{
if constexpr (CSameAs<CharType, char>)
if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{
return NAMESPACE_STD::isupper(static_cast<unsigned char>(InChar));
NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isupper(InChar, Loc);
}
else if constexpr (CSameAs<CharType, wchar_t>)
{
return NAMESPACE_STD::iswupper(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
else if constexpr (CSameAs<CharType, u8char>)
{
/*
* BASIC LATIN
@ -157,12 +264,30 @@ struct TChar
return false;
}
else
else if constexpr (CSameAs<CharType, u16char>)
{
static_assert(sizeof(CharType) == -1, "Unsupported character type");
checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsUpper() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return TChar<u8char>::IsUpper(static_cast<u8char>(InChar));
}
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsUpper() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return TChar<u8char>::IsUpper(static_cast<u8char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr bool IsDigit(CharType InChar)
{
/* <U0030>..<U0039>; */
@ -182,20 +307,19 @@ struct TChar
NODISCARD FORCEINLINE static constexpr bool IsCntrl(CharType InChar)
{
if constexpr (CSameAs<CharType, char>)
if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{
return NAMESPACE_STD::iscntrl(static_cast<unsigned char>(InChar));
NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::iscntrl(InChar, Loc);
}
else if constexpr (CSameAs<CharType, wchar_t>)
{
return NAMESPACE_STD::iswcntrl(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
else if constexpr (CSameAs<CharType, u8char>)
{
/* <U0000>..<U001F>;<U007F>; */
return (InChar >= U8TEXT('\u0000') && InChar <= U8TEXT('\u001F')) || InChar == U8TEXT('\u007F');
}
else if constexpr (CSameAs<CharType, char16_t>)
else if constexpr (CSameAs<CharType, u16char>)
{
/* <U0000>..<U001F>;<U007F>..<U009F>;<U2028>;<U2029>; */
return
@ -203,7 +327,8 @@ struct TChar
(InChar >= U16TEXT('\u007F') && InChar <= U16TEXT('\u009F')) ||
(InChar == U16TEXT('\u2028') || InChar == U16TEXT('\u2029'));
}
else if constexpr (CSameAs<CharType, char32_t>)
else if constexpr (CSameAs<CharType, u32char>)
{
/* <U0000>..<U001F>;<U007F>..<U009F>;<U2028>;<U2029>; */
return
@ -211,23 +336,21 @@ struct TChar
(InChar >= U32TEXT('\u007F') && InChar <= U32TEXT('\u009F')) ||
(InChar == U32TEXT('\u2028') || InChar == U32TEXT('\u2029'));
}
else
{
static_assert(sizeof(CharType) == -1, "Unsupported character type");
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr bool IsGraph(CharType InChar)
{
if constexpr (CSameAs<CharType, char>)
if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{
return NAMESPACE_STD::isgraph(static_cast<unsigned char>(InChar));
NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isgraph(InChar, Loc);
}
else if constexpr (CSameAs<CharType, wchar_t>)
{
return NAMESPACE_STD::iswgraph(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
else if constexpr (CSameAs<CharType, u8char>)
{
/*
* BASIC LATIN
@ -237,23 +360,39 @@ struct TChar
return false;
}
else
else if constexpr (CSameAs<CharType, u16char>)
{
static_assert(sizeof(CharType) == -1, "Unsupported character type");
checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsGraph() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return TChar<u8char>::IsGraph(static_cast<u8char>(InChar));
}
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsGraph() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return TChar<u8char>::IsGraph(static_cast<u8char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr bool IsSpace(CharType InChar)
{
if constexpr (CSameAs<CharType, char>)
if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{
return NAMESPACE_STD::isspace(static_cast<unsigned char>(InChar));
NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isspace(InChar, Loc);
}
else if constexpr (CSameAs<CharType, wchar_t>)
{
return NAMESPACE_STD::iswspace(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
else if constexpr (CSameAs<CharType, u8char>)
{
/*
* ISO/IEC 6429
@ -269,7 +408,8 @@ struct TChar
return false;
}
else if constexpr (CSameAs<CharType, char16_t>)
else if constexpr (CSameAs<CharType, u16char>)
{
/*
* ISO/IEC 6429
@ -313,7 +453,8 @@ struct TChar
return false;
}
else if constexpr (CSameAs<CharType, char32_t>)
else if constexpr (CSameAs<CharType, u32char>)
{
/*
* ISO/IEC 6429
@ -357,28 +498,27 @@ struct TChar
return false;
}
else
{
static_assert(sizeof(CharType) == -1, "Unsupported character type");
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr bool IsBlank(CharType InChar)
{
if constexpr (CSameAs<CharType, char>)
if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{
return NAMESPACE_STD::isblank(static_cast<unsigned char>(InChar));
NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isblank(InChar, Loc);
}
else if constexpr (CSameAs<CharType, wchar_t>)
{
return NAMESPACE_STD::iswblank(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
else if constexpr (CSameAs<CharType, u8char>)
{
/* <U0009>;<U0020>; */
return InChar == U8TEXT('\u0009') || InChar == U8TEXT('\u0020');
}
else if constexpr (CSameAs<CharType, char16_t>)
else if constexpr (CSameAs<CharType, u16char>)
{
/* <U0009>;<U0020>;<U1680>;<U180E>;<U2000>..<U2006>;<U2008>..<U200A>;<U205F>;<U3000>; */
return
@ -388,7 +528,8 @@ struct TChar
(InChar == U16TEXT('\u2008') || InChar == U16TEXT('\u200A')) ||
(InChar == U16TEXT('\u205F') || InChar == U16TEXT('\u3000'));
}
else if constexpr (CSameAs<CharType, char32_t>)
else if constexpr (CSameAs<CharType, u32char>)
{
/* <U0009>;<U0020>;<U1680>;<U180E>;<U2000>..<U2006>;<U2008>..<U200A>;<U205F>;<U3000>; */
return
@ -398,23 +539,21 @@ struct TChar
(InChar == U32TEXT('\u2008') || InChar == U32TEXT('\u200A')) ||
(InChar == U32TEXT('\u205F') || InChar == U32TEXT('\u3000'));
}
else
{
static_assert(sizeof(CharType) == -1, "Unsupported character type");
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr bool IsPrint(CharType InChar)
{
if constexpr (CSameAs<CharType, char>)
if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{
return NAMESPACE_STD::isprint(static_cast<unsigned char>(InChar));
NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isprint(InChar, Loc);
}
else if constexpr (CSameAs<CharType, wchar_t>)
{
return NAMESPACE_STD::iswprint(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
else if constexpr (CSameAs<CharType, u8char>)
{
/*
* BASIC LATIN
@ -424,23 +563,39 @@ struct TChar
return false;
}
else
else if constexpr (CSameAs<CharType, u16char>)
{
static_assert(sizeof(CharType) == -1, "Unsupported character type");
checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsPrint() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return TChar<u8char>::IsPrint(static_cast<u8char>(InChar));
}
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsPrint() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return TChar<u8char>::IsPrint(static_cast<u8char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr bool IsPunct(CharType InChar)
{
if constexpr (CSameAs<CharType, char>)
if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{
return NAMESPACE_STD::ispunct(static_cast<unsigned char>(InChar));
NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::ispunct(InChar, Loc);
}
else if constexpr (CSameAs<CharType, wchar_t>)
{
return NAMESPACE_STD::iswpunct(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
else if constexpr (CSameAs<CharType, u8char>)
{
/*
* BASIC LATIN
@ -454,23 +609,39 @@ struct TChar
return false;
}
else
else if constexpr (CSameAs<CharType, u16char>)
{
static_assert(sizeof(CharType) == -1, "Unsupported character type");
checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsPunct() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return TChar<u8char>::IsPunct(static_cast<u8char>(InChar));
}
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsPunct() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return TChar<u8char>::IsPunct(static_cast<u8char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr CharType ToLower(CharType InChar)
{
if constexpr (CSameAs<CharType, char>)
if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{
return static_cast<CharType>(NAMESPACE_STD::tolower(static_cast<unsigned char>(InChar)));
NAMESPACE_STD::locale Loc("");
return static_cast<CharType>(NAMESPACE_STD::tolower(InChar, Loc));
}
else if constexpr (CSameAs<CharType, wchar_t>)
{
return static_cast<CharType>(NAMESPACE_STD::towlower(InChar));
}
else if constexpr (CSameAs<CharType, char8_t>)
else if constexpr (CSameAs<CharType, u8char>)
{
/*
* BASIC LATIN
@ -486,23 +657,39 @@ struct TChar
return InChar;
}
else
else if constexpr (CSameAs<CharType, u16char>)
{
static_assert(sizeof(CharType) == -1, "Unsupported character type");
checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::ToLower() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return static_cast<u16char>(TChar<u8char>::ToLower(static_cast<u8char>(InChar)));
}
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::ToLower() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return static_cast<u16char>(TChar<u8char>::ToLower(static_cast<u8char>(InChar)));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return InChar;
}
NODISCARD FORCEINLINE static constexpr CharType ToUpper(CharType InChar)
{
if constexpr (CSameAs<CharType, char>)
if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{
return static_cast<CharType>(NAMESPACE_STD::toupper(static_cast<unsigned char>(InChar)));
NAMESPACE_STD::locale Loc("");
return static_cast<CharType>(NAMESPACE_STD::toupper(InChar, Loc));
}
else if constexpr (CSameAs<CharType, wchar_t>)
{
return static_cast<CharType>(NAMESPACE_STD::towupper(InChar));
}
else if constexpr (CSameAs<CharType, char8_t>)
else if constexpr (CSameAs<CharType, u8char>)
{
/*
* BASIC LATIN
@ -518,13 +705,31 @@ struct TChar
return InChar;
}
else
else if constexpr (CSameAs<CharType, u16char>)
{
static_assert(sizeof(CharType) == -1, "Unsupported character type");
checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::ToUpper() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return static_cast<u16char>(TChar<u8char>::ToUpper(static_cast<u8char>(InChar)));
}
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::ToUpper() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return static_cast<u16char>(TChar<u8char>::ToUpper(static_cast<u8char>(InChar)));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return InChar;
}
NODISCARD FORCEINLINE static constexpr int ToDigit(CharType InChar)
NODISCARD FORCEINLINE static constexpr TOptional<unsigned> ToDigit(CharType InChar)
{
switch (InChar)
{
@ -564,7 +769,7 @@ struct TChar
case LITERAL(CharType, 'x'): return 33;
case LITERAL(CharType, 'y'): return 34;
case LITERAL(CharType, 'z'): return 35;
case LITERAL(CharType, 'A'): return 10;
case LITERAL(CharType, 'A'): return 10;
case LITERAL(CharType, 'B'): return 11;
case LITERAL(CharType, 'C'): return 12;
case LITERAL(CharType, 'D'): return 13;
@ -590,23 +795,27 @@ struct TChar
case LITERAL(CharType, 'X'): return 33;
case LITERAL(CharType, 'Y'): return 34;
case LITERAL(CharType, 'Z'): return 35;
default: return -1;
default: return Invalid;
}
}
NODISCARD FORCEINLINE static constexpr CharType FromDigit(int InDigit)
NODISCARD FORCEINLINE static constexpr TOptional<CharType> FromDigit(int InDigit)
{
if (InDigit < 0 || InDigit >= 36) return NONE;
if (InDigit < 0 || InDigit > 35) return Invalid;
return LITERAL(CharType, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")[InDigit];
}
};
using FChar = TChar<char>;
using FWChar = TChar<wchar_t>;
using FU8Char = TChar<char8_t>;
using FU16Char = TChar<char16_t>;
using FU32Char = TChar<char32_t>;
using FChar = TChar<char>;
using FWChar = TChar<wchar>;
using FU8Char = TChar<u8char>;
using FU16Char = TChar<u16char>;
using FU32Char = TChar<u32char>;
using FUnicodeChar = TChar<unicodechar>;
static_assert(FUnicodeChar::bIsFixedLength);
NAMESPACE_MODULE_END(Utility)
NAMESPACE_MODULE_END(Redcraft)