feat(strings): add formatter for TStringView and TString

This commit is contained in:
Redstone1024 2025-01-15 19:33:13 +08:00
parent 8a834a9c05
commit d8adf47d10
3 changed files with 701 additions and 5 deletions

View File

@ -928,14 +928,21 @@ public:
*Iter++ = LITERAL(FCharType, '\"');
}
const FCharType* Ptr = Object - 1;
const FCharType* Ptr = Object;
bool bComplete = false;
// Write the object, include escaped quotes in the counter.
for (size_t Index = bEscape ? 1 : 0; Index != MaxDynamicField; ++Index)
{
FCharType Char = *++Ptr;
if (*Ptr == LITERAL(FCharType, '\0'))
{
bComplete = true;
if (Char == LITERAL(FCharType, '\0')) break;
break;
}
FCharType Char = *Ptr++;
if (Iter == Sent) UNLIKELY return Iter;
@ -998,7 +1005,7 @@ public:
}
// Write the right quote, if the field width is enough.
if (bEscape && *Ptr == LITERAL(FCharType, '\0'))
if (bEscape && bComplete)
{
if (Iter == Sent) UNLIKELY return Iter;

View File

@ -1442,7 +1442,67 @@ using FU32String = TString<u32char>;
using FUnicodeString = TString<unicodechar>;
template <CCharType T> template <typename Allocator> constexpr TStringView<T>::TStringView(const TString<FElementType, Allocator>& InString)
: TStringView(InString.GetData(), InString.Num()) { }
: TStringView(InString.GetData(), InString.Num())
{ }
/**
* A formatter for TString.
*
* The syntax of format specifications is:
*
* [Fill And Align] [Width] [Precision] [Type] [!] [?]
*
* 1. The fill and align part:
*
* [Fill Character] <Align Option>
*
* i. Fill Character: The character is used to fill width of the object. It is optional and cannot be '{' or '}'.
* It should be representable as a single unicode otherwise it is undefined behavior.
*
* ii. Align Option: The character is used to indicate the direction of alignment.
*
* - '<': Align the formatted argument to the left of the available space
* by inserting n fill characters after the formatted argument.
* This is default option.
* - '^': Align the formatted argument to the center of the available space
* by inserting n fill characters around the formatted argument.
* If cannot absolute centering, offset to the left.
* - '>': Align the formatted argument ro the right of the available space
* by inserting n fill characters before the formatted argument.
*
* 2. The width part:
*
* - 'N': The number is used to specify the minimum field width of the object.
* N should be an unsigned non-zero decimal number.
* - '{N}': Dynamically determine the minimum field width of the object.
* N should be a valid index of the format integral argument.
* N is optional, and the default value is automatic indexing.
*
* 3. The precision part:
*
* - '.N': The number is used to specify the maximum field width of the object.
* N should be an unsigned non-zero decimal number.
* - '.{N}': Dynamically determine the maximum field width of the object.
* N should be a valid index of the format integral argument.
* N is optional, and the default value is automatic indexing.
*
* 4. The type indicator part:
*
* - none: Indicates the as-is formatting.
* - 'S': Indicates the as-is formatting.
* - 's': Indicates lowercase formatting.
*
* 5. The case indicators part:
*
* - '!': Indicates capitalize the entire string.
*
* 6. The escape indicators part:
*
* - '?': Indicates the escape formatting.
*
*/
template <CCharType T, typename Allocator>
class TFormatter<TString<T, Allocator>, T> : public TFormatter<TStringView<T>, T> { };
NAMESPACE_MODULE_END(Utility)
NAMESPACE_MODULE_END(Redcraft)

View File

@ -13,6 +13,7 @@
#include "Iterators/Sentinel.h"
#include "Strings/Char.h"
#include "Strings/Convert.h"
#include "Strings/Formatting.h"
#include "Miscellaneous/AssertionMacros.h"
#include <cstring>
@ -644,6 +645,634 @@ using FUnicodeStringView = TStringView<unicodechar>;
// ReSharper restore CppInconsistentNaming
/**
* A formatter for TStringView.
*
* The syntax of format specifications is:
*
* [Fill And Align] [Width] [Precision] [Type] [!] [?]
*
* 1. The fill and align part:
*
* [Fill Character] <Align Option>
*
* i. Fill Character: The character is used to fill width of the object. It is optional and cannot be '{' or '}'.
* It should be representable as a single unicode otherwise it is undefined behavior.
*
* ii. Align Option: The character is used to indicate the direction of alignment.
*
* - '<': Align the formatted argument to the left of the available space
* by inserting n fill characters after the formatted argument.
* This is default option.
* - '^': Align the formatted argument to the center of the available space
* by inserting n fill characters around the formatted argument.
* If cannot absolute centering, offset to the left.
* - '>': Align the formatted argument ro the right of the available space
* by inserting n fill characters before the formatted argument.
*
* 2. The width part:
*
* - 'N': The number is used to specify the minimum field width of the object.
* N should be an unsigned non-zero decimal number.
* - '{N}': Dynamically determine the minimum field width of the object.
* N should be a valid index of the format integral argument.
* N is optional, and the default value is automatic indexing.
*
* 3. The precision part:
*
* - '.N': The number is used to specify the maximum field width of the object.
* N should be an unsigned non-zero decimal number.
* - '.{N}': Dynamically determine the maximum field width of the object.
* N should be a valid index of the format integral argument.
* N is optional, and the default value is automatic indexing.
*
* 4. The type indicator part:
*
* - none: Indicates the as-is formatting.
* - 'S': Indicates the as-is formatting.
* - 's': Indicates lowercase formatting.
*
* 5. The case indicators part:
*
* - '!': Indicates capitalize the entire string.
*
* 6. The escape indicators part:
*
* - '?': Indicates the escape formatting.
*
*/
template <CCharType T>
class TFormatter<TStringView<T>, T>
{
private:
using FCharType = T;
using FCharTraits = TChar<FCharType>;
using FFillCharacter = TStaticArray<FCharType, FCharTraits::MaxCodeUnitLength>;
public:
template <CFormatStringContext<FCharType> CTX>
constexpr TRangeIterator<CTX> Parse(CTX& Context)
{
auto Iter = Ranges::Begin(Context);
auto Sent = Ranges::End (Context);
// Set the default values.
{
FillUnitLength = 1;
FillCharacter[0] = LITERAL(FCharType, ' ');
AlignOption = LITERAL(FCharType, '<');
MinFieldWidth = 0;
MaxFieldWidth = -1;
bDynamicMin = false;
bDynamicMax = false;
bLowercase = false;
bUppercase = false;
bEscape = false;
}
// If the format description string is empty.
if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter;
FCharType Char = *Iter; ++Iter;
// Try to parse the fill and align part.
// This code assumes that the format string does not contain multi-unit characters, except for fill character.
// If the fill character is multi-unit.
if (!FCharTraits::IsValid(Char))
{
FillUnitLength = 1;
FillCharacter[0] = Char;
while (true)
{
if (Iter == Sent) UNLIKELY
{
checkf(false, TEXT("Illegal format string. Missing '}' in format string."));
return Iter;
}
Char = *Iter; ++Iter;
// If the fill character ends.
if (FillUnitLength == FCharTraits::MaxCodeUnitLength || FCharTraits::IsValid(Char)) break;
FillCharacter[FillUnitLength++] = Char;
}
if (Char != LITERAL(FCharType, '<') && Char != LITERAL(FCharType, '^') && Char != LITERAL(FCharType, '>')) UNLIKELY
{
checkf(false, TEXT("Illegal format string. The fill character is not representable as a single unicode."));
return Iter;
}
AlignOption = Char;
if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter;
Char = *Iter; ++Iter;
}
// If the fill character is single-unit.
else do
{
if (Iter == Sent) break;
// If the fill character is specified.
if (*Iter == LITERAL(FCharType, '<') || *Iter == LITERAL(FCharType, '^') || *Iter == LITERAL(FCharType, '>'))
{
FillUnitLength = 1;
FillCharacter[0] = Char;
Char = *Iter; ++Iter;
}
// If the fill character is not specified and the align option is not specified.
else if (Char != LITERAL(FCharType, '<') && Char != LITERAL(FCharType, '^') && Char != LITERAL(FCharType, '>')) break;
AlignOption = Char;
if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter;
Char = *Iter; ++Iter;
}
while (false);
// Try to parse the width part.
{
if (Char == LITERAL(FCharType, '{'))
{
bDynamicMin = true;
MinFieldWidth = INDEX_NONE;
if (Iter == Sent) UNLIKELY
{
checkf(false, TEXT("Illegal format string. Missing '}' in format string."));
return Iter;
}
Char = *Iter; ++Iter;
}
if ((bDynamicMin || Char != LITERAL(FCharType, '0')) && FCharTraits::IsDigit(Char))
{
MinFieldWidth = FCharTraits::ToDigit(Char);
while (true)
{
if (Iter == Sent)
{
checkf(!bDynamicMin, TEXT("Illegal format string. Missing '}' in format string."));
return Iter;
}
if (!bDynamicMin && *Iter == LITERAL(FCharType, '}')) return Iter;
Char = *Iter; ++Iter;
const uint Digit = FCharTraits::ToDigit(Char);
if (Digit >= 10) break;
MinFieldWidth = MinFieldWidth * 10 + Digit;
}
}
if (bDynamicMin)
{
if (Char != LITERAL(FCharType, '}')) UNLIKELY
{
checkf(false, TEXT("Illegal format string. Missing '}' in format string."));
return Iter;
}
do
{
// Try to automatic indexing.
if (MinFieldWidth == INDEX_NONE)
{
MinFieldWidth = Context.GetNextIndex();
if (MinFieldWidth == INDEX_NONE) UNLIKELY
{
checkf(false, TEXT("Illegal index. Please check the field width."));
}
else break;
}
// Try to manual indexing.
else if (!Context.CheckIndex(MinFieldWidth)) UNLIKELY
{
checkf(false, TEXT("Illegal index. Please check the field width."));
}
else break;
bDynamicMin = false;
MinFieldWidth = 0;
}
while (false);
if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter;
Char = *Iter; ++Iter;
}
}
// Try to parse the precision part.
if (Char == LITERAL(FCharType, '.'))
{
if (Iter == Sent) UNLIKELY
{
checkf(false, TEXT("Illegal format string. Missing precision in format string."));
return Iter;
}
Char = *Iter; ++Iter;
if (Char == LITERAL(FCharType, '{'))
{
bDynamicMax = true;
MaxFieldWidth = INDEX_NONE;
if (Iter == Sent) UNLIKELY
{
checkf(false, TEXT("Illegal format string. Missing '}' in format string."));
return Iter;
}
Char = *Iter; ++Iter;
}
if ((bDynamicMax || Char != LITERAL(FCharType, '0')) && FCharTraits::IsDigit(Char))
{
MaxFieldWidth = FCharTraits::ToDigit(Char);
while (true)
{
if (Iter == Sent)
{
checkf(!bDynamicMax, TEXT("Illegal format string. Missing '}' in format string."));
return Iter;
}
if (!bDynamicMax && *Iter == LITERAL(FCharType, '}')) return Iter;
Char = *Iter; ++Iter;
const uint Digit = FCharTraits::ToDigit(Char);
if (Digit >= 10) break;
MaxFieldWidth = MaxFieldWidth * 10 + Digit;
}
}
else if (!bDynamicMax)
{
checkf(false, TEXT("Illegal format string. Missing precision in format string."));
return Iter;
}
if (bDynamicMax)
{
if (Char != LITERAL(FCharType, '}')) UNLIKELY
{
checkf(false, TEXT("Illegal format string. Missing '}' in format string."));
return Iter;
}
do
{
// Try to automatic indexing.
if (MaxFieldWidth == INDEX_NONE)
{
MaxFieldWidth = Context.GetNextIndex();
if (MaxFieldWidth == INDEX_NONE) UNLIKELY
{
checkf(false, TEXT("Illegal index. Please check the precision."));
}
else break;
}
// Try to manual indexing.
else if (!Context.CheckIndex(MaxFieldWidth)) UNLIKELY
{
checkf(false, TEXT("Illegal index. Please check the precision."));
}
else break;
bDynamicMax = false;
MaxFieldWidth = -1;
}
while (false);
if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter;
Char = *Iter; ++Iter;
}
}
// Try to parse the type indicators part.
switch (Char)
{
case LITERAL(FCharType, 's'): bLowercase = true; break;
default: { }
}
switch (Char)
{
case LITERAL(FCharType, 'S'):
case LITERAL(FCharType, 's'): if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter; Char = *Iter; ++Iter; break;
default: { }
}
// Try to parse the case indicators part.
if (Char == LITERAL(FCharType, '!'))
{
bUppercase = true;
if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter;
Char = *Iter; ++Iter;
}
// Try to parse the escape indicators part.
if (Char == LITERAL(FCharType, '?'))
{
bEscape = true;
if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter;
Char = *Iter; ++Iter;
}
checkf(false, TEXT("Illegal format string. Missing '}' in format string."));
return Iter;
}
template <CFormatObjectContext<FCharType> CTX>
constexpr TRangeIterator<CTX> Format(TStringView<FCharType> Object, CTX& Context) const
{
auto Iter = Ranges::Begin(Context);
auto Sent = Ranges::End (Context);
size_t MinDynamicField = MinFieldWidth;
size_t MaxDynamicField = MaxFieldWidth;
// Visit the dynamic width argument.
if (bDynamicMin)
{
MinDynamicField = Context.Visit([]<typename U>(U&& Value) -> size_t
{
using FDecayU = TRemoveCVRef<U>;
if constexpr (CIntegral<FDecayU> && !CSameAs<FDecayU, bool>)
{
checkf(Value > 0, TEXT("Illegal format argument. The dynamic width argument must be a unsigned non-zero number."));
return Math::Max(Value, 1);
}
else
{
checkf(false, TEXT("Illegal format argument. The dynamic width argument must be an integral."));
return 0;
}
}
, MinFieldWidth);
}
// Visit the dynamic precision argument.
if (bDynamicMax)
{
MaxDynamicField = Context.Visit([]<typename U>(U&& Value) -> size_t
{
using FDecayU = TRemoveCVRef<U>;
if constexpr (CIntegral<FDecayU> && !CSameAs<FDecayU, bool>)
{
checkf(Value > 0, TEXT("Illegal format argument. The dynamic precision argument must be a unsigned non-zero number."));
return Math::Max(Value, 1);
}
else
{
checkf(false, TEXT("Illegal format argument. The dynamic precision argument must be an integral."));
return 0;
}
}
, MaxFieldWidth);
}
size_t LeftPadding = 0;
size_t RightPadding = 0;
// Estimate the field width.
if (MinDynamicField != 0)
{
// If escape formatting is enabled, add quotes characters.
size_t FieldWidth = bEscape ? 2 : 0;
for (auto ObjectIter = Object.Begin(); ObjectIter != Object.End(); ++ObjectIter)
{
if (bEscape)
{
switch (const FCharType Char = *ObjectIter)
{
case LITERAL(FCharType, '\"'):
case LITERAL(FCharType, '\\'):
case LITERAL(FCharType, '\a'):
case LITERAL(FCharType, '\b'):
case LITERAL(FCharType, '\f'):
case LITERAL(FCharType, '\n'):
case LITERAL(FCharType, '\r'):
case LITERAL(FCharType, '\t'):
case LITERAL(FCharType, '\v'): FieldWidth += 2; break;
default:
{
// Use '\x00' format for other non-printable characters.
if (!FCharTraits::IsASCII(Char) || !FCharTraits::IsPrint(Char))
{
FieldWidth += 2 + sizeof(FCharType) * 2;
}
else ++FieldWidth;
}
}
}
else ++FieldWidth;
}
const size_t PaddingWidth = MinDynamicField - Math::Min(FieldWidth, MinDynamicField, MaxDynamicField);
switch (AlignOption)
{
default:
case LITERAL(FCharType, '<'): RightPadding = PaddingWidth; break;
case LITERAL(FCharType, '>'): LeftPadding = PaddingWidth; break;
case LITERAL(FCharType, '^'):
LeftPadding = Math::DivAndFloor(PaddingWidth, 2);
RightPadding = PaddingWidth - LeftPadding;
}
}
// Write the left padding.
for (size_t Index = 0; Index != LeftPadding; ++Index)
{
for (size_t Jndex = 0; Jndex != FillUnitLength; ++Jndex)
{
if (Iter == Sent) UNLIKELY return Iter;
*Iter++ = FillCharacter[Jndex];
}
}
// Write the left quote.
if (bEscape)
{
if (Iter == Sent) UNLIKELY return Iter;
*Iter++ = LITERAL(FCharType, '\"');
}
auto ObjectIter = Object.Begin();
bool bComplete = false;
// Write the object, include escaped quotes in the counter.
for (size_t Index = bEscape ? 1 : 0; Index != MaxDynamicField; ++Index)
{
if (ObjectIter == Object.End())
{
bComplete = true;
break;
}
FCharType Char = *ObjectIter++;
if (Iter == Sent) UNLIKELY return Iter;
// Convert the character case.
if (bLowercase) Char = FCharTraits::ToLower(Char);
if (bUppercase) Char = FCharTraits::ToUpper(Char);
if (bEscape)
{
switch (Char)
{
case LITERAL(FCharType, '\"'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, '\"'); break;
case LITERAL(FCharType, '\\'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, '\\'); break;
case LITERAL(FCharType, '\a'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 'a'); break;
case LITERAL(FCharType, '\b'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 'b'); break;
case LITERAL(FCharType, '\f'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 'f'); break;
case LITERAL(FCharType, '\n'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 'n'); break;
case LITERAL(FCharType, '\r'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 'r'); break;
case LITERAL(FCharType, '\t'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 't'); break;
case LITERAL(FCharType, '\v'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 'v'); break;
default:
{
// Use '\x00' format for other non-printable characters.
if (!FCharTraits::IsASCII(Char) || !FCharTraits::IsPrint(Char))
{
*Iter++ = LITERAL(FCharType, '\\');
*Iter++ = LITERAL(FCharType, 'x' );
using FUnsignedT = TMakeUnsigned<FCharType>;
constexpr size_t DigitNum = sizeof(FCharType) * 2;
FUnsignedT IntValue = static_cast<FUnsignedT>(Char);
TStaticArray<FCharType, DigitNum> Buffer;
for (size_t Jndex = 0; Jndex != DigitNum; ++Jndex)
{
Buffer[DigitNum - Jndex - 1] = FCharTraits::FromDigit(IntValue & 0xF);
IntValue >>= 4;
}
check(IntValue == 0);
for (size_t Jndex = 0; Jndex != DigitNum; ++Jndex)
{
if (Iter == Sent) UNLIKELY return Iter;
*Iter++ = Buffer[Jndex];
}
}
else *Iter++ = Char;
}
}
}
else *Iter++ = Char;
}
// Write the right quote, if the field width is enough.
if (bEscape && bComplete)
{
if (Iter == Sent) UNLIKELY return Iter;
*Iter++ = LITERAL(FCharType, '\"');
}
// Write the right padding.
for (size_t Index = 0; Index != RightPadding; ++Index)
{
for (size_t Jndex = 0; Jndex != FillUnitLength; ++Jndex)
{
if (Iter == Sent) UNLIKELY return Iter;
*Iter++ = FillCharacter[Jndex];
}
}
return Iter;
}
private:
size_t FillUnitLength = 1;
FFillCharacter FillCharacter = { LITERAL(FCharType, ' ') };
FCharType AlignOption = LITERAL(FCharType, '<');
size_t MinFieldWidth = 0;
size_t MaxFieldWidth = -1;
bool bDynamicMin = false;
bool bDynamicMax = false;
bool bLowercase = false;
bool bUppercase = false;
bool bEscape = false;
};
NAMESPACE_MODULE_END(Utility)
NAMESPACE_MODULE_END(Redcraft)
NAMESPACE_REDCRAFT_END