From d8adf47d10b2da72ecd541490a4600efb0520c6c Mon Sep 17 00:00:00 2001 From: Redstone1024 <2824517378@qq.com> Date: Wed, 15 Jan 2025 19:33:13 +0800 Subject: [PATCH] feat(strings): add formatter for TStringView and TString --- .../Source/Public/Strings/Formatting.h | 15 +- .../Source/Public/Strings/String.h | 62 +- .../Source/Public/Strings/StringView.h | 629 ++++++++++++++++++ 3 files changed, 701 insertions(+), 5 deletions(-) diff --git a/Redcraft.Utility/Source/Public/Strings/Formatting.h b/Redcraft.Utility/Source/Public/Strings/Formatting.h index 025659c..692eb8c 100644 --- a/Redcraft.Utility/Source/Public/Strings/Formatting.h +++ b/Redcraft.Utility/Source/Public/Strings/Formatting.h @@ -928,14 +928,21 @@ public: *Iter++ = LITERAL(FCharType, '\"'); } - const FCharType* Ptr = Object - 1; + const FCharType* Ptr = Object; + + bool bComplete = false; // Write the object, include escaped quotes in the counter. for (size_t Index = bEscape ? 1 : 0; Index != MaxDynamicField; ++Index) { - FCharType Char = *++Ptr; + if (*Ptr == LITERAL(FCharType, '\0')) + { + bComplete = true; - if (Char == LITERAL(FCharType, '\0')) break; + break; + } + + FCharType Char = *Ptr++; if (Iter == Sent) UNLIKELY return Iter; @@ -998,7 +1005,7 @@ public: } // Write the right quote, if the field width is enough. - if (bEscape && *Ptr == LITERAL(FCharType, '\0')) + if (bEscape && bComplete) { if (Iter == Sent) UNLIKELY return Iter; diff --git a/Redcraft.Utility/Source/Public/Strings/String.h b/Redcraft.Utility/Source/Public/Strings/String.h index 3d8f4c3..73c47c8 100644 --- a/Redcraft.Utility/Source/Public/Strings/String.h +++ b/Redcraft.Utility/Source/Public/Strings/String.h @@ -1442,7 +1442,67 @@ using FU32String = TString; using FUnicodeString = TString; template template constexpr TStringView::TStringView(const TString& InString) - : TStringView(InString.GetData(), InString.Num()) { } + : TStringView(InString.GetData(), InString.Num()) +{ } + +/** + * A formatter for TString. + * + * The syntax of format specifications is: + * + * [Fill And Align] [Width] [Precision] [Type] [!] [?] + * + * 1. The fill and align part: + * + * [Fill Character] + * + * i. Fill Character: The character is used to fill width of the object. It is optional and cannot be '{' or '}'. + * It should be representable as a single unicode otherwise it is undefined behavior. + * + * ii. Align Option: The character is used to indicate the direction of alignment. + * + * - '<': Align the formatted argument to the left of the available space + * by inserting n fill characters after the formatted argument. + * This is default option. + * - '^': Align the formatted argument to the center of the available space + * by inserting n fill characters around the formatted argument. + * If cannot absolute centering, offset to the left. + * - '>': Align the formatted argument ro the right of the available space + * by inserting n fill characters before the formatted argument. + * + * 2. The width part: + * + * - 'N': The number is used to specify the minimum field width of the object. + * N should be an unsigned non-zero decimal number. + * - '{N}': Dynamically determine the minimum field width of the object. + * N should be a valid index of the format integral argument. + * N is optional, and the default value is automatic indexing. + * + * 3. The precision part: + * + * - '.N': The number is used to specify the maximum field width of the object. + * N should be an unsigned non-zero decimal number. + * - '.{N}': Dynamically determine the maximum field width of the object. + * N should be a valid index of the format integral argument. + * N is optional, and the default value is automatic indexing. + * + * 4. The type indicator part: + * + * - none: Indicates the as-is formatting. + * - 'S': Indicates the as-is formatting. + * - 's': Indicates lowercase formatting. + * + * 5. The case indicators part: + * + * - '!': Indicates capitalize the entire string. + * + * 6. The escape indicators part: + * + * - '?': Indicates the escape formatting. + * + */ +template +class TFormatter, T> : public TFormatter, T> { }; NAMESPACE_MODULE_END(Utility) NAMESPACE_MODULE_END(Redcraft) diff --git a/Redcraft.Utility/Source/Public/Strings/StringView.h b/Redcraft.Utility/Source/Public/Strings/StringView.h index 7abc131..d58decc 100644 --- a/Redcraft.Utility/Source/Public/Strings/StringView.h +++ b/Redcraft.Utility/Source/Public/Strings/StringView.h @@ -13,6 +13,7 @@ #include "Iterators/Sentinel.h" #include "Strings/Char.h" #include "Strings/Convert.h" +#include "Strings/Formatting.h" #include "Miscellaneous/AssertionMacros.h" #include @@ -644,6 +645,634 @@ using FUnicodeStringView = TStringView; // ReSharper restore CppInconsistentNaming +/** + * A formatter for TStringView. + * + * The syntax of format specifications is: + * + * [Fill And Align] [Width] [Precision] [Type] [!] [?] + * + * 1. The fill and align part: + * + * [Fill Character] + * + * i. Fill Character: The character is used to fill width of the object. It is optional and cannot be '{' or '}'. + * It should be representable as a single unicode otherwise it is undefined behavior. + * + * ii. Align Option: The character is used to indicate the direction of alignment. + * + * - '<': Align the formatted argument to the left of the available space + * by inserting n fill characters after the formatted argument. + * This is default option. + * - '^': Align the formatted argument to the center of the available space + * by inserting n fill characters around the formatted argument. + * If cannot absolute centering, offset to the left. + * - '>': Align the formatted argument ro the right of the available space + * by inserting n fill characters before the formatted argument. + * + * 2. The width part: + * + * - 'N': The number is used to specify the minimum field width of the object. + * N should be an unsigned non-zero decimal number. + * - '{N}': Dynamically determine the minimum field width of the object. + * N should be a valid index of the format integral argument. + * N is optional, and the default value is automatic indexing. + * + * 3. The precision part: + * + * - '.N': The number is used to specify the maximum field width of the object. + * N should be an unsigned non-zero decimal number. + * - '.{N}': Dynamically determine the maximum field width of the object. + * N should be a valid index of the format integral argument. + * N is optional, and the default value is automatic indexing. + * + * 4. The type indicator part: + * + * - none: Indicates the as-is formatting. + * - 'S': Indicates the as-is formatting. + * - 's': Indicates lowercase formatting. + * + * 5. The case indicators part: + * + * - '!': Indicates capitalize the entire string. + * + * 6. The escape indicators part: + * + * - '?': Indicates the escape formatting. + * + */ +template +class TFormatter, T> +{ +private: + + using FCharType = T; + using FCharTraits = TChar; + using FFillCharacter = TStaticArray; + +public: + + template CTX> + constexpr TRangeIterator Parse(CTX& Context) + { + auto Iter = Ranges::Begin(Context); + auto Sent = Ranges::End (Context); + + // Set the default values. + { + FillUnitLength = 1; + FillCharacter[0] = LITERAL(FCharType, ' '); + AlignOption = LITERAL(FCharType, '<'); + + MinFieldWidth = 0; + MaxFieldWidth = -1; + + bDynamicMin = false; + bDynamicMax = false; + + bLowercase = false; + bUppercase = false; + bEscape = false; + } + + // If the format description string is empty. + if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter; + + FCharType Char = *Iter; ++Iter; + + // Try to parse the fill and align part. + // This code assumes that the format string does not contain multi-unit characters, except for fill character. + + // If the fill character is multi-unit. + if (!FCharTraits::IsValid(Char)) + { + FillUnitLength = 1; + FillCharacter[0] = Char; + + while (true) + { + if (Iter == Sent) UNLIKELY + { + checkf(false, TEXT("Illegal format string. Missing '}' in format string.")); + + return Iter; + } + + Char = *Iter; ++Iter; + + // If the fill character ends. + if (FillUnitLength == FCharTraits::MaxCodeUnitLength || FCharTraits::IsValid(Char)) break; + + FillCharacter[FillUnitLength++] = Char; + } + + if (Char != LITERAL(FCharType, '<') && Char != LITERAL(FCharType, '^') && Char != LITERAL(FCharType, '>')) UNLIKELY + { + checkf(false, TEXT("Illegal format string. The fill character is not representable as a single unicode.")); + + return Iter; + } + + AlignOption = Char; + + if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter; + + Char = *Iter; ++Iter; + } + + // If the fill character is single-unit. + else do + { + if (Iter == Sent) break; + + // If the fill character is specified. + if (*Iter == LITERAL(FCharType, '<') || *Iter == LITERAL(FCharType, '^') || *Iter == LITERAL(FCharType, '>')) + { + FillUnitLength = 1; + FillCharacter[0] = Char; + + Char = *Iter; ++Iter; + } + + // If the fill character is not specified and the align option is not specified. + else if (Char != LITERAL(FCharType, '<') && Char != LITERAL(FCharType, '^') && Char != LITERAL(FCharType, '>')) break; + + AlignOption = Char; + + if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter; + + Char = *Iter; ++Iter; + } + while (false); + + // Try to parse the width part. + { + if (Char == LITERAL(FCharType, '{')) + { + bDynamicMin = true; + MinFieldWidth = INDEX_NONE; + + if (Iter == Sent) UNLIKELY + { + checkf(false, TEXT("Illegal format string. Missing '}' in format string.")); + + return Iter; + } + + Char = *Iter; ++Iter; + } + + if ((bDynamicMin || Char != LITERAL(FCharType, '0')) && FCharTraits::IsDigit(Char)) + { + MinFieldWidth = FCharTraits::ToDigit(Char); + + while (true) + { + if (Iter == Sent) + { + checkf(!bDynamicMin, TEXT("Illegal format string. Missing '}' in format string.")); + + return Iter; + } + + if (!bDynamicMin && *Iter == LITERAL(FCharType, '}')) return Iter; + + Char = *Iter; ++Iter; + + const uint Digit = FCharTraits::ToDigit(Char); + + if (Digit >= 10) break; + + MinFieldWidth = MinFieldWidth * 10 + Digit; + } + } + + if (bDynamicMin) + { + if (Char != LITERAL(FCharType, '}')) UNLIKELY + { + checkf(false, TEXT("Illegal format string. Missing '}' in format string.")); + + return Iter; + } + + do + { + // Try to automatic indexing. + if (MinFieldWidth == INDEX_NONE) + { + MinFieldWidth = Context.GetNextIndex(); + + if (MinFieldWidth == INDEX_NONE) UNLIKELY + { + checkf(false, TEXT("Illegal index. Please check the field width.")); + } + else break; + } + + // Try to manual indexing. + else if (!Context.CheckIndex(MinFieldWidth)) UNLIKELY + { + checkf(false, TEXT("Illegal index. Please check the field width.")); + } + + else break; + + bDynamicMin = false; + MinFieldWidth = 0; + } + while (false); + + if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter; + + Char = *Iter; ++Iter; + } + } + + // Try to parse the precision part. + if (Char == LITERAL(FCharType, '.')) + { + if (Iter == Sent) UNLIKELY + { + checkf(false, TEXT("Illegal format string. Missing precision in format string.")); + + return Iter; + } + + Char = *Iter; ++Iter; + + if (Char == LITERAL(FCharType, '{')) + { + bDynamicMax = true; + MaxFieldWidth = INDEX_NONE; + + if (Iter == Sent) UNLIKELY + { + checkf(false, TEXT("Illegal format string. Missing '}' in format string.")); + + return Iter; + } + + Char = *Iter; ++Iter; + } + + if ((bDynamicMax || Char != LITERAL(FCharType, '0')) && FCharTraits::IsDigit(Char)) + { + MaxFieldWidth = FCharTraits::ToDigit(Char); + + while (true) + { + if (Iter == Sent) + { + checkf(!bDynamicMax, TEXT("Illegal format string. Missing '}' in format string.")); + + return Iter; + } + + if (!bDynamicMax && *Iter == LITERAL(FCharType, '}')) return Iter; + + Char = *Iter; ++Iter; + + const uint Digit = FCharTraits::ToDigit(Char); + + if (Digit >= 10) break; + + MaxFieldWidth = MaxFieldWidth * 10 + Digit; + } + } + + else if (!bDynamicMax) + { + checkf(false, TEXT("Illegal format string. Missing precision in format string.")); + + return Iter; + } + + if (bDynamicMax) + { + if (Char != LITERAL(FCharType, '}')) UNLIKELY + { + checkf(false, TEXT("Illegal format string. Missing '}' in format string.")); + + return Iter; + } + + do + { + // Try to automatic indexing. + if (MaxFieldWidth == INDEX_NONE) + { + MaxFieldWidth = Context.GetNextIndex(); + + if (MaxFieldWidth == INDEX_NONE) UNLIKELY + { + checkf(false, TEXT("Illegal index. Please check the precision.")); + } + else break; + } + + // Try to manual indexing. + else if (!Context.CheckIndex(MaxFieldWidth)) UNLIKELY + { + checkf(false, TEXT("Illegal index. Please check the precision.")); + } + + else break; + + bDynamicMax = false; + MaxFieldWidth = -1; + } + while (false); + + if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter; + + Char = *Iter; ++Iter; + } + } + + // Try to parse the type indicators part. + + switch (Char) + { + case LITERAL(FCharType, 's'): bLowercase = true; break; + default: { } + } + + switch (Char) + { + case LITERAL(FCharType, 'S'): + case LITERAL(FCharType, 's'): if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter; Char = *Iter; ++Iter; break; + default: { } + } + + // Try to parse the case indicators part. + if (Char == LITERAL(FCharType, '!')) + { + bUppercase = true; + + if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter; + + Char = *Iter; ++Iter; + } + + // Try to parse the escape indicators part. + if (Char == LITERAL(FCharType, '?')) + { + bEscape = true; + + if (Iter == Sent || *Iter == LITERAL(FCharType, '}')) return Iter; + + Char = *Iter; ++Iter; + } + + checkf(false, TEXT("Illegal format string. Missing '}' in format string.")); + + return Iter; + } + + template CTX> + constexpr TRangeIterator Format(TStringView Object, CTX& Context) const + { + auto Iter = Ranges::Begin(Context); + auto Sent = Ranges::End (Context); + + size_t MinDynamicField = MinFieldWidth; + size_t MaxDynamicField = MaxFieldWidth; + + // Visit the dynamic width argument. + if (bDynamicMin) + { + MinDynamicField = Context.Visit([](U&& Value) -> size_t + { + using FDecayU = TRemoveCVRef; + + if constexpr (CIntegral && !CSameAs) + { + checkf(Value > 0, TEXT("Illegal format argument. The dynamic width argument must be a unsigned non-zero number.")); + + return Math::Max(Value, 1); + } + else + { + checkf(false, TEXT("Illegal format argument. The dynamic width argument must be an integral.")); + + return 0; + } + } + , MinFieldWidth); + } + + // Visit the dynamic precision argument. + if (bDynamicMax) + { + MaxDynamicField = Context.Visit([](U&& Value) -> size_t + { + using FDecayU = TRemoveCVRef; + + if constexpr (CIntegral && !CSameAs) + { + checkf(Value > 0, TEXT("Illegal format argument. The dynamic precision argument must be a unsigned non-zero number.")); + + return Math::Max(Value, 1); + } + else + { + checkf(false, TEXT("Illegal format argument. The dynamic precision argument must be an integral.")); + + return 0; + } + } + , MaxFieldWidth); + } + + size_t LeftPadding = 0; + size_t RightPadding = 0; + + // Estimate the field width. + if (MinDynamicField != 0) + { + // If escape formatting is enabled, add quotes characters. + size_t FieldWidth = bEscape ? 2 : 0; + + for (auto ObjectIter = Object.Begin(); ObjectIter != Object.End(); ++ObjectIter) + { + if (bEscape) + { + switch (const FCharType Char = *ObjectIter) + { + case LITERAL(FCharType, '\"'): + case LITERAL(FCharType, '\\'): + case LITERAL(FCharType, '\a'): + case LITERAL(FCharType, '\b'): + case LITERAL(FCharType, '\f'): + case LITERAL(FCharType, '\n'): + case LITERAL(FCharType, '\r'): + case LITERAL(FCharType, '\t'): + case LITERAL(FCharType, '\v'): FieldWidth += 2; break; + default: + { + // Use '\x00' format for other non-printable characters. + if (!FCharTraits::IsASCII(Char) || !FCharTraits::IsPrint(Char)) + { + FieldWidth += 2 + sizeof(FCharType) * 2; + } + + else ++FieldWidth; + } + } + } + + else ++FieldWidth; + } + + const size_t PaddingWidth = MinDynamicField - Math::Min(FieldWidth, MinDynamicField, MaxDynamicField); + + switch (AlignOption) + { + default: + case LITERAL(FCharType, '<'): RightPadding = PaddingWidth; break; + case LITERAL(FCharType, '>'): LeftPadding = PaddingWidth; break; + case LITERAL(FCharType, '^'): + LeftPadding = Math::DivAndFloor(PaddingWidth, 2); + RightPadding = PaddingWidth - LeftPadding; + } + } + + // Write the left padding. + for (size_t Index = 0; Index != LeftPadding; ++Index) + { + for (size_t Jndex = 0; Jndex != FillUnitLength; ++Jndex) + { + if (Iter == Sent) UNLIKELY return Iter; + + *Iter++ = FillCharacter[Jndex]; + } + } + + // Write the left quote. + if (bEscape) + { + if (Iter == Sent) UNLIKELY return Iter; + + *Iter++ = LITERAL(FCharType, '\"'); + } + + auto ObjectIter = Object.Begin(); + + bool bComplete = false; + + // Write the object, include escaped quotes in the counter. + for (size_t Index = bEscape ? 1 : 0; Index != MaxDynamicField; ++Index) + { + if (ObjectIter == Object.End()) + { + bComplete = true; + + break; + } + + FCharType Char = *ObjectIter++; + + if (Iter == Sent) UNLIKELY return Iter; + + // Convert the character case. + if (bLowercase) Char = FCharTraits::ToLower(Char); + if (bUppercase) Char = FCharTraits::ToUpper(Char); + + if (bEscape) + { + switch (Char) + { + case LITERAL(FCharType, '\"'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, '\"'); break; + case LITERAL(FCharType, '\\'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, '\\'); break; + case LITERAL(FCharType, '\a'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 'a'); break; + case LITERAL(FCharType, '\b'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 'b'); break; + case LITERAL(FCharType, '\f'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 'f'); break; + case LITERAL(FCharType, '\n'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 'n'); break; + case LITERAL(FCharType, '\r'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 'r'); break; + case LITERAL(FCharType, '\t'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 't'); break; + case LITERAL(FCharType, '\v'): *Iter++ = LITERAL(FCharType, '\\'); *Iter++ = LITERAL(FCharType, 'v'); break; + default: + { + // Use '\x00' format for other non-printable characters. + if (!FCharTraits::IsASCII(Char) || !FCharTraits::IsPrint(Char)) + { + *Iter++ = LITERAL(FCharType, '\\'); + *Iter++ = LITERAL(FCharType, 'x' ); + + using FUnsignedT = TMakeUnsigned; + + constexpr size_t DigitNum = sizeof(FCharType) * 2; + + FUnsignedT IntValue = static_cast(Char); + + TStaticArray Buffer; + + for (size_t Jndex = 0; Jndex != DigitNum; ++Jndex) + { + Buffer[DigitNum - Jndex - 1] = FCharTraits::FromDigit(IntValue & 0xF); + + IntValue >>= 4; + } + + check(IntValue == 0); + + for (size_t Jndex = 0; Jndex != DigitNum; ++Jndex) + { + if (Iter == Sent) UNLIKELY return Iter; + + *Iter++ = Buffer[Jndex]; + } + } + + else *Iter++ = Char; + } + } + } + + else *Iter++ = Char; + } + + // Write the right quote, if the field width is enough. + if (bEscape && bComplete) + { + if (Iter == Sent) UNLIKELY return Iter; + + *Iter++ = LITERAL(FCharType, '\"'); + } + + // Write the right padding. + for (size_t Index = 0; Index != RightPadding; ++Index) + { + for (size_t Jndex = 0; Jndex != FillUnitLength; ++Jndex) + { + if (Iter == Sent) UNLIKELY return Iter; + + *Iter++ = FillCharacter[Jndex]; + } + } + + return Iter; + } + +private: + + size_t FillUnitLength = 1; + FFillCharacter FillCharacter = { LITERAL(FCharType, ' ') }; + FCharType AlignOption = LITERAL(FCharType, '<'); + + size_t MinFieldWidth = 0; + size_t MaxFieldWidth = -1; + + bool bDynamicMin = false; + bool bDynamicMax = false; + + bool bLowercase = false; + bool bUppercase = false; + bool bEscape = false; + +}; + NAMESPACE_MODULE_END(Utility) NAMESPACE_MODULE_END(Redcraft) NAMESPACE_REDCRAFT_END