Revert "unicode: Don't special case ignorable code points"

This reverts commit 5c26d2f1d3f5e4be3e196526bead29ecb139cf91.

It turns out that we can't do this, because while the old behavior of
ignoring ignorable code points was most definitely wrong, we have
case-folding filesystems with on-disk hash values with that wrong
behavior.

So now you can't look up those names, because they hash to something
different.

Of course, it's also entirely possible that in the meantime people have
created *new* files with the new ("more correct") case folding logic,
and reverting will just make other things break.

The correct solution is to not do case folding in filesystems, but
sadly, people seem to never really understand that.  People still see it
as a feature, not a bug.

Reported-by: Qi Han <hanqi@vivo.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586
Cc: Gabriel Krisman Bertazi <krisman@suse.de>
Requested-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Linus Torvalds 2024-12-11 14:11:23 -08:00
parent ec8e2d3889
commit 231825b2e1
2 changed files with 3427 additions and 3346 deletions

View File

@ -2230,6 +2230,75 @@ static void nfdicf_init(void)
file_fail(fold_name); file_fail(fold_name);
} }
static void ignore_init(void)
{
FILE *file;
unsigned int unichar;
unsigned int first;
unsigned int last;
unsigned int *um;
int count;
int ret;
if (verbose > 0)
printf("Parsing %s\n", prop_name);
file = fopen(prop_name, "r");
if (!file)
open_fail(prop_name, errno);
assert(file);
count = 0;
while (fgets(line, LINESIZE, file)) {
ret = sscanf(line, "%X..%X ; %s # ", &first, &last, buf0);
if (ret == 3) {
if (strcmp(buf0, "Default_Ignorable_Code_Point"))
continue;
if (!utf32valid(first) || !utf32valid(last))
line_fail(prop_name, line);
for (unichar = first; unichar <= last; unichar++) {
free(unicode_data[unichar].utf32nfdi);
um = malloc(sizeof(unsigned int));
*um = 0;
unicode_data[unichar].utf32nfdi = um;
free(unicode_data[unichar].utf32nfdicf);
um = malloc(sizeof(unsigned int));
*um = 0;
unicode_data[unichar].utf32nfdicf = um;
count++;
}
if (verbose > 1)
printf(" %X..%X Default_Ignorable_Code_Point\n",
first, last);
continue;
}
ret = sscanf(line, "%X ; %s # ", &unichar, buf0);
if (ret == 2) {
if (strcmp(buf0, "Default_Ignorable_Code_Point"))
continue;
if (!utf32valid(unichar))
line_fail(prop_name, line);
free(unicode_data[unichar].utf32nfdi);
um = malloc(sizeof(unsigned int));
*um = 0;
unicode_data[unichar].utf32nfdi = um;
free(unicode_data[unichar].utf32nfdicf);
um = malloc(sizeof(unsigned int));
*um = 0;
unicode_data[unichar].utf32nfdicf = um;
if (verbose > 1)
printf(" %X Default_Ignorable_Code_Point\n",
unichar);
count++;
continue;
}
}
fclose(file);
if (verbose > 0)
printf("Found %d entries\n", count);
if (count == 0)
file_fail(prop_name);
}
static void corrections_init(void) static void corrections_init(void)
{ {
FILE *file; FILE *file;
@ -3342,6 +3411,7 @@ int main(int argc, char *argv[])
ccc_init(); ccc_init();
nfdi_init(); nfdi_init();
nfdicf_init(); nfdicf_init();
ignore_init();
corrections_init(); corrections_init();
hangul_decompose(); hangul_decompose();
nfdi_decompose(); nfdi_decompose();

File diff suppressed because it is too large Load Diff