linux-stable/tools/perf/util/string.c
Masami Hiramatsu (Google) 313026f3ce perf string: Add strpbrk_esq() and strdup_esq() for escape and quote
strpbrk_esq() and strdup_esq() are new variants for strpbrk() and
strdup() which handles escaped characters and quoted strings.

- strpbrk_esq() searches specified set of characters but ignores the
  escaped characters and quoted strings.
  e.g. strpbrk_esq("'quote\d' \queue quiz", "qd") returns "quiz".

- strdup_esq() duplicates string but removes backslash and quotes which
  is used for quotation. It also keeps the string (including backslash)
  in the quoted part.
  e.g. strdup_esq("'quote\d' \queue quiz") returns "quote\d queue quiz".

The (single, double) quotes in the quoted part should be escaped by
backslash. In this case, strdup_esq() removes that backslash.
The same quotes must be paired. If you use double quotation, you need
to use the double quotation to close the quoted part.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Alexander Lobakin <aleksander.lobakin@intel.com>
Cc: Dima Kogan <dima@secretsauce.net>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Link: https://lore.kernel.org/r/173099116045.2431889.15772916605719019533.stgit@mhiramat.roam.corp.google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-11-14 16:56:32 -03:00

452 lines
9.5 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include "string2.h"
#include <linux/kernel.h>
#include <linux/string.h>
#include <stdlib.h>
#include <linux/ctype.h>
const char *graph_dotted_line =
"---------------------------------------------------------------------"
"---------------------------------------------------------------------"
"---------------------------------------------------------------------";
const char *dots =
"....................................................................."
"....................................................................."
".....................................................................";
/*
* perf_atoll()
* Parse (\d+)(b|B|kb|KB|mb|MB|gb|GB|tb|TB) (e.g. "256MB")
* and return its numeric value
*/
s64 perf_atoll(const char *str)
{
s64 length;
char *p;
char c;
if (!isdigit(str[0]))
goto out_err;
length = strtoll(str, &p, 10);
switch (c = *p++) {
case 'b': case 'B':
if (*p)
goto out_err;
fallthrough;
case '\0':
return length;
default:
goto out_err;
/* two-letter suffices */
case 'k': case 'K':
length <<= 10;
break;
case 'm': case 'M':
length <<= 20;
break;
case 'g': case 'G':
length <<= 30;
break;
case 't': case 'T':
length <<= 40;
break;
}
/* we want the cases to match */
if (islower(c)) {
if (strcmp(p, "b") != 0)
goto out_err;
} else {
if (strcmp(p, "B") != 0)
goto out_err;
}
return length;
out_err:
return -1;
}
/* Character class matching */
static bool __match_charclass(const char *pat, char c, const char **npat)
{
bool complement = false, ret = true;
if (*pat == '!') {
complement = true;
pat++;
}
if (*pat++ == c) /* First character is special */
goto end;
while (*pat && *pat != ']') { /* Matching */
if (*pat == '-' && *(pat + 1) != ']') { /* Range */
if (*(pat - 1) <= c && c <= *(pat + 1))
goto end;
if (*(pat - 1) > *(pat + 1))
goto error;
pat += 2;
} else if (*pat++ == c)
goto end;
}
if (!*pat)
goto error;
ret = false;
end:
while (*pat && *pat != ']') /* Searching closing */
pat++;
if (!*pat)
goto error;
*npat = pat + 1;
return complement ? !ret : ret;
error:
return false;
}
/* Glob/lazy pattern matching */
static bool __match_glob(const char *str, const char *pat, bool ignore_space,
bool case_ins)
{
while (*str && *pat && *pat != '*') {
if (ignore_space) {
/* Ignore spaces for lazy matching */
if (isspace(*str)) {
str++;
continue;
}
if (isspace(*pat)) {
pat++;
continue;
}
}
if (*pat == '?') { /* Matches any single character */
str++;
pat++;
continue;
} else if (*pat == '[') /* Character classes/Ranges */
if (__match_charclass(pat + 1, *str, &pat)) {
str++;
continue;
} else
return false;
else if (*pat == '\\') /* Escaped char match as normal char */
pat++;
if (case_ins) {
if (tolower(*str) != tolower(*pat))
return false;
} else if (*str != *pat)
return false;
str++;
pat++;
}
/* Check wild card */
if (*pat == '*') {
while (*pat == '*')
pat++;
if (!*pat) /* Tail wild card matches all */
return true;
while (*str)
if (__match_glob(str++, pat, ignore_space, case_ins))
return true;
}
return !*str && !*pat;
}
/**
* strglobmatch - glob expression pattern matching
* @str: the target string to match
* @pat: the pattern string to match
*
* This returns true if the @str matches @pat. @pat can includes wildcards
* ('*','?') and character classes ([CHARS], complementation and ranges are
* also supported). Also, this supports escape character ('\') to use special
* characters as normal character.
*
* Note: if @pat syntax is broken, this always returns false.
*/
bool strglobmatch(const char *str, const char *pat)
{
return __match_glob(str, pat, false, false);
}
bool strglobmatch_nocase(const char *str, const char *pat)
{
return __match_glob(str, pat, false, true);
}
/**
* strlazymatch - matching pattern strings lazily with glob pattern
* @str: the target string to match
* @pat: the pattern string to match
*
* This is similar to strglobmatch, except this ignores spaces in
* the target string.
*/
bool strlazymatch(const char *str, const char *pat)
{
return __match_glob(str, pat, true, false);
}
/**
* strtailcmp - Compare the tail of two strings
* @s1: 1st string to be compared
* @s2: 2nd string to be compared
*
* Return 0 if whole of either string is same as another's tail part.
*/
int strtailcmp(const char *s1, const char *s2)
{
int i1 = strlen(s1);
int i2 = strlen(s2);
while (--i1 >= 0 && --i2 >= 0) {
if (s1[i1] != s2[i2])
return s1[i1] - s2[i2];
}
return 0;
}
char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
{
/*
* FIXME: replace this with an expression using log10() when we
* find a suitable implementation, maybe the one in the dvb drivers...
*
* "%s == %d || " = log10(MAXINT) * 2 + 8 chars for the operators
*/
size_t size = nints * 28 + 1; /* \0 */
size_t i, printed = 0;
char *expr = malloc(size);
if (expr) {
const char *or_and = "||", *eq_neq = "==";
char *e = expr;
if (!in) {
or_and = "&&";
eq_neq = "!=";
}
for (i = 0; i < nints; ++i) {
if (printed == size)
goto out_err_overflow;
if (i > 0)
printed += scnprintf(e + printed, size - printed, " %s ", or_and);
printed += scnprintf(e + printed, size - printed,
"%s %s %d", var, eq_neq, ints[i]);
}
}
return expr;
out_err_overflow:
free(expr);
return NULL;
}
/* Like strpbrk(), but not break if it is right after a backslash (escaped) */
char *strpbrk_esc(char *str, const char *stopset)
{
char *ptr;
do {
ptr = strpbrk(str, stopset);
if (ptr == str ||
(ptr == str + 1 && *(ptr - 1) != '\\'))
break;
str = ptr + 1;
} while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\');
return ptr;
}
/* Like strpbrk_esc(), but not break if it is quoted with single/double quotes */
char *strpbrk_esq(char *str, const char *stopset)
{
char *_stopset = NULL;
char *ptr;
const char *squote = "'";
const char *dquote = "\"";
if (asprintf(&_stopset, "%s%c%c", stopset, *squote, *dquote) < 0)
return NULL;
do {
ptr = strpbrk_esc(str, _stopset);
if (!ptr)
break;
if (*ptr == *squote)
ptr = strpbrk_esc(ptr + 1, squote);
else if (*ptr == *dquote)
ptr = strpbrk_esc(ptr + 1, dquote);
else
break;
str = ptr + 1;
} while (ptr);
free(_stopset);
return ptr;
}
/* Like strdup, but do not copy a single backslash */
char *strdup_esc(const char *str)
{
char *s, *d, *p, *ret = strdup(str);
if (!ret)
return NULL;
d = strchr(ret, '\\');
if (!d)
return ret;
s = d + 1;
do {
if (*s == '\0') {
*d = '\0';
break;
}
p = strchr(s + 1, '\\');
if (p) {
memmove(d, s, p - s);
d += p - s;
s = p + 1;
} else
memmove(d, s, strlen(s) + 1);
} while (p);
return ret;
}
/* Remove backslash right before quote and return next quote address. */
static char *remove_consumed_esc(char *str, int len, int quote)
{
char *ptr = str, *end = str + len;
while (*ptr != quote && ptr < end) {
if (*ptr == '\\' && *(ptr + 1) == quote) {
memmove(ptr, ptr + 1, end - (ptr + 1));
/* now *ptr is `quote`. */
end--;
}
ptr++;
}
return *ptr == quote ? ptr : NULL;
}
/*
* Like strdup_esc, but keep quoted string as it is (and single backslash
* before quote is removed). If there is no closed quote, return NULL.
*/
char *strdup_esq(const char *str)
{
char *d, *ret;
/* If there is no quote, return normal strdup_esc() */
d = strpbrk_esc((char *)str, "\"'");
if (!d)
return strdup_esc(str);
ret = strdup(str);
if (!ret)
return NULL;
d = ret;
do {
d = strpbrk(d, "\\\"\'");
if (!d)
break;
if (*d == '"' || *d == '\'') {
/* This is non-escaped quote */
int quote = *d;
int len = strlen(d + 1) + 1;
/*
* Remove the start quote and remove consumed escape (backslash
* before quote) and remove the end quote. If there is no end
* quote, it is the input error.
*/
memmove(d, d + 1, len);
d = remove_consumed_esc(d, len, quote);
if (!d)
goto error;
memmove(d, d + 1, strlen(d + 1) + 1);
}
if (*d == '\\') {
memmove(d, d + 1, strlen(d + 1) + 1);
if (*d == '\\') {
/* double backslash -- keep the second one. */
d++;
}
}
} while (*d != '\0');
return ret;
error:
free(ret);
return NULL;
}
unsigned int hex(char c)
{
if (c >= '0' && c <= '9')
return c - '0';
if (c >= 'a' && c <= 'f')
return c - 'a' + 10;
return c - 'A' + 10;
}
/*
* Replace all occurrences of character 'needle' in string 'haystack' with
* string 'replace'
*
* The new string could be longer so a new string is returned which must be
* freed.
*/
char *strreplace_chars(char needle, const char *haystack, const char *replace)
{
int replace_len = strlen(replace);
char *new_s, *to;
const char *loc = strchr(haystack, needle);
const char *from = haystack;
int num = 0;
/* Count occurrences */
while (loc) {
loc = strchr(loc + 1, needle);
num++;
}
/* Allocate enough space for replacements and reset first location */
new_s = malloc(strlen(haystack) + (num * (replace_len - 1) + 1));
if (!new_s)
return NULL;
loc = strchr(haystack, needle);
to = new_s;
while (loc) {
/* Copy original string up to found char and update positions */
memcpy(to, from, 1 + loc - from);
to += loc - from;
from = loc + 1;
/* Copy replacement string and update positions */
memcpy(to, replace, replace_len);
to += replace_len;
/* needle next occurrence or end of string */
loc = strchr(from, needle);
}
/* Copy any remaining chars + null */
strcpy(to, from);
return new_s;
}