Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

lib / string_helpers: introduce string_escape_mem()

This is almost the opposite function to string_unescape(). Nevertheless
it handles \0 and could be used for any byte buffer.

The documentation is supplied together with the function prototype.

The test cases covers most of the scenarios and would be expanded later
on.

[akpm@linux-foundation.org: avoid 1k stack consumption]
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: "John W . Linville" <linville@tuxdriver.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Joe Perches <joe@perches.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Andy Shevchenko and committed by
Linus Torvalds
c8250381 45ff337a

+541 -4
+31
include/linux/string_helpers.h
··· 37 37 return string_unescape_any(buf, buf, 0); 38 38 } 39 39 40 + #define ESCAPE_SPACE 0x01 41 + #define ESCAPE_SPECIAL 0x02 42 + #define ESCAPE_NULL 0x04 43 + #define ESCAPE_OCTAL 0x08 44 + #define ESCAPE_ANY \ 45 + (ESCAPE_SPACE | ESCAPE_OCTAL | ESCAPE_SPECIAL | ESCAPE_NULL) 46 + #define ESCAPE_NP 0x10 47 + #define ESCAPE_ANY_NP (ESCAPE_ANY | ESCAPE_NP) 48 + #define ESCAPE_HEX 0x20 49 + 50 + int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, 51 + unsigned int flags, const char *esc); 52 + 53 + static inline int string_escape_mem_any_np(const char *src, size_t isz, 54 + char **dst, size_t osz, const char *esc) 55 + { 56 + return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, esc); 57 + } 58 + 59 + static inline int string_escape_str(const char *src, char **dst, size_t sz, 60 + unsigned int flags, const char *esc) 61 + { 62 + return string_escape_mem(src, strlen(src), dst, sz, flags, esc); 63 + } 64 + 65 + static inline int string_escape_str_any_np(const char *src, char **dst, 66 + size_t sz, const char *esc) 67 + { 68 + return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, esc); 69 + } 70 + 40 71 #endif
+274
lib/string_helpers.c
··· 8 8 #include <linux/math64.h> 9 9 #include <linux/export.h> 10 10 #include <linux/ctype.h> 11 + #include <linux/errno.h> 12 + #include <linux/string.h> 11 13 #include <linux/string_helpers.h> 12 14 13 15 /** ··· 242 240 return out - dst; 243 241 } 244 242 EXPORT_SYMBOL(string_unescape); 243 + 244 + static int escape_passthrough(unsigned char c, char **dst, size_t *osz) 245 + { 246 + char *out = *dst; 247 + 248 + if (*osz < 1) 249 + return -ENOMEM; 250 + 251 + *out++ = c; 252 + 253 + *dst = out; 254 + *osz -= 1; 255 + 256 + return 1; 257 + } 258 + 259 + static int escape_space(unsigned char c, char **dst, size_t *osz) 260 + { 261 + char *out = *dst; 262 + unsigned char to; 263 + 264 + if (*osz < 2) 265 + return -ENOMEM; 266 + 267 + switch (c) { 268 + case '\n': 269 + to = 'n'; 270 + break; 271 + case '\r': 272 + to = 'r'; 273 + break; 274 + case '\t': 275 + to = 't'; 276 + break; 277 + case '\v': 278 + to = 'v'; 279 + break; 280 + case '\f': 281 + to = 'f'; 282 + break; 283 + default: 284 + return 0; 285 + } 286 + 287 + *out++ = '\\'; 288 + *out++ = to; 289 + 290 + *dst = out; 291 + *osz -= 2; 292 + 293 + return 1; 294 + } 295 + 296 + static int escape_special(unsigned char c, char **dst, size_t *osz) 297 + { 298 + char *out = *dst; 299 + unsigned char to; 300 + 301 + if (*osz < 2) 302 + return -ENOMEM; 303 + 304 + switch (c) { 305 + case '\\': 306 + to = '\\'; 307 + break; 308 + case '\a': 309 + to = 'a'; 310 + break; 311 + case '\e': 312 + to = 'e'; 313 + break; 314 + default: 315 + return 0; 316 + } 317 + 318 + *out++ = '\\'; 319 + *out++ = to; 320 + 321 + *dst = out; 322 + *osz -= 2; 323 + 324 + return 1; 325 + } 326 + 327 + static int escape_null(unsigned char c, char **dst, size_t *osz) 328 + { 329 + char *out = *dst; 330 + 331 + if (*osz < 2) 332 + return -ENOMEM; 333 + 334 + if (c) 335 + return 0; 336 + 337 + *out++ = '\\'; 338 + *out++ = '0'; 339 + 340 + *dst = out; 341 + *osz -= 2; 342 + 343 + return 1; 344 + } 345 + 346 + static int escape_octal(unsigned char c, char **dst, size_t *osz) 347 + { 348 + char *out = *dst; 349 + 350 + if (*osz < 4) 351 + return -ENOMEM; 352 + 353 + *out++ = '\\'; 354 + *out++ = ((c >> 6) & 0x07) + '0'; 355 + *out++ = ((c >> 3) & 0x07) + '0'; 356 + *out++ = ((c >> 0) & 0x07) + '0'; 357 + 358 + *dst = out; 359 + *osz -= 4; 360 + 361 + return 1; 362 + } 363 + 364 + static int escape_hex(unsigned char c, char **dst, size_t *osz) 365 + { 366 + char *out = *dst; 367 + 368 + if (*osz < 4) 369 + return -ENOMEM; 370 + 371 + *out++ = '\\'; 372 + *out++ = 'x'; 373 + *out++ = hex_asc_hi(c); 374 + *out++ = hex_asc_lo(c); 375 + 376 + *dst = out; 377 + *osz -= 4; 378 + 379 + return 1; 380 + } 381 + 382 + /** 383 + * string_escape_mem - quote characters in the given memory buffer 384 + * @src: source buffer (unescaped) 385 + * @isz: source buffer size 386 + * @dst: destination buffer (escaped) 387 + * @osz: destination buffer size 388 + * @flags: combination of the flags (bitwise OR): 389 + * %ESCAPE_SPACE: 390 + * '\f' - form feed 391 + * '\n' - new line 392 + * '\r' - carriage return 393 + * '\t' - horizontal tab 394 + * '\v' - vertical tab 395 + * %ESCAPE_SPECIAL: 396 + * '\\' - backslash 397 + * '\a' - alert (BEL) 398 + * '\e' - escape 399 + * %ESCAPE_NULL: 400 + * '\0' - null 401 + * %ESCAPE_OCTAL: 402 + * '\NNN' - byte with octal value NNN (3 digits) 403 + * %ESCAPE_ANY: 404 + * all previous together 405 + * %ESCAPE_NP: 406 + * escape only non-printable characters (checked by isprint) 407 + * %ESCAPE_ANY_NP: 408 + * all previous together 409 + * %ESCAPE_HEX: 410 + * '\xHH' - byte with hexadecimal value HH (2 digits) 411 + * @esc: NULL-terminated string of characters any of which, if found in 412 + * the source, has to be escaped 413 + * 414 + * Description: 415 + * The process of escaping byte buffer includes several parts. They are applied 416 + * in the following sequence. 417 + * 1. The character is matched to the printable class, if asked, and in 418 + * case of match it passes through to the output. 419 + * 2. The character is not matched to the one from @esc string and thus 420 + * must go as is to the output. 421 + * 3. The character is checked if it falls into the class given by @flags. 422 + * %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any 423 + * character. Note that they actually can't go together, otherwise 424 + * %ESCAPE_HEX will be ignored. 425 + * 426 + * Caller must provide valid source and destination pointers. Be aware that 427 + * destination buffer will not be NULL-terminated, thus caller have to append 428 + * it if needs. 429 + * 430 + * Return: 431 + * The amount of the characters processed to the destination buffer, or 432 + * %-ENOMEM if the size of buffer is not enough to put an escaped character is 433 + * returned. 434 + * 435 + * Even in the case of error @dst pointer will be updated to point to the byte 436 + * after the last processed character. 437 + */ 438 + int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, 439 + unsigned int flags, const char *esc) 440 + { 441 + char *out = *dst, *p = out; 442 + bool is_dict = esc && *esc; 443 + int ret = 0; 444 + 445 + while (isz--) { 446 + unsigned char c = *src++; 447 + 448 + /* 449 + * Apply rules in the following sequence: 450 + * - the character is printable, when @flags has 451 + * %ESCAPE_NP bit set 452 + * - the @esc string is supplied and does not contain a 453 + * character under question 454 + * - the character doesn't fall into a class of symbols 455 + * defined by given @flags 456 + * In these cases we just pass through a character to the 457 + * output buffer. 458 + */ 459 + if ((flags & ESCAPE_NP && isprint(c)) || 460 + (is_dict && !strchr(esc, c))) { 461 + /* do nothing */ 462 + } else { 463 + if (flags & ESCAPE_SPACE) { 464 + ret = escape_space(c, &p, &osz); 465 + if (ret < 0) 466 + break; 467 + if (ret > 0) 468 + continue; 469 + } 470 + 471 + if (flags & ESCAPE_SPECIAL) { 472 + ret = escape_special(c, &p, &osz); 473 + if (ret < 0) 474 + break; 475 + if (ret > 0) 476 + continue; 477 + } 478 + 479 + if (flags & ESCAPE_NULL) { 480 + ret = escape_null(c, &p, &osz); 481 + if (ret < 0) 482 + break; 483 + if (ret > 0) 484 + continue; 485 + } 486 + 487 + /* ESCAPE_OCTAL and ESCAPE_HEX always go last */ 488 + if (flags & ESCAPE_OCTAL) { 489 + ret = escape_octal(c, &p, &osz); 490 + if (ret < 0) 491 + break; 492 + continue; 493 + } 494 + if (flags & ESCAPE_HEX) { 495 + ret = escape_hex(c, &p, &osz); 496 + if (ret < 0) 497 + break; 498 + continue; 499 + } 500 + } 501 + 502 + ret = escape_passthrough(c, &p, &osz); 503 + if (ret < 0) 504 + break; 505 + } 506 + 507 + *dst = p; 508 + 509 + if (ret < 0) 510 + return ret; 511 + 512 + return p - out; 513 + } 514 + EXPORT_SYMBOL(string_escape_mem);
+236 -4
lib/test-string_helpers.c
··· 5 5 6 6 #include <linux/init.h> 7 7 #include <linux/kernel.h> 8 + #include <linux/slab.h> 8 9 #include <linux/module.h> 9 10 #include <linux/random.h> 10 11 #include <linux/string.h> ··· 63 62 static void __init test_string_unescape(const char *name, unsigned int flags, 64 63 bool inplace) 65 64 { 66 - char in[256]; 67 - char out_test[256]; 68 - char out_real[256]; 69 - int i, p = 0, q_test = 0, q_real = sizeof(out_real); 65 + int q_real = 256; 66 + char *in = kmalloc(q_real, GFP_KERNEL); 67 + char *out_test = kmalloc(q_real, GFP_KERNEL); 68 + char *out_real = kmalloc(q_real, GFP_KERNEL); 69 + int i, p = 0, q_test = 0; 70 + 71 + if (!in || !out_test || !out_real) 72 + goto out; 70 73 71 74 for (i = 0; i < ARRAY_SIZE(strings); i++) { 72 75 const char *s = strings[i].in; ··· 105 100 106 101 test_string_check_buf(name, flags, in, p - 1, out_real, q_real, 107 102 out_test, q_test); 103 + out: 104 + kfree(out_real); 105 + kfree(out_test); 106 + kfree(in); 107 + } 108 + 109 + struct test_string_1 { 110 + const char *out; 111 + unsigned int flags; 112 + }; 113 + 114 + #define TEST_STRING_2_MAX_S1 32 115 + struct test_string_2 { 116 + const char *in; 117 + struct test_string_1 s1[TEST_STRING_2_MAX_S1]; 118 + }; 119 + 120 + #define TEST_STRING_2_DICT_0 NULL 121 + static const struct test_string_2 escape0[] __initconst = {{ 122 + .in = "\f\\ \n\r\t\v", 123 + .s1 = {{ 124 + .out = "\\f\\ \\n\\r\\t\\v", 125 + .flags = ESCAPE_SPACE, 126 + },{ 127 + .out = "\\f\\134\\040\\n\\r\\t\\v", 128 + .flags = ESCAPE_SPACE | ESCAPE_OCTAL, 129 + },{ 130 + .out = "\\f\\x5c\\x20\\n\\r\\t\\v", 131 + .flags = ESCAPE_SPACE | ESCAPE_HEX, 132 + },{ 133 + /* terminator */ 134 + }}, 135 + },{ 136 + .in = "\\h\\\"\a\e\\", 137 + .s1 = {{ 138 + .out = "\\\\h\\\\\"\\a\\e\\\\", 139 + .flags = ESCAPE_SPECIAL, 140 + },{ 141 + .out = "\\\\\\150\\\\\\042\\a\\e\\\\", 142 + .flags = ESCAPE_SPECIAL | ESCAPE_OCTAL, 143 + },{ 144 + .out = "\\\\\\x68\\\\\\x22\\a\\e\\\\", 145 + .flags = ESCAPE_SPECIAL | ESCAPE_HEX, 146 + },{ 147 + /* terminator */ 148 + }}, 149 + },{ 150 + .in = "\eb \\C\007\"\x90\r]", 151 + .s1 = {{ 152 + .out = "\eb \\C\007\"\x90\\r]", 153 + .flags = ESCAPE_SPACE, 154 + },{ 155 + .out = "\\eb \\\\C\\a\"\x90\r]", 156 + .flags = ESCAPE_SPECIAL, 157 + },{ 158 + .out = "\\eb \\\\C\\a\"\x90\\r]", 159 + .flags = ESCAPE_SPACE | ESCAPE_SPECIAL, 160 + },{ 161 + .out = "\\033\\142\\040\\134\\103\\007\\042\\220\\015\\135", 162 + .flags = ESCAPE_OCTAL, 163 + },{ 164 + .out = "\\033\\142\\040\\134\\103\\007\\042\\220\\r\\135", 165 + .flags = ESCAPE_SPACE | ESCAPE_OCTAL, 166 + },{ 167 + .out = "\\e\\142\\040\\\\\\103\\a\\042\\220\\015\\135", 168 + .flags = ESCAPE_SPECIAL | ESCAPE_OCTAL, 169 + },{ 170 + .out = "\\e\\142\\040\\\\\\103\\a\\042\\220\\r\\135", 171 + .flags = ESCAPE_SPACE | ESCAPE_SPECIAL | ESCAPE_OCTAL, 172 + },{ 173 + .out = "\eb \\C\007\"\x90\r]", 174 + .flags = ESCAPE_NP, 175 + },{ 176 + .out = "\eb \\C\007\"\x90\\r]", 177 + .flags = ESCAPE_SPACE | ESCAPE_NP, 178 + },{ 179 + .out = "\\eb \\C\\a\"\x90\r]", 180 + .flags = ESCAPE_SPECIAL | ESCAPE_NP, 181 + },{ 182 + .out = "\\eb \\C\\a\"\x90\\r]", 183 + .flags = ESCAPE_SPACE | ESCAPE_SPECIAL | ESCAPE_NP, 184 + },{ 185 + .out = "\\033b \\C\\007\"\\220\\015]", 186 + .flags = ESCAPE_OCTAL | ESCAPE_NP, 187 + },{ 188 + .out = "\\033b \\C\\007\"\\220\\r]", 189 + .flags = ESCAPE_SPACE | ESCAPE_OCTAL | ESCAPE_NP, 190 + },{ 191 + .out = "\\eb \\C\\a\"\\220\\r]", 192 + .flags = ESCAPE_SPECIAL | ESCAPE_SPACE | ESCAPE_OCTAL | 193 + ESCAPE_NP, 194 + },{ 195 + .out = "\\x1bb \\C\\x07\"\\x90\\x0d]", 196 + .flags = ESCAPE_NP | ESCAPE_HEX, 197 + },{ 198 + /* terminator */ 199 + }}, 200 + },{ 201 + /* terminator */ 202 + }}; 203 + 204 + #define TEST_STRING_2_DICT_1 "b\\ \t\r" 205 + static const struct test_string_2 escape1[] __initconst = {{ 206 + .in = "\f\\ \n\r\t\v", 207 + .s1 = {{ 208 + .out = "\f\\134\\040\n\\015\\011\v", 209 + .flags = ESCAPE_OCTAL, 210 + },{ 211 + .out = "\f\\x5c\\x20\n\\x0d\\x09\v", 212 + .flags = ESCAPE_HEX, 213 + },{ 214 + /* terminator */ 215 + }}, 216 + },{ 217 + .in = "\\h\\\"\a\e\\", 218 + .s1 = {{ 219 + .out = "\\134h\\134\"\a\e\\134", 220 + .flags = ESCAPE_OCTAL, 221 + },{ 222 + /* terminator */ 223 + }}, 224 + },{ 225 + .in = "\eb \\C\007\"\x90\r]", 226 + .s1 = {{ 227 + .out = "\e\\142\\040\\134C\007\"\x90\\015]", 228 + .flags = ESCAPE_OCTAL, 229 + },{ 230 + /* terminator */ 231 + }}, 232 + },{ 233 + /* terminator */ 234 + }}; 235 + 236 + static __init const char *test_string_find_match(const struct test_string_2 *s2, 237 + unsigned int flags) 238 + { 239 + const struct test_string_1 *s1 = s2->s1; 240 + unsigned int i; 241 + 242 + if (!flags) 243 + return s2->in; 244 + 245 + /* Test cases are NULL-aware */ 246 + flags &= ~ESCAPE_NULL; 247 + 248 + /* ESCAPE_OCTAL has a higher priority */ 249 + if (flags & ESCAPE_OCTAL) 250 + flags &= ~ESCAPE_HEX; 251 + 252 + for (i = 0; i < TEST_STRING_2_MAX_S1 && s1->out; i++, s1++) 253 + if (s1->flags == flags) 254 + return s1->out; 255 + return NULL; 256 + } 257 + 258 + static __init void test_string_escape(const char *name, 259 + const struct test_string_2 *s2, 260 + unsigned int flags, const char *esc) 261 + { 262 + int q_real = 512; 263 + char *out_test = kmalloc(q_real, GFP_KERNEL); 264 + char *out_real = kmalloc(q_real, GFP_KERNEL); 265 + char *in = kmalloc(256, GFP_KERNEL); 266 + char *buf = out_real; 267 + int p = 0, q_test = 0; 268 + 269 + if (!out_test || !out_real || !in) 270 + goto out; 271 + 272 + for (; s2->in; s2++) { 273 + const char *out; 274 + int len; 275 + 276 + /* NULL injection */ 277 + if (flags & ESCAPE_NULL) { 278 + in[p++] = '\0'; 279 + out_test[q_test++] = '\\'; 280 + out_test[q_test++] = '0'; 281 + } 282 + 283 + /* Don't try strings that have no output */ 284 + out = test_string_find_match(s2, flags); 285 + if (!out) 286 + continue; 287 + 288 + /* Copy string to in buffer */ 289 + len = strlen(s2->in); 290 + memcpy(&in[p], s2->in, len); 291 + p += len; 292 + 293 + /* Copy expected result for given flags */ 294 + len = strlen(out); 295 + memcpy(&out_test[q_test], out, len); 296 + q_test += len; 297 + } 298 + 299 + q_real = string_escape_mem(in, p, &buf, q_real, flags, esc); 300 + 301 + test_string_check_buf(name, flags, in, p, out_real, q_real, out_test, 302 + q_test); 303 + out: 304 + kfree(in); 305 + kfree(out_real); 306 + kfree(out_test); 307 + } 308 + 309 + static __init void test_string_escape_nomem(void) 310 + { 311 + char *in = "\eb \\C\007\"\x90\r]"; 312 + char out[64], *buf = out; 313 + int rc = -ENOMEM, ret; 314 + 315 + ret = string_escape_str_any_np(in, &buf, strlen(in), NULL); 316 + if (ret == rc) 317 + return; 318 + 319 + pr_err("Test 'escape nomem' failed: got %d instead of %d\n", ret, rc); 108 320 } 109 321 110 322 static int __init test_string_helpers_init(void) ··· 333 111 test_string_unescape("unescape", i, false); 334 112 test_string_unescape("unescape inplace", 335 113 get_random_int() % (UNESCAPE_ANY + 1), true); 114 + 115 + /* Without dictionary */ 116 + for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++) 117 + test_string_escape("escape 0", escape0, i, TEST_STRING_2_DICT_0); 118 + 119 + /* With dictionary */ 120 + for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++) 121 + test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1); 122 + 123 + test_string_escape_nomem(); 336 124 337 125 return -EINVAL; 338 126 }