Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: Add support for DISCARD in the UBD Driver

Support for DISCARD and WRITE_ZEROES in the ubd driver using
fallocate.

DISCARD is enabled by default and can be disabled using a new
UBD command line flag.

If the underlying fs on which the UBD image is stored does not
support DISCARD the support for both DISCARD and WRITE_ZEROES
is turned off.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Signed-off-by: Richard Weinberger <richard@nod.at>

authored by

Anton Ivanov and committed by
Richard Weinberger
50109b5a a41421ed

+65 -11
+54 -11
arch/um/drivers/ubd_kern.c
··· 154 154 struct openflags openflags; 155 155 unsigned shared:1; 156 156 unsigned no_cow:1; 157 + unsigned no_trim:1; 157 158 struct cow cow; 158 159 struct platform_device pdev; 159 160 struct request_queue *queue; ··· 178 177 .boot_openflags = OPEN_FLAGS, \ 179 178 .openflags = OPEN_FLAGS, \ 180 179 .no_cow = 0, \ 180 + .no_trim = 0, \ 181 181 .shared = 0, \ 182 182 .cow = DEFAULT_COW, \ 183 183 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ ··· 325 323 *index_out = n; 326 324 327 325 err = -EINVAL; 328 - for (i = 0; i < sizeof("rscd="); i++) { 326 + for (i = 0; i < sizeof("rscdt="); i++) { 329 327 switch (*str) { 330 328 case 'r': 331 329 flags.w = 0; ··· 339 337 case 'c': 340 338 ubd_dev->shared = 1; 341 339 break; 340 + case 't': 341 + ubd_dev->no_trim = 1; 342 + break; 342 343 case '=': 343 344 str++; 344 345 goto break_loop; 345 346 default: 346 347 *error_out = "Expected '=' or flag letter " 347 - "(r, s, c, or d)"; 348 + "(r, s, c, t or d)"; 348 349 goto out; 349 350 } 350 351 str++; ··· 420 415 " 'c' will cause the device to be treated as being shared between multiple\n" 421 416 " UMLs and file locking will be turned off - this is appropriate for a\n" 422 417 " cluster filesystem and inappropriate at almost all other times.\n\n" 418 + " 't' will disable trim/discard support on the device (enabled by default).\n\n" 423 419 ); 424 420 425 421 static int udb_setup(char *str) ··· 519 513 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { 520 514 struct io_thread_req *io_req = (*irq_req_buffer)[count]; 521 515 522 - if (!blk_update_request(io_req->req, io_req->error, io_req->length)) 523 - __blk_mq_end_request(io_req->req, io_req->error); 524 - 516 + if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) { 517 + blk_queue_max_discard_sectors(io_req->req->q, 0); 518 + blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); 519 + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q); 520 + } 521 + if ((io_req->error) || (io_req->buffer == NULL)) 522 + blk_mq_end_request(io_req->req, io_req->error); 523 + else { 524 + if (!blk_update_request(io_req->req, io_req->error, io_req->length)) 525 + __blk_mq_end_request(io_req->req, io_req->error); 526 + } 525 527 kfree(io_req); 526 528 } 527 529 } ··· 842 828 NULL, NULL, NULL, NULL); 843 829 if(err < 0) goto error; 844 830 ubd_dev->cow.fd = err; 831 + } 832 + if (ubd_dev->no_trim == 0) { 833 + ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE; 834 + ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE; 835 + blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST); 836 + blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST); 837 + blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue); 845 838 } 846 839 blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue); 847 840 return 0; ··· 1393 1372 case REQ_OP_WRITE: 1394 1373 ret = queue_rw_req(hctx, req); 1395 1374 break; 1375 + case REQ_OP_DISCARD: 1376 + case REQ_OP_WRITE_ZEROES: 1377 + ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL); 1378 + break; 1396 1379 default: 1397 1380 WARN_ON_ONCE(1); 1398 1381 res = BLK_STS_NOTSUPP; ··· 1488 1463 1489 1464 n = os_pwrite_file(req->fds[1], &req->bitmap_words, 1490 1465 sizeof(req->bitmap_words), req->cow_offset); 1491 - if(n != sizeof(req->bitmap_words)) 1466 + if (n != sizeof(req->bitmap_words)) 1492 1467 return map_error(-n); 1493 1468 1494 1469 return map_error(0); ··· 1496 1471 1497 1472 static void do_io(struct io_thread_req *req) 1498 1473 { 1499 - char *buf; 1474 + char *buf = NULL; 1500 1475 unsigned long len; 1501 1476 int n, nsectors, start, end, bit; 1502 1477 __u64 off; 1478 + 1479 + /* FLUSH is really a special case, we cannot "case" it with others */ 1503 1480 1504 1481 if (req_op(req->req) == REQ_OP_FLUSH) { 1505 1482 /* fds[0] is always either the rw image or our cow file */ ··· 1522 1495 off = req->offset + req->offsets[bit] + 1523 1496 start * req->sectorsize; 1524 1497 len = (end - start) * req->sectorsize; 1525 - buf = &req->buffer[start * req->sectorsize]; 1498 + if (req->buffer != NULL) 1499 + buf = &req->buffer[start * req->sectorsize]; 1526 1500 1527 - if (req_op(req->req) == REQ_OP_READ) { 1501 + switch (req_op(req->req)) { 1502 + case REQ_OP_READ: 1528 1503 n = 0; 1529 1504 do { 1530 1505 buf = &buf[n]; 1531 1506 len -= n; 1532 1507 n = os_pread_file(req->fds[bit], buf, len, off); 1533 - if(n < 0){ 1508 + if (n < 0) { 1534 1509 req->error = map_error(-n); 1535 1510 return; 1536 1511 } 1537 1512 } while((n < len) && (n != 0)); 1538 1513 if (n < len) memset(&buf[n], 0, len - n); 1539 - } else { 1514 + break; 1515 + case REQ_OP_WRITE: 1540 1516 n = os_pwrite_file(req->fds[bit], buf, len, off); 1541 1517 if(n != len){ 1542 1518 req->error = map_error(-n); 1543 1519 return; 1544 1520 } 1521 + break; 1522 + case REQ_OP_DISCARD: 1523 + case REQ_OP_WRITE_ZEROES: 1524 + n = os_falloc_punch(req->fds[bit], off, len); 1525 + if (n) { 1526 + req->error = map_error(-n); 1527 + return; 1528 + } 1529 + break; 1530 + default: 1531 + WARN_ON_ONCE(1); 1532 + req->error = BLK_STS_NOTSUPP; 1533 + return; 1545 1534 } 1546 1535 1547 1536 start = end;
+1
arch/um/include/shared/os.h
··· 175 175 extern unsigned os_major(unsigned long long dev); 176 176 extern unsigned os_minor(unsigned long long dev); 177 177 extern unsigned long long os_makedev(unsigned major, unsigned minor); 178 + extern int os_falloc_punch(int fd, unsigned long long offset, int count); 178 179 179 180 /* start_up.c */ 180 181 extern void os_early_checks(void);
+10
arch/um/os-Linux/file.c
··· 610 610 { 611 611 return makedev(major, minor); 612 612 } 613 + 614 + int os_falloc_punch(int fd, unsigned long long offset, int len) 615 + { 616 + int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len); 617 + 618 + if (n < 0) 619 + return -errno; 620 + return n; 621 + } 622 +