Merge tag 'for-4.21/block-20190102' of git://git.kernel.dk/linux-block

+4

block/bio.c

··· 2097 2097 */ 2098 2098 void bio_clone_blkg_association(struct bio *dst, struct bio *src) 2099 2099 { 2100 + rcu_read_lock(); 2101 + 2100 2102 if (src->bi_blkg) 2101 2103 __bio_associate_blkg(dst, src->bi_blkg); 2104 + 2105 + rcu_read_unlock(); 2102 2106 } 2103 2107 EXPORT_SYMBOL_GPL(bio_clone_blkg_association); 2104 2108 #endif /* CONFIG_BLK_CGROUP */

-23

block/blk-cgroup.c

··· 438 438 spin_unlock_irq(&q->queue_lock); 439 439 } 440 440 441 - /* 442 - * A group is RCU protected, but having an rcu lock does not mean that one 443 - * can access all the fields of blkg and assume these are valid. For 444 - * example, don't try to follow throtl_data and request queue links. 445 - * 446 - * Having a reference to blkg under an rcu allows accesses to only values 447 - * local to groups like group stats and group rate limits. 448 - */ 449 - void __blkg_release_rcu(struct rcu_head *rcu_head) 450 - { 451 - struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); 452 - 453 - /* release the blkcg and parent blkg refs this blkg has been holding */ 454 - css_put(&blkg->blkcg->css); 455 - if (blkg->parent) 456 - blkg_put(blkg->parent); 457 - 458 - wb_congested_put(blkg->wb_congested); 459 - 460 - blkg_free(blkg); 461 - } 462 - EXPORT_SYMBOL_GPL(__blkg_release_rcu); 463 - 464 441 static int blkcg_reset_stats(struct cgroup_subsys_state *css, 465 442 struct cftype *cftype, u64 val) 466 443 {

+4

block/bsg.c

··· 177 177 goto out; 178 178 } 179 179 180 + pr_warn_once( 181 + "BIDI support in bsg has been deprecated and might be removed. " 182 + "Please report your use case to linux-scsi@vger.kernel.org\n"); 183 + 180 184 next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0); 181 185 if (IS_ERR(next_rq)) { 182 186 ret = PTR_ERR(next_rq);

-2

drivers/block/amiflop.c

··· 1601 1601 return p->type->read_size; 1602 1602 #endif 1603 1603 default: 1604 - printk(KERN_DEBUG "fd_ioctl: unknown cmd %d for drive %d.", 1605 - cmd, drive); 1606 1604 return -ENOSYS; 1607 1605 } 1608 1606 return 0;

+2

drivers/block/drbd/drbd_debugfs.c

··· 237 237 seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL"); 238 238 seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C"); 239 239 seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync"); 240 + seq_print_rq_state_bit(m, f & EE_TRIM, &sep, "trim"); 241 + seq_print_rq_state_bit(m, f & EE_ZEROOUT, &sep, "zero-out"); 240 242 seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same"); 241 243 seq_putc(m, '\n'); 242 244 }

+10 -9

drivers/block/drbd/drbd_int.h

··· 430 430 __EE_MAY_SET_IN_SYNC, 431 431 432 432 /* is this a TRIM aka REQ_OP_DISCARD? */ 433 - __EE_IS_TRIM, 433 + __EE_TRIM, 434 + /* explicit zero-out requested, or 435 + * our lower level cannot handle trim, 436 + * and we want to fall back to zeroout instead */ 437 + __EE_ZEROOUT, 434 438 435 439 /* In case a barrier failed, 436 440 * we need to resubmit without the barrier flag. */ ··· 476 472 }; 477 473 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) 478 474 #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) 479 - #define EE_IS_TRIM (1<<__EE_IS_TRIM) 475 + #define EE_TRIM (1<<__EE_TRIM) 476 + #define EE_ZEROOUT (1<<__EE_ZEROOUT) 480 477 #define EE_RESUBMITTED (1<<__EE_RESUBMITTED) 481 478 #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) 482 479 #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) ··· 1561 1556 extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); 1562 1557 1563 1558 /* drbd_receiver.c */ 1559 + extern int drbd_issue_discard_or_zero_out(struct drbd_device *device, 1560 + sector_t start, unsigned int nr_sectors, int flags); 1564 1561 extern int drbd_receiver(struct drbd_thread *thi); 1565 1562 extern int drbd_ack_receiver(struct drbd_thread *thi); 1566 1563 extern void drbd_send_ping_wf(struct work_struct *ws); ··· 1616 1609 } 1617 1610 1618 1611 /* sets the number of 512 byte sectors of our virtual device */ 1619 - static inline void drbd_set_my_capacity(struct drbd_device *device, 1620 - sector_t size) 1621 - { 1622 - /* set_capacity(device->this_bdev->bd_disk, size); */ 1623 - set_capacity(device->vdisk, size); 1624 - device->this_bdev->bd_inode->i_size = (loff_t)size << 9; 1625 - } 1612 + void drbd_set_my_capacity(struct drbd_device *device, sector_t size); 1626 1613 1627 1614 /* 1628 1615 * used to submit our private bio

+24 -4

drivers/block/drbd/drbd_main.c

··· 1668 1668 (bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) | 1669 1669 (bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) | 1670 1670 (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) | 1671 - (bio_op(bio) == REQ_OP_WRITE_ZEROES ? DP_DISCARD : 0); 1671 + (bio_op(bio) == REQ_OP_WRITE_ZEROES ? 1672 + ((connection->agreed_features & DRBD_FF_WZEROES) ? 1673 + (DP_ZEROES |(!(bio->bi_opf & REQ_NOUNMAP) ? DP_DISCARD : 0)) 1674 + : DP_DISCARD) 1675 + : 0); 1672 1676 else 1673 1677 return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0; 1674 1678 } ··· 1716 1712 } 1717 1713 p->dp_flags = cpu_to_be32(dp_flags); 1718 1714 1719 - if (dp_flags & DP_DISCARD) { 1715 + if (dp_flags & (DP_DISCARD|DP_ZEROES)) { 1716 + enum drbd_packet cmd = (dp_flags & DP_ZEROES) ? P_ZEROES : P_TRIM; 1720 1717 struct p_trim *t = (struct p_trim*)p; 1721 1718 t->size = cpu_to_be32(req->i.size); 1722 - err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0); 1719 + err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*t), NULL, 0); 1723 1720 goto out; 1724 1721 } 1725 1722 if (dp_flags & DP_WSAME) { ··· 2039 2034 device->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; 2040 2035 } 2041 2036 2037 + static void _drbd_set_my_capacity(struct drbd_device *device, sector_t size) 2038 + { 2039 + /* set_capacity(device->this_bdev->bd_disk, size); */ 2040 + set_capacity(device->vdisk, size); 2041 + device->this_bdev->bd_inode->i_size = (loff_t)size << 9; 2042 + } 2043 + 2044 + void drbd_set_my_capacity(struct drbd_device *device, sector_t size) 2045 + { 2046 + char ppb[10]; 2047 + _drbd_set_my_capacity(device, size); 2048 + drbd_info(device, "size = %s (%llu KB)\n", 2049 + ppsize(ppb, size>>1), (unsigned long long)size>>1); 2050 + } 2051 + 2042 2052 void drbd_device_cleanup(struct drbd_device *device) 2043 2053 { 2044 2054 int i; ··· 2079 2059 } 2080 2060 D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL); 2081 2061 2082 - drbd_set_my_capacity(device, 0); 2062 + _drbd_set_my_capacity(device, 0); 2083 2063 if (device->bitmap) { 2084 2064 /* maybe never allocated. */ 2085 2065 drbd_bm_resize(device, 0, 1);

+107 -26

drivers/block/drbd/drbd_nl.c

··· 127 127 return 0; 128 128 } 129 129 130 + __printf(2, 3) 131 + static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...) 132 + { 133 + va_list args; 134 + struct nlattr *nla, *txt; 135 + int err = -EMSGSIZE; 136 + int len; 137 + 138 + nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY); 139 + if (!nla) 140 + return err; 141 + 142 + txt = nla_reserve(skb, T_info_text, 256); 143 + if (!txt) { 144 + nla_nest_cancel(skb, nla); 145 + return err; 146 + } 147 + va_start(args, fmt); 148 + len = vscnprintf(nla_data(txt), 256, fmt, args); 149 + va_end(args); 150 + 151 + /* maybe: retry with larger reserve, if truncated */ 152 + txt->nla_len = nla_attr_size(len+1); 153 + nlmsg_trim(skb, (char*)txt + NLA_ALIGN(txt->nla_len)); 154 + nla_nest_end(skb, nla); 155 + 156 + return 0; 157 + } 158 + 130 159 /* This would be a good candidate for a "pre_doit" hook, 131 160 * and per-family private info->pointers. 132 161 * But we need to stay compatible with older kernels. ··· 697 668 if (rv == SS_TWO_PRIMARIES) { 698 669 /* Maybe the peer is detected as dead very soon... 699 670 retry at most once more in this case. */ 700 - int timeo; 701 - rcu_read_lock(); 702 - nc = rcu_dereference(connection->net_conf); 703 - timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; 704 - rcu_read_unlock(); 705 - schedule_timeout_interruptible(timeo); 706 - if (try < max_tries) 671 + if (try < max_tries) { 672 + int timeo; 707 673 try = max_tries - 1; 674 + rcu_read_lock(); 675 + nc = rcu_dereference(connection->net_conf); 676 + timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; 677 + rcu_read_unlock(); 678 + schedule_timeout_interruptible(timeo); 679 + } 708 680 continue; 709 681 } 710 682 if (rv < SS_SUCCESS) { ··· 951 921 } prev; 952 922 sector_t u_size, size; 953 923 struct drbd_md *md = &device->ldev->md; 954 - char ppb[10]; 955 924 void *buffer; 956 925 957 926 int md_moved, la_size_changed; ··· 1028 999 /* racy, see comments above. */ 1029 1000 drbd_set_my_capacity(device, size); 1030 1001 md->la_size_sect = size; 1031 - drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), 1032 - (unsigned long long)size>>1); 1033 1002 } 1034 1003 if (rv <= DS_ERROR) 1035 1004 goto err_out; ··· 1261 1234 } 1262 1235 } 1263 1236 1237 + static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q) 1238 + { 1239 + /* Fixup max_write_zeroes_sectors after blk_queue_stack_limits(): 1240 + * if we can handle "zeroes" efficiently on the protocol, 1241 + * we want to do that, even if our backend does not announce 1242 + * max_write_zeroes_sectors itself. */ 1243 + struct drbd_connection *connection = first_peer_device(device)->connection; 1244 + /* If the peer announces WZEROES support, use it. Otherwise, rather 1245 + * send explicit zeroes than rely on some discard-zeroes-data magic. */ 1246 + if (connection->agreed_features & DRBD_FF_WZEROES) 1247 + q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; 1248 + else 1249 + q->limits.max_write_zeroes_sectors = 0; 1250 + } 1251 + 1264 1252 static void decide_on_write_same_support(struct drbd_device *device, 1265 1253 struct request_queue *q, 1266 1254 struct request_queue *b, struct o_qlim *o, ··· 1386 1344 } 1387 1345 } 1388 1346 fixup_discard_if_not_supported(q); 1347 + fixup_write_zeroes(device, q); 1389 1348 } 1390 1349 1391 1350 void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) ··· 1557 1514 } 1558 1515 } 1559 1516 1517 + static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc) 1518 + { 1519 + int err = -EBUSY; 1520 + 1521 + if (device->act_log && 1522 + device->act_log->nr_elements == dc->al_extents) 1523 + return 0; 1524 + 1525 + drbd_suspend_io(device); 1526 + /* If IO completion is currently blocked, we would likely wait 1527 + * "forever" for the activity log to become unused. So we don't. */ 1528 + if (atomic_read(&device->ap_bio_cnt)) 1529 + goto out; 1530 + 1531 + wait_event(device->al_wait, lc_try_lock(device->act_log)); 1532 + drbd_al_shrink(device); 1533 + err = drbd_check_al_size(device, dc); 1534 + lc_unlock(device->act_log); 1535 + wake_up(&device->al_wait); 1536 + out: 1537 + drbd_resume_io(device); 1538 + return err; 1539 + } 1540 + 1560 1541 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) 1561 1542 { 1562 1543 struct drbd_config_context adm_ctx; ··· 1643 1576 } 1644 1577 } 1645 1578 1646 - drbd_suspend_io(device); 1647 - wait_event(device->al_wait, lc_try_lock(device->act_log)); 1648 - drbd_al_shrink(device); 1649 - err = drbd_check_al_size(device, new_disk_conf); 1650 - lc_unlock(device->act_log); 1651 - wake_up(&device->al_wait); 1652 - drbd_resume_io(device); 1653 - 1579 + err = disk_opts_check_al_size(device, new_disk_conf); 1654 1580 if (err) { 1581 + /* Could be just "busy". Ignore? 1582 + * Introduce dedicated error code? */ 1583 + drbd_msg_put_info(adm_ctx.reply_skb, 1584 + "Try again without changing current al-extents setting"); 1655 1585 retcode = ERR_NOMEM; 1656 1586 goto fail_unlock; 1657 1587 } ··· 1998 1934 } 1999 1935 } 2000 1936 2001 - if (device->state.conn < C_CONNECTED && 2002 - device->state.role == R_PRIMARY && device->ed_uuid && 2003 - (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { 1937 + if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid && 1938 + (device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) && 1939 + (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { 2004 1940 drbd_err(device, "Can only attach to data with current UUID=%016llX\n", 2005 1941 (unsigned long long)device->ed_uuid); 2006 1942 retcode = ERR_DATA_NOT_CURRENT; ··· 2014 1950 } 2015 1951 2016 1952 /* Prevent shrinking of consistent devices ! */ 2017 - if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && 2018 - drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) { 2019 - drbd_warn(device, "refusing to truncate a consistent device\n"); 2020 - retcode = ERR_DISK_TOO_SMALL; 2021 - goto force_diskless_dec; 1953 + { 1954 + unsigned long long nsz = drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0); 1955 + unsigned long long eff = nbc->md.la_size_sect; 1956 + if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && nsz < eff) { 1957 + if (nsz == nbc->disk_conf->disk_size) { 1958 + drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff); 1959 + } else { 1960 + drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff); 1961 + drbd_msg_sprintf_info(adm_ctx.reply_skb, 1962 + "To-be-attached device has last effective > current size, and is consistent\n" 1963 + "(%llu > %llu sectors). Refusing to attach.", eff, nsz); 1964 + retcode = ERR_IMPLICIT_SHRINK; 1965 + goto force_diskless_dec; 1966 + } 1967 + } 2022 1968 } 2023 1969 2024 1970 lock_all_resources(); ··· 2728 2654 2729 2655 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force) 2730 2656 { 2657 + enum drbd_conns cstate; 2731 2658 enum drbd_state_rv rv; 2732 2659 2660 + repeat: 2733 2661 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), 2734 2662 force ? CS_HARD : 0); 2735 2663 ··· 2749 2673 2750 2674 break; 2751 2675 case SS_CW_FAILED_BY_PEER: 2676 + spin_lock_irq(&connection->resource->req_lock); 2677 + cstate = connection->cstate; 2678 + spin_unlock_irq(&connection->resource->req_lock); 2679 + if (cstate <= C_WF_CONNECTION) 2680 + goto repeat; 2752 2681 /* The peer probably wants to see us outdated. */ 2753 2682 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, 2754 2683 disk, D_OUTDATED), 0);

+47

drivers/block/drbd/drbd_protocol.h

··· 70 70 * we may fall back to an opencoded loop instead. */ 71 71 P_WSAME = 0x34, 72 72 73 + /* 0x35 already claimed in DRBD 9 */ 74 + P_ZEROES = 0x36, /* data sock: zero-out, WRITE_ZEROES */ 75 + 76 + /* 0x40 .. 0x48 already claimed in DRBD 9 */ 77 + 73 78 P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ 74 79 P_MAX_OPT_CMD = 0x101, 75 80 ··· 135 130 #define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */ 136 131 #define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */ 137 132 #define DP_WSAME 512 /* equiv. REQ_WRITE_SAME */ 133 + #define DP_ZEROES 1024 /* equiv. REQ_OP_WRITE_ZEROES */ 134 + 135 + /* possible combinations: 136 + * REQ_OP_WRITE_ZEROES: DP_DISCARD | DP_ZEROES 137 + * REQ_OP_WRITE_ZEROES + REQ_NOUNMAP: DP_ZEROES 138 + */ 138 139 139 140 struct p_data { 140 141 u64 sector; /* 64 bits sector number */ ··· 207 196 * drbd_send_sizes()/receive_sizes() 208 197 */ 209 198 #define DRBD_FF_WSAME 4 199 + 200 + /* supports REQ_OP_WRITE_ZEROES on the "wire" protocol. 201 + * 202 + * We used to map that to "discard" on the sending side, and if we cannot 203 + * guarantee that discard zeroes data, the receiving side would map discard 204 + * back to zero-out. 205 + * 206 + * With the introduction of REQ_OP_WRITE_ZEROES, 207 + * we started to use that for both WRITE_ZEROES and DISCARDS, 208 + * hoping that WRITE_ZEROES would "do what we want", 209 + * UNMAP if possible, zero-out the rest. 210 + * 211 + * The example scenario is some LVM "thin" backend. 212 + * 213 + * While an un-allocated block on dm-thin reads as zeroes, on a dm-thin 214 + * with "skip_block_zeroing=true", after a partial block write allocated 215 + * that block, that same block may well map "undefined old garbage" from 216 + * the backends on LBAs that have not yet been written to. 217 + * 218 + * If we cannot distinguish between zero-out and discard on the receiving 219 + * side, to avoid "undefined old garbage" to pop up randomly at later times 220 + * on supposedly zero-initialized blocks, we'd need to map all discards to 221 + * zero-out on the receiving side. But that would potentially do a full 222 + * alloc on thinly provisioned backends, even when the expectation was to 223 + * unmap/trim/discard/de-allocate. 224 + * 225 + * We need to distinguish on the protocol level, whether we need to guarantee 226 + * zeroes (and thus use zero-out, potentially doing the mentioned full-alloc), 227 + * or if we want to put the emphasis on discard, and only do a "best effort 228 + * zeroing" (by "discarding" blocks aligned to discard-granularity, and zeroing 229 + * only potential unaligned head and tail clippings), to at least *try* to 230 + * avoid "false positives" in an online-verify later, hoping that someone 231 + * set skip_block_zeroing=false. 232 + */ 233 + #define DRBD_FF_WZEROES 8 234 + 210 235 211 236 struct p_connection_features { 212 237 u32 protocol_min;

+220 -31

drivers/block/drbd/drbd_receiver.c

··· 50 50 #include "drbd_req.h" 51 51 #include "drbd_vli.h" 52 52 53 - #define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME) 53 + #define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES) 54 54 55 55 struct packet_info { 56 56 enum drbd_packet cmd; ··· 1490 1490 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); 1491 1491 } 1492 1492 1493 - static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req) 1493 + /* 1494 + * Mapping "discard" to ZEROOUT with UNMAP does not work for us: 1495 + * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it 1496 + * will directly go to fallback mode, submitting normal writes, and 1497 + * never even try to UNMAP. 1498 + * 1499 + * And dm-thin does not do this (yet), mostly because in general it has 1500 + * to assume that "skip_block_zeroing" is set. See also: 1501 + * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html 1502 + * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html 1503 + * 1504 + * We *may* ignore the discard-zeroes-data setting, if so configured. 1505 + * 1506 + * Assumption is that this "discard_zeroes_data=0" is only because the backend 1507 + * may ignore partial unaligned discards. 1508 + * 1509 + * LVM/DM thin as of at least 1510 + * LVM version: 2.02.115(2)-RHEL7 (2015-01-28) 1511 + * Library version: 1.02.93-RHEL7 (2015-01-28) 1512 + * Driver version: 4.29.0 1513 + * still behaves this way. 1514 + * 1515 + * For unaligned (wrt. alignment and granularity) or too small discards, 1516 + * we zero-out the initial (and/or) trailing unaligned partial chunks, 1517 + * but discard all the aligned full chunks. 1518 + * 1519 + * At least for LVM/DM thin, with skip_block_zeroing=false, 1520 + * the result is effectively "discard_zeroes_data=1". 1521 + */ 1522 + /* flags: EE_TRIM|EE_ZEROOUT */ 1523 + int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags) 1494 1524 { 1495 1525 struct block_device *bdev = device->ldev->backing_bdev; 1526 + struct request_queue *q = bdev_get_queue(bdev); 1527 + sector_t tmp, nr; 1528 + unsigned int max_discard_sectors, granularity; 1529 + int alignment; 1530 + int err = 0; 1496 1531 1497 - if (blkdev_issue_zeroout(bdev, peer_req->i.sector, peer_req->i.size >> 9, 1498 - GFP_NOIO, 0)) 1532 + if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM)) 1533 + goto zero_out; 1534 + 1535 + /* Zero-sector (unknown) and one-sector granularities are the same. */ 1536 + granularity = max(q->limits.discard_granularity >> 9, 1U); 1537 + alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; 1538 + 1539 + max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22)); 1540 + max_discard_sectors -= max_discard_sectors % granularity; 1541 + if (unlikely(!max_discard_sectors)) 1542 + goto zero_out; 1543 + 1544 + if (nr_sectors < granularity) 1545 + goto zero_out; 1546 + 1547 + tmp = start; 1548 + if (sector_div(tmp, granularity) != alignment) { 1549 + if (nr_sectors < 2*granularity) 1550 + goto zero_out; 1551 + /* start + gran - (start + gran - align) % gran */ 1552 + tmp = start + granularity - alignment; 1553 + tmp = start + granularity - sector_div(tmp, granularity); 1554 + 1555 + nr = tmp - start; 1556 + /* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many 1557 + * layers are below us, some may have smaller granularity */ 1558 + err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0); 1559 + nr_sectors -= nr; 1560 + start = tmp; 1561 + } 1562 + while (nr_sectors >= max_discard_sectors) { 1563 + err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0); 1564 + nr_sectors -= max_discard_sectors; 1565 + start += max_discard_sectors; 1566 + } 1567 + if (nr_sectors) { 1568 + /* max_discard_sectors is unsigned int (and a multiple of 1569 + * granularity, we made sure of that above already); 1570 + * nr is < max_discard_sectors; 1571 + * I don't need sector_div here, even though nr is sector_t */ 1572 + nr = nr_sectors; 1573 + nr -= (unsigned int)nr % granularity; 1574 + if (nr) { 1575 + err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0); 1576 + nr_sectors -= nr; 1577 + start += nr; 1578 + } 1579 + } 1580 + zero_out: 1581 + if (nr_sectors) { 1582 + err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 1583 + (flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP); 1584 + } 1585 + return err != 0; 1586 + } 1587 + 1588 + static bool can_do_reliable_discards(struct drbd_device *device) 1589 + { 1590 + struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); 1591 + struct disk_conf *dc; 1592 + bool can_do; 1593 + 1594 + if (!blk_queue_discard(q)) 1595 + return false; 1596 + 1597 + rcu_read_lock(); 1598 + dc = rcu_dereference(device->ldev->disk_conf); 1599 + can_do = dc->discard_zeroes_if_aligned; 1600 + rcu_read_unlock(); 1601 + return can_do; 1602 + } 1603 + 1604 + static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req) 1605 + { 1606 + /* If the backend cannot discard, or does not guarantee 1607 + * read-back zeroes in discarded ranges, we fall back to 1608 + * zero-out. Unless configuration specifically requested 1609 + * otherwise. */ 1610 + if (!can_do_reliable_discards(device)) 1611 + peer_req->flags |= EE_ZEROOUT; 1612 + 1613 + if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector, 1614 + peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM))) 1499 1615 peer_req->flags |= EE_WAS_ERROR; 1500 - 1501 1616 drbd_endio_write_sec_final(peer_req); 1502 1617 } 1503 1618 ··· 1665 1550 * Correctness first, performance later. Next step is to code an 1666 1551 * asynchronous variant of the same. 1667 1552 */ 1668 - if (peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) { 1553 + if (peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) { 1669 1554 /* wait for all pending IO completions, before we start 1670 1555 * zeroing things out. */ 1671 1556 conn_wait_active_ee_empty(peer_req->peer_device->connection); ··· 1682 1567 spin_unlock_irq(&device->resource->req_lock); 1683 1568 } 1684 1569 1685 - if (peer_req->flags & EE_IS_TRIM) 1686 - drbd_issue_peer_discard(device, peer_req); 1570 + if (peer_req->flags & (EE_TRIM|EE_ZEROOUT)) 1571 + drbd_issue_peer_discard_or_zero_out(device, peer_req); 1687 1572 else /* EE_WRITE_SAME */ 1688 1573 drbd_issue_peer_wsame(device, peer_req); 1689 1574 return 0; ··· 1880 1765 void *dig_vv = peer_device->connection->int_dig_vv; 1881 1766 unsigned long *data; 1882 1767 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL; 1768 + struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL; 1883 1769 struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL; 1884 1770 1885 1771 digest_size = 0; ··· 1902 1786 if (!expect(data_size == 0)) 1903 1787 return NULL; 1904 1788 ds = be32_to_cpu(trim->size); 1789 + } else if (zeroes) { 1790 + if (!expect(data_size == 0)) 1791 + return NULL; 1792 + ds = be32_to_cpu(zeroes->size); 1905 1793 } else if (wsame) { 1906 1794 if (data_size != queue_logical_block_size(device->rq_queue)) { 1907 1795 drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n", ··· 1922 1802 1923 1803 if (!expect(IS_ALIGNED(ds, 512))) 1924 1804 return NULL; 1925 - if (trim || wsame) { 1805 + if (trim || wsame || zeroes) { 1926 1806 if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9))) 1927 1807 return NULL; 1928 1808 } else if (!expect(ds <= DRBD_MAX_BIO_SIZE)) ··· 1947 1827 1948 1828 peer_req->flags |= EE_WRITE; 1949 1829 if (trim) { 1950 - peer_req->flags |= EE_IS_TRIM; 1830 + peer_req->flags |= EE_TRIM; 1831 + return peer_req; 1832 + } 1833 + if (zeroes) { 1834 + peer_req->flags |= EE_ZEROOUT; 1951 1835 return peer_req; 1952 1836 } 1953 1837 if (wsame) ··· 2450 2326 2451 2327 static unsigned long wire_flags_to_bio_op(u32 dpf) 2452 2328 { 2453 - if (dpf & DP_DISCARD) 2329 + if (dpf & DP_ZEROES) 2454 2330 return REQ_OP_WRITE_ZEROES; 2331 + if (dpf & DP_DISCARD) 2332 + return REQ_OP_DISCARD; 2333 + if (dpf & DP_WSAME) 2334 + return REQ_OP_WRITE_SAME; 2455 2335 else 2456 2336 return REQ_OP_WRITE; 2457 2337 } ··· 2646 2518 op_flags = wire_flags_to_bio_flags(dp_flags); 2647 2519 if (pi->cmd == P_TRIM) { 2648 2520 D_ASSERT(peer_device, peer_req->i.size > 0); 2521 + D_ASSERT(peer_device, op == REQ_OP_DISCARD); 2522 + D_ASSERT(peer_device, peer_req->pages == NULL); 2523 + /* need to play safe: an older DRBD sender 2524 + * may mean zero-out while sending P_TRIM. */ 2525 + if (0 == (connection->agreed_features & DRBD_FF_WZEROES)) 2526 + peer_req->flags |= EE_ZEROOUT; 2527 + } else if (pi->cmd == P_ZEROES) { 2528 + D_ASSERT(peer_device, peer_req->i.size > 0); 2649 2529 D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES); 2650 2530 D_ASSERT(peer_device, peer_req->pages == NULL); 2531 + /* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */ 2532 + if (dp_flags & DP_DISCARD) 2533 + peer_req->flags |= EE_TRIM; 2651 2534 } else if (peer_req->pages == NULL) { 2652 2535 D_ASSERT(device, peer_req->i.size == 0); 2653 2536 D_ASSERT(device, dp_flags & DP_FLUSH); ··· 2726 2587 * we wait for all pending requests, respectively wait for 2727 2588 * active_ee to become empty in drbd_submit_peer_request(); 2728 2589 * better not add ourselves here. */ 2729 - if ((peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) == 0) 2590 + if ((peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) == 0) 2730 2591 list_add_tail(&peer_req->w.list, &device->active_ee); 2731 2592 spin_unlock_irq(&device->resource->req_lock); 2732 2593 ··· 3503 3364 enum drbd_conns rv = C_MASK; 3504 3365 enum drbd_disk_state mydisk; 3505 3366 struct net_conf *nc; 3506 - int hg, rule_nr, rr_conflict, tentative; 3367 + int hg, rule_nr, rr_conflict, tentative, always_asbp; 3507 3368 3508 3369 mydisk = device->state.disk; 3509 3370 if (mydisk == D_NEGOTIATING) ··· 3554 3415 3555 3416 rcu_read_lock(); 3556 3417 nc = rcu_dereference(peer_device->connection->net_conf); 3418 + always_asbp = nc->always_asbp; 3419 + rr_conflict = nc->rr_conflict; 3420 + tentative = nc->tentative; 3421 + rcu_read_unlock(); 3557 3422 3558 - if (hg == 100 || (hg == -100 && nc->always_asbp)) { 3423 + if (hg == 100 || (hg == -100 && always_asbp)) { 3559 3424 int pcount = (device->state.role == R_PRIMARY) 3560 3425 + (peer_role == R_PRIMARY); 3561 3426 int forced = (hg == -100); ··· 3598 3455 "Sync from %s node\n", 3599 3456 (hg < 0) ? "peer" : "this"); 3600 3457 } 3601 - rr_conflict = nc->rr_conflict; 3602 - tentative = nc->tentative; 3603 - rcu_read_unlock(); 3604 3458 3605 3459 if (hg == -100) { 3606 3460 /* FIXME this log message is not correct if we end up here ··· 4120 3980 struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL; 4121 3981 enum determine_dev_size dd = DS_UNCHANGED; 4122 3982 sector_t p_size, p_usize, p_csize, my_usize; 3983 + sector_t new_size, cur_size; 4123 3984 int ldsc = 0; /* local disk size changed */ 4124 3985 enum dds_flags ddsf; 4125 3986 ··· 4128 3987 if (!peer_device) 4129 3988 return config_unknown_volume(connection, pi); 4130 3989 device = peer_device->device; 3990 + cur_size = drbd_get_capacity(device->this_bdev); 4131 3991 4132 3992 p_size = be64_to_cpu(p->d_size); 4133 3993 p_usize = be64_to_cpu(p->u_size); ··· 4139 3997 device->p_size = p_size; 4140 3998 4141 3999 if (get_ldev(device)) { 4142 - sector_t new_size, cur_size; 4143 4000 rcu_read_lock(); 4144 4001 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size; 4145 4002 rcu_read_unlock(); ··· 4153 4012 if (device->state.conn == C_WF_REPORT_PARAMS) 4154 4013 p_usize = min_not_zero(my_usize, p_usize); 4155 4014 4156 - /* Never shrink a device with usable data during connect. 4157 - But allow online shrinking if we are connected. */ 4015 + /* Never shrink a device with usable data during connect, 4016 + * or "attach" on the peer. 4017 + * But allow online shrinking if we are connected. */ 4158 4018 new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0); 4159 - cur_size = drbd_get_capacity(device->this_bdev); 4160 4019 if (new_size < cur_size && 4161 4020 device->state.disk >= D_OUTDATED && 4162 - device->state.conn < C_CONNECTED) { 4021 + (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) { 4163 4022 drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n", 4164 4023 (unsigned long long)new_size, (unsigned long long)cur_size); 4165 4024 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); ··· 4187 4046 synchronize_rcu(); 4188 4047 kfree(old_disk_conf); 4189 4048 4190 - drbd_info(device, "Peer sets u_size to %lu sectors\n", 4191 - (unsigned long)my_usize); 4049 + drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n", 4050 + (unsigned long)p_usize, (unsigned long)my_usize); 4192 4051 } 4193 4052 4194 4053 put_ldev(device); ··· 4221 4080 * 4222 4081 * However, if he sends a zero current size, 4223 4082 * take his (user-capped or) backing disk size anyways. 4083 + * 4084 + * Unless of course he does not have a disk himself. 4085 + * In which case we ignore this completely. 4224 4086 */ 4087 + sector_t new_size = p_csize ?: p_usize ?: p_size; 4225 4088 drbd_reconsider_queue_parameters(device, NULL, o); 4226 - drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size); 4089 + if (new_size == 0) { 4090 + /* Ignore, peer does not know nothing. */ 4091 + } else if (new_size == cur_size) { 4092 + /* nothing to do */ 4093 + } else if (cur_size != 0 && p_size == 0) { 4094 + drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n", 4095 + (unsigned long long)new_size, (unsigned long long)cur_size); 4096 + } else if (new_size < cur_size && device->state.role == R_PRIMARY) { 4097 + drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n", 4098 + (unsigned long long)new_size, (unsigned long long)cur_size); 4099 + conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 4100 + return -EIO; 4101 + } else { 4102 + /* I believe the peer, if 4103 + * - I don't have a current size myself 4104 + * - we agree on the size anyways 4105 + * - I do have a current size, am Secondary, 4106 + * and he has the only disk 4107 + * - I do have a current size, am Primary, 4108 + * and he has the only disk, 4109 + * which is larger than my current size 4110 + */ 4111 + drbd_set_my_capacity(device, new_size); 4112 + } 4227 4113 } 4228 4114 4229 4115 if (get_ldev(device)) { ··· 4310 4142 kfree(device->p_uuid); 4311 4143 device->p_uuid = p_uuid; 4312 4144 4313 - if (device->state.conn < C_CONNECTED && 4145 + if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) && 4314 4146 device->state.disk < D_INCONSISTENT && 4315 4147 device->state.role == R_PRIMARY && 4316 4148 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { ··· 4536 4368 if (peer_state.conn == C_AHEAD) 4537 4369 ns.conn = C_BEHIND; 4538 4370 4371 + /* TODO: 4372 + * if (primary and diskless and peer uuid != effective uuid) 4373 + * abort attach on peer; 4374 + * 4375 + * If this node does not have good data, was already connected, but 4376 + * the peer did a late attach only now, trying to "negotiate" with me, 4377 + * AND I am currently Primary, possibly frozen, with some specific 4378 + * "effective" uuid, this should never be reached, really, because 4379 + * we first send the uuids, then the current state. 4380 + * 4381 + * In this scenario, we already dropped the connection hard 4382 + * when we received the unsuitable uuids (receive_uuids(). 4383 + * 4384 + * Should we want to change this, that is: not drop the connection in 4385 + * receive_uuids() already, then we would need to add a branch here 4386 + * that aborts the attach of "unsuitable uuids" on the peer in case 4387 + * this node is currently Diskless Primary. 4388 + */ 4389 + 4539 4390 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING && 4540 4391 get_ldev_if_state(device, D_NEGOTIATING)) { 4541 4392 int cr; /* consider resync */ ··· 4567 4380 (peer_state.disk == D_NEGOTIATING || 4568 4381 os.disk == D_NEGOTIATING)); 4569 4382 /* if we have both been inconsistent, and the peer has been 4570 - * forced to be UpToDate with --overwrite-data */ 4383 + * forced to be UpToDate with --force */ 4571 4384 cr |= test_bit(CONSIDER_RESYNC, &device->flags); 4572 4385 /* if we had been plain connected, and the admin requested to 4573 4386 * start a sync by "invalidate" or "invalidate-remote" */ ··· 5032 4845 5033 4846 peer_req->w.cb = e_end_resync_block; 5034 4847 peer_req->submit_jif = jiffies; 5035 - peer_req->flags |= EE_IS_TRIM; 4848 + peer_req->flags |= EE_TRIM; 5036 4849 5037 4850 spin_lock_irq(&device->resource->req_lock); 5038 4851 list_add_tail(&peer_req->w.list, &device->sync_ee); ··· 5100 4913 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, 5101 4914 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, 5102 4915 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data }, 4916 + [P_ZEROES] = { 0, sizeof(struct p_trim), receive_Data }, 5103 4917 [P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated }, 5104 4918 [P_WSAME] = { 1, sizeof(struct p_wsame), receive_Data }, 5105 4919 }; ··· 5385 5197 drbd_info(connection, "Handshake successful: " 5386 5198 "Agreed network protocol version %d\n", connection->agreed_pro_version); 5387 5199 5388 - drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s.\n", 5200 + drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n", 5389 5201 connection->agreed_features, 5390 5202 connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "", 5391 5203 connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "", 5392 - connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : 5204 + connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "", 5205 + connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" : 5393 5206 connection->agreed_features ? "" : " none"); 5394 5207 5395 5208 return 1; ··· 5473 5284 if (pi.cmd != P_AUTH_CHALLENGE) { 5474 5285 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n", 5475 5286 cmdname(pi.cmd), pi.cmd); 5476 - rv = 0; 5287 + rv = -1; 5477 5288 goto fail; 5478 5289 } 5479 5290

+10 -9

drivers/block/drbd/drbd_req.c

··· 63 63 drbd_req_make_private_bio(req, bio_src); 64 64 req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0) 65 65 | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0) 66 - | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_UNMAP : 0) 66 + | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0) 67 67 | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0); 68 68 req->device = device; 69 69 req->master_bio = bio_src; ··· 1155 1155 return remote; 1156 1156 } 1157 1157 1158 - static void drbd_process_discard_req(struct drbd_request *req) 1158 + static void drbd_process_discard_or_zeroes_req(struct drbd_request *req, int flags) 1159 1159 { 1160 - struct block_device *bdev = req->device->ldev->backing_bdev; 1161 - 1162 - if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9, 1163 - GFP_NOIO, 0)) 1160 + int err = drbd_issue_discard_or_zero_out(req->device, 1161 + req->i.sector, req->i.size >> 9, flags); 1162 + if (err) 1164 1163 req->private_bio->bi_status = BLK_STS_IOERR; 1165 1164 bio_endio(req->private_bio); 1166 1165 } ··· 1188 1189 if (get_ldev(device)) { 1189 1190 if (drbd_insert_fault(device, type)) 1190 1191 bio_io_error(bio); 1191 - else if (bio_op(bio) == REQ_OP_WRITE_ZEROES || 1192 - bio_op(bio) == REQ_OP_DISCARD) 1193 - drbd_process_discard_req(req); 1192 + else if (bio_op(bio) == REQ_OP_WRITE_ZEROES) 1193 + drbd_process_discard_or_zeroes_req(req, EE_ZEROOUT | 1194 + ((bio->bi_opf & REQ_NOUNMAP) ? 0 : EE_TRIM)); 1195 + else if (bio_op(bio) == REQ_OP_DISCARD) 1196 + drbd_process_discard_or_zeroes_req(req, EE_TRIM); 1194 1197 else 1195 1198 generic_make_request(bio); 1196 1199 put_ldev(device);

+2

drivers/block/drbd/drbd_req.h

··· 208 208 __RQ_WRITE, 209 209 __RQ_WSAME, 210 210 __RQ_UNMAP, 211 + __RQ_ZEROES, 211 212 212 213 /* Should call drbd_al_complete_io() for this request... */ 213 214 __RQ_IN_ACT_LOG, ··· 254 253 #define RQ_WRITE (1UL << __RQ_WRITE) 255 254 #define RQ_WSAME (1UL << __RQ_WSAME) 256 255 #define RQ_UNMAP (1UL << __RQ_UNMAP) 256 + #define RQ_ZEROES (1UL << __RQ_ZEROES) 257 257 #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) 258 258 #define RQ_UNPLUG (1UL << __RQ_UNPLUG) 259 259 #define RQ_POSTPONED (1UL << __RQ_POSTPONED)

+4 -7

drivers/block/drbd/drbd_state.c

··· 688 688 CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO); 689 689 } 690 690 691 - enum drbd_state_rv 692 - drbd_request_detach_interruptible(struct drbd_device *device) 691 + int drbd_request_detach_interruptible(struct drbd_device *device) 693 692 { 694 - enum drbd_state_rv rv; 695 - int ret; 693 + int ret, rv; 696 694 697 695 drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ 698 696 wait_event_interruptible(device->state_wait, ··· 1122 1124 ns.pdsk = D_UP_TO_DATE; 1123 1125 } 1124 1126 1125 - /* Implications of the connection stat on the disk states */ 1127 + /* Implications of the connection state on the disk states */ 1126 1128 disk_min = D_DISKLESS; 1127 1129 disk_max = D_UP_TO_DATE; 1128 1130 pdsk_min = D_INCONSISTENT; ··· 2107 2109 spin_unlock_irq(&connection->resource->req_lock); 2108 2110 } 2109 2111 } 2110 - kref_put(&connection->kref, drbd_destroy_connection); 2111 - 2112 2112 conn_md_sync(connection); 2113 + kref_put(&connection->kref, drbd_destroy_connection); 2113 2114 2114 2115 return 0; 2115 2116 }

+2 -3

drivers/block/drbd/drbd_state.h

··· 131 131 enum chg_state_flags, 132 132 struct completion *done); 133 133 extern void print_st_err(struct drbd_device *, union drbd_state, 134 - union drbd_state, int); 134 + union drbd_state, enum drbd_state_rv); 135 135 136 136 enum drbd_state_rv 137 137 _conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, ··· 162 162 } 163 163 164 164 /* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */ 165 - enum drbd_state_rv 166 - drbd_request_detach_interruptible(struct drbd_device *device); 165 + int drbd_request_detach_interruptible(struct drbd_device *device); 167 166 168 167 enum drbd_role conn_highest_role(struct drbd_connection *connection); 169 168 enum drbd_role conn_highest_peer(struct drbd_connection *connection);

+1 -1

drivers/block/drbd/drbd_worker.c

··· 153 153 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); 154 154 155 155 /* FIXME do we want to detach for failed REQ_OP_DISCARD? 156 - * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ 156 + * ((peer_req->flags & (EE_WAS_ERROR|EE_TRIM)) == EE_WAS_ERROR) */ 157 157 if (peer_req->flags & EE_WAS_ERROR) 158 158 __drbd_chk_io_error(device, DRBD_WRITE_ERROR); 159 159

-1

drivers/block/loop.c

··· 616 616 default: 617 617 WARN_ON_ONCE(1); 618 618 return -EIO; 619 - break; 620 619 } 621 620 } 622 621

-1

drivers/block/sunvdc.c

··· 633 633 case VD_OP_GET_EFI: 634 634 case VD_OP_SET_EFI: 635 635 return -EOPNOTSUPP; 636 - break; 637 636 }; 638 637 639 638 map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;

+8 -6

drivers/block/swim3.c

··· 995 995 struct swim3 __iomem *sw = fs->swim3; 996 996 997 997 mutex_lock(&swim3_mutex); 998 - if (fs->ref_count > 0 && --fs->ref_count == 0) { 998 + if (fs->ref_count > 0) 999 + --fs->ref_count; 1000 + else if (fs->ref_count == -1) 1001 + fs->ref_count = 0; 1002 + if (fs->ref_count == 0) { 999 1003 swim3_action(fs, MOTOR_OFF); 1000 1004 out_8(&sw->control_bic, 0xff); 1001 1005 swim3_select(fs, RELAX); ··· 1091 1087 struct floppy_state *fs = &floppy_states[index]; 1092 1088 int rc = -EBUSY; 1093 1089 1094 - /* Do this first for message macros */ 1095 - memset(fs, 0, sizeof(*fs)); 1096 1090 fs->mdev = mdev; 1097 1091 fs->index = index; 1098 1092 ··· 1153 1151 swim3_err("%s", "Couldn't request interrupt\n"); 1154 1152 pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0); 1155 1153 goto out_unmap; 1156 - return -EBUSY; 1157 1154 } 1158 1155 1159 1156 timer_setup(&fs->timeout, NULL, 0); ··· 1189 1188 return rc; 1190 1189 } 1191 1190 1192 - fs = &floppy_states[floppy_count]; 1193 - 1194 1191 disk = alloc_disk(1); 1195 1192 if (disk == NULL) { 1196 1193 rc = -ENOMEM; 1197 1194 goto out_unregister; 1198 1195 } 1196 + 1197 + fs = &floppy_states[floppy_count]; 1198 + memset(fs, 0, sizeof(*fs)); 1199 1199 1200 1200 disk->queue = blk_mq_init_sq_queue(&fs->tag_set, &swim3_mq_ops, 2, 1201 1201 BLK_MQ_F_SHOULD_MERGE);

+1

drivers/cdrom/gdrom.c

··· 873 873 platform_device_unregister(pd); 874 874 platform_driver_unregister(&gdrom_driver); 875 875 kfree(gd.toc); 876 + kfree(gd.cd_info); 876 877 } 877 878 878 879 module_init(init_gdrom);

-1

drivers/lightnvm/pblk-recovery.c

··· 418 418 if (ret) { 419 419 pblk_err(pblk, "I/O submission failed: %d\n", ret); 420 420 bio_put(bio); 421 - bio_put(bio); 422 421 return ret; 423 422 } 424 423

+17 -6

include/linux/blk-cgroup.h

··· 499 499 */ 500 500 static inline bool blkg_tryget(struct blkcg_gq *blkg) 501 501 { 502 - return percpu_ref_tryget(&blkg->refcnt); 502 + return blkg && percpu_ref_tryget(&blkg->refcnt); 503 503 } 504 504 505 505 /** 506 506 * blkg_tryget_closest - try and get a blkg ref on the closet blkg 507 507 * @blkg: blkg to get 508 508 * 509 - * This walks up the blkg tree to find the closest non-dying blkg and returns 510 - * the blkg that it did association with as it may not be the passed in blkg. 509 + * This needs to be called rcu protected. As the failure mode here is to walk 510 + * up the blkg tree, this ensure that the blkg->parent pointers are always 511 + * valid. This returns the blkg that it ended up taking a reference on or %NULL 512 + * if no reference was taken. 511 513 */ 512 514 static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) 513 515 { 514 - while (blkg && !percpu_ref_tryget(&blkg->refcnt)) 515 - blkg = blkg->parent; 516 + struct blkcg_gq *ret_blkg = NULL; 516 517 517 - return blkg; 518 + WARN_ON_ONCE(!rcu_read_lock_held()); 519 + 520 + while (blkg) { 521 + if (blkg_tryget(blkg)) { 522 + ret_blkg = blkg; 523 + break; 524 + } 525 + blkg = blkg->parent; 526 + } 527 + 528 + return ret_blkg; 518 529 } 519 530 520 531 /**

+1 -1

include/linux/drbd.h

··· 51 51 #endif 52 52 53 53 extern const char *drbd_buildtag(void); 54 - #define REL_VERSION "8.4.10" 54 + #define REL_VERSION "8.4.11" 55 55 #define API_VERSION 1 56 56 #define PRO_VERSION_MIN 86 57 57 #define PRO_VERSION_MAX 101

+4 -1

include/linux/genl_magic_struct.h

··· 191 191 { 192 192 switch (0) { 193 193 #include GENL_MAGIC_INCLUDE_FILE 194 + case 0: 194 195 ; 195 196 } 196 197 } ··· 210 209 { 211 210 switch (0) { 212 211 #include GENL_MAGIC_INCLUDE_FILE 212 + case 0: 213 213 ; 214 214 } 215 215 } ··· 220 218 static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ 221 219 { \ 222 220 switch (0) { \ 223 - s_fields \ 221 + s_fields \ 222 + case 0: \ 224 223 ; \ 225 224 } \ 226 225 }