Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

btrfs: qgroup: Cleanup the old ref_node-oriented mechanism.

Goodbye, the old mechanisim.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>

authored by

Qu Wenruo and committed by
Chris Mason
e69bcee3 442244c9

+3 -972
+1 -1
fs/btrfs/ctree.h
··· 1736 1736 /* list of dirty qgroups to be written at next commit */ 1737 1737 struct list_head dirty_qgroups; 1738 1738 1739 - /* used by btrfs_qgroup_record_ref for an efficient tree traversal */ 1739 + /* used by qgroup for an efficient tree traversal */ 1740 1740 u64 qgroup_seq; 1741 1741 1742 1742 /* qgroup rescan items */
-5
fs/btrfs/extent-tree.c
··· 1981 1981 u64 refs; 1982 1982 int ret; 1983 1983 int no_quota = node->no_quota; 1984 - enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL; 1985 1984 1986 1985 path = btrfs_alloc_path(); 1987 1986 if (!path) ··· 2008 2009 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2009 2010 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2010 2011 refs = btrfs_extent_refs(leaf, item); 2011 - if (refs) 2012 - type = BTRFS_QGROUP_OPER_ADD_SHARED; 2013 2012 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 2014 2013 if (extent_op) 2015 2014 __run_delayed_extent_op(extent_op, leaf, item); ··· 6109 6112 u64 bytenr = node->bytenr; 6110 6113 u64 num_bytes = node->num_bytes; 6111 6114 int last_ref = 0; 6112 - enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL; 6113 6115 bool skinny_metadata = btrfs_fs_incompat(root->fs_info, 6114 6116 SKINNY_METADATA); 6115 6117 ··· 6289 6293 refs -= refs_to_drop; 6290 6294 6291 6295 if (refs > 0) { 6292 - type = BTRFS_QGROUP_OPER_SUB_SHARED; 6293 6296 if (extent_op) 6294 6297 __run_delayed_extent_op(extent_op, leaf, ei); 6295 6298 /*
+2 -862
fs/btrfs/qgroup.c
··· 34 34 #include "extent_io.h" 35 35 #include "qgroup.h" 36 36 37 + 37 38 /* TODO XXX FIXME 38 39 * - subvol delete -> delete when ref goes to 0? delete limits also? 39 40 * - reorganize keys ··· 1388 1387 return ret; 1389 1388 } 1390 1389 1391 - static int comp_oper_exist(struct btrfs_qgroup_operation *oper1, 1392 - struct btrfs_qgroup_operation *oper2) 1393 - { 1394 - /* 1395 - * Ignore seq and type here, we're looking for any operation 1396 - * at all related to this extent on that root. 1397 - */ 1398 - if (oper1->bytenr < oper2->bytenr) 1399 - return -1; 1400 - if (oper1->bytenr > oper2->bytenr) 1401 - return 1; 1402 - if (oper1->ref_root < oper2->ref_root) 1403 - return -1; 1404 - if (oper1->ref_root > oper2->ref_root) 1405 - return 1; 1406 - return 0; 1407 - } 1408 - 1409 - static int qgroup_oper_exists(struct btrfs_fs_info *fs_info, 1410 - struct btrfs_qgroup_operation *oper) 1411 - { 1412 - struct rb_node *n; 1413 - struct btrfs_qgroup_operation *cur; 1414 - int cmp; 1415 - 1416 - spin_lock(&fs_info->qgroup_op_lock); 1417 - n = fs_info->qgroup_op_tree.rb_node; 1418 - while (n) { 1419 - cur = rb_entry(n, struct btrfs_qgroup_operation, n); 1420 - cmp = comp_oper_exist(cur, oper); 1421 - if (cmp < 0) { 1422 - n = n->rb_right; 1423 - } else if (cmp) { 1424 - n = n->rb_left; 1425 - } else { 1426 - spin_unlock(&fs_info->qgroup_op_lock); 1427 - return -EEXIST; 1428 - } 1429 - } 1430 - spin_unlock(&fs_info->qgroup_op_lock); 1431 - return 0; 1432 - } 1433 - 1434 - static int comp_oper(struct btrfs_qgroup_operation *oper1, 1435 - struct btrfs_qgroup_operation *oper2) 1436 - { 1437 - if (oper1->bytenr < oper2->bytenr) 1438 - return -1; 1439 - if (oper1->bytenr > oper2->bytenr) 1440 - return 1; 1441 - if (oper1->ref_root < oper2->ref_root) 1442 - return -1; 1443 - if (oper1->ref_root > oper2->ref_root) 1444 - return 1; 1445 - if (oper1->seq < oper2->seq) 1446 - return -1; 1447 - if (oper1->seq > oper2->seq) 1448 - return 1; 1449 - if (oper1->type < oper2->type) 1450 - return -1; 1451 - if (oper1->type > oper2->type) 1452 - return 1; 1453 - return 0; 1454 - } 1455 - 1456 - static int insert_qgroup_oper(struct btrfs_fs_info *fs_info, 1457 - struct btrfs_qgroup_operation *oper) 1458 - { 1459 - struct rb_node **p; 1460 - struct rb_node *parent = NULL; 1461 - struct btrfs_qgroup_operation *cur; 1462 - int cmp; 1463 - 1464 - spin_lock(&fs_info->qgroup_op_lock); 1465 - p = &fs_info->qgroup_op_tree.rb_node; 1466 - while (*p) { 1467 - parent = *p; 1468 - cur = rb_entry(parent, struct btrfs_qgroup_operation, n); 1469 - cmp = comp_oper(cur, oper); 1470 - if (cmp < 0) { 1471 - p = &(*p)->rb_right; 1472 - } else if (cmp) { 1473 - p = &(*p)->rb_left; 1474 - } else { 1475 - spin_unlock(&fs_info->qgroup_op_lock); 1476 - return -EEXIST; 1477 - } 1478 - } 1479 - rb_link_node(&oper->n, parent, p); 1480 - rb_insert_color(&oper->n, &fs_info->qgroup_op_tree); 1481 - spin_unlock(&fs_info->qgroup_op_lock); 1482 - return 0; 1483 - } 1484 - 1485 - /* 1486 - * Record a quota operation for processing later on. 1487 - * @trans: the transaction we are adding the delayed op to. 1488 - * @fs_info: the fs_info for this fs. 1489 - * @ref_root: the root of the reference we are acting on, 1490 - * @bytenr: the bytenr we are acting on. 1491 - * @num_bytes: the number of bytes in the reference. 1492 - * @type: the type of operation this is. 1493 - * @mod_seq: do we need to get a sequence number for looking up roots. 1494 - * 1495 - * We just add it to our trans qgroup_ref_list and carry on and process these 1496 - * operations in order at some later point. If the reference root isn't a fs 1497 - * root then we don't bother with doing anything. 1498 - * 1499 - * MUST BE HOLDING THE REF LOCK. 1500 - */ 1501 - int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, 1502 - struct btrfs_fs_info *fs_info, u64 ref_root, 1503 - u64 bytenr, u64 num_bytes, 1504 - enum btrfs_qgroup_operation_type type, int mod_seq) 1505 - { 1506 - struct btrfs_qgroup_operation *oper; 1507 - int ret; 1508 - 1509 - if (!is_fstree(ref_root) || !fs_info->quota_enabled) 1510 - return 0; 1511 - 1512 - oper = kmalloc(sizeof(*oper), GFP_NOFS); 1513 - if (!oper) 1514 - return -ENOMEM; 1515 - 1516 - oper->ref_root = ref_root; 1517 - oper->bytenr = bytenr; 1518 - oper->num_bytes = num_bytes; 1519 - oper->type = type; 1520 - oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); 1521 - INIT_LIST_HEAD(&oper->elem.list); 1522 - oper->elem.seq = 0; 1523 - 1524 - trace_btrfs_qgroup_record_ref(oper); 1525 - 1526 - if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) { 1527 - /* 1528 - * If any operation for this bytenr/ref_root combo 1529 - * exists, then we know it's not exclusively owned and 1530 - * shouldn't be queued up. 1531 - * 1532 - * This also catches the case where we have a cloned 1533 - * extent that gets queued up multiple times during 1534 - * drop snapshot. 1535 - */ 1536 - if (qgroup_oper_exists(fs_info, oper)) { 1537 - kfree(oper); 1538 - return 0; 1539 - } 1540 - } 1541 - 1542 - ret = insert_qgroup_oper(fs_info, oper); 1543 - if (ret) { 1544 - /* Shouldn't happen so have an assert for developers */ 1545 - ASSERT(0); 1546 - kfree(oper); 1547 - return ret; 1548 - } 1549 - list_add_tail(&oper->list, &trans->qgroup_ref_list); 1550 - 1551 - if (mod_seq) 1552 - btrfs_get_tree_mod_seq(fs_info, &oper->elem); 1553 - 1554 - return 0; 1555 - } 1556 - 1557 1390 int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, 1558 1391 struct btrfs_fs_info *fs_info) 1559 1392 { ··· 1439 1604 rb_link_node(&record->node, parent_node, p); 1440 1605 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1441 1606 return NULL; 1442 - } 1443 - 1444 - /* 1445 - * The easy accounting, if we are adding/removing the only ref for an extent 1446 - * then this qgroup and all of the parent qgroups get their refrence and 1447 - * exclusive counts adjusted. 1448 - */ 1449 - static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1450 - struct btrfs_qgroup_operation *oper) 1451 - { 1452 - struct ulist *tmp; 1453 - int sign = 0; 1454 - int ret = 0; 1455 - 1456 - tmp = ulist_alloc(GFP_NOFS); 1457 - if (!tmp) 1458 - return -ENOMEM; 1459 - 1460 - spin_lock(&fs_info->qgroup_lock); 1461 - if (!fs_info->quota_root) 1462 - goto out; 1463 - 1464 - switch (oper->type) { 1465 - case BTRFS_QGROUP_OPER_ADD_EXCL: 1466 - sign = 1; 1467 - break; 1468 - case BTRFS_QGROUP_OPER_SUB_EXCL: 1469 - sign = -1; 1470 - break; 1471 - default: 1472 - ASSERT(0); 1473 - } 1474 - ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root, 1475 - oper->num_bytes, sign); 1476 - out: 1477 - spin_unlock(&fs_info->qgroup_lock); 1478 - ulist_free(tmp); 1479 - return ret; 1480 - } 1481 - 1482 - /* 1483 - * Walk all of the roots that pointed to our bytenr and adjust their refcnts as 1484 - * properly. 1485 - */ 1486 - static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info, 1487 - u64 root_to_skip, struct ulist *tmp, 1488 - struct ulist *roots, struct ulist *qgroups, 1489 - u64 seq, int *old_roots, int rescan) 1490 - { 1491 - struct ulist_node *unode; 1492 - struct ulist_iterator uiter; 1493 - struct ulist_node *tmp_unode; 1494 - struct ulist_iterator tmp_uiter; 1495 - struct btrfs_qgroup *qg; 1496 - int ret; 1497 - 1498 - ULIST_ITER_INIT(&uiter); 1499 - while ((unode = ulist_next(roots, &uiter))) { 1500 - /* We don't count our current root here */ 1501 - if (unode->val == root_to_skip) 1502 - continue; 1503 - qg = find_qgroup_rb(fs_info, unode->val); 1504 - if (!qg) 1505 - continue; 1506 - /* 1507 - * We could have a pending removal of this same ref so we may 1508 - * not have actually found our ref root when doing 1509 - * btrfs_find_all_roots, so we need to keep track of how many 1510 - * old roots we find in case we removed ours and added a 1511 - * different one at the same time. I don't think this could 1512 - * happen in practice but that sort of thinking leads to pain 1513 - * and suffering and to the dark side. 1514 - */ 1515 - (*old_roots)++; 1516 - 1517 - ulist_reinit(tmp); 1518 - ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), 1519 - GFP_ATOMIC); 1520 - if (ret < 0) 1521 - return ret; 1522 - ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC); 1523 - if (ret < 0) 1524 - return ret; 1525 - ULIST_ITER_INIT(&tmp_uiter); 1526 - while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1527 - struct btrfs_qgroup_list *glist; 1528 - int mod; 1529 - 1530 - qg = u64_to_ptr(tmp_unode->aux); 1531 - /* 1532 - * We use this sequence number to keep from having to 1533 - * run the whole list and 0 out the refcnt every time. 1534 - * We basically use sequnce as the known 0 count and 1535 - * then add 1 everytime we see a qgroup. This is how we 1536 - * get how many of the roots actually point up to the 1537 - * upper level qgroups in order to determine exclusive 1538 - * counts. 1539 - * 1540 - * For rescan none of the extent is recorded before so 1541 - * we just don't add old_refcnt. 1542 - */ 1543 - if (rescan) 1544 - mod = 0; 1545 - else 1546 - mod = 1; 1547 - btrfs_qgroup_update_old_refcnt(qg, seq, mod); 1548 - btrfs_qgroup_update_new_refcnt(qg, seq, 1); 1549 - list_for_each_entry(glist, &qg->groups, next_group) { 1550 - ret = ulist_add(qgroups, glist->group->qgroupid, 1551 - ptr_to_u64(glist->group), 1552 - GFP_ATOMIC); 1553 - if (ret < 0) 1554 - return ret; 1555 - ret = ulist_add(tmp, glist->group->qgroupid, 1556 - ptr_to_u64(glist->group), 1557 - GFP_ATOMIC); 1558 - if (ret < 0) 1559 - return ret; 1560 - } 1561 - } 1562 - } 1563 - return 0; 1564 - } 1565 - 1566 - /* 1567 - * We need to walk forward in our operation tree and account for any roots that 1568 - * were deleted after we made this operation. 1569 - */ 1570 - static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info, 1571 - struct btrfs_qgroup_operation *oper, 1572 - struct ulist *tmp, 1573 - struct ulist *qgroups, u64 seq, 1574 - int *old_roots) 1575 - { 1576 - struct ulist_node *unode; 1577 - struct ulist_iterator uiter; 1578 - struct btrfs_qgroup *qg; 1579 - struct btrfs_qgroup_operation *tmp_oper; 1580 - struct rb_node *n; 1581 - int ret; 1582 - 1583 - ulist_reinit(tmp); 1584 - 1585 - /* 1586 - * We only walk forward in the tree since we're only interested in 1587 - * removals that happened _after_ our operation. 1588 - */ 1589 - spin_lock(&fs_info->qgroup_op_lock); 1590 - n = rb_next(&oper->n); 1591 - spin_unlock(&fs_info->qgroup_op_lock); 1592 - if (!n) 1593 - return 0; 1594 - tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); 1595 - while (tmp_oper->bytenr == oper->bytenr) { 1596 - /* 1597 - * If it's not a removal we don't care, additions work out 1598 - * properly with our refcnt tracking. 1599 - */ 1600 - if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED && 1601 - tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL) 1602 - goto next; 1603 - qg = find_qgroup_rb(fs_info, tmp_oper->ref_root); 1604 - if (!qg) 1605 - goto next; 1606 - ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), 1607 - GFP_ATOMIC); 1608 - if (ret) { 1609 - if (ret < 0) 1610 - return ret; 1611 - /* 1612 - * We only want to increase old_roots if this qgroup is 1613 - * not already in the list of qgroups. If it is already 1614 - * there then that means it must have been re-added or 1615 - * the delete will be discarded because we had an 1616 - * existing ref that we haven't looked up yet. In this 1617 - * case we don't want to increase old_roots. So if ret 1618 - * == 1 then we know that this is the first time we've 1619 - * seen this qgroup and we can bump the old_roots. 1620 - */ 1621 - (*old_roots)++; 1622 - ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), 1623 - GFP_ATOMIC); 1624 - if (ret < 0) 1625 - return ret; 1626 - } 1627 - next: 1628 - spin_lock(&fs_info->qgroup_op_lock); 1629 - n = rb_next(&tmp_oper->n); 1630 - spin_unlock(&fs_info->qgroup_op_lock); 1631 - if (!n) 1632 - break; 1633 - tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); 1634 - } 1635 - 1636 - /* Ok now process the qgroups we found */ 1637 - ULIST_ITER_INIT(&uiter); 1638 - while ((unode = ulist_next(tmp, &uiter))) { 1639 - struct btrfs_qgroup_list *glist; 1640 - 1641 - qg = u64_to_ptr(unode->aux); 1642 - btrfs_qgroup_update_old_refcnt(qg, seq, 1); 1643 - btrfs_qgroup_update_new_refcnt(qg, seq, 1); 1644 - list_for_each_entry(glist, &qg->groups, next_group) { 1645 - ret = ulist_add(qgroups, glist->group->qgroupid, 1646 - ptr_to_u64(glist->group), GFP_ATOMIC); 1647 - if (ret < 0) 1648 - return ret; 1649 - ret = ulist_add(tmp, glist->group->qgroupid, 1650 - ptr_to_u64(glist->group), GFP_ATOMIC); 1651 - if (ret < 0) 1652 - return ret; 1653 - } 1654 - } 1655 - return 0; 1656 - } 1657 - 1658 - /* Add refcnt for the newly added reference. */ 1659 - static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info, 1660 - struct btrfs_qgroup_operation *oper, 1661 - struct btrfs_qgroup *qgroup, 1662 - struct ulist *tmp, struct ulist *qgroups, 1663 - u64 seq) 1664 - { 1665 - struct ulist_node *unode; 1666 - struct ulist_iterator uiter; 1667 - struct btrfs_qgroup *qg; 1668 - int ret; 1669 - 1670 - ulist_reinit(tmp); 1671 - ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup), 1672 - GFP_ATOMIC); 1673 - if (ret < 0) 1674 - return ret; 1675 - ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup), 1676 - GFP_ATOMIC); 1677 - if (ret < 0) 1678 - return ret; 1679 - ULIST_ITER_INIT(&uiter); 1680 - while ((unode = ulist_next(tmp, &uiter))) { 1681 - struct btrfs_qgroup_list *glist; 1682 - 1683 - qg = u64_to_ptr(unode->aux); 1684 - if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) 1685 - btrfs_qgroup_update_new_refcnt(qg, seq, 1); 1686 - else 1687 - btrfs_qgroup_update_old_refcnt(qg, seq, 1); 1688 - list_for_each_entry(glist, &qg->groups, next_group) { 1689 - ret = ulist_add(tmp, glist->group->qgroupid, 1690 - ptr_to_u64(glist->group), GFP_ATOMIC); 1691 - if (ret < 0) 1692 - return ret; 1693 - ret = ulist_add(qgroups, glist->group->qgroupid, 1694 - ptr_to_u64(glist->group), GFP_ATOMIC); 1695 - if (ret < 0) 1696 - return ret; 1697 - } 1698 - } 1699 - return 0; 1700 1607 } 1701 1608 1702 1609 #define UPDATE_NEW 0 ··· 1502 1925 /* 1503 1926 * Update qgroup rfer/excl counters. 1504 1927 * Rfer update is easy, codes can explain themselves. 1928 + * 1505 1929 * Excl update is tricky, the update is split into 2 part. 1506 1930 * Part 1: Possible exclusive <-> sharing detect: 1507 1931 * | A | !A | ··· 1620 2042 return 0; 1621 2043 } 1622 2044 1623 - /* 1624 - * This adjusts the counters for all referenced qgroups if need be. 1625 - */ 1626 - static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info, 1627 - u64 root_to_skip, u64 num_bytes, 1628 - struct ulist *qgroups, u64 seq, 1629 - int old_roots, int new_roots, int rescan) 1630 - { 1631 - struct ulist_node *unode; 1632 - struct ulist_iterator uiter; 1633 - struct btrfs_qgroup *qg; 1634 - u64 cur_new_count, cur_old_count; 1635 - 1636 - ULIST_ITER_INIT(&uiter); 1637 - while ((unode = ulist_next(qgroups, &uiter))) { 1638 - bool dirty = false; 1639 - 1640 - qg = u64_to_ptr(unode->aux); 1641 - cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); 1642 - cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); 1643 - 1644 - /* 1645 - * Wasn't referenced before but is now, add to the reference 1646 - * counters. 1647 - */ 1648 - if (cur_old_count == 0 && cur_new_count > 0) { 1649 - qg->rfer += num_bytes; 1650 - qg->rfer_cmpr += num_bytes; 1651 - dirty = true; 1652 - } 1653 - 1654 - /* 1655 - * Was referenced before but isn't now, subtract from the 1656 - * reference counters. 1657 - */ 1658 - if (cur_old_count > 0 && cur_new_count == 0) { 1659 - qg->rfer -= num_bytes; 1660 - qg->rfer_cmpr -= num_bytes; 1661 - dirty = true; 1662 - } 1663 - 1664 - /* 1665 - * If our refcount was the same as the roots previously but our 1666 - * new count isn't the same as the number of roots now then we 1667 - * went from having a exclusive reference on this range to not. 1668 - */ 1669 - if (old_roots && cur_old_count == old_roots && 1670 - (cur_new_count != new_roots || new_roots == 0)) { 1671 - WARN_ON(cur_new_count != new_roots && new_roots == 0); 1672 - qg->excl -= num_bytes; 1673 - qg->excl_cmpr -= num_bytes; 1674 - dirty = true; 1675 - } 1676 - 1677 - /* 1678 - * If we didn't reference all the roots before but now we do we 1679 - * have an exclusive reference to this range. 1680 - */ 1681 - if ((!old_roots || (old_roots && cur_old_count != old_roots)) 1682 - && cur_new_count == new_roots) { 1683 - qg->excl += num_bytes; 1684 - qg->excl_cmpr += num_bytes; 1685 - dirty = true; 1686 - } 1687 - 1688 - if (dirty) 1689 - qgroup_dirty(fs_info, qg); 1690 - } 1691 - return 0; 1692 - } 1693 - 1694 - /* 1695 - * If we removed a data extent and there were other references for that bytenr 1696 - * then we need to lookup all referenced roots to make sure we still don't 1697 - * reference this bytenr. If we do then we can just discard this operation. 1698 - */ 1699 - static int check_existing_refs(struct btrfs_trans_handle *trans, 1700 - struct btrfs_fs_info *fs_info, 1701 - struct btrfs_qgroup_operation *oper) 1702 - { 1703 - struct ulist *roots = NULL; 1704 - struct ulist_node *unode; 1705 - struct ulist_iterator uiter; 1706 - int ret = 0; 1707 - 1708 - ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, 1709 - oper->elem.seq, &roots); 1710 - if (ret < 0) 1711 - return ret; 1712 - ret = 0; 1713 - 1714 - ULIST_ITER_INIT(&uiter); 1715 - while ((unode = ulist_next(roots, &uiter))) { 1716 - if (unode->val == oper->ref_root) { 1717 - ret = 1; 1718 - break; 1719 - } 1720 - } 1721 - ulist_free(roots); 1722 - btrfs_put_tree_mod_seq(fs_info, &oper->elem); 1723 - 1724 - return ret; 1725 - } 1726 - 1727 - /* 1728 - * If we share a reference across multiple roots then we may need to adjust 1729 - * various qgroups referenced and exclusive counters. The basic premise is this 1730 - * 1731 - * 1) We have seq to represent a 0 count. Instead of looping through all of the 1732 - * qgroups and resetting their refcount to 0 we just constantly bump this 1733 - * sequence number to act as the base reference count. This means that if 1734 - * anybody is equal to or below this sequence they were never referenced. We 1735 - * jack this sequence up by the number of roots we found each time in order to 1736 - * make sure we don't have any overlap. 1737 - * 1738 - * 2) We first search all the roots that reference the area _except_ the root 1739 - * we're acting on currently. This makes up the old_refcnt of all the qgroups 1740 - * before. 1741 - * 1742 - * 3) We walk all of the qgroups referenced by the root we are currently acting 1743 - * on, and will either adjust old_refcnt in the case of a removal or the 1744 - * new_refcnt in the case of an addition. 1745 - * 1746 - * 4) Finally we walk all the qgroups that are referenced by this range 1747 - * including the root we are acting on currently. We will adjust the counters 1748 - * based on the number of roots we had and will have after this operation. 1749 - * 1750 - * Take this example as an illustration 1751 - * 1752 - * [qgroup 1/0] 1753 - * / | \ 1754 - * [qg 0/0] [qg 0/1] [qg 0/2] 1755 - * \ | / 1756 - * [ extent ] 1757 - * 1758 - * Say we are adding a reference that is covered by qg 0/0. The first step 1759 - * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with 1760 - * old_roots being 2. Because it is adding new_roots will be 1. We then go 1761 - * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's 1762 - * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we 1763 - * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a 1764 - * reference and thus must add the size to the referenced bytes. Everything 1765 - * else is the same so nothing else changes. 1766 - */ 1767 - static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, 1768 - struct btrfs_fs_info *fs_info, 1769 - struct btrfs_qgroup_operation *oper) 1770 - { 1771 - struct ulist *roots = NULL; 1772 - struct ulist *qgroups, *tmp; 1773 - struct btrfs_qgroup *qgroup; 1774 - struct seq_list elem = SEQ_LIST_INIT(elem); 1775 - u64 seq; 1776 - int old_roots = 0; 1777 - int new_roots = 0; 1778 - int ret = 0; 1779 - 1780 - if (oper->elem.seq) { 1781 - ret = check_existing_refs(trans, fs_info, oper); 1782 - if (ret < 0) 1783 - return ret; 1784 - if (ret) 1785 - return 0; 1786 - } 1787 - 1788 - qgroups = ulist_alloc(GFP_NOFS); 1789 - if (!qgroups) 1790 - return -ENOMEM; 1791 - 1792 - tmp = ulist_alloc(GFP_NOFS); 1793 - if (!tmp) { 1794 - ulist_free(qgroups); 1795 - return -ENOMEM; 1796 - } 1797 - 1798 - btrfs_get_tree_mod_seq(fs_info, &elem); 1799 - ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, 1800 - &roots); 1801 - btrfs_put_tree_mod_seq(fs_info, &elem); 1802 - if (ret < 0) { 1803 - ulist_free(qgroups); 1804 - ulist_free(tmp); 1805 - return ret; 1806 - } 1807 - spin_lock(&fs_info->qgroup_lock); 1808 - qgroup = find_qgroup_rb(fs_info, oper->ref_root); 1809 - if (!qgroup) 1810 - goto out; 1811 - seq = fs_info->qgroup_seq; 1812 - 1813 - /* 1814 - * So roots is the list of all the roots currently pointing at the 1815 - * bytenr, including the ref we are adding if we are adding, or not if 1816 - * we are removing a ref. So we pass in the ref_root to skip that root 1817 - * in our calculations. We set old_refnct and new_refcnt cause who the 1818 - * hell knows what everything looked like before, and it doesn't matter 1819 - * except... 1820 - */ 1821 - ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups, 1822 - seq, &old_roots, 0); 1823 - if (ret < 0) 1824 - goto out; 1825 - 1826 - /* 1827 - * Now adjust the refcounts of the qgroups that care about this 1828 - * reference, either the old_count in the case of removal or new_count 1829 - * in the case of an addition. 1830 - */ 1831 - ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups, 1832 - seq); 1833 - if (ret < 0) 1834 - goto out; 1835 - 1836 - /* 1837 - * ...in the case of removals. If we had a removal before we got around 1838 - * to processing this operation then we need to find that guy and count 1839 - * his references as if they really existed so we don't end up screwing 1840 - * up the exclusive counts. Then whenever we go to process the delete 1841 - * everything will be grand and we can account for whatever exclusive 1842 - * changes need to be made there. We also have to pass in old_roots so 1843 - * we have an accurate count of the roots as it pertains to this 1844 - * operations view of the world. 1845 - */ 1846 - ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq, 1847 - &old_roots); 1848 - if (ret < 0) 1849 - goto out; 1850 - 1851 - /* 1852 - * We are adding our root, need to adjust up the number of roots, 1853 - * otherwise old_roots is the number of roots we want. 1854 - */ 1855 - if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { 1856 - new_roots = old_roots + 1; 1857 - } else { 1858 - new_roots = old_roots; 1859 - old_roots++; 1860 - } 1861 - 1862 - /* 1863 - * Bump qgroup_seq to avoid seq overlap 1864 - * XXX: This makes qgroup_seq mismatch with oper->seq. 1865 - */ 1866 - fs_info->qgroup_seq += old_roots + 1; 1867 - 1868 - 1869 - /* 1870 - * And now the magic happens, bless Arne for having a pretty elegant 1871 - * solution for this. 1872 - */ 1873 - qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes, 1874 - qgroups, seq, old_roots, new_roots, 0); 1875 - out: 1876 - spin_unlock(&fs_info->qgroup_lock); 1877 - ulist_free(qgroups); 1878 - ulist_free(roots); 1879 - ulist_free(tmp); 1880 - return ret; 1881 - } 1882 - 1883 - /* 1884 - * Process a reference to a shared subtree. This type of operation is 1885 - * queued during snapshot removal when we encounter extents which are 1886 - * shared between more than one root. 1887 - */ 1888 - static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, 1889 - struct btrfs_fs_info *fs_info, 1890 - struct btrfs_qgroup_operation *oper) 1891 - { 1892 - struct ulist *roots = NULL; 1893 - struct ulist_node *unode; 1894 - struct ulist_iterator uiter; 1895 - struct btrfs_qgroup_list *glist; 1896 - struct ulist *parents; 1897 - int ret = 0; 1898 - int err; 1899 - struct btrfs_qgroup *qg; 1900 - u64 root_obj = 0; 1901 - struct seq_list elem = SEQ_LIST_INIT(elem); 1902 - 1903 - parents = ulist_alloc(GFP_NOFS); 1904 - if (!parents) 1905 - return -ENOMEM; 1906 - 1907 - btrfs_get_tree_mod_seq(fs_info, &elem); 1908 - ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, 1909 - elem.seq, &roots); 1910 - btrfs_put_tree_mod_seq(fs_info, &elem); 1911 - if (ret < 0) 1912 - goto out; 1913 - 1914 - if (roots->nnodes != 1) 1915 - goto out; 1916 - 1917 - ULIST_ITER_INIT(&uiter); 1918 - unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */ 1919 - /* 1920 - * If we find our ref root then that means all refs 1921 - * this extent has to the root have not yet been 1922 - * deleted. In that case, we do nothing and let the 1923 - * last ref for this bytenr drive our update. 1924 - * 1925 - * This can happen for example if an extent is 1926 - * referenced multiple times in a snapshot (clone, 1927 - * etc). If we are in the middle of snapshot removal, 1928 - * queued updates for such an extent will find the 1929 - * root if we have not yet finished removing the 1930 - * snapshot. 1931 - */ 1932 - if (unode->val == oper->ref_root) 1933 - goto out; 1934 - 1935 - root_obj = unode->val; 1936 - BUG_ON(!root_obj); 1937 - 1938 - spin_lock(&fs_info->qgroup_lock); 1939 - qg = find_qgroup_rb(fs_info, root_obj); 1940 - if (!qg) 1941 - goto out_unlock; 1942 - 1943 - qg->excl += oper->num_bytes; 1944 - qg->excl_cmpr += oper->num_bytes; 1945 - qgroup_dirty(fs_info, qg); 1946 - 1947 - /* 1948 - * Adjust counts for parent groups. First we find all 1949 - * parents, then in the 2nd loop we do the adjustment 1950 - * while adding parents of the parents to our ulist. 1951 - */ 1952 - list_for_each_entry(glist, &qg->groups, next_group) { 1953 - err = ulist_add(parents, glist->group->qgroupid, 1954 - ptr_to_u64(glist->group), GFP_ATOMIC); 1955 - if (err < 0) { 1956 - ret = err; 1957 - goto out_unlock; 1958 - } 1959 - } 1960 - 1961 - ULIST_ITER_INIT(&uiter); 1962 - while ((unode = ulist_next(parents, &uiter))) { 1963 - qg = u64_to_ptr(unode->aux); 1964 - qg->excl += oper->num_bytes; 1965 - qg->excl_cmpr += oper->num_bytes; 1966 - qgroup_dirty(fs_info, qg); 1967 - 1968 - /* Add any parents of the parents */ 1969 - list_for_each_entry(glist, &qg->groups, next_group) { 1970 - err = ulist_add(parents, glist->group->qgroupid, 1971 - ptr_to_u64(glist->group), GFP_ATOMIC); 1972 - if (err < 0) { 1973 - ret = err; 1974 - goto out_unlock; 1975 - } 1976 - } 1977 - } 1978 - 1979 - out_unlock: 1980 - spin_unlock(&fs_info->qgroup_lock); 1981 - 1982 - out: 1983 - ulist_free(roots); 1984 - ulist_free(parents); 1985 - return ret; 1986 - } 1987 - 1988 - /* 1989 - * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 1990 - * from the fs. First, all roots referencing the extent are searched, and 1991 - * then the space is accounted accordingly to the different roots. The 1992 - * accounting algorithm works in 3 steps documented inline. 1993 - */ 1994 - static int btrfs_qgroup_account(struct btrfs_trans_handle *trans, 1995 - struct btrfs_fs_info *fs_info, 1996 - struct btrfs_qgroup_operation *oper) 1997 - { 1998 - int ret = 0; 1999 - 2000 - if (!fs_info->quota_enabled) 2001 - return 0; 2002 - 2003 - BUG_ON(!fs_info->quota_root); 2004 - 2005 - mutex_lock(&fs_info->qgroup_rescan_lock); 2006 - if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 2007 - if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) { 2008 - mutex_unlock(&fs_info->qgroup_rescan_lock); 2009 - return 0; 2010 - } 2011 - } 2012 - mutex_unlock(&fs_info->qgroup_rescan_lock); 2013 - 2014 - ASSERT(is_fstree(oper->ref_root)); 2015 - 2016 - trace_btrfs_qgroup_account(oper); 2017 - 2018 - switch (oper->type) { 2019 - case BTRFS_QGROUP_OPER_ADD_EXCL: 2020 - case BTRFS_QGROUP_OPER_SUB_EXCL: 2021 - ret = qgroup_excl_accounting(fs_info, oper); 2022 - break; 2023 - case BTRFS_QGROUP_OPER_ADD_SHARED: 2024 - case BTRFS_QGROUP_OPER_SUB_SHARED: 2025 - ret = qgroup_shared_accounting(trans, fs_info, oper); 2026 - break; 2027 - case BTRFS_QGROUP_OPER_SUB_SUBTREE: 2028 - ret = qgroup_subtree_accounting(trans, fs_info, oper); 2029 - break; 2030 - default: 2031 - ASSERT(0); 2032 - } 2033 - return ret; 2034 - } 2035 - 2036 2045 int 2037 2046 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 2038 2047 struct btrfs_fs_info *fs_info, ··· 1732 2567 rb_erase(node, &delayed_refs->dirty_extent_root); 1733 2568 kfree(record); 1734 2569 1735 - } 1736 - return ret; 1737 - } 1738 - 1739 - /* 1740 - * Needs to be called everytime we run delayed refs, even if there is an error 1741 - * in order to cleanup outstanding operations. 1742 - */ 1743 - int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, 1744 - struct btrfs_fs_info *fs_info) 1745 - { 1746 - struct btrfs_qgroup_operation *oper; 1747 - int ret = 0; 1748 - 1749 - while (!list_empty(&trans->qgroup_ref_list)) { 1750 - oper = list_first_entry(&trans->qgroup_ref_list, 1751 - struct btrfs_qgroup_operation, list); 1752 - list_del_init(&oper->list); 1753 - if (!ret || !trans->aborted) 1754 - ret = btrfs_qgroup_account(trans, fs_info, oper); 1755 - spin_lock(&fs_info->qgroup_op_lock); 1756 - rb_erase(&oper->n, &fs_info->qgroup_op_tree); 1757 - spin_unlock(&fs_info->qgroup_op_lock); 1758 - btrfs_put_tree_mod_seq(fs_info, &oper->elem); 1759 - kfree(oper); 1760 2570 } 1761 2571 return ret; 1762 2572 }
-49
fs/btrfs/qgroup.h
··· 23 23 #include "delayed-ref.h" 24 24 25 25 /* 26 - * A description of the operations, all of these operations only happen when we 27 - * are adding the 1st reference for that subvolume in the case of adding space 28 - * or on the last reference delete in the case of subtraction. The only 29 - * exception is the last one, which is added for confusion. 30 - * 31 - * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only 32 - * one pointing at the bytes we are adding. This is called on the first 33 - * allocation. 34 - * 35 - * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be 36 - * shared between subvols. This is called on the creation of a ref that already 37 - * has refs from a different subvolume, so basically reflink. 38 - * 39 - * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only 40 - * one referencing the range. 41 - * 42 - * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with 43 - * refs with other subvolumes. 44 - */ 45 - enum btrfs_qgroup_operation_type { 46 - BTRFS_QGROUP_OPER_ADD_EXCL, 47 - BTRFS_QGROUP_OPER_ADD_SHARED, 48 - BTRFS_QGROUP_OPER_SUB_EXCL, 49 - BTRFS_QGROUP_OPER_SUB_SHARED, 50 - BTRFS_QGROUP_OPER_SUB_SUBTREE, 51 - }; 52 - 53 - struct btrfs_qgroup_operation { 54 - u64 ref_root; 55 - u64 bytenr; 56 - u64 num_bytes; 57 - u64 seq; 58 - enum btrfs_qgroup_operation_type type; 59 - struct seq_list elem; 60 - struct rb_node n; 61 - struct list_head list; 62 - }; 63 - 64 - /* 65 26 * Record a dirty extent, and info qgroup to update quota on it 66 27 * TODO: Use kmem cache to alloc it. 67 28 */ ··· 54 93 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); 55 94 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); 56 95 struct btrfs_delayed_extent_op; 57 - int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, 58 - struct btrfs_fs_info *fs_info, u64 ref_root, 59 - u64 bytenr, u64 num_bytes, 60 - enum btrfs_qgroup_operation_type type, 61 - int mod_seq); 62 96 int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, 63 97 struct btrfs_fs_info *fs_info); 64 98 struct btrfs_qgroup_extent_record ··· 66 110 struct ulist *old_roots, struct ulist *new_roots); 67 111 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, 68 112 struct btrfs_fs_info *fs_info); 69 - int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, 70 - struct btrfs_fs_info *fs_info); 71 - void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans, 72 - struct btrfs_fs_info *fs_info, 73 - struct btrfs_qgroup_operation *oper); 74 113 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 75 114 struct btrfs_fs_info *fs_info); 76 115 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
-55
include/trace/events/btrfs.h
··· 1117 1117 TP_ARGS(wq) 1118 1118 ); 1119 1119 1120 - #define show_oper_type(type) \ 1121 - __print_symbolic(type, \ 1122 - { BTRFS_QGROUP_OPER_ADD_EXCL, "OPER_ADD_EXCL" }, \ 1123 - { BTRFS_QGROUP_OPER_ADD_SHARED, "OPER_ADD_SHARED" }, \ 1124 - { BTRFS_QGROUP_OPER_SUB_EXCL, "OPER_SUB_EXCL" }, \ 1125 - { BTRFS_QGROUP_OPER_SUB_SHARED, "OPER_SUB_SHARED" }) 1126 - 1127 - DECLARE_EVENT_CLASS(btrfs_qgroup_oper, 1128 - 1129 - TP_PROTO(struct btrfs_qgroup_operation *oper), 1130 - 1131 - TP_ARGS(oper), 1132 - 1133 - TP_STRUCT__entry( 1134 - __field( u64, ref_root ) 1135 - __field( u64, bytenr ) 1136 - __field( u64, num_bytes ) 1137 - __field( u64, seq ) 1138 - __field( int, type ) 1139 - __field( u64, elem_seq ) 1140 - ), 1141 - 1142 - TP_fast_assign( 1143 - __entry->ref_root = oper->ref_root; 1144 - __entry->bytenr = oper->bytenr, 1145 - __entry->num_bytes = oper->num_bytes; 1146 - __entry->seq = oper->seq; 1147 - __entry->type = oper->type; 1148 - __entry->elem_seq = oper->elem.seq; 1149 - ), 1150 - 1151 - TP_printk("ref_root = %llu, bytenr = %llu, num_bytes = %llu, " 1152 - "seq = %llu, elem.seq = %llu, type = %s", 1153 - (unsigned long long)__entry->ref_root, 1154 - (unsigned long long)__entry->bytenr, 1155 - (unsigned long long)__entry->num_bytes, 1156 - (unsigned long long)__entry->seq, 1157 - (unsigned long long)__entry->elem_seq, 1158 - show_oper_type(__entry->type)) 1159 - ); 1160 - 1161 - DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_account, 1162 - 1163 - TP_PROTO(struct btrfs_qgroup_operation *oper), 1164 - 1165 - TP_ARGS(oper) 1166 - ); 1167 - 1168 - DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_record_ref, 1169 - 1170 - TP_PROTO(struct btrfs_qgroup_operation *oper), 1171 - 1172 - TP_ARGS(oper) 1173 - ); 1174 - 1175 1120 #endif /* _TRACE_BTRFS_H */ 1176 1121 1177 1122 /* This part must be outside protection */