Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm thin metadata: introduce dm_pool_abort_metadata

Introduce dm_pool_abort_metadata to abort the current metadata
transaction. Generally this will only be called when bad things are
happening and dm-thin is trying to roll back to a good state for
read-only mode.

It's complicated by the fact that the metadata device may have failed
completely causing the abort to be unable to read the old transaction.
In this case the metadata object is placed in a 'fail' mode and
everything fails apart from destroying it.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

authored by

Joe Thornber and committed by
Alasdair G Kergon
da105ed5 12ba58af

+158 -61
+146 -61
drivers/md/dm-thin-metadata.c
··· 1 1 /* 2 - * Copyright (C) 2011 Red Hat, Inc. 2 + * Copyright (C) 2011-2012 Red Hat, Inc. 3 3 * 4 4 * This file is released under the GPL. 5 5 */ ··· 185 185 unsigned long flags; 186 186 sector_t data_block_size; 187 187 bool read_only:1; 188 + 189 + /* 190 + * Set if a transaction has to be aborted but the attempt to roll back 191 + * to the previous (good) transaction failed. The only pool metadata 192 + * operation possible in this state is the closing of the device. 193 + */ 194 + bool fail_io:1; 188 195 }; 189 196 190 197 struct dm_thin_device { ··· 200 193 dm_thin_id id; 201 194 202 195 int open_count; 203 - int changed; 196 + bool changed:1; 197 + bool aborted_with_changes:1; 204 198 uint64_t mapped_blocks; 205 199 uint64_t transaction_id; 206 200 uint32_t creation_time; ··· 739 731 740 732 static int __commit_transaction(struct dm_pool_metadata *pmd) 741 733 { 742 - /* 743 - * FIXME: Associated pool should be made read-only on failure. 744 - */ 745 734 int r; 746 735 size_t metadata_len, data_len; 747 736 struct thin_disk_superblock *disk_super; ··· 814 809 pmd->time = 0; 815 810 INIT_LIST_HEAD(&pmd->thin_devices); 816 811 pmd->read_only = false; 812 + pmd->fail_io = false; 817 813 pmd->bdev = bdev; 818 814 pmd->data_block_size = data_block_size; 819 815 ··· 857 851 return -EBUSY; 858 852 } 859 853 860 - if (!pmd->read_only) { 854 + if (!pmd->read_only && !pmd->fail_io) { 861 855 r = __commit_transaction(pmd); 862 856 if (r < 0) 863 857 DMWARN("%s: __commit_transaction() failed, error = %d", 864 858 __func__, r); 865 859 } 866 860 867 - __destroy_persistent_data_objects(pmd); 868 - kfree(pmd); 861 + if (!pmd->fail_io) 862 + __destroy_persistent_data_objects(pmd); 869 863 864 + kfree(pmd); 870 865 return 0; 871 866 } 872 867 ··· 928 921 (*td)->id = dev; 929 922 (*td)->open_count = 1; 930 923 (*td)->changed = changed; 924 + (*td)->aborted_with_changes = false; 931 925 (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks); 932 926 (*td)->transaction_id = le64_to_cpu(details_le.transaction_id); 933 927 (*td)->creation_time = le32_to_cpu(details_le.creation_time); ··· 990 982 991 983 int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev) 992 984 { 993 - int r; 985 + int r = -EINVAL; 994 986 995 987 down_write(&pmd->root_lock); 996 - r = __create_thin(pmd, dev); 988 + if (!pmd->fail_io) 989 + r = __create_thin(pmd, dev); 997 990 up_write(&pmd->root_lock); 998 991 999 992 return r; ··· 1081 1072 dm_thin_id dev, 1082 1073 dm_thin_id origin) 1083 1074 { 1084 - int r; 1075 + int r = -EINVAL; 1085 1076 1086 1077 down_write(&pmd->root_lock); 1087 - r = __create_snap(pmd, dev, origin); 1078 + if (!pmd->fail_io) 1079 + r = __create_snap(pmd, dev, origin); 1088 1080 up_write(&pmd->root_lock); 1089 1081 1090 1082 return r; ··· 1124 1114 int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, 1125 1115 dm_thin_id dev) 1126 1116 { 1127 - int r; 1117 + int r = -EINVAL; 1128 1118 1129 1119 down_write(&pmd->root_lock); 1130 - r = __delete_device(pmd, dev); 1120 + if (!pmd->fail_io) 1121 + r = __delete_device(pmd, dev); 1131 1122 up_write(&pmd->root_lock); 1132 1123 1133 1124 return r; ··· 1138 1127 uint64_t current_id, 1139 1128 uint64_t new_id) 1140 1129 { 1130 + int r = -EINVAL; 1131 + 1141 1132 down_write(&pmd->root_lock); 1133 + 1134 + if (pmd->fail_io) 1135 + goto out; 1136 + 1142 1137 if (pmd->trans_id != current_id) { 1143 - up_write(&pmd->root_lock); 1144 1138 DMERR("mismatched transaction id"); 1145 - return -EINVAL; 1139 + goto out; 1146 1140 } 1147 1141 1148 1142 pmd->trans_id = new_id; 1143 + r = 0; 1144 + 1145 + out: 1149 1146 up_write(&pmd->root_lock); 1150 1147 1151 - return 0; 1148 + return r; 1152 1149 } 1153 1150 1154 1151 int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd, 1155 1152 uint64_t *result) 1156 1153 { 1154 + int r = -EINVAL; 1155 + 1157 1156 down_read(&pmd->root_lock); 1158 - *result = pmd->trans_id; 1157 + if (!pmd->fail_io) { 1158 + *result = pmd->trans_id; 1159 + r = 0; 1160 + } 1159 1161 up_read(&pmd->root_lock); 1160 1162 1161 - return 0; 1163 + return r; 1162 1164 } 1163 1165 1164 1166 static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) ··· 1235 1211 1236 1212 int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd) 1237 1213 { 1238 - int r; 1214 + int r = -EINVAL; 1239 1215 1240 1216 down_write(&pmd->root_lock); 1241 - r = __reserve_metadata_snap(pmd); 1217 + if (!pmd->fail_io) 1218 + r = __reserve_metadata_snap(pmd); 1242 1219 up_write(&pmd->root_lock); 1243 1220 1244 1221 return r; ··· 1281 1256 1282 1257 int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd) 1283 1258 { 1284 - int r; 1259 + int r = -EINVAL; 1285 1260 1286 1261 down_write(&pmd->root_lock); 1287 - r = __release_metadata_snap(pmd); 1262 + if (!pmd->fail_io) 1263 + r = __release_metadata_snap(pmd); 1288 1264 up_write(&pmd->root_lock); 1289 1265 1290 1266 return r; ··· 1312 1286 int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, 1313 1287 dm_block_t *result) 1314 1288 { 1315 - int r; 1289 + int r = -EINVAL; 1316 1290 1317 1291 down_read(&pmd->root_lock); 1318 - r = __get_metadata_snap(pmd, result); 1292 + if (!pmd->fail_io) 1293 + r = __get_metadata_snap(pmd, result); 1319 1294 up_read(&pmd->root_lock); 1320 1295 1321 1296 return r; ··· 1325 1298 int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev, 1326 1299 struct dm_thin_device **td) 1327 1300 { 1328 - int r; 1301 + int r = -EINVAL; 1329 1302 1330 1303 down_write(&pmd->root_lock); 1331 - r = __open_device(pmd, dev, 0, td); 1304 + if (!pmd->fail_io) 1305 + r = __open_device(pmd, dev, 0, td); 1332 1306 up_write(&pmd->root_lock); 1333 1307 1334 1308 return r; ··· 1357 1329 int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, 1358 1330 int can_block, struct dm_thin_lookup_result *result) 1359 1331 { 1360 - int r; 1332 + int r = -EINVAL; 1361 1333 uint64_t block_time = 0; 1362 1334 __le64 value; 1363 1335 struct dm_pool_metadata *pmd = td->pmd; 1364 1336 dm_block_t keys[2] = { td->id, block }; 1337 + struct dm_btree_info *info; 1365 1338 1366 1339 if (can_block) { 1367 1340 down_read(&pmd->root_lock); 1368 - r = dm_btree_lookup(&pmd->info, pmd->root, keys, &value); 1369 - if (!r) 1370 - block_time = le64_to_cpu(value); 1371 - up_read(&pmd->root_lock); 1372 - 1373 - } else if (down_read_trylock(&pmd->root_lock)) { 1374 - r = dm_btree_lookup(&pmd->nb_info, pmd->root, keys, &value); 1375 - if (!r) 1376 - block_time = le64_to_cpu(value); 1377 - up_read(&pmd->root_lock); 1378 - 1379 - } else 1341 + info = &pmd->info; 1342 + } else if (down_read_trylock(&pmd->root_lock)) 1343 + info = &pmd->nb_info; 1344 + else 1380 1345 return -EWOULDBLOCK; 1346 + 1347 + if (pmd->fail_io) 1348 + goto out; 1349 + 1350 + r = dm_btree_lookup(info, pmd->root, keys, &value); 1351 + if (!r) 1352 + block_time = le64_to_cpu(value); 1353 + 1354 + out: 1355 + up_read(&pmd->root_lock); 1381 1356 1382 1357 if (!r) { 1383 1358 dm_block_t exception_block; ··· 1420 1389 int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block, 1421 1390 dm_block_t data_block) 1422 1391 { 1423 - int r; 1392 + int r = -EINVAL; 1424 1393 1425 1394 down_write(&td->pmd->root_lock); 1426 - r = __insert(td, block, data_block); 1395 + if (!td->pmd->fail_io) 1396 + r = __insert(td, block, data_block); 1427 1397 up_write(&td->pmd->root_lock); 1428 1398 1429 1399 return r; ··· 1448 1416 1449 1417 int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) 1450 1418 { 1451 - int r; 1419 + int r = -EINVAL; 1452 1420 1453 1421 down_write(&td->pmd->root_lock); 1454 - r = __remove(td, block); 1422 + if (!td->pmd->fail_io) 1423 + r = __remove(td, block); 1455 1424 up_write(&td->pmd->root_lock); 1456 1425 1457 1426 return r; ··· 1469 1436 return r; 1470 1437 } 1471 1438 1439 + bool dm_thin_aborted_changes(struct dm_thin_device *td) 1440 + { 1441 + bool r; 1442 + 1443 + down_read(&td->pmd->root_lock); 1444 + r = td->aborted_with_changes; 1445 + up_read(&td->pmd->root_lock); 1446 + 1447 + return r; 1448 + } 1449 + 1472 1450 int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) 1473 1451 { 1474 - int r; 1452 + int r = -EINVAL; 1475 1453 1476 1454 down_write(&pmd->root_lock); 1477 - r = dm_sm_new_block(pmd->data_sm, result); 1455 + if (!pmd->fail_io) 1456 + r = dm_sm_new_block(pmd->data_sm, result); 1478 1457 up_write(&pmd->root_lock); 1479 1458 1480 1459 return r; ··· 1494 1449 1495 1450 int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) 1496 1451 { 1497 - int r; 1452 + int r = -EINVAL; 1498 1453 1499 1454 down_write(&pmd->root_lock); 1455 + if (pmd->fail_io) 1456 + goto out; 1500 1457 1501 1458 r = __commit_transaction(pmd); 1502 1459 if (r <= 0) ··· 1513 1466 return r; 1514 1467 } 1515 1468 1469 + static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) 1470 + { 1471 + struct dm_thin_device *td; 1472 + 1473 + list_for_each_entry(td, &pmd->thin_devices, list) 1474 + td->aborted_with_changes = td->changed; 1475 + } 1476 + 1477 + int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) 1478 + { 1479 + int r = -EINVAL; 1480 + 1481 + down_write(&pmd->root_lock); 1482 + if (pmd->fail_io) 1483 + goto out; 1484 + 1485 + __set_abort_with_changes_flags(pmd); 1486 + __destroy_persistent_data_objects(pmd); 1487 + r = __create_persistent_data_objects(pmd, false); 1488 + if (r) 1489 + pmd->fail_io = true; 1490 + 1491 + out: 1492 + up_write(&pmd->root_lock); 1493 + 1494 + return r; 1495 + } 1496 + 1516 1497 int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) 1517 1498 { 1518 - int r; 1499 + int r = -EINVAL; 1519 1500 1520 1501 down_read(&pmd->root_lock); 1521 - r = dm_sm_get_nr_free(pmd->data_sm, result); 1502 + if (!pmd->fail_io) 1503 + r = dm_sm_get_nr_free(pmd->data_sm, result); 1522 1504 up_read(&pmd->root_lock); 1523 1505 1524 1506 return r; ··· 1556 1480 int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, 1557 1481 dm_block_t *result) 1558 1482 { 1559 - int r; 1483 + int r = -EINVAL; 1560 1484 1561 1485 down_read(&pmd->root_lock); 1562 - r = dm_sm_get_nr_free(pmd->metadata_sm, result); 1486 + if (!pmd->fail_io) 1487 + r = dm_sm_get_nr_free(pmd->metadata_sm, result); 1563 1488 up_read(&pmd->root_lock); 1564 1489 1565 1490 return r; ··· 1569 1492 int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, 1570 1493 dm_block_t *result) 1571 1494 { 1572 - int r; 1495 + int r = -EINVAL; 1573 1496 1574 1497 down_read(&pmd->root_lock); 1575 - r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); 1498 + if (!pmd->fail_io) 1499 + r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); 1576 1500 up_read(&pmd->root_lock); 1577 1501 1578 1502 return r; ··· 1590 1512 1591 1513 int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) 1592 1514 { 1593 - int r; 1515 + int r = -EINVAL; 1594 1516 1595 1517 down_read(&pmd->root_lock); 1596 - r = dm_sm_get_nr_blocks(pmd->data_sm, result); 1518 + if (!pmd->fail_io) 1519 + r = dm_sm_get_nr_blocks(pmd->data_sm, result); 1597 1520 up_read(&pmd->root_lock); 1598 1521 1599 1522 return r; ··· 1602 1523 1603 1524 int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result) 1604 1525 { 1526 + int r = -EINVAL; 1605 1527 struct dm_pool_metadata *pmd = td->pmd; 1606 1528 1607 1529 down_read(&pmd->root_lock); 1608 - *result = td->mapped_blocks; 1530 + if (!pmd->fail_io) { 1531 + *result = td->mapped_blocks; 1532 + r = 0; 1533 + } 1609 1534 up_read(&pmd->root_lock); 1610 1535 1611 - return 0; 1536 + return r; 1612 1537 } 1613 1538 1614 1539 static int __highest_block(struct dm_thin_device *td, dm_block_t *result) ··· 1634 1551 int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, 1635 1552 dm_block_t *result) 1636 1553 { 1637 - int r; 1554 + int r = -EINVAL; 1638 1555 struct dm_pool_metadata *pmd = td->pmd; 1639 1556 1640 1557 down_read(&pmd->root_lock); 1641 - r = __highest_block(td, result); 1558 + if (!pmd->fail_io) 1559 + r = __highest_block(td, result); 1642 1560 up_read(&pmd->root_lock); 1643 1561 1644 1562 return r; ··· 1667 1583 1668 1584 int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) 1669 1585 { 1670 - int r; 1586 + int r = -EINVAL; 1671 1587 1672 1588 down_write(&pmd->root_lock); 1673 - r = __resize_data_dev(pmd, new_count); 1589 + if (!pmd->fail_io) 1590 + r = __resize_data_dev(pmd, new_count); 1674 1591 up_write(&pmd->root_lock); 1675 1592 1676 1593 return r;
+12
drivers/md/dm-thin-metadata.h
··· 80 80 int dm_pool_commit_metadata(struct dm_pool_metadata *pmd); 81 81 82 82 /* 83 + * Discards all uncommitted changes. Rereads the superblock, rolling back 84 + * to the last good transaction. Thin devices remain open. 85 + * dm_thin_aborted_changes() tells you if they had uncommitted changes. 86 + * 87 + * If this call fails it's only useful to call dm_pool_metadata_close(). 88 + * All other methods will fail with -EINVAL. 89 + */ 90 + int dm_pool_abort_metadata(struct dm_pool_metadata *pmd); 91 + 92 + /* 83 93 * Set/get userspace transaction id. 84 94 */ 85 95 int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, ··· 159 149 * Queries. 160 150 */ 161 151 bool dm_thin_changed_this_transaction(struct dm_thin_device *td); 152 + 153 + bool dm_thin_aborted_changes(struct dm_thin_device *td); 162 154 163 155 int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, 164 156 dm_block_t *highest_mapped);