Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

lightnvm: pblk: fail gracefully on irrec. error

Due to user writes being decoupled from media writes because of the need
of an intermediate write buffer, irrecoverable media write errors lead
to pblk stalling; user writes fill up the buffer and end up in an
infinite retry loop.

In order to let user writes fail gracefully, it is necessary for pblk to
keep track of its own internal state and prevent further writes from
being placed into the write buffer.

This patch implements a state machine to keep track of internal errors
and, in case of failure, fail further user writes in an standard way.
Depending on the type of error, pblk will do its best to persist
buffered writes (which are already acknowledged) and close down on a
graceful manner. This way, data might be recovered by re-instantiating
pblk. Such state machine paves out the way for a state-based FTL log.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Javier González and committed by
Jens Axboe
588726d3 ef576494

+335 -117
+7 -1
drivers/lightnvm/pblk-cache.c
··· 31 31 */ 32 32 retry: 33 33 ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos); 34 - if (ret == NVM_IO_REQUEUE) { 34 + switch (ret) { 35 + case NVM_IO_REQUEUE: 35 36 io_schedule(); 36 37 goto retry; 38 + case NVM_IO_ERR: 39 + pblk_pipeline_stop(pblk); 40 + goto out; 37 41 } 38 42 39 43 if (unlikely(!bio_has_data(bio))) ··· 61 57 atomic_long_add(nr_entries, &pblk->inflight_writes); 62 58 atomic_long_add(nr_entries, &pblk->req_writes); 63 59 #endif 60 + 61 + pblk_rl_inserted(&pblk->rl, nr_entries); 64 62 65 63 out: 66 64 pblk_write_should_kick(pblk);
+202 -84
drivers/lightnvm/pblk-core.c
··· 53 53 *ppa = rqd->ppa_addr; 54 54 pblk_mark_bb(pblk, line, ppa); 55 55 } 56 + 57 + atomic_dec(&pblk->inflight_io); 56 58 } 57 59 58 60 /* Erase completion assumes that only one block is erased at the time */ ··· 259 257 complete(waiting); 260 258 } 261 259 262 - void pblk_flush_writer(struct pblk *pblk) 260 + void pblk_wait_for_meta(struct pblk *pblk) 263 261 { 264 - struct bio *bio; 265 - int ret; 266 - DECLARE_COMPLETION_ONSTACK(wait); 262 + do { 263 + if (!atomic_read(&pblk->inflight_io)) 264 + break; 267 265 268 - bio = bio_alloc(GFP_KERNEL, 1); 269 - if (!bio) 270 - return; 266 + schedule(); 267 + } while (1); 268 + } 271 269 272 - bio->bi_iter.bi_sector = 0; /* internal bio */ 273 - bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH); 274 - bio->bi_private = &wait; 275 - bio->bi_end_io = pblk_end_bio_sync; 270 + static void pblk_flush_writer(struct pblk *pblk) 271 + { 272 + pblk_rb_flush(&pblk->rwb); 273 + do { 274 + if (!pblk_rb_read_count(&pblk->rwb)) 275 + break; 276 276 277 - ret = pblk_write_to_cache(pblk, bio, 0); 278 - if (ret == NVM_IO_OK) { 279 - if (!wait_for_completion_io_timeout(&wait, 280 - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 281 - pr_err("pblk: flush cache timed out\n"); 282 - } 283 - } else if (ret != NVM_IO_DONE) { 284 - pr_err("pblk: tear down bio failed\n"); 285 - } 286 - 287 - if (bio->bi_status) 288 - pr_err("pblk: flush sync write failed (%u)\n", bio->bi_status); 289 - 290 - bio_put(bio); 277 + schedule(); 278 + } while (1); 291 279 } 292 280 293 281 struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) ··· 417 425 } 418 426 } 419 427 #endif 428 + 429 + atomic_inc(&pblk->inflight_io); 430 + 420 431 return nvm_submit_io(dev, rqd); 421 432 } 422 433 ··· 671 676 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 672 677 pr_err("pblk: emeta I/O timed out\n"); 673 678 } 679 + atomic_dec(&pblk->inflight_io); 674 680 reinit_completion(&wait); 675 681 676 682 if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META)) ··· 787 791 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 788 792 pr_err("pblk: smeta I/O timed out\n"); 789 793 } 794 + atomic_dec(&pblk->inflight_io); 790 795 791 796 if (rqd.error) { 792 797 if (dir == WRITE) ··· 829 832 static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) 830 833 { 831 834 struct nvm_rq rqd; 832 - int ret; 835 + int ret = 0; 833 836 DECLARE_COMPLETION_ONSTACK(wait); 834 837 835 838 memset(&rqd, 0, sizeof(struct nvm_rq)); ··· 864 867 rqd.private = pblk; 865 868 __pblk_end_io_erase(pblk, &rqd); 866 869 867 - return 0; 870 + return ret; 868 871 } 869 872 870 873 int pblk_line_erase(struct pblk *pblk, struct pblk_line *line) 871 874 { 872 875 struct pblk_line_meta *lm = &pblk->lm; 873 876 struct ppa_addr ppa; 874 - int bit = -1; 877 + int ret, bit = -1; 875 878 876 879 /* Erase only good blocks, one at a time */ 877 880 do { ··· 890 893 WARN_ON(test_and_set_bit(bit, line->erase_bitmap)); 891 894 spin_unlock(&line->lock); 892 895 893 - if (pblk_blk_erase_sync(pblk, ppa)) { 896 + ret = pblk_blk_erase_sync(pblk, ppa); 897 + if (ret) { 894 898 pr_err("pblk: failed to erase line %d\n", line->id); 895 - return -ENOMEM; 899 + return ret; 896 900 } 897 901 } while (1); 898 902 ··· 905 907 struct pblk_line_meta *lm) 906 908 { 907 909 int meta_line; 910 + 911 + lockdep_assert_held(&l_mg->free_lock); 908 912 909 913 retry_meta: 910 914 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); ··· 1039 1039 /* Mark smeta metadata sectors as bad sectors */ 1040 1040 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); 1041 1041 off = bit * geo->sec_per_pl; 1042 - retry_smeta: 1043 1042 bitmap_set(line->map_bitmap, off, lm->smeta_sec); 1044 1043 line->sec_in_line -= lm->smeta_sec; 1045 1044 line->smeta_ssec = off; ··· 1046 1047 1047 1048 if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) { 1048 1049 pr_debug("pblk: line smeta I/O failed. Retry\n"); 1049 - off += geo->sec_per_pl; 1050 - goto retry_smeta; 1050 + return 1; 1051 1051 } 1052 1052 1053 1053 bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line); ··· 1108 1110 1109 1111 spin_lock(&line->lock); 1110 1112 if (line->state != PBLK_LINESTATE_FREE) { 1113 + mempool_free(line->invalid_bitmap, pblk->line_meta_pool); 1114 + mempool_free(line->map_bitmap, pblk->line_meta_pool); 1111 1115 spin_unlock(&line->lock); 1112 - WARN(1, "pblk: corrupted line state\n"); 1113 - return -EINTR; 1116 + WARN(1, "pblk: corrupted line %d, state %d\n", 1117 + line->id, line->state); 1118 + return -EAGAIN; 1114 1119 } 1120 + 1115 1121 line->state = PBLK_LINESTATE_OPEN; 1116 1122 1117 1123 atomic_set(&line->left_eblks, blk_in_line); ··· 1171 1169 { 1172 1170 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1173 1171 struct pblk_line_meta *lm = &pblk->lm; 1174 - struct pblk_line *line = NULL; 1175 - int bit; 1172 + struct pblk_line *line; 1173 + int ret, bit; 1176 1174 1177 1175 lockdep_assert_held(&l_mg->free_lock); 1178 1176 1179 - retry_get: 1177 + retry: 1180 1178 if (list_empty(&l_mg->free_list)) { 1181 1179 pr_err("pblk: no free lines\n"); 1182 - goto out; 1180 + return NULL; 1183 1181 } 1184 1182 1185 1183 line = list_first_entry(&l_mg->free_list, struct pblk_line, list); ··· 1195 1193 list_add_tail(&line->list, &l_mg->bad_list); 1196 1194 1197 1195 pr_debug("pblk: line %d is bad\n", line->id); 1198 - goto retry_get; 1196 + goto retry; 1199 1197 } 1200 1198 1201 - if (pblk_line_prepare(pblk, line)) { 1202 - pr_err("pblk: failed to prepare line %d\n", line->id); 1203 - list_add(&line->list, &l_mg->free_list); 1204 - l_mg->nr_free_lines++; 1205 - return NULL; 1199 + ret = pblk_line_prepare(pblk, line); 1200 + if (ret) { 1201 + if (ret == -EAGAIN) { 1202 + list_add(&line->list, &l_mg->corrupt_list); 1203 + goto retry; 1204 + } else { 1205 + pr_err("pblk: failed to prepare line %d\n", line->id); 1206 + list_add(&line->list, &l_mg->free_list); 1207 + l_mg->nr_free_lines++; 1208 + return NULL; 1209 + } 1206 1210 } 1207 1211 1208 - out: 1209 1212 return line; 1210 1213 } 1211 1214 ··· 1220 1213 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1221 1214 struct pblk_line *retry_line; 1222 1215 1216 + retry: 1223 1217 spin_lock(&l_mg->free_lock); 1224 1218 retry_line = pblk_line_get(pblk); 1225 1219 if (!retry_line) { ··· 1237 1229 l_mg->data_line = retry_line; 1238 1230 spin_unlock(&l_mg->free_lock); 1239 1231 1240 - if (pblk_line_erase(pblk, retry_line)) { 1241 - spin_lock(&l_mg->free_lock); 1242 - l_mg->data_line = NULL; 1243 - spin_unlock(&l_mg->free_lock); 1244 - return NULL; 1245 - } 1246 - 1247 1232 pblk_rl_free_lines_dec(&pblk->rl, retry_line); 1248 1233 1234 + if (pblk_line_erase(pblk, retry_line)) 1235 + goto retry; 1236 + 1249 1237 return retry_line; 1238 + } 1239 + 1240 + static void pblk_set_space_limit(struct pblk *pblk) 1241 + { 1242 + struct pblk_rl *rl = &pblk->rl; 1243 + 1244 + atomic_set(&rl->rb_space, 0); 1250 1245 } 1251 1246 1252 1247 struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) ··· 1273 1262 1274 1263 /* Allocate next line for preparation */ 1275 1264 l_mg->data_next = pblk_line_get(pblk); 1276 - if (l_mg->data_next) { 1265 + if (!l_mg->data_next) { 1266 + /* If we cannot get a new line, we need to stop the pipeline. 1267 + * Only allow as many writes in as we can store safely and then 1268 + * fail gracefully 1269 + */ 1270 + pblk_set_space_limit(pblk); 1271 + 1272 + l_mg->data_next = NULL; 1273 + } else { 1277 1274 l_mg->data_next->seq_nr = l_mg->d_seq_nr++; 1278 1275 l_mg->data_next->type = PBLK_LINETYPE_DATA; 1279 1276 is_next = 1; 1280 1277 } 1281 1278 spin_unlock(&l_mg->free_lock); 1282 1279 1280 + if (pblk_line_erase(pblk, line)) { 1281 + line = pblk_line_retry(pblk, line); 1282 + if (!line) 1283 + return NULL; 1284 + } 1285 + 1283 1286 pblk_rl_free_lines_dec(&pblk->rl, line); 1284 1287 if (is_next) 1285 1288 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); 1286 - 1287 - if (pblk_line_erase(pblk, line)) 1288 - return NULL; 1289 1289 1290 1290 retry_setup: 1291 1291 if (!pblk_line_init_metadata(pblk, line, NULL)) { ··· 1318 1296 return line; 1319 1297 } 1320 1298 1321 - struct pblk_line *pblk_line_replace_data(struct pblk *pblk) 1299 + static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line) 1300 + { 1301 + lockdep_assert_held(&pblk->l_mg.free_lock); 1302 + 1303 + pblk_set_space_limit(pblk); 1304 + pblk->state = PBLK_STATE_STOPPING; 1305 + } 1306 + 1307 + void pblk_pipeline_stop(struct pblk *pblk) 1308 + { 1309 + struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1310 + int ret; 1311 + 1312 + spin_lock(&l_mg->free_lock); 1313 + if (pblk->state == PBLK_STATE_RECOVERING || 1314 + pblk->state == PBLK_STATE_STOPPED) { 1315 + spin_unlock(&l_mg->free_lock); 1316 + return; 1317 + } 1318 + pblk->state = PBLK_STATE_RECOVERING; 1319 + spin_unlock(&l_mg->free_lock); 1320 + 1321 + pblk_flush_writer(pblk); 1322 + pblk_wait_for_meta(pblk); 1323 + 1324 + ret = pblk_recov_pad(pblk); 1325 + if (ret) { 1326 + pr_err("pblk: could not close data on teardown(%d)\n", ret); 1327 + return; 1328 + } 1329 + 1330 + pblk_line_close_meta_sync(pblk); 1331 + 1332 + spin_lock(&l_mg->free_lock); 1333 + pblk->state = PBLK_STATE_STOPPED; 1334 + l_mg->data_line = NULL; 1335 + l_mg->data_next = NULL; 1336 + spin_unlock(&l_mg->free_lock); 1337 + } 1338 + 1339 + void pblk_line_replace_data(struct pblk *pblk) 1322 1340 { 1323 1341 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1324 1342 struct pblk_line *cur, *new; ··· 1368 1306 cur = l_mg->data_line; 1369 1307 new = l_mg->data_next; 1370 1308 if (!new) 1371 - return NULL; 1309 + return; 1372 1310 l_mg->data_line = new; 1373 1311 1374 - retry_line: 1375 - left_seblks = atomic_read(&new->left_seblks); 1376 - if (left_seblks) { 1377 - /* If line is not fully erased, erase it */ 1378 - if (atomic_read(&new->left_eblks)) { 1379 - if (pblk_line_erase(pblk, new)) 1380 - return NULL; 1381 - } else { 1382 - io_schedule(); 1383 - } 1384 - goto retry_line; 1385 - } 1386 - 1387 1312 spin_lock(&l_mg->free_lock); 1388 - /* Allocate next line for preparation */ 1389 - l_mg->data_next = pblk_line_get(pblk); 1390 - if (l_mg->data_next) { 1391 - l_mg->data_next->seq_nr = l_mg->d_seq_nr++; 1392 - l_mg->data_next->type = PBLK_LINETYPE_DATA; 1393 - is_next = 1; 1313 + if (pblk->state != PBLK_STATE_RUNNING) { 1314 + l_mg->data_line = NULL; 1315 + l_mg->data_next = NULL; 1316 + spin_unlock(&l_mg->free_lock); 1317 + return; 1394 1318 } 1395 1319 1396 1320 pblk_line_setup_metadata(new, l_mg, &pblk->lm); 1397 1321 spin_unlock(&l_mg->free_lock); 1398 1322 1399 - if (is_next) 1400 - pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); 1323 + retry_erase: 1324 + left_seblks = atomic_read(&new->left_seblks); 1325 + if (left_seblks) { 1326 + /* If line is not fully erased, erase it */ 1327 + if (atomic_read(&new->left_eblks)) { 1328 + if (pblk_line_erase(pblk, new)) 1329 + return; 1330 + } else { 1331 + io_schedule(); 1332 + } 1333 + goto retry_erase; 1334 + } 1401 1335 1402 1336 retry_setup: 1403 1337 if (!pblk_line_init_metadata(pblk, new, cur)) { 1404 1338 new = pblk_line_retry(pblk, new); 1405 1339 if (!new) 1406 - return NULL; 1340 + return; 1407 1341 1408 1342 goto retry_setup; 1409 1343 } ··· 1407 1349 if (!pblk_line_init_bb(pblk, new, 1)) { 1408 1350 new = pblk_line_retry(pblk, new); 1409 1351 if (!new) 1410 - return NULL; 1352 + return; 1411 1353 1412 1354 goto retry_setup; 1413 1355 } 1414 1356 1415 - return new; 1357 + /* Allocate next line for preparation */ 1358 + spin_lock(&l_mg->free_lock); 1359 + l_mg->data_next = pblk_line_get(pblk); 1360 + if (!l_mg->data_next) { 1361 + /* If we cannot get a new line, we need to stop the pipeline. 1362 + * Only allow as many writes in as we can store safely and then 1363 + * fail gracefully 1364 + */ 1365 + pblk_stop_writes(pblk, new); 1366 + l_mg->data_next = NULL; 1367 + } else { 1368 + l_mg->data_next->seq_nr = l_mg->d_seq_nr++; 1369 + l_mg->data_next->type = PBLK_LINETYPE_DATA; 1370 + is_next = 1; 1371 + } 1372 + spin_unlock(&l_mg->free_lock); 1373 + 1374 + if (is_next) 1375 + pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); 1416 1376 } 1417 1377 1418 1378 void pblk_line_free(struct pblk *pblk, struct pblk_line *line) ··· 1514 1438 return (line->left_msecs == 0); 1515 1439 } 1516 1440 1441 + void pblk_line_close_meta_sync(struct pblk *pblk) 1442 + { 1443 + struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1444 + struct pblk_line_meta *lm = &pblk->lm; 1445 + struct pblk_line *line, *tline; 1446 + LIST_HEAD(list); 1447 + 1448 + spin_lock(&l_mg->close_lock); 1449 + if (list_empty(&l_mg->emeta_list)) { 1450 + spin_unlock(&l_mg->close_lock); 1451 + return; 1452 + } 1453 + 1454 + list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev); 1455 + spin_unlock(&l_mg->close_lock); 1456 + 1457 + list_for_each_entry_safe(line, tline, &list, list) { 1458 + struct pblk_emeta *emeta = line->emeta; 1459 + 1460 + while (emeta->mem < lm->emeta_len[0]) { 1461 + int ret; 1462 + 1463 + ret = pblk_submit_meta_io(pblk, line); 1464 + if (ret) { 1465 + pr_err("pblk: sync meta line %d failed (%d)\n", 1466 + line->id, ret); 1467 + return; 1468 + } 1469 + } 1470 + } 1471 + 1472 + pblk_wait_for_meta(pblk); 1473 + } 1474 + 1475 + static void pblk_line_should_sync_meta(struct pblk *pblk) 1476 + { 1477 + if (pblk_rl_is_limit(&pblk->rl)) 1478 + pblk_line_close_meta_sync(pblk); 1479 + } 1480 + 1517 1481 void pblk_line_close(struct pblk *pblk, struct pblk_line *line) 1518 1482 { 1519 1483 struct pblk_line_mgmt *l_mg = &pblk->l_mg; ··· 1593 1477 struct pblk_emeta *emeta = line->emeta; 1594 1478 struct line_emeta *emeta_buf = emeta->buf; 1595 1479 1596 - /* No need for exact vsc value; avoid a big line lock and tak aprox. */ 1480 + /* No need for exact vsc value; avoid a big line lock and take aprox. */ 1597 1481 memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len); 1598 1482 memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len); 1599 1483 ··· 1605 1489 list_add_tail(&line->list, &l_mg->emeta_list); 1606 1490 spin_unlock(&line->lock); 1607 1491 spin_unlock(&l_mg->close_lock); 1492 + 1493 + pblk_line_should_sync_meta(pblk); 1608 1494 } 1609 1495 1610 1496 void pblk_line_close_ws(struct work_struct *work)
+4 -2
drivers/lightnvm/pblk-init.c
··· 372 372 kfree(l_mg->bb_aux); 373 373 kfree(l_mg->vsc_list); 374 374 375 + spin_lock(&l_mg->free_lock); 375 376 for (i = 0; i < PBLK_DATA_LINES; i++) { 376 377 kfree(l_mg->sline_meta[i]); 377 378 pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type); 378 379 kfree(l_mg->eline_meta[i]); 379 380 } 381 + spin_unlock(&l_mg->free_lock); 380 382 381 383 kfree(pblk->lines); 382 384 } ··· 861 859 862 860 static void pblk_tear_down(struct pblk *pblk) 863 861 { 864 - pblk_flush_writer(pblk); 862 + pblk_pipeline_stop(pblk); 865 863 pblk_writer_stop(pblk); 866 864 pblk_rb_sync_l2p(&pblk->rwb); 867 - pblk_recov_pad(pblk); 868 865 pblk_rwb_free(pblk); 869 866 pblk_rl_free(&pblk->rl); 870 867 ··· 909 908 910 909 pblk->dev = dev; 911 910 pblk->disk = tdisk; 911 + pblk->state = PBLK_STATE_RUNNING; 912 912 913 913 spin_lock_init(&pblk->trans_lock); 914 914 spin_lock_init(&pblk->lock);
+17 -6
drivers/lightnvm/pblk-map.c
··· 62 62 63 63 if (pblk_line_is_full(line)) { 64 64 struct pblk_line *prev_line = line; 65 - line = pblk_line_replace_data(pblk); 66 - if (!line) 67 - return; 65 + 66 + pblk_line_replace_data(pblk); 68 67 pblk_line_close_meta(pblk, prev_line); 69 68 } 70 69 ··· 105 106 pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], 106 107 lun_bitmap, &meta_list[i], map_secs); 107 108 108 - /* line can change after page map */ 109 - e_line = pblk_line_get_erase(pblk); 110 109 erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]); 110 + 111 + /* line can change after page map. We might also be writing the 112 + * last line. 113 + */ 114 + e_line = pblk_line_get_erase(pblk); 115 + if (!e_line) 116 + return pblk_map_rq(pblk, rqd, sentry, lun_bitmap, 117 + valid_secs, i + min); 111 118 112 119 spin_lock(&e_line->lock); 113 120 if (!test_bit(erase_lun, e_line->erase_bitmap)) { ··· 132 127 spin_unlock(&e_line->lock); 133 128 } 134 129 135 - e_line = pblk_line_get_erase(pblk); 136 130 d_line = pblk_line_get_data(pblk); 131 + 132 + /* line can change after page map. We might also be writing the 133 + * last line. 134 + */ 135 + e_line = pblk_line_get_erase(pblk); 136 + if (!e_line) 137 + return; 137 138 138 139 /* Erase blocks that are bad in this line but might not be in next */ 139 140 if (unlikely(ppa_empty(*erase_ppa)) &&
+20 -5
drivers/lightnvm/pblk-rb.c
··· 369 369 /* Protect syncs */ 370 370 smp_store_release(&rb->sync_point, sync_point); 371 371 372 + if (!bio) 373 + return 0; 374 + 372 375 spin_lock_irq(&rb->s_lock); 373 376 bio_list_add(&entry->w_ctx.bios, bio); 374 377 spin_unlock_irq(&rb->s_lock); ··· 410 407 return 1; 411 408 } 412 409 410 + void pblk_rb_flush(struct pblk_rb *rb) 411 + { 412 + struct pblk *pblk = container_of(rb, struct pblk, rwb); 413 + unsigned int mem = READ_ONCE(rb->mem); 414 + 415 + if (pblk_rb_sync_point_set(rb, NULL, mem)) 416 + return; 417 + 418 + pblk_write_should_kick(pblk); 419 + } 420 + 413 421 static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, 414 422 unsigned int *pos, struct bio *bio, 415 423 int *io_ret) ··· 457 443 unsigned int nr_entries, unsigned int *pos) 458 444 { 459 445 struct pblk *pblk = container_of(rb, struct pblk, rwb); 460 - int flush_done; 446 + int io_ret; 461 447 462 448 spin_lock(&rb->w_lock); 463 - if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) { 449 + io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries); 450 + if (io_ret) { 464 451 spin_unlock(&rb->w_lock); 465 - return NVM_IO_REQUEUE; 452 + return io_ret; 466 453 } 467 454 468 - if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) { 455 + if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) { 469 456 spin_unlock(&rb->w_lock); 470 457 return NVM_IO_REQUEUE; 471 458 } ··· 474 459 pblk_rl_user_in(&pblk->rl, nr_entries); 475 460 spin_unlock(&rb->w_lock); 476 461 477 - return flush_done; 462 + return io_ret; 478 463 } 479 464 480 465 /*
+3
drivers/lightnvm/pblk-read.c
··· 142 142 #endif 143 143 144 144 pblk_free_rqd(pblk, rqd, READ); 145 + atomic_dec(&pblk->inflight_io); 145 146 } 146 147 147 148 static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, ··· 348 347 bio_get(bio); 349 348 if (bitmap_full(&read_bitmap, nr_secs)) { 350 349 bio_endio(bio); 350 + atomic_inc(&pblk->inflight_io); 351 351 pblk_end_io_read(rqd); 352 352 return NVM_IO_OK; 353 353 } ··· 518 516 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 519 517 pr_err("pblk: GC read I/O timed out\n"); 520 518 } 519 + atomic_dec(&pblk->inflight_io); 521 520 522 521 if (rqd.error) { 523 522 atomic_long_inc(&pblk->read_failed_gc);
+22 -9
drivers/lightnvm/pblk-recovery.c
··· 300 300 pr_err("pblk: L2P recovery read timed out\n"); 301 301 return -EINTR; 302 302 } 303 - 303 + atomic_dec(&pblk->inflight_io); 304 304 reinit_completion(&wait); 305 305 306 306 /* At this point, the read should not fail. If it does, it is a problem ··· 415 415 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 416 416 pr_err("pblk: L2P recovery write timed out\n"); 417 417 } 418 + atomic_dec(&pblk->inflight_io); 418 419 reinit_completion(&wait); 419 420 420 421 left_line_ppas -= rq_ppas; ··· 520 519 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 521 520 pr_err("pblk: L2P recovery read timed out\n"); 522 521 } 522 + atomic_dec(&pblk->inflight_io); 523 523 reinit_completion(&wait); 524 524 525 525 /* This should not happen since the read failed during normal recovery, ··· 660 658 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 661 659 pr_err("pblk: L2P recovery read timed out\n"); 662 660 } 661 + atomic_dec(&pblk->inflight_io); 663 662 reinit_completion(&wait); 664 663 665 664 /* Reached the end of the written line */ ··· 957 954 } 958 955 959 956 /* 960 - * Pad until smeta can be read on current data line 957 + * Pad current line 961 958 */ 962 - void pblk_recov_pad(struct pblk *pblk) 959 + int pblk_recov_pad(struct pblk *pblk) 963 960 { 964 961 struct nvm_tgt_dev *dev = pblk->dev; 965 962 struct nvm_geo *geo = &dev->geo; ··· 970 967 struct ppa_addr *ppa_list; 971 968 struct pblk_sec_meta *meta_list; 972 969 void *data; 970 + int left_msecs; 971 + int ret = 0; 973 972 dma_addr_t dma_ppa_list, dma_meta_list; 974 973 975 974 spin_lock(&l_mg->free_lock); 976 975 line = l_mg->data_line; 976 + left_msecs = line->left_msecs; 977 977 spin_unlock(&l_mg->free_lock); 978 978 979 979 rqd = pblk_alloc_rqd(pblk, READ); 980 980 if (IS_ERR(rqd)) 981 - return; 981 + return PTR_ERR(rqd); 982 982 983 983 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); 984 - if (!meta_list) 984 + if (!meta_list) { 985 + ret = -ENOMEM; 985 986 goto free_rqd; 987 + } 986 988 987 989 ppa_list = (void *)(meta_list) + pblk_dma_meta_size; 988 990 dma_ppa_list = dma_meta_list + pblk_dma_meta_size; 989 991 990 992 data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL); 991 - if (!data) 993 + if (!data) { 994 + ret = -ENOMEM; 992 995 goto free_meta_list; 996 + } 993 997 994 998 p.ppa_list = ppa_list; 995 999 p.meta_list = meta_list; ··· 1005 995 p.dma_ppa_list = dma_ppa_list; 1006 996 p.dma_meta_list = dma_meta_list; 1007 997 1008 - if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) { 1009 - pr_err("pblk: Tear down padding failed\n"); 998 + ret = pblk_recov_pad_oob(pblk, line, p, left_msecs); 999 + if (ret) { 1000 + pr_err("pblk: Tear down padding failed (%d)\n", ret); 1010 1001 goto free_data; 1011 1002 } 1012 1003 1013 - pblk_line_close(pblk, line); 1004 + pblk_line_close_meta(pblk, line); 1014 1005 1015 1006 free_data: 1016 1007 kfree(data); ··· 1019 1008 nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); 1020 1009 free_rqd: 1021 1010 pblk_free_rqd(pblk, rqd, READ); 1011 + 1012 + return ret; 1022 1013 }
+28 -2
drivers/lightnvm/pblk-rl.c
··· 23 23 mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000)); 24 24 } 25 25 26 + int pblk_rl_is_limit(struct pblk_rl *rl) 27 + { 28 + int rb_space; 29 + 30 + rb_space = atomic_read(&rl->rb_space); 31 + 32 + return (rb_space == 0); 33 + } 34 + 26 35 int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries) 27 36 { 28 37 int rb_user_cnt = atomic_read(&rl->rb_user_cnt); 38 + int rb_space = atomic_read(&rl->rb_space); 29 39 30 - return (!(rb_user_cnt >= rl->rb_user_max)); 40 + if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0)) 41 + return NVM_IO_ERR; 42 + 43 + if (rb_user_cnt >= rl->rb_user_max) 44 + return NVM_IO_REQUEUE; 45 + 46 + return NVM_IO_OK; 47 + } 48 + 49 + void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries) 50 + { 51 + int rb_space = atomic_read(&rl->rb_space); 52 + 53 + if (unlikely(rb_space >= 0)) 54 + atomic_sub(nr_entries, &rl->rb_space); 31 55 } 32 56 33 57 int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries) ··· 214 190 /* To start with, all buffer is available to user I/O writers */ 215 191 rl->rb_budget = budget; 216 192 rl->rb_user_max = budget; 217 - atomic_set(&rl->rb_user_cnt, 0); 218 193 rl->rb_gc_max = 0; 219 194 rl->rb_state = PBLK_RL_HIGH; 195 + 196 + atomic_set(&rl->rb_user_cnt, 0); 220 197 atomic_set(&rl->rb_gc_cnt, 0); 198 + atomic_set(&rl->rb_space, -1); 221 199 222 200 setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl); 223 201
+5 -3
drivers/lightnvm/pblk-sysfs.c
··· 241 241 geo->nr_luns, lm->blk_per_line, lm->sec_per_line); 242 242 243 243 sz += snprintf(page + sz, PAGE_SIZE - sz, 244 - "lines:d:%d,l:%d-f:%d,m:%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n", 244 + "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n", 245 245 cur_data, cur_log, 246 - nr_free_lines, emeta_line_cnt, 246 + nr_free_lines, 247 + emeta_line_cnt, meta_weight, 247 248 closed_line_cnt, 248 249 bad, cor, 249 250 d_line_cnt, l_line_cnt, ··· 258 257 sz += snprintf(page + sz, PAGE_SIZE - sz, 259 258 "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n", 260 259 cur_data, cur_sec, msecs, vsc, sec_in_line, 261 - map_weight, lm->sec_per_line, meta_weight); 260 + map_weight, lm->sec_per_line, 261 + atomic_read(&pblk->inflight_io)); 262 262 263 263 return sz; 264 264 }
+4 -1
drivers/lightnvm/pblk-write.c
··· 171 171 #endif 172 172 173 173 pblk_complete_write(pblk, rqd, c_ctx); 174 + atomic_dec(&pblk->inflight_io); 174 175 } 175 176 176 177 static void pblk_end_io_write_meta(struct nvm_rq *rqd) ··· 204 203 205 204 bio_put(rqd->bio); 206 205 pblk_free_rqd(pblk, rqd, READ); 206 + 207 + atomic_dec(&pblk->inflight_io); 207 208 } 208 209 209 210 static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, ··· 257 254 return ret; 258 255 } 259 256 260 - if (likely(!atomic_read(&e_line->left_eblks) || !e_line)) 257 + if (likely(!e_line || !atomic_read(&e_line->left_eblks))) 261 258 pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0); 262 259 else 263 260 pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
+23 -4
drivers/lightnvm/pblk.h
··· 257 257 */ 258 258 int rb_budget; /* Total number of entries available for I/O */ 259 259 int rb_user_max; /* Max buffer entries available for user I/O */ 260 - atomic_t rb_user_cnt; /* User I/O buffer counter */ 261 260 int rb_gc_max; /* Max buffer entries available for GC I/O */ 262 261 int rb_gc_rsv; /* Reserved buffer entries for GC I/O */ 263 262 int rb_state; /* Rate-limiter current state */ 263 + 264 + atomic_t rb_user_cnt; /* User I/O buffer counter */ 264 265 atomic_t rb_gc_cnt; /* GC I/O buffer counter */ 266 + atomic_t rb_space; /* Space limit in case of reaching capacity */ 265 267 266 268 int rsv_blocks; /* Reserved blocks for GC */ 267 269 ··· 531 529 u8 sec_offset; 532 530 }; 533 531 532 + enum { 533 + PBLK_STATE_RUNNING = 0, 534 + PBLK_STATE_STOPPING = 1, 535 + PBLK_STATE_RECOVERING = 2, 536 + PBLK_STATE_STOPPED = 3, 537 + }; 538 + 534 539 struct pblk { 535 540 struct nvm_tgt_dev *dev; 536 541 struct gendisk *disk; ··· 554 545 struct pblk_addr_format ppaf; 555 546 556 547 struct pblk_rb rwb; 548 + 549 + int state; /* pblk line state */ 557 550 558 551 int min_write_pgs; /* Minimum amount of pages required by controller */ 559 552 int max_write_pgs; /* Maximum amount of pages supported by controller */ ··· 597 586 atomic_long_t read_failed_gc; 598 587 atomic_long_t write_failed; 599 588 atomic_long_t erase_failed; 589 + 590 + atomic_t inflight_io; /* General inflight I/O counter */ 600 591 601 592 struct task_struct *writer_ts; 602 593 ··· 653 640 struct pblk_w_ctx w_ctx, struct pblk_line *gc_line, 654 641 unsigned int pos); 655 642 struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos); 643 + void pblk_rb_flush(struct pblk_rb *rb); 656 644 657 645 void pblk_rb_sync_l2p(struct pblk_rb *rb); 658 646 unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, ··· 689 675 int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, 690 676 struct pblk_c_ctx *c_ctx); 691 677 void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw); 692 - void pblk_flush_writer(struct pblk *pblk); 678 + void pblk_wait_for_meta(struct pblk *pblk); 693 679 struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba); 694 680 void pblk_discard(struct pblk *pblk, struct bio *bio); 695 681 void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); ··· 701 687 gfp_t gfp_mask); 702 688 struct pblk_line *pblk_line_get(struct pblk *pblk); 703 689 struct pblk_line *pblk_line_get_first_data(struct pblk *pblk); 704 - struct pblk_line *pblk_line_replace_data(struct pblk *pblk); 690 + void pblk_line_replace_data(struct pblk *pblk); 705 691 int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); 706 692 void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); 707 693 struct pblk_line *pblk_line_get_data(struct pblk *pblk); ··· 711 697 void pblk_line_free(struct pblk *pblk, struct pblk_line *line); 712 698 void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line); 713 699 void pblk_line_close(struct pblk *pblk, struct pblk_line *line); 700 + void pblk_line_close_meta_sync(struct pblk *pblk); 714 701 void pblk_line_close_ws(struct work_struct *work); 702 + void pblk_pipeline_stop(struct pblk *pblk); 715 703 void pblk_line_mark_bb(struct work_struct *work); 716 704 void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, 717 705 void (*work)(struct work_struct *), ··· 795 779 */ 796 780 void pblk_submit_rec(struct work_struct *work); 797 781 struct pblk_line *pblk_recov_l2p(struct pblk *pblk); 798 - void pblk_recov_pad(struct pblk *pblk); 782 + int pblk_recov_pad(struct pblk *pblk); 799 783 __le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta); 800 784 int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, 801 785 struct pblk_rec_ctx *recovery, u64 *comp_bits, ··· 828 812 int pblk_rl_low_thrs(struct pblk_rl *rl); 829 813 unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); 830 814 int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); 815 + void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries); 831 816 void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); 832 817 int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries); 833 818 void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); ··· 836 819 int pblk_rl_sysfs_rate_show(struct pblk_rl *rl); 837 820 void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); 838 821 void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); 822 + void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left); 823 + int pblk_rl_is_limit(struct pblk_rl *rl); 839 824 840 825 /* 841 826 * pblk sysfs