Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm snapshot: fix hung bios when copy error occurs

When there is an error copying a chunk dm-snapshot can incorrectly hold
associated bios indefinitely, resulting in hung IO.

The function copy_callback sets pe->error if there was error copying the
chunk, and then calls complete_exception. complete_exception calls
pending_complete on error, otherwise it calls commit_exception with
commit_callback (and commit_callback calls complete_exception).

The persistent exception store (dm-snap-persistent.c) assumes that calls
to prepare_exception and commit_exception are paired.
persistent_prepare_exception increases ps->pending_count and
persistent_commit_exception decreases it.

If there is a copy error, persistent_prepare_exception is called but
persistent_commit_exception is not. This results in the variable
ps->pending_count never returning to zero and that causes some pending
exceptions (and their associated bios) to be held forever.

Fix this by unconditionally calling commit_exception regardless of
whether the copy was successful. A new "valid" parameter is added to
commit_exception -- when the copy fails this parameter is set to zero so
that the chunk that failed to copy (and all following chunks) is not
recorded in the snapshot store. Also, remove commit_callback now that
it is merely a wrapper around pending_complete.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org

authored by

Mikulas Patocka and committed by
Mike Snitzer
385277bf 1c2e54e1

+12 -19
+1 -1
drivers/md/dm-exception-store.h
··· 69 69 * Update the metadata with this exception. 70 70 */ 71 71 void (*commit_exception) (struct dm_exception_store *store, 72 - struct dm_exception *e, 72 + struct dm_exception *e, int valid, 73 73 void (*callback) (void *, int success), 74 74 void *callback_context); 75 75
+4 -1
drivers/md/dm-snap-persistent.c
··· 695 695 } 696 696 697 697 static void persistent_commit_exception(struct dm_exception_store *store, 698 - struct dm_exception *e, 698 + struct dm_exception *e, int valid, 699 699 void (*callback) (void *, int success), 700 700 void *callback_context) 701 701 { ··· 703 703 struct pstore *ps = get_info(store); 704 704 struct core_exception ce; 705 705 struct commit_callback *cb; 706 + 707 + if (!valid) 708 + ps->valid = 0; 706 709 707 710 ce.old_chunk = e->old_chunk; 708 711 ce.new_chunk = e->new_chunk;
+2 -2
drivers/md/dm-snap-transient.c
··· 52 52 } 53 53 54 54 static void transient_commit_exception(struct dm_exception_store *store, 55 - struct dm_exception *e, 55 + struct dm_exception *e, int valid, 56 56 void (*callback) (void *, int success), 57 57 void *callback_context) 58 58 { 59 59 /* Just succeed */ 60 - callback(callback_context, 1); 60 + callback(callback_context, valid); 61 61 } 62 62 63 63 static void transient_usage(struct dm_exception_store *store,
+5 -15
drivers/md/dm-snap.c
··· 1437 1437 dm_table_event(s->ti->table); 1438 1438 } 1439 1439 1440 - static void pending_complete(struct dm_snap_pending_exception *pe, int success) 1440 + static void pending_complete(void *context, int success) 1441 1441 { 1442 + struct dm_snap_pending_exception *pe = context; 1442 1443 struct dm_exception *e; 1443 1444 struct dm_snapshot *s = pe->snap; 1444 1445 struct bio *origin_bios = NULL; ··· 1507 1506 free_pending_exception(pe); 1508 1507 } 1509 1508 1510 - static void commit_callback(void *context, int success) 1511 - { 1512 - struct dm_snap_pending_exception *pe = context; 1513 - 1514 - pending_complete(pe, success); 1515 - } 1516 - 1517 1509 static void complete_exception(struct dm_snap_pending_exception *pe) 1518 1510 { 1519 1511 struct dm_snapshot *s = pe->snap; 1520 1512 1521 - if (unlikely(pe->copy_error)) 1522 - pending_complete(pe, 0); 1523 - 1524 - else 1525 - /* Update the metadata if we are persistent */ 1526 - s->store->type->commit_exception(s->store, &pe->e, 1527 - commit_callback, pe); 1513 + /* Update the metadata if we are persistent */ 1514 + s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error, 1515 + pending_complete, pe); 1528 1516 } 1529 1517 1530 1518 /*