Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] device-mapper snapshot: fix invalidation

When a snapshot becomes invalid, s->valid is set to 0. In this state, a
snapshot can no longer be accessed.

When s->lock is acquired, before doing anything else, s->valid must be checked
to ensure the snapshot remains valid.

This patch eliminates some races (that may cause panics) by adding some
missing checks. At the same time, some unnecessary levels of indentation are
removed and snapshot invalidation is moved into a single function that always
generates a device-mapper event.

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Alasdair G Kergon and committed by
Linus Torvalds
76df1c65 b4b610f6

+181 -128
+181 -128
drivers/md/dm-snap.c
··· 392 392 down_write(&s->lock); 393 393 s->valid = 0; 394 394 up_write(&s->lock); 395 + 396 + dm_table_event(s->table); 395 397 } 396 398 } 397 399 ··· 603 601 } 604 602 } 605 603 604 + static inline void error_snapshot_bios(struct pending_exception *pe) 605 + { 606 + error_bios(bio_list_get(&pe->snapshot_bios)); 607 + } 608 + 606 609 static struct bio *__flush_bios(struct pending_exception *pe) 607 610 { 608 611 /* ··· 623 616 return NULL; 624 617 } 625 618 619 + static void __invalidate_snapshot(struct dm_snapshot *s, 620 + struct pending_exception *pe, int err) 621 + { 622 + if (!s->valid) 623 + return; 624 + 625 + if (err == -EIO) 626 + DMERR("Invalidating snapshot: Error reading/writing."); 627 + else if (err == -ENOMEM) 628 + DMERR("Invalidating snapshot: Unable to allocate exception."); 629 + 630 + if (pe) 631 + remove_exception(&pe->e); 632 + 633 + if (s->store.drop_snapshot) 634 + s->store.drop_snapshot(&s->store); 635 + 636 + s->valid = 0; 637 + 638 + dm_table_event(s->table); 639 + } 640 + 626 641 static void pending_complete(struct pending_exception *pe, int success) 627 642 { 628 643 struct exception *e; ··· 652 623 struct dm_snapshot *s = pe->snap; 653 624 struct bio *flush = NULL; 654 625 655 - if (success) { 656 - e = alloc_exception(); 657 - if (!e) { 658 - DMWARN("Unable to allocate exception."); 659 - down_write(&s->lock); 660 - s->store.drop_snapshot(&s->store); 661 - s->valid = 0; 662 - flush = __flush_bios(pe); 663 - up_write(&s->lock); 664 - 665 - error_bios(bio_list_get(&pe->snapshot_bios)); 666 - goto out; 667 - } 668 - *e = pe->e; 669 - 670 - /* 671 - * Add a proper exception, and remove the 672 - * in-flight exception from the list. 673 - */ 674 - down_write(&s->lock); 675 - insert_exception(&s->complete, e); 676 - remove_exception(&pe->e); 677 - flush = __flush_bios(pe); 678 - 679 - /* Submit any pending write bios */ 680 - up_write(&s->lock); 681 - 682 - flush_bios(bio_list_get(&pe->snapshot_bios)); 683 - } else { 626 + if (!success) { 684 627 /* Read/write error - snapshot is unusable */ 685 628 down_write(&s->lock); 686 - if (s->valid) 687 - DMERR("Error reading/writing snapshot"); 688 - s->store.drop_snapshot(&s->store); 689 - s->valid = 0; 690 - remove_exception(&pe->e); 629 + __invalidate_snapshot(s, pe, -EIO); 691 630 flush = __flush_bios(pe); 692 631 up_write(&s->lock); 693 632 694 - error_bios(bio_list_get(&pe->snapshot_bios)); 695 - 696 - dm_table_event(s->table); 633 + error_snapshot_bios(pe); 634 + goto out; 697 635 } 636 + 637 + e = alloc_exception(); 638 + if (!e) { 639 + down_write(&s->lock); 640 + __invalidate_snapshot(s, pe, -ENOMEM); 641 + flush = __flush_bios(pe); 642 + up_write(&s->lock); 643 + 644 + error_snapshot_bios(pe); 645 + goto out; 646 + } 647 + *e = pe->e; 648 + 649 + /* 650 + * Add a proper exception, and remove the 651 + * in-flight exception from the list. 652 + */ 653 + down_write(&s->lock); 654 + if (!s->valid) { 655 + flush = __flush_bios(pe); 656 + up_write(&s->lock); 657 + 658 + free_exception(e); 659 + 660 + error_snapshot_bios(pe); 661 + goto out; 662 + } 663 + 664 + insert_exception(&s->complete, e); 665 + remove_exception(&pe->e); 666 + flush = __flush_bios(pe); 667 + 668 + up_write(&s->lock); 669 + 670 + /* Submit any pending write bios */ 671 + flush_bios(bio_list_get(&pe->snapshot_bios)); 698 672 699 673 out: 700 674 primary_pe = pe->primary_pe; ··· 790 758 if (e) { 791 759 /* cast the exception to a pending exception */ 792 760 pe = container_of(e, struct pending_exception, e); 793 - 794 - } else { 795 - /* 796 - * Create a new pending exception, we don't want 797 - * to hold the lock while we do this. 798 - */ 799 - up_write(&s->lock); 800 - pe = alloc_pending_exception(); 801 - down_write(&s->lock); 802 - 803 - e = lookup_exception(&s->pending, chunk); 804 - if (e) { 805 - free_pending_exception(pe); 806 - pe = container_of(e, struct pending_exception, e); 807 - } else { 808 - pe->e.old_chunk = chunk; 809 - bio_list_init(&pe->origin_bios); 810 - bio_list_init(&pe->snapshot_bios); 811 - pe->primary_pe = NULL; 812 - atomic_set(&pe->sibling_count, 1); 813 - pe->snap = s; 814 - pe->started = 0; 815 - 816 - if (s->store.prepare_exception(&s->store, &pe->e)) { 817 - free_pending_exception(pe); 818 - s->valid = 0; 819 - return NULL; 820 - } 821 - 822 - insert_exception(&s->pending, &pe->e); 823 - } 761 + goto out; 824 762 } 825 763 764 + /* 765 + * Create a new pending exception, we don't want 766 + * to hold the lock while we do this. 767 + */ 768 + up_write(&s->lock); 769 + pe = alloc_pending_exception(); 770 + down_write(&s->lock); 771 + 772 + if (!s->valid) { 773 + free_pending_exception(pe); 774 + return NULL; 775 + } 776 + 777 + e = lookup_exception(&s->pending, chunk); 778 + if (e) { 779 + free_pending_exception(pe); 780 + pe = container_of(e, struct pending_exception, e); 781 + goto out; 782 + } 783 + 784 + pe->e.old_chunk = chunk; 785 + bio_list_init(&pe->origin_bios); 786 + bio_list_init(&pe->snapshot_bios); 787 + pe->primary_pe = NULL; 788 + atomic_set(&pe->sibling_count, 1); 789 + pe->snap = s; 790 + pe->started = 0; 791 + 792 + if (s->store.prepare_exception(&s->store, &pe->e)) { 793 + free_pending_exception(pe); 794 + return NULL; 795 + } 796 + 797 + insert_exception(&s->pending, &pe->e); 798 + 799 + out: 826 800 return pe; 827 801 } 828 802 ··· 845 807 { 846 808 struct exception *e; 847 809 struct dm_snapshot *s = (struct dm_snapshot *) ti->private; 810 + int copy_needed = 0; 848 811 int r = 1; 849 812 chunk_t chunk; 850 - struct pending_exception *pe; 813 + struct pending_exception *pe = NULL; 851 814 852 815 chunk = sector_to_chunk(s, bio->bi_sector); 853 816 854 817 /* Full snapshots are not usable */ 818 + /* To get here the table must be live so s->active is always set. */ 855 819 if (!s->valid) 856 820 return -EIO; 857 821 ··· 871 831 * to copy an exception */ 872 832 down_write(&s->lock); 873 833 834 + if (!s->valid) { 835 + r = -EIO; 836 + goto out_unlock; 837 + } 838 + 874 839 /* If the block is already remapped - use that, else remap it */ 875 840 e = lookup_exception(&s->complete, chunk); 876 841 if (e) { 877 842 remap_exception(s, e, bio); 878 - up_write(&s->lock); 879 - 880 - } else { 881 - pe = __find_pending_exception(s, bio); 882 - 883 - if (!pe) { 884 - if (s->store.drop_snapshot) 885 - s->store.drop_snapshot(&s->store); 886 - s->valid = 0; 887 - r = -EIO; 888 - up_write(&s->lock); 889 - } else { 890 - remap_exception(s, &pe->e, bio); 891 - bio_list_add(&pe->snapshot_bios, bio); 892 - 893 - if (!pe->started) { 894 - /* this is protected by snap->lock */ 895 - pe->started = 1; 896 - up_write(&s->lock); 897 - start_copy(pe); 898 - } else 899 - up_write(&s->lock); 900 - r = 0; 901 - } 843 + goto out_unlock; 902 844 } 903 845 846 + pe = __find_pending_exception(s, bio); 847 + if (!pe) { 848 + __invalidate_snapshot(s, pe, -ENOMEM); 849 + r = -EIO; 850 + goto out_unlock; 851 + } 852 + 853 + remap_exception(s, &pe->e, bio); 854 + bio_list_add(&pe->snapshot_bios, bio); 855 + 856 + if (!pe->started) { 857 + /* this is protected by snap->lock */ 858 + pe->started = 1; 859 + copy_needed = 1; 860 + } 861 + 862 + r = 0; 863 + 864 + out_unlock: 865 + up_write(&s->lock); 866 + 867 + if (copy_needed) 868 + start_copy(pe); 904 869 } else { 905 870 /* 906 871 * FIXME: this read path scares me because we ··· 916 871 917 872 /* Do reads */ 918 873 down_read(&s->lock); 874 + 875 + if (!s->valid) { 876 + up_read(&s->lock); 877 + return -EIO; 878 + } 919 879 920 880 /* See if it it has been remapped */ 921 881 e = lookup_exception(&s->complete, chunk); ··· 998 948 /* Do all the snapshots on this origin */ 999 949 list_for_each_entry (snap, snapshots, list) { 1000 950 951 + down_write(&snap->lock); 952 + 1001 953 /* Only deal with valid and active snapshots */ 1002 954 if (!snap->valid || !snap->active) 1003 - continue; 955 + goto next_snapshot; 1004 956 1005 957 /* Nothing to do if writing beyond end of snapshot */ 1006 958 if (bio->bi_sector >= dm_table_get_size(snap->table)) 1007 - continue; 1008 - 1009 - down_write(&snap->lock); 959 + goto next_snapshot; 1010 960 1011 961 /* 1012 962 * Remember, different snapshots can have ··· 1023 973 * won't destroy the primary_pe while we're inside this loop. 1024 974 */ 1025 975 e = lookup_exception(&snap->complete, chunk); 1026 - if (!e) { 1027 - pe = __find_pending_exception(snap, bio); 1028 - if (!pe) { 1029 - snap->store.drop_snapshot(&snap->store); 1030 - snap->valid = 0; 976 + if (e) 977 + goto next_snapshot; 1031 978 1032 - } else { 1033 - if (!primary_pe) { 1034 - /* 1035 - * Either every pe here has same 1036 - * primary_pe or none has one yet. 1037 - */ 1038 - if (pe->primary_pe) 1039 - primary_pe = pe->primary_pe; 1040 - else { 1041 - primary_pe = pe; 1042 - first = 1; 1043 - } 1044 - 1045 - bio_list_add(&primary_pe->origin_bios, 1046 - bio); 1047 - r = 0; 1048 - } 1049 - if (!pe->primary_pe) { 1050 - atomic_inc(&primary_pe->sibling_count); 1051 - pe->primary_pe = primary_pe; 1052 - } 1053 - if (!pe->started) { 1054 - pe->started = 1; 1055 - list_add_tail(&pe->list, &pe_queue); 1056 - } 1057 - } 979 + pe = __find_pending_exception(snap, bio); 980 + if (!pe) { 981 + __invalidate_snapshot(snap, pe, ENOMEM); 982 + goto next_snapshot; 1058 983 } 1059 984 985 + if (!primary_pe) { 986 + /* 987 + * Either every pe here has same 988 + * primary_pe or none has one yet. 989 + */ 990 + if (pe->primary_pe) 991 + primary_pe = pe->primary_pe; 992 + else { 993 + primary_pe = pe; 994 + first = 1; 995 + } 996 + 997 + bio_list_add(&primary_pe->origin_bios, bio); 998 + 999 + r = 0; 1000 + } 1001 + 1002 + if (!pe->primary_pe) { 1003 + atomic_inc(&primary_pe->sibling_count); 1004 + pe->primary_pe = primary_pe; 1005 + } 1006 + 1007 + if (!pe->started) { 1008 + pe->started = 1; 1009 + list_add_tail(&pe->list, &pe_queue); 1010 + } 1011 + 1012 + next_snapshot: 1060 1013 up_write(&snap->lock); 1061 1014 } 1062 1015