Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm mpath: add retry pg init

This patch allows a failed path group initialisation command to be retried.

It adds a generic MP_RETRY flag and a "pg_init_retries" feature to
device-mapper multipath which limits the number of retries.

1. A hw handler sends a path initialization command to the storage and
the command completes with an error code indicating the command
should be retried.

2. The hardware handler calls dm_pg_init_complete() with MP_RETRY
set in err_flags to ask the dm multipath core to retry.

3. If the retry limit has not been exceeded, pg_init() is retried.
Otherwise fail_path() is called.

If you are using the userspace multipath-tools or device-mapper-multipath
package, you can set pg_init_retries in the 'device' section of your
/etc/multipath.conf file. For example:

features "2 pg_init_retries 7"

The number of PG retries attempted is reported in the 'dmsetup status' output.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Acked-by: Mike Christie <michaelc@cs.wisc.edu>
Acked-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

authored by

Dave Wysochanski and committed by
Alasdair G Kergon
c9e45581 636d5786

+68 -14
+1
drivers/md/dm-hw-handler.h
··· 58 58 #define MP_FAIL_PATH 1 59 59 #define MP_BYPASS_PG 2 60 60 #define MP_ERROR_IO 4 /* Don't retry this I/O */ 61 + #define MP_RETRY 8 61 62 62 63 #endif
+67 -14
drivers/md/dm-mpath.c
··· 75 75 unsigned queue_io; /* Must we queue all I/O? */ 76 76 unsigned queue_if_no_path; /* Queue I/O if last path fails? */ 77 77 unsigned saved_queue_if_no_path;/* Saved state during suspension */ 78 + unsigned pg_init_retries; /* Number of times to retry pg_init */ 79 + unsigned pg_init_count; /* Number of times pg_init called */ 78 80 79 81 struct work_struct process_queued_ios; 80 82 struct bio_list queued_ios; ··· 227 225 m->pg_init_required = 0; 228 226 m->queue_io = 0; 229 227 } 228 + 229 + m->pg_init_count = 0; 230 230 } 231 231 232 232 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg) ··· 428 424 must_queue = 0; 429 425 430 426 if (m->pg_init_required && !m->pg_init_in_progress) { 427 + m->pg_init_count++; 431 428 m->pg_init_required = 0; 432 429 m->pg_init_in_progress = 1; 433 430 init_required = 1; ··· 694 689 int r; 695 690 unsigned argc; 696 691 struct dm_target *ti = m->ti; 692 + const char *param_name; 697 693 698 694 static struct param _params[] = { 699 - {0, 1, "invalid number of feature args"}, 695 + {0, 3, "invalid number of feature args"}, 696 + {1, 50, "pg_init_retries must be between 1 and 50"}, 700 697 }; 701 698 702 699 r = read_param(_params, shift(as), &argc, &ti->error); ··· 708 701 if (!argc) 709 702 return 0; 710 703 711 - if (!strnicmp(shift(as), MESG_STR("queue_if_no_path"))) 712 - return queue_if_no_path(m, 1, 0); 713 - else { 704 + do { 705 + param_name = shift(as); 706 + argc--; 707 + 708 + if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) { 709 + r = queue_if_no_path(m, 1, 0); 710 + continue; 711 + } 712 + 713 + if (!strnicmp(param_name, MESG_STR("pg_init_retries")) && 714 + (argc >= 1)) { 715 + r = read_param(_params + 1, shift(as), 716 + &m->pg_init_retries, &ti->error); 717 + argc--; 718 + continue; 719 + } 720 + 714 721 ti->error = "Unrecognised multipath feature request"; 715 - return -EINVAL; 716 - } 722 + r = -EINVAL; 723 + } while (argc && !r); 724 + 725 + return r; 717 726 } 718 727 719 728 static int multipath_ctr(struct dm_target *ti, unsigned int argc, ··· 999 976 } 1000 977 1001 978 /* 979 + * Should we retry pg_init immediately? 980 + */ 981 + static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) 982 + { 983 + unsigned long flags; 984 + int limit_reached = 0; 985 + 986 + spin_lock_irqsave(&m->lock, flags); 987 + 988 + if (m->pg_init_count <= m->pg_init_retries) 989 + m->pg_init_required = 1; 990 + else 991 + limit_reached = 1; 992 + 993 + spin_unlock_irqrestore(&m->lock, flags); 994 + 995 + return limit_reached; 996 + } 997 + 998 + /* 1002 999 * pg_init must call this when it has completed its initialisation 1003 1000 */ 1004 1001 void dm_pg_init_complete(struct dm_path *path, unsigned err_flags) ··· 1028 985 struct multipath *m = pg->m; 1029 986 unsigned long flags; 1030 987 1031 - /* We insist on failing the path if the PG is already bypassed. */ 1032 - if (err_flags && pg->bypassed) 988 + /* 989 + * If requested, retry pg_init until maximum number of retries exceeded. 990 + * If retry not requested and PG already bypassed, always fail the path. 991 + */ 992 + if (err_flags & MP_RETRY) { 993 + if (pg_init_limit_reached(m, pgpath)) 994 + err_flags |= MP_FAIL_PATH; 995 + } else if (err_flags && pg->bypassed) 1033 996 err_flags |= MP_FAIL_PATH; 1034 997 1035 998 if (err_flags & MP_FAIL_PATH) ··· 1045 996 bypass_pg(m, pg, 1); 1046 997 1047 998 spin_lock_irqsave(&m->lock, flags); 1048 - if (err_flags) { 999 + if (err_flags & ~MP_RETRY) { 1049 1000 m->current_pgpath = NULL; 1050 1001 m->current_pg = NULL; 1051 1002 } else if (!m->pg_init_required) ··· 1197 1148 1198 1149 /* Features */ 1199 1150 if (type == STATUSTYPE_INFO) 1200 - DMEMIT("1 %u ", m->queue_size); 1201 - else if (m->queue_if_no_path) 1202 - DMEMIT("1 queue_if_no_path "); 1203 - else 1204 - DMEMIT("0 "); 1151 + DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count); 1152 + else { 1153 + DMEMIT("%u ", m->queue_if_no_path + 1154 + (m->pg_init_retries > 0) * 2); 1155 + if (m->queue_if_no_path) 1156 + DMEMIT("queue_if_no_path "); 1157 + if (m->pg_init_retries) 1158 + DMEMIT("pg_init_retries %u ", m->pg_init_retries); 1159 + } 1205 1160 1206 1161 if (hwh->type && hwh->type->status) 1207 1162 sz += hwh->type->status(hwh, type, result + sz, maxlen - sz);