Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

gfs2: New gfs2_withdraw_helper

Currently, when a gfs2 filesystem is withdrawn, an "offline" uevent is
triggered that invokes gfs2-util's gfs2_withdraw_helper script. The
purpose of this script is to deactivate the filesystem's block device so
that it can be withdrawn immediately, even before all the filesystem's
caches have been discarded. The script provided by gfs2-utils never did
anything useful, and there was no way for it to report back its status
to the kernel.

To fix that, extend the gfs2_withdraw_helper mechanism so that the
script can report one of the following results by writing the
corresponding value into "/sys$DEVPATH/lock_module/withdraw":

0 - The shared block device has been marked inactive. Future write
operations will fail.

1 - The shared block device may still be active and carry out
write operations.

If the "offline" uevent isn't reacted upon within the timeout configured
in /sys$DEVPATH/tune/withdraw_helper_timeout (default 5 seconds), the
event handler is assumed to have failed.

In addition, add an additional "errors=deactivate" mount option.

With these changes, if fatal errors are detected on a gfs2 filesystem
and the filesystem is mounted with the "errors=panic" option, the kernel
will panic immediately. Otherwise, an attempt will be made to
deactivate the underlying block device. If successful, the kernel will
release all cluster-wide locks immediately so that the rest of the
cluster can continue. If unsuccessful, the kernel will either panic
("errors=deactivate"), or it will purge all filesystem I/O before
releasing all cluster-wide locks ("errors=withdraw").

Note that the gfs2_withdraw_helper script still needs to be fixed to
take advantage of these improvements. It could be changed to use a
mechanism like LVM Persistent Reservations. "dmsetup suspend" is not a
suitable mechanism as it infinitely postpones I/O operations, which may
prevent withdraw from completing.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>

+90 -28
+5 -4
fs/gfs2/incore.h
··· 537 537 538 538 #define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW 539 539 #define GFS2_ERRORS_WITHDRAW 0 540 - #define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */ 541 - #define GFS2_ERRORS_RO 2 /* place holder for future feature */ 540 + #define GFS2_ERRORS_DEACTIVATE 1 542 541 #define GFS2_ERRORS_PANIC 3 543 542 544 543 struct gfs2_args { ··· 553 554 unsigned int ar_data:2; /* ordered/writeback */ 554 555 unsigned int ar_meta:1; /* mount metafs */ 555 556 unsigned int ar_discard:1; /* discard requests */ 556 - unsigned int ar_errors:2; /* errors=withdraw | panic */ 557 + unsigned int ar_errors:2; /* errors=withdraw | deactivate | panic */ 557 558 unsigned int ar_nobarrier:1; /* do not send barriers */ 558 559 unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */ 559 560 unsigned int ar_got_rgrplvb:1; /* Was the rgrplvb opt given? */ ··· 579 580 unsigned int gt_complain_secs; 580 581 unsigned int gt_statfs_quantum; 581 582 unsigned int gt_statfs_slow; 583 + unsigned int gt_withdraw_helper_timeout; 582 584 }; 583 585 584 586 enum { ··· 711 711 wait_queue_head_t sd_async_glock_wait; 712 712 atomic_t sd_glock_disposal; 713 713 struct completion sd_locking_init; 714 - struct completion sd_wdack; 714 + struct completion sd_withdraw_helper; 715 + int sd_withdraw_helper_status; 715 716 struct delayed_work sd_control_work; 716 717 717 718 /* Inode Stuff */
+6 -3
fs/gfs2/ops_fstype.c
··· 60 60 gt->gt_new_files_jdata = 0; 61 61 gt->gt_max_readahead = BIT(18); 62 62 gt->gt_complain_secs = 10; 63 + gt->gt_withdraw_helper_timeout = 5; 63 64 } 64 65 65 66 void free_sbd(struct gfs2_sbd *sdp) ··· 93 92 init_waitqueue_head(&sdp->sd_async_glock_wait); 94 93 atomic_set(&sdp->sd_glock_disposal, 0); 95 94 init_completion(&sdp->sd_locking_init); 96 - init_completion(&sdp->sd_wdack); 95 + init_completion(&sdp->sd_withdraw_helper); 97 96 spin_lock_init(&sdp->sd_statfs_spin); 98 97 99 98 spin_lock_init(&sdp->sd_rindex_spin); ··· 1396 1395 }; 1397 1396 1398 1397 enum opt_errors { 1399 - Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW, 1400 - Opt_errors_panic = GFS2_ERRORS_PANIC, 1398 + Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW, 1399 + Opt_errors_deactivate = GFS2_ERRORS_DEACTIVATE, 1400 + Opt_errors_panic = GFS2_ERRORS_PANIC, 1401 1401 }; 1402 1402 1403 1403 static const struct constant_table gfs2_param_errors[] = { 1404 1404 {"withdraw", Opt_errors_withdraw }, 1405 + {"deactivate", Opt_errors_deactivate }, 1405 1406 {"panic", Opt_errors_panic }, 1406 1407 {} 1407 1408 };
+3
fs/gfs2/super.c
··· 1145 1145 case GFS2_ERRORS_WITHDRAW: 1146 1146 state = "withdraw"; 1147 1147 break; 1148 + case GFS2_ERRORS_DEACTIVATE: 1149 + state = "deactivate"; 1150 + break; 1148 1151 case GFS2_ERRORS_PANIC: 1149 1152 state = "panic"; 1150 1153 break;
+10 -14
fs/gfs2/sys.c
··· 425 425 return len; 426 426 } 427 427 428 - static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf) 429 - { 430 - int val = completion_done(&sdp->sd_wdack) ? 1 : 0; 431 - 432 - return sprintf(buf, "%d\n", val); 433 - } 434 - 435 - static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len) 428 + static ssize_t withdraw_helper_status_store(struct gfs2_sbd *sdp, 429 + const char *buf, 430 + size_t len) 436 431 { 437 432 int ret, val; 438 433 439 434 ret = kstrtoint(buf, 0, &val); 440 435 if (ret) 441 436 return ret; 442 - 443 - if ((val == 1) && 444 - !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) 445 - complete(&sdp->sd_wdack); 446 - else 437 + if (val < 0 || val > 1) 447 438 return -EINVAL; 439 + 440 + sdp->sd_withdraw_helper_status = val; 441 + complete(&sdp->sd_withdraw_helper); 448 442 return len; 449 443 } 450 444 ··· 585 591 586 592 GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 587 593 GDLM_ATTR(block, 0644, block_show, block_store); 588 - GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store); 594 + GDLM_ATTR(withdraw, 0200, NULL, withdraw_helper_status_store); 589 595 GDLM_ATTR(jid, 0644, jid_show, jid_store); 590 596 GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store); 591 597 GDLM_ATTR(first_done, 0444, first_done_show, NULL); ··· 684 690 TUNE_ATTR(new_files_jdata, 0); 685 691 TUNE_ATTR(statfs_quantum, 1); 686 692 TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); 693 + TUNE_ATTR(withdraw_helper_timeout, 1); 687 694 688 695 static struct attribute *tune_attrs[] = { 689 696 &tune_attr_quota_warn_period.attr, ··· 695 700 &tune_attr_statfs_quantum.attr, 696 701 &tune_attr_quota_scale.attr, 697 702 &tune_attr_new_files_jdata.attr, 703 + &tune_attr_withdraw_helper_timeout.attr, 698 704 NULL, 699 705 }; 700 706
+66 -7
fs/gfs2/util.c
··· 171 171 va_end(args); 172 172 } 173 173 174 + /** 175 + * gfs2_offline_uevent - run gfs2_withdraw_helper 176 + * @sdp: The GFS2 superblock 177 + */ 178 + static bool gfs2_offline_uevent(struct gfs2_sbd *sdp) 179 + { 180 + struct lm_lockstruct *ls = &sdp->sd_lockstruct; 181 + long timeout; 182 + 183 + /* Skip protocol "lock_nolock" which doesn't require shared storage. */ 184 + if (!ls->ls_ops->lm_lock) 185 + return false; 186 + 187 + /* 188 + * The gfs2_withdraw_helper replies by writing one of the following 189 + * status codes to "/sys$DEVPATH/lock_module/withdraw": 190 + * 191 + * 0 - The shared block device has been marked inactive. Future write 192 + * operations will fail. 193 + * 194 + * 1 - The shared block device may still be active and carry out 195 + * write operations. 196 + * 197 + * If the "offline" uevent isn't reacted upon in time, the event 198 + * handler is assumed to have failed. 199 + */ 200 + 201 + sdp->sd_withdraw_helper_status = -1; 202 + kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); 203 + timeout = gfs2_tune_get(sdp, gt_withdraw_helper_timeout) * HZ; 204 + wait_for_completion_timeout(&sdp->sd_withdraw_helper, timeout); 205 + if (sdp->sd_withdraw_helper_status == -1) { 206 + fs_err(sdp, "%s timed out\n", "gfs2_withdraw_helper"); 207 + } else { 208 + fs_err(sdp, "%s %s with status %d\n", 209 + "gfs2_withdraw_helper", 210 + sdp->sd_withdraw_helper_status == 0 ? 211 + "succeeded" : "failed", 212 + sdp->sd_withdraw_helper_status); 213 + } 214 + return sdp->sd_withdraw_helper_status == 0; 215 + } 216 + 174 217 void gfs2_withdraw_func(struct work_struct *work) 175 218 { 176 219 struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_withdraw_work); 177 220 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 178 221 const struct lm_lockops *lm = ls->ls_ops; 222 + bool device_inactive; 179 223 180 224 if (test_bit(SDF_KILL, &sdp->sd_flags)) 181 225 return; 182 226 183 227 BUG_ON(sdp->sd_args.ar_debug); 184 228 185 - do_withdraw(sdp); 229 + /* 230 + * Try to deactivate the shared block device so that no more I/O will 231 + * go through. If successful, we can immediately trigger remote 232 + * recovery. Otherwise, we must first empty out all our local caches. 233 + */ 186 234 187 - kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); 235 + device_inactive = gfs2_offline_uevent(sdp); 188 236 189 - if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) 190 - wait_for_completion(&sdp->sd_wdack); 237 + if (sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE && !device_inactive) 238 + panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname); 191 239 192 - if (lm->lm_unmount) 193 - lm->lm_unmount(sdp, false); 240 + if (lm->lm_unmount) { 241 + if (device_inactive) { 242 + lm->lm_unmount(sdp, false); 243 + do_withdraw(sdp); 244 + } else { 245 + do_withdraw(sdp); 246 + lm->lm_unmount(sdp, false); 247 + } 248 + } else { 249 + do_withdraw(sdp); 250 + } 251 + 194 252 fs_err(sdp, "file system withdrawn\n"); 195 253 } 196 254 197 255 void gfs2_withdraw(struct gfs2_sbd *sdp) 198 256 { 199 - if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { 257 + if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW || 258 + sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE) { 200 259 if (test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) 201 260 return; 202 261