Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] md: increase the delay before marking metadata clean, and make it configurable

When a md array has been idle (no writes) for 20msecs it is marked as 'clean'.
This delay turns out to be too short for some real workloads. So increase it
to 200msec (the time to update the metadata should be a tiny fraction of that)
and make it sysfs-configurable.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

NeilBrown and committed by
Linus Torvalds
16f17b39 9443a1d1

+61 -2
+9
Documentation/md.txt
··· 207 207 available. It will then appear at md/dev-XXX (depending on the 208 208 name of the device) and further configuration is then possible. 209 209 210 + safe_mode_delay 211 + When an md array has seen no write requests for a certain period 212 + of time, it will be marked as 'clean'. When another write 213 + request arrive, the array is marked as 'dirty' before the write 214 + commenses. This is known as 'safe_mode'. 215 + The 'certain period' is controlled by this file which stores the 216 + period as a number of seconds. The default is 200msec (0.200). 217 + Writing a value of 0 disables safemode. 218 + 210 219 sync_speed_min 211 220 sync_speed_max 212 221 This are similar to /proc/sys/dev/raid/speed_limit_{min,max}
+52 -2
drivers/md/md.c
··· 44 44 #include <linux/suspend.h> 45 45 #include <linux/poll.h> 46 46 #include <linux/mutex.h> 47 + #include <linux/ctype.h> 47 48 48 49 #include <linux/init.h> 49 50 ··· 1979 1978 } 1980 1979 1981 1980 static ssize_t 1981 + safe_delay_show(mddev_t *mddev, char *page) 1982 + { 1983 + int msec = (mddev->safemode_delay*1000)/HZ; 1984 + return sprintf(page, "%d.%03d\n", msec/1000, msec%1000); 1985 + } 1986 + static ssize_t 1987 + safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) 1988 + { 1989 + int scale=1; 1990 + int dot=0; 1991 + int i; 1992 + unsigned long msec; 1993 + char buf[30]; 1994 + char *e; 1995 + /* remove a period, and count digits after it */ 1996 + if (len >= sizeof(buf)) 1997 + return -EINVAL; 1998 + strlcpy(buf, cbuf, len); 1999 + buf[len] = 0; 2000 + for (i=0; i<len; i++) { 2001 + if (dot) { 2002 + if (isdigit(buf[i])) { 2003 + buf[i-1] = buf[i]; 2004 + scale *= 10; 2005 + } 2006 + buf[i] = 0; 2007 + } else if (buf[i] == '.') { 2008 + dot=1; 2009 + buf[i] = 0; 2010 + } 2011 + } 2012 + msec = simple_strtoul(buf, &e, 10); 2013 + if (e == buf || (*e && *e != '\n')) 2014 + return -EINVAL; 2015 + msec = (msec * 1000) / scale; 2016 + if (msec == 0) 2017 + mddev->safemode_delay = 0; 2018 + else { 2019 + mddev->safemode_delay = (msec*HZ)/1000; 2020 + if (mddev->safemode_delay == 0) 2021 + mddev->safemode_delay = 1; 2022 + } 2023 + return len; 2024 + } 2025 + static struct md_sysfs_entry md_safe_delay = 2026 + __ATTR(safe_mode_delay, 0644,safe_delay_show, safe_delay_store); 2027 + 2028 + static ssize_t 1982 2029 level_show(mddev_t *mddev, char *page) 1983 2030 { 1984 2031 struct mdk_personality *p = mddev->pers; ··· 2482 2433 &md_size.attr, 2483 2434 &md_metadata.attr, 2484 2435 &md_new_device.attr, 2436 + &md_safe_delay.attr, 2485 2437 NULL, 2486 2438 }; 2487 2439 ··· 2758 2708 mddev->safemode = 0; 2759 2709 mddev->safemode_timer.function = md_safemode_timeout; 2760 2710 mddev->safemode_timer.data = (unsigned long) mddev; 2761 - mddev->safemode_delay = (20 * HZ)/1000 +1; /* 20 msec delay */ 2711 + mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ 2762 2712 mddev->in_sync = 1; 2763 2713 2764 2714 ITERATE_RDEV(mddev,rdev,tmp) ··· 4644 4594 if (atomic_dec_and_test(&mddev->writes_pending)) { 4645 4595 if (mddev->safemode == 2) 4646 4596 md_wakeup_thread(mddev->thread); 4647 - else 4597 + else if (mddev->safemode_delay) 4648 4598 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay); 4649 4599 } 4650 4600 }