Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux

Pull module updates from Rusty Russell:
"Main excitement here is Peter Zijlstra's lockless rbtree optimization
to speed module address lookup. He found some abusers of the module
lock doing that too.

A little bit of parameter work here too; including Dan Streetman's
breaking up the big param mutex so writing a parameter can load
another module (yeah, really). Unfortunately that broke the usual
suspects, !CONFIG_MODULES and !CONFIG_SYSFS, so those fixes were
appended too"

* tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (26 commits)
modules: only use mod->param_lock if CONFIG_MODULES
param: fix module param locks when !CONFIG_SYSFS.
rcu: merge fix for Convert ACCESS_ONCE() to READ_ONCE() and WRITE_ONCE()
module: add per-module param_lock
module: make perm const
params: suppress unused variable error, warn once just in case code changes.
modules: clarify CONFIG_MODULE_COMPRESS help, suggest 'N'.
kernel/module.c: avoid ifdefs for sig_enforce declaration
kernel/workqueue.c: remove ifdefs over wq_power_efficient
kernel/params.c: export param_ops_bool_enable_only
kernel/params.c: generalize bool_enable_only
kernel/module.c: use generic module param operaters for sig_enforce
kernel/params: constify struct kernel_param_ops uses
sysfs: tightened sysfs permission checks
module: Rework module_addr_{min,max}
module: Use __module_address() for module_address_lookup()
module: Make the mod_tree stuff conditional on PERF_EVENTS || TRACING
module: Optimize __module_address() using a latched RB-tree
rbtree: Implement generic latch_tree
seqlock: Introduce raw_read_seqcount_latch()
...

+887 -359
+1 -1
arch/s390/kernel/perf_cpum_sf.c
··· 1572 1572 } 1573 1573 1574 1574 #define param_check_sfb_size(name, p) __param_check(name, p, void) 1575 - static struct kernel_param_ops param_ops_sfb_size = { 1575 + static const struct kernel_param_ops param_ops_sfb_size = { 1576 1576 .set = param_set_sfb_size, 1577 1577 .get = param_get_sfb_size, 1578 1578 };
+10 -10
arch/um/drivers/hostaudio_kern.c
··· 185 185 int ret; 186 186 187 187 #ifdef DEBUG 188 - kparam_block_sysfs_write(dsp); 188 + kernel_param_lock(THIS_MODULE); 189 189 printk(KERN_DEBUG "hostaudio: open called (host: %s)\n", dsp); 190 - kparam_unblock_sysfs_write(dsp); 190 + kernel_param_unlock(THIS_MODULE); 191 191 #endif 192 192 193 193 state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL); ··· 199 199 if (file->f_mode & FMODE_WRITE) 200 200 w = 1; 201 201 202 - kparam_block_sysfs_write(dsp); 202 + kernel_param_lock(THIS_MODULE); 203 203 mutex_lock(&hostaudio_mutex); 204 204 ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); 205 205 mutex_unlock(&hostaudio_mutex); 206 - kparam_unblock_sysfs_write(dsp); 206 + kernel_param_unlock(THIS_MODULE); 207 207 208 208 if (ret < 0) { 209 209 kfree(state); ··· 260 260 if (file->f_mode & FMODE_WRITE) 261 261 w = 1; 262 262 263 - kparam_block_sysfs_write(mixer); 263 + kernel_param_lock(THIS_MODULE); 264 264 mutex_lock(&hostaudio_mutex); 265 265 ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); 266 266 mutex_unlock(&hostaudio_mutex); 267 - kparam_unblock_sysfs_write(mixer); 267 + kernel_param_unlock(THIS_MODULE); 268 268 269 269 if (ret < 0) { 270 - kparam_block_sysfs_write(dsp); 270 + kernel_param_lock(THIS_MODULE); 271 271 printk(KERN_ERR "hostaudio_open_mixdev failed to open '%s', " 272 272 "err = %d\n", dsp, -ret); 273 - kparam_unblock_sysfs_write(dsp); 273 + kernel_param_unlock(THIS_MODULE); 274 274 kfree(state); 275 275 return ret; 276 276 } ··· 326 326 327 327 static int __init hostaudio_init_module(void) 328 328 { 329 - __kernel_param_lock(); 329 + kernel_param_lock(THIS_MODULE); 330 330 printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n", 331 331 dsp, mixer); 332 - __kernel_param_unlock(); 332 + kernel_param_unlock(THIS_MODULE); 333 333 334 334 module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); 335 335 if (module_data.dev_audio < 0) {
+1 -1
arch/x86/kvm/mmu_audit.c
··· 297 297 return 0; 298 298 } 299 299 300 - static struct kernel_param_ops audit_param_ops = { 300 + static const struct kernel_param_ops audit_param_ops = { 301 301 .set = mmu_audit_set, 302 302 .get = param_get_bool, 303 303 };
+1 -1
arch/x86/platform/uv/uv_nmi.c
··· 104 104 return 0; 105 105 } 106 106 107 - static struct kernel_param_ops param_ops_local64 = { 107 + static const struct kernel_param_ops param_ops_local64 = { 108 108 .get = param_get_local64, 109 109 .set = param_set_local64, 110 110 };
+2 -2
drivers/block/null_blk.c
··· 99 99 return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ); 100 100 } 101 101 102 - static struct kernel_param_ops null_queue_mode_param_ops = { 102 + static const struct kernel_param_ops null_queue_mode_param_ops = { 103 103 .set = null_set_queue_mode, 104 104 .get = param_get_int, 105 105 }; ··· 127 127 NULL_IRQ_TIMER); 128 128 } 129 129 130 - static struct kernel_param_ops null_irqmode_param_ops = { 130 + static const struct kernel_param_ops null_irqmode_param_ops = { 131 131 .set = null_set_irqmode, 132 132 .get = param_get_int, 133 133 };
+3 -3
drivers/char/ipmi/ipmi_watchdog.c
··· 208 208 return rv; 209 209 } 210 210 211 - static struct kernel_param_ops param_ops_timeout = { 211 + static const struct kernel_param_ops param_ops_timeout = { 212 212 .set = set_param_timeout, 213 213 .get = param_get_int, 214 214 }; ··· 270 270 return 0; 271 271 } 272 272 273 - static struct kernel_param_ops param_ops_wdog_ifnum = { 273 + static const struct kernel_param_ops param_ops_wdog_ifnum = { 274 274 .set = set_param_wdog_ifnum, 275 275 .get = param_get_int, 276 276 }; 277 277 278 278 #define param_check_wdog_ifnum param_check_int 279 279 280 - static struct kernel_param_ops param_ops_str = { 280 + static const struct kernel_param_ops param_ops_str = { 281 281 .set = set_param_str, 282 282 .get = get_param_str, 283 283 };
+2 -2
drivers/dma/dmatest.c
··· 120 120 121 121 static int dmatest_run_set(const char *val, const struct kernel_param *kp); 122 122 static int dmatest_run_get(char *val, const struct kernel_param *kp); 123 - static struct kernel_param_ops run_ops = { 123 + static const struct kernel_param_ops run_ops = { 124 124 .set = dmatest_run_set, 125 125 .get = dmatest_run_get, 126 126 }; ··· 195 195 return param_get_bool(val, kp); 196 196 } 197 197 198 - static struct kernel_param_ops wait_ops = { 198 + static const struct kernel_param_ops wait_ops = { 199 199 .get = dmatest_wait_get, 200 200 .set = param_set_bool, 201 201 };
+1 -1
drivers/ide/ide.c
··· 199 199 return 0; 200 200 } 201 201 202 - static struct kernel_param_ops param_ops_ide_dev_mask = { 202 + static const struct kernel_param_ops param_ops_ide_dev_mask = { 203 203 .set = ide_set_dev_param_mask 204 204 }; 205 205
+2 -2
drivers/infiniband/ulp/srp/ib_srp.c
··· 99 99 MODULE_PARM_DESC(register_always, 100 100 "Use memory registration even for contiguous memory regions"); 101 101 102 - static struct kernel_param_ops srp_tmo_ops; 102 + static const struct kernel_param_ops srp_tmo_ops; 103 103 104 104 static int srp_reconnect_delay = 10; 105 105 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, ··· 184 184 return res; 185 185 } 186 186 187 - static struct kernel_param_ops srp_tmo_ops = { 187 + static const struct kernel_param_ops srp_tmo_ops = { 188 188 .get = srp_tmo_get, 189 189 .set = srp_tmo_set, 190 190 };
+2 -2
drivers/input/misc/ati_remote2.c
··· 94 94 95 95 static unsigned int channel_mask = ATI_REMOTE2_MAX_CHANNEL_MASK; 96 96 #define param_check_channel_mask(name, p) __param_check(name, p, unsigned int) 97 - static struct kernel_param_ops param_ops_channel_mask = { 97 + static const struct kernel_param_ops param_ops_channel_mask = { 98 98 .set = ati_remote2_set_channel_mask, 99 99 .get = ati_remote2_get_channel_mask, 100 100 }; ··· 103 103 104 104 static unsigned int mode_mask = ATI_REMOTE2_MAX_MODE_MASK; 105 105 #define param_check_mode_mask(name, p) __param_check(name, p, unsigned int) 106 - static struct kernel_param_ops param_ops_mode_mask = { 106 + static const struct kernel_param_ops param_ops_mode_mask = { 107 107 .set = ati_remote2_set_mode_mask, 108 108 .get = ati_remote2_get_mode_mask, 109 109 };
+1 -1
drivers/input/mouse/psmouse-base.c
··· 47 47 static unsigned int psmouse_max_proto = PSMOUSE_AUTO; 48 48 static int psmouse_set_maxproto(const char *val, const struct kernel_param *); 49 49 static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp); 50 - static struct kernel_param_ops param_ops_proto_abbrev = { 50 + static const struct kernel_param_ops param_ops_proto_abbrev = { 51 51 .set = psmouse_set_maxproto, 52 52 .get = psmouse_get_maxproto, 53 53 };
+1 -1
drivers/misc/lis3lv02d/lis3lv02d.c
··· 115 115 return ret; 116 116 } 117 117 118 - static struct kernel_param_ops param_ops_axis = { 118 + static const struct kernel_param_ops param_ops_axis = { 119 119 .set = param_set_axis, 120 120 .get = param_get_int, 121 121 };
+1 -1
drivers/mtd/ubi/block.c
··· 162 162 return 0; 163 163 } 164 164 165 - static struct kernel_param_ops ubiblock_param_ops = { 165 + static const struct kernel_param_ops ubiblock_param_ops = { 166 166 .set = ubiblock_set_param, 167 167 }; 168 168 module_param_cb(block, &ubiblock_param_ops, NULL, 0);
+3 -3
drivers/net/ethernet/myricom/myri10ge/myri10ge.c
··· 279 279 MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat"); 280 280 MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat"); 281 281 282 - /* Careful: must be accessed under kparam_block_sysfs_write */ 282 + /* Careful: must be accessed under kernel_param_lock() */ 283 283 static char *myri10ge_fw_name = NULL; 284 284 module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR); 285 285 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name"); ··· 3427 3427 } 3428 3428 } 3429 3429 3430 - kparam_block_sysfs_write(myri10ge_fw_name); 3430 + kernel_param_lock(THIS_MODULE); 3431 3431 if (myri10ge_fw_name != NULL) { 3432 3432 char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL); 3433 3433 if (fw_name) { ··· 3435 3435 set_fw_name(mgp, fw_name, true); 3436 3436 } 3437 3437 } 3438 - kparam_unblock_sysfs_write(myri10ge_fw_name); 3438 + kernel_param_unlock(THIS_MODULE); 3439 3439 3440 3440 if (mgp->board_number < MYRI10GE_MAX_BOARDS && 3441 3441 myri10ge_fw_names[mgp->board_number] != NULL &&
+2 -2
drivers/net/wireless/ath/wil6210/main.c
··· 62 62 return ret; 63 63 } 64 64 65 - static struct kernel_param_ops mtu_max_ops = { 65 + static const struct kernel_param_ops mtu_max_ops = { 66 66 .set = mtu_max_set, 67 67 .get = param_get_uint, 68 68 }; ··· 91 91 return 0; 92 92 } 93 93 94 - static struct kernel_param_ops ring_order_ops = { 94 + static const struct kernel_param_ops ring_order_ops = { 95 95 .set = ring_order_set, 96 96 .get = param_get_uint, 97 97 };
+3 -3
drivers/net/wireless/libertas_tf/if_usb.c
··· 821 821 822 822 lbtf_deb_enter(LBTF_DEB_USB); 823 823 824 - kparam_block_sysfs_write(fw_name); 824 + kernel_param_lock(THIS_MODULE); 825 825 ret = request_firmware(&cardp->fw, lbtf_fw_name, &cardp->udev->dev); 826 826 if (ret < 0) { 827 827 pr_err("request_firmware() failed with %#x\n", ret); 828 828 pr_err("firmware %s not found\n", lbtf_fw_name); 829 - kparam_unblock_sysfs_write(fw_name); 829 + kernel_param_unlock(THIS_MODULE); 830 830 goto done; 831 831 } 832 - kparam_unblock_sysfs_write(fw_name); 832 + kernel_param_unlock(THIS_MODULE); 833 833 834 834 if (check_fwfile_format(cardp->fw->data, cardp->fw->size)) 835 835 goto release_fw;
+8 -8
drivers/power/test_power.c
··· 448 448 449 449 #define param_get_battery_voltage param_get_int 450 450 451 - static struct kernel_param_ops param_ops_ac_online = { 451 + static const struct kernel_param_ops param_ops_ac_online = { 452 452 .set = param_set_ac_online, 453 453 .get = param_get_ac_online, 454 454 }; 455 455 456 - static struct kernel_param_ops param_ops_usb_online = { 456 + static const struct kernel_param_ops param_ops_usb_online = { 457 457 .set = param_set_usb_online, 458 458 .get = param_get_usb_online, 459 459 }; 460 460 461 - static struct kernel_param_ops param_ops_battery_status = { 461 + static const struct kernel_param_ops param_ops_battery_status = { 462 462 .set = param_set_battery_status, 463 463 .get = param_get_battery_status, 464 464 }; 465 465 466 - static struct kernel_param_ops param_ops_battery_present = { 466 + static const struct kernel_param_ops param_ops_battery_present = { 467 467 .set = param_set_battery_present, 468 468 .get = param_get_battery_present, 469 469 }; 470 470 471 - static struct kernel_param_ops param_ops_battery_technology = { 471 + static const struct kernel_param_ops param_ops_battery_technology = { 472 472 .set = param_set_battery_technology, 473 473 .get = param_get_battery_technology, 474 474 }; 475 475 476 - static struct kernel_param_ops param_ops_battery_health = { 476 + static const struct kernel_param_ops param_ops_battery_health = { 477 477 .set = param_set_battery_health, 478 478 .get = param_get_battery_health, 479 479 }; 480 480 481 - static struct kernel_param_ops param_ops_battery_capacity = { 481 + static const struct kernel_param_ops param_ops_battery_capacity = { 482 482 .set = param_set_battery_capacity, 483 483 .get = param_get_battery_capacity, 484 484 }; 485 485 486 - static struct kernel_param_ops param_ops_battery_voltage = { 486 + static const struct kernel_param_ops param_ops_battery_voltage = { 487 487 .set = param_set_battery_voltage, 488 488 .get = param_get_battery_voltage, 489 489 };
+2 -2
drivers/thermal/intel_powerclamp.c
··· 119 119 return ret; 120 120 } 121 121 122 - static struct kernel_param_ops duration_ops = { 122 + static const struct kernel_param_ops duration_ops = { 123 123 .set = duration_set, 124 124 .get = param_get_int, 125 125 }; ··· 167 167 return ret; 168 168 } 169 169 170 - static struct kernel_param_ops window_size_ops = { 170 + static const struct kernel_param_ops window_size_ops = { 171 171 .set = window_size_set, 172 172 .get = param_get_int, 173 173 };
+1 -1
drivers/tty/hvc/hvc_iucv.c
··· 1345 1345 1346 1346 #define param_check_vmidfilter(name, p) __param_check(name, p, void) 1347 1347 1348 - static struct kernel_param_ops param_ops_vmidfilter = { 1348 + static const struct kernel_param_ops param_ops_vmidfilter = { 1349 1349 .set = param_set_vmidfilter, 1350 1350 .get = param_get_vmidfilter, 1351 1351 };
+1 -1
drivers/tty/sysrq.c
··· 988 988 return 0; 989 989 } 990 990 991 - static struct kernel_param_ops param_ops_sysrq_reset_seq = { 991 + static const struct kernel_param_ops param_ops_sysrq_reset_seq = { 992 992 .get = param_get_ushort, 993 993 .set = sysrq_reset_seq_param_set, 994 994 };
+2 -2
drivers/usb/atm/ueagle-atm.c
··· 1599 1599 char file_arr[] = "CMVxy.bin"; 1600 1600 char *file; 1601 1601 1602 - kparam_block_sysfs_write(cmv_file); 1602 + kernel_param_lock(THIS_MODULE); 1603 1603 /* set proper name corresponding modem version and line type */ 1604 1604 if (cmv_file[sc->modem_index] == NULL) { 1605 1605 if (UEA_CHIP_VERSION(sc) == ADI930) ··· 1618 1618 strlcat(cmv_name, file, UEA_FW_NAME_MAX); 1619 1619 if (ver == 2) 1620 1620 strlcat(cmv_name, ".v2", UEA_FW_NAME_MAX); 1621 - kparam_unblock_sysfs_write(cmv_file); 1621 + kernel_param_unlock(THIS_MODULE); 1622 1622 } 1623 1623 1624 1624 static int request_cmvs_old(struct uea_softc *sc,
+1 -1
drivers/video/fbdev/uvesafb.c
··· 1977 1977 1978 1978 return 0; 1979 1979 } 1980 - static struct kernel_param_ops param_ops_scroll = { 1980 + static const struct kernel_param_ops param_ops_scroll = { 1981 1981 .set = param_set_scroll, 1982 1982 }; 1983 1983 #define param_check_scroll(name, p) __param_check(name, p, void)
+2 -2
drivers/video/fbdev/vt8623fb.c
··· 754 754 755 755 /* Prepare startup mode */ 756 756 757 - kparam_block_sysfs_write(mode_option); 757 + kernel_param_lock(THIS_MODULE); 758 758 rc = fb_find_mode(&(info->var), info, mode_option, NULL, 0, NULL, 8); 759 - kparam_unblock_sysfs_write(mode_option); 759 + kernel_param_unlock(THIS_MODULE); 760 760 if (! ((rc == 1) || (rc == 2))) { 761 761 rc = -EINVAL; 762 762 dev_err(info->device, "mode %s not found\n", mode_option);
+1 -1
drivers/virtio/virtio_mmio.c
··· 691 691 return strlen(buffer) + 1; 692 692 } 693 693 694 - static struct kernel_param_ops vm_cmdline_param_ops = { 694 + static const struct kernel_param_ops vm_cmdline_param_ops = { 695 695 .set = vm_cmdline_set, 696 696 .get = vm_cmdline_get, 697 697 };
+1 -1
fs/nfs/super.c
··· 2847 2847 *((unsigned int *)kp->arg) = num; 2848 2848 return 0; 2849 2849 } 2850 - static struct kernel_param_ops param_ops_portnr = { 2850 + static const struct kernel_param_ops param_ops_portnr = { 2851 2851 .set = param_set_portnr, 2852 2852 .get = param_get_uint, 2853 2853 };
+15
include/linux/compiler.h
··· 475 475 (volatile typeof(x) *)&(x); }) 476 476 #define ACCESS_ONCE(x) (*__ACCESS_ONCE(x)) 477 477 478 + /** 479 + * lockless_dereference() - safely load a pointer for later dereference 480 + * @p: The pointer to load 481 + * 482 + * Similar to rcu_dereference(), but for situations where the pointed-to 483 + * object's lifetime is managed by something other than RCU. That 484 + * "something other" might be reference counting or simple immortality. 485 + */ 486 + #define lockless_dereference(p) \ 487 + ({ \ 488 + typeof(p) _________p1 = READ_ONCE(p); \ 489 + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ 490 + (_________p1); \ 491 + }) 492 + 478 493 /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ 479 494 #ifdef CONFIG_KPROBES 480 495 # define __kprobes __attribute__((__section__(".kprobes.text")))
+10 -8
include/linux/kernel.h
··· 813 813 #endif 814 814 815 815 /* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ 816 - #define VERIFY_OCTAL_PERMISSIONS(perms) \ 817 - (BUILD_BUG_ON_ZERO((perms) < 0) + \ 818 - BUILD_BUG_ON_ZERO((perms) > 0777) + \ 819 - /* User perms >= group perms >= other perms */ \ 820 - BUILD_BUG_ON_ZERO(((perms) >> 6) < (((perms) >> 3) & 7)) + \ 821 - BUILD_BUG_ON_ZERO((((perms) >> 3) & 7) < ((perms) & 7)) + \ 822 - /* Other writable? Generally considered a bad idea. */ \ 823 - BUILD_BUG_ON_ZERO((perms) & 2) + \ 816 + #define VERIFY_OCTAL_PERMISSIONS(perms) \ 817 + (BUILD_BUG_ON_ZERO((perms) < 0) + \ 818 + BUILD_BUG_ON_ZERO((perms) > 0777) + \ 819 + /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */ \ 820 + BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) + \ 821 + BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) + \ 822 + /* USER_WRITABLE >= GROUP_WRITABLE */ \ 823 + BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) + \ 824 + /* OTHER_WRITABLE? Generally considered a bad idea. */ \ 825 + BUILD_BUG_ON_ZERO((perms) & 2) + \ 824 826 (perms)) 825 827 #endif
+41 -5
include/linux/module.h
··· 17 17 #include <linux/moduleparam.h> 18 18 #include <linux/jump_label.h> 19 19 #include <linux/export.h> 20 + #include <linux/rbtree_latch.h> 20 21 21 22 #include <linux/percpu.h> 22 23 #include <asm/module.h> ··· 211 210 MODULE_STATE_UNFORMED, /* Still setting it up. */ 212 211 }; 213 212 213 + struct module; 214 + 215 + struct mod_tree_node { 216 + struct module *mod; 217 + struct latch_tree_node node; 218 + }; 219 + 214 220 struct module { 215 221 enum module_state state; 216 222 ··· 240 232 unsigned int num_syms; 241 233 242 234 /* Kernel parameters. */ 235 + #ifdef CONFIG_SYSFS 236 + struct mutex param_lock; 237 + #endif 243 238 struct kernel_param *kp; 244 239 unsigned int num_kp; 245 240 ··· 282 271 /* Startup function. */ 283 272 int (*init)(void); 284 273 285 - /* If this is non-NULL, vfree after init() returns */ 286 - void *module_init; 274 + /* 275 + * If this is non-NULL, vfree() after init() returns. 276 + * 277 + * Cacheline align here, such that: 278 + * module_init, module_core, init_size, core_size, 279 + * init_text_size, core_text_size and mtn_core::{mod,node[0]} 280 + * are on the same cacheline. 281 + */ 282 + void *module_init ____cacheline_aligned; 287 283 288 284 /* Here is the actual code + data, vfree'd on unload. */ 289 285 void *module_core; ··· 300 282 301 283 /* The size of the executable code in each section. */ 302 284 unsigned int init_text_size, core_text_size; 285 + 286 + #ifdef CONFIG_MODULES_TREE_LOOKUP 287 + /* 288 + * We want mtn_core::{mod,node[0]} to be in the same cacheline as the 289 + * above entries such that a regular lookup will only touch one 290 + * cacheline. 291 + */ 292 + struct mod_tree_node mtn_core; 293 + struct mod_tree_node mtn_init; 294 + #endif 303 295 304 296 /* Size of RO sections of the module (text+rodata) */ 305 297 unsigned int init_ro_size, core_ro_size; ··· 397 369 ctor_fn_t *ctors; 398 370 unsigned int num_ctors; 399 371 #endif 400 - }; 372 + } ____cacheline_aligned; 401 373 #ifndef MODULE_ARCH_INIT 402 374 #define MODULE_ARCH_INIT {} 403 375 #endif ··· 451 423 bool unused; 452 424 }; 453 425 454 - /* Search for an exported symbol by name. */ 426 + /* 427 + * Search for an exported symbol by name. 428 + * 429 + * Must be called with module_mutex held or preemption disabled. 430 + */ 455 431 const struct kernel_symbol *find_symbol(const char *name, 456 432 struct module **owner, 457 433 const unsigned long **crc, 458 434 bool gplok, 459 435 bool warn); 460 436 461 - /* Walk the exported symbol table */ 437 + /* 438 + * Walk the exported symbol table 439 + * 440 + * Must be called with module_mutex held or preemption disabled. 441 + */ 462 442 bool each_symbol_section(bool (*fn)(const struct symsearch *arr, 463 443 struct module *owner, 464 444 void *data), void *data);
+31 -68
include/linux/moduleparam.h
··· 67 67 68 68 struct kernel_param { 69 69 const char *name; 70 + struct module *mod; 70 71 const struct kernel_param_ops *ops; 71 - u16 perm; 72 + const u16 perm; 72 73 s8 level; 73 74 u8 flags; 74 75 union { ··· 109 108 * 110 109 * @perm is 0 if the the variable is not to appear in sysfs, or 0444 111 110 * for world-readable, 0644 for root-writable, etc. Note that if it 112 - * is writable, you may need to use kparam_block_sysfs_write() around 111 + * is writable, you may need to use kernel_param_lock() around 113 112 * accesses (esp. charp, which can be kfreed when it changes). 114 113 * 115 114 * The @type is simply pasted to refer to a param_ops_##type and a ··· 217 216 parameters. */ 218 217 #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ 219 218 /* Default value instead of permissions? */ \ 220 - static const char __param_str_##name[] = prefix #name; \ 219 + static const char __param_str_##name[] = prefix #name; \ 221 220 static struct kernel_param __moduleparam_const __param_##name \ 222 221 __used \ 223 222 __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ 224 - = { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm), \ 225 - level, flags, { arg } } 223 + = { __param_str_##name, THIS_MODULE, ops, \ 224 + VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } 226 225 227 226 /* Obsolete - use module_param_cb() */ 228 227 #define module_param_call(name, set, get, arg, perm) \ 229 - static struct kernel_param_ops __param_ops_##name = \ 228 + static const struct kernel_param_ops __param_ops_##name = \ 230 229 { .flags = 0, (void *)set, (void *)get }; \ 231 230 __module_param_call(MODULE_PARAM_PREFIX, \ 232 231 name, &__param_ops_##name, arg, \ ··· 239 238 return 0; 240 239 } 241 240 242 - /** 243 - * kparam_block_sysfs_write - make sure a parameter isn't written via sysfs. 244 - * @name: the name of the parameter 245 - * 246 - * There's no point blocking write on a paramter that isn't writable via sysfs! 247 - */ 248 - #define kparam_block_sysfs_write(name) \ 249 - do { \ 250 - BUG_ON(!(__param_##name.perm & 0222)); \ 251 - __kernel_param_lock(); \ 252 - } while (0) 253 - 254 - /** 255 - * kparam_unblock_sysfs_write - allows sysfs to write to a parameter again. 256 - * @name: the name of the parameter 257 - */ 258 - #define kparam_unblock_sysfs_write(name) \ 259 - do { \ 260 - BUG_ON(!(__param_##name.perm & 0222)); \ 261 - __kernel_param_unlock(); \ 262 - } while (0) 263 - 264 - /** 265 - * kparam_block_sysfs_read - make sure a parameter isn't read via sysfs. 266 - * @name: the name of the parameter 267 - * 268 - * This also blocks sysfs writes. 269 - */ 270 - #define kparam_block_sysfs_read(name) \ 271 - do { \ 272 - BUG_ON(!(__param_##name.perm & 0444)); \ 273 - __kernel_param_lock(); \ 274 - } while (0) 275 - 276 - /** 277 - * kparam_unblock_sysfs_read - allows sysfs to read a parameter again. 278 - * @name: the name of the parameter 279 - */ 280 - #define kparam_unblock_sysfs_read(name) \ 281 - do { \ 282 - BUG_ON(!(__param_##name.perm & 0444)); \ 283 - __kernel_param_unlock(); \ 284 - } while (0) 285 - 286 241 #ifdef CONFIG_SYSFS 287 - extern void __kernel_param_lock(void); 288 - extern void __kernel_param_unlock(void); 242 + extern void kernel_param_lock(struct module *mod); 243 + extern void kernel_param_unlock(struct module *mod); 289 244 #else 290 - static inline void __kernel_param_lock(void) 245 + static inline void kernel_param_lock(struct module *mod) 291 246 { 292 247 } 293 - static inline void __kernel_param_unlock(void) 248 + static inline void kernel_param_unlock(struct module *mod) 294 249 { 295 250 } 296 251 #endif ··· 343 386 #define __param_check(name, p, type) \ 344 387 static inline type __always_unused *__check_##name(void) { return(p); } 345 388 346 - extern struct kernel_param_ops param_ops_byte; 389 + extern const struct kernel_param_ops param_ops_byte; 347 390 extern int param_set_byte(const char *val, const struct kernel_param *kp); 348 391 extern int param_get_byte(char *buffer, const struct kernel_param *kp); 349 392 #define param_check_byte(name, p) __param_check(name, p, unsigned char) 350 393 351 - extern struct kernel_param_ops param_ops_short; 394 + extern const struct kernel_param_ops param_ops_short; 352 395 extern int param_set_short(const char *val, const struct kernel_param *kp); 353 396 extern int param_get_short(char *buffer, const struct kernel_param *kp); 354 397 #define param_check_short(name, p) __param_check(name, p, short) 355 398 356 - extern struct kernel_param_ops param_ops_ushort; 399 + extern const struct kernel_param_ops param_ops_ushort; 357 400 extern int param_set_ushort(const char *val, const struct kernel_param *kp); 358 401 extern int param_get_ushort(char *buffer, const struct kernel_param *kp); 359 402 #define param_check_ushort(name, p) __param_check(name, p, unsigned short) 360 403 361 - extern struct kernel_param_ops param_ops_int; 404 + extern const struct kernel_param_ops param_ops_int; 362 405 extern int param_set_int(const char *val, const struct kernel_param *kp); 363 406 extern int param_get_int(char *buffer, const struct kernel_param *kp); 364 407 #define param_check_int(name, p) __param_check(name, p, int) 365 408 366 - extern struct kernel_param_ops param_ops_uint; 409 + extern const struct kernel_param_ops param_ops_uint; 367 410 extern int param_set_uint(const char *val, const struct kernel_param *kp); 368 411 extern int param_get_uint(char *buffer, const struct kernel_param *kp); 369 412 #define param_check_uint(name, p) __param_check(name, p, unsigned int) 370 413 371 - extern struct kernel_param_ops param_ops_long; 414 + extern const struct kernel_param_ops param_ops_long; 372 415 extern int param_set_long(const char *val, const struct kernel_param *kp); 373 416 extern int param_get_long(char *buffer, const struct kernel_param *kp); 374 417 #define param_check_long(name, p) __param_check(name, p, long) 375 418 376 - extern struct kernel_param_ops param_ops_ulong; 419 + extern const struct kernel_param_ops param_ops_ulong; 377 420 extern int param_set_ulong(const char *val, const struct kernel_param *kp); 378 421 extern int param_get_ulong(char *buffer, const struct kernel_param *kp); 379 422 #define param_check_ulong(name, p) __param_check(name, p, unsigned long) 380 423 381 - extern struct kernel_param_ops param_ops_ullong; 424 + extern const struct kernel_param_ops param_ops_ullong; 382 425 extern int param_set_ullong(const char *val, const struct kernel_param *kp); 383 426 extern int param_get_ullong(char *buffer, const struct kernel_param *kp); 384 427 #define param_check_ullong(name, p) __param_check(name, p, unsigned long long) 385 428 386 - extern struct kernel_param_ops param_ops_charp; 429 + extern const struct kernel_param_ops param_ops_charp; 387 430 extern int param_set_charp(const char *val, const struct kernel_param *kp); 388 431 extern int param_get_charp(char *buffer, const struct kernel_param *kp); 389 432 #define param_check_charp(name, p) __param_check(name, p, char *) 390 433 391 434 /* We used to allow int as well as bool. We're taking that away! */ 392 - extern struct kernel_param_ops param_ops_bool; 435 + extern const struct kernel_param_ops param_ops_bool; 393 436 extern int param_set_bool(const char *val, const struct kernel_param *kp); 394 437 extern int param_get_bool(char *buffer, const struct kernel_param *kp); 395 438 #define param_check_bool(name, p) __param_check(name, p, bool) 396 439 397 - extern struct kernel_param_ops param_ops_invbool; 440 + extern const struct kernel_param_ops param_ops_bool_enable_only; 441 + extern int param_set_bool_enable_only(const char *val, 442 + const struct kernel_param *kp); 443 + /* getter is the same as for the regular bool */ 444 + #define param_check_bool_enable_only param_check_bool 445 + 446 + extern const struct kernel_param_ops param_ops_invbool; 398 447 extern int param_set_invbool(const char *val, const struct kernel_param *kp); 399 448 extern int param_get_invbool(char *buffer, const struct kernel_param *kp); 400 449 #define param_check_invbool(name, p) __param_check(name, p, bool) 401 450 402 451 /* An int, which can only be set like a bool (though it shows as an int). */ 403 - extern struct kernel_param_ops param_ops_bint; 452 + extern const struct kernel_param_ops param_ops_bint; 404 453 extern int param_set_bint(const char *val, const struct kernel_param *kp); 405 454 #define param_get_bint param_get_int 406 455 #define param_check_bint param_check_int ··· 450 487 perm, -1, 0); \ 451 488 __MODULE_PARM_TYPE(name, "array of " #type) 452 489 453 - extern struct kernel_param_ops param_array_ops; 490 + extern const struct kernel_param_ops param_array_ops; 454 491 455 - extern struct kernel_param_ops param_ops_string; 492 + extern const struct kernel_param_ops param_ops_string; 456 493 extern int param_set_copystring(const char *val, const struct kernel_param *); 457 494 extern int param_get_string(char *buffer, const struct kernel_param *kp); 458 495
+13 -3
include/linux/rbtree.h
··· 31 31 32 32 #include <linux/kernel.h> 33 33 #include <linux/stddef.h> 34 + #include <linux/rcupdate.h> 34 35 35 36 struct rb_node { 36 37 unsigned long __rb_parent_color; ··· 74 73 extern struct rb_node *rb_next_postorder(const struct rb_node *); 75 74 76 75 /* Fast replacement of a single node without remove/rebalance/add/rebalance */ 77 - extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, 76 + extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, 78 77 struct rb_root *root); 79 78 80 - static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, 81 - struct rb_node ** rb_link) 79 + static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, 80 + struct rb_node **rb_link) 82 81 { 83 82 node->__rb_parent_color = (unsigned long)parent; 84 83 node->rb_left = node->rb_right = NULL; 85 84 86 85 *rb_link = node; 86 + } 87 + 88 + static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent, 89 + struct rb_node **rb_link) 90 + { 91 + node->__rb_parent_color = (unsigned long)parent; 92 + node->rb_left = node->rb_right = NULL; 93 + 94 + rcu_assign_pointer(*rb_link, node); 87 95 } 88 96 89 97 #define rb_entry_safe(ptr, type, member) \
+14 -7
include/linux/rbtree_augmented.h
··· 123 123 { 124 124 if (parent) { 125 125 if (parent->rb_left == old) 126 - parent->rb_left = new; 126 + WRITE_ONCE(parent->rb_left, new); 127 127 else 128 - parent->rb_right = new; 128 + WRITE_ONCE(parent->rb_right, new); 129 129 } else 130 - root->rb_node = new; 130 + WRITE_ONCE(root->rb_node, new); 131 131 } 132 132 133 133 extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, ··· 137 137 __rb_erase_augmented(struct rb_node *node, struct rb_root *root, 138 138 const struct rb_augment_callbacks *augment) 139 139 { 140 - struct rb_node *child = node->rb_right, *tmp = node->rb_left; 140 + struct rb_node *child = node->rb_right; 141 + struct rb_node *tmp = node->rb_left; 141 142 struct rb_node *parent, *rebalance; 142 143 unsigned long pc; 143 144 ··· 168 167 tmp = parent; 169 168 } else { 170 169 struct rb_node *successor = child, *child2; 170 + 171 171 tmp = child->rb_left; 172 172 if (!tmp) { 173 173 /* ··· 182 180 */ 183 181 parent = successor; 184 182 child2 = successor->rb_right; 183 + 185 184 augment->copy(node, successor); 186 185 } else { 187 186 /* ··· 204 201 successor = tmp; 205 202 tmp = tmp->rb_left; 206 203 } while (tmp); 207 - parent->rb_left = child2 = successor->rb_right; 208 - successor->rb_right = child; 204 + child2 = successor->rb_right; 205 + WRITE_ONCE(parent->rb_left, child2); 206 + WRITE_ONCE(successor->rb_right, child); 209 207 rb_set_parent(child, successor); 208 + 210 209 augment->copy(node, successor); 211 210 augment->propagate(parent, successor); 212 211 } 213 212 214 - successor->rb_left = tmp = node->rb_left; 213 + tmp = node->rb_left; 214 + WRITE_ONCE(successor->rb_left, tmp); 215 215 rb_set_parent(tmp, successor); 216 216 217 217 pc = node->__rb_parent_color; 218 218 tmp = __rb_parent(pc); 219 219 __rb_change_child(node, successor, tmp, root); 220 + 220 221 if (child2) { 221 222 successor->__rb_parent_color = pc; 222 223 rb_set_parent_color(child2, parent, RB_BLACK);
+212
include/linux/rbtree_latch.h
··· 1 + /* 2 + * Latched RB-trees 3 + * 4 + * Copyright (C) 2015 Intel Corp., Peter Zijlstra <peterz@infradead.org> 5 + * 6 + * Since RB-trees have non-atomic modifications they're not immediately suited 7 + * for RCU/lockless queries. Even though we made RB-tree lookups non-fatal for 8 + * lockless lookups; we cannot guarantee they return a correct result. 9 + * 10 + * The simplest solution is a seqlock + RB-tree, this will allow lockless 11 + * lookups; but has the constraint (inherent to the seqlock) that read sides 12 + * cannot nest in write sides. 13 + * 14 + * If we need to allow unconditional lookups (say as required for NMI context 15 + * usage) we need a more complex setup; this data structure provides this by 16 + * employing the latch technique -- see @raw_write_seqcount_latch -- to 17 + * implement a latched RB-tree which does allow for unconditional lookups by 18 + * virtue of always having (at least) one stable copy of the tree. 19 + * 20 + * However, while we have the guarantee that there is at all times one stable 21 + * copy, this does not guarantee an iteration will not observe modifications. 22 + * What might have been a stable copy at the start of the iteration, need not 23 + * remain so for the duration of the iteration. 24 + * 25 + * Therefore, this does require a lockless RB-tree iteration to be non-fatal; 26 + * see the comment in lib/rbtree.c. Note however that we only require the first 27 + * condition -- not seeing partial stores -- because the latch thing isolates 28 + * us from loops. If we were to interrupt a modification the lookup would be 29 + * pointed at the stable tree and complete while the modification was halted. 30 + */ 31 + 32 + #ifndef RB_TREE_LATCH_H 33 + #define RB_TREE_LATCH_H 34 + 35 + #include <linux/rbtree.h> 36 + #include <linux/seqlock.h> 37 + 38 + struct latch_tree_node { 39 + struct rb_node node[2]; 40 + }; 41 + 42 + struct latch_tree_root { 43 + seqcount_t seq; 44 + struct rb_root tree[2]; 45 + }; 46 + 47 + /** 48 + * latch_tree_ops - operators to define the tree order 49 + * @less: used for insertion; provides the (partial) order between two elements. 50 + * @comp: used for lookups; provides the order between the search key and an element. 51 + * 52 + * The operators are related like: 53 + * 54 + * comp(a->key,b) < 0 := less(a,b) 55 + * comp(a->key,b) > 0 := less(b,a) 56 + * comp(a->key,b) == 0 := !less(a,b) && !less(b,a) 57 + * 58 + * If these operators define a partial order on the elements we make no 59 + * guarantee on which of the elements matching the key is found. See 60 + * latch_tree_find(). 61 + */ 62 + struct latch_tree_ops { 63 + bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b); 64 + int (*comp)(void *key, struct latch_tree_node *b); 65 + }; 66 + 67 + static __always_inline struct latch_tree_node * 68 + __lt_from_rb(struct rb_node *node, int idx) 69 + { 70 + return container_of(node, struct latch_tree_node, node[idx]); 71 + } 72 + 73 + static __always_inline void 74 + __lt_insert(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx, 75 + bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b)) 76 + { 77 + struct rb_root *root = &ltr->tree[idx]; 78 + struct rb_node **link = &root->rb_node; 79 + struct rb_node *node = &ltn->node[idx]; 80 + struct rb_node *parent = NULL; 81 + struct latch_tree_node *ltp; 82 + 83 + while (*link) { 84 + parent = *link; 85 + ltp = __lt_from_rb(parent, idx); 86 + 87 + if (less(ltn, ltp)) 88 + link = &parent->rb_left; 89 + else 90 + link = &parent->rb_right; 91 + } 92 + 93 + rb_link_node_rcu(node, parent, link); 94 + rb_insert_color(node, root); 95 + } 96 + 97 + static __always_inline void 98 + __lt_erase(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx) 99 + { 100 + rb_erase(&ltn->node[idx], &ltr->tree[idx]); 101 + } 102 + 103 + static __always_inline struct latch_tree_node * 104 + __lt_find(void *key, struct latch_tree_root *ltr, int idx, 105 + int (*comp)(void *key, struct latch_tree_node *node)) 106 + { 107 + struct rb_node *node = rcu_dereference_raw(ltr->tree[idx].rb_node); 108 + struct latch_tree_node *ltn; 109 + int c; 110 + 111 + while (node) { 112 + ltn = __lt_from_rb(node, idx); 113 + c = comp(key, ltn); 114 + 115 + if (c < 0) 116 + node = rcu_dereference_raw(node->rb_left); 117 + else if (c > 0) 118 + node = rcu_dereference_raw(node->rb_right); 119 + else 120 + return ltn; 121 + } 122 + 123 + return NULL; 124 + } 125 + 126 + /** 127 + * latch_tree_insert() - insert @node into the trees @root 128 + * @node: nodes to insert 129 + * @root: trees to insert @node into 130 + * @ops: operators defining the node order 131 + * 132 + * It inserts @node into @root in an ordered fashion such that we can always 133 + * observe one complete tree. See the comment for raw_write_seqcount_latch(). 134 + * 135 + * The inserts use rcu_assign_pointer() to publish the element such that the 136 + * tree structure is stored before we can observe the new @node. 137 + * 138 + * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be 139 + * serialized. 140 + */ 141 + static __always_inline void 142 + latch_tree_insert(struct latch_tree_node *node, 143 + struct latch_tree_root *root, 144 + const struct latch_tree_ops *ops) 145 + { 146 + raw_write_seqcount_latch(&root->seq); 147 + __lt_insert(node, root, 0, ops->less); 148 + raw_write_seqcount_latch(&root->seq); 149 + __lt_insert(node, root, 1, ops->less); 150 + } 151 + 152 + /** 153 + * latch_tree_erase() - removes @node from the trees @root 154 + * @node: nodes to remote 155 + * @root: trees to remove @node from 156 + * @ops: operators defining the node order 157 + * 158 + * Removes @node from the trees @root in an ordered fashion such that we can 159 + * always observe one complete tree. See the comment for 160 + * raw_write_seqcount_latch(). 161 + * 162 + * It is assumed that @node will observe one RCU quiescent state before being 163 + * reused of freed. 164 + * 165 + * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be 166 + * serialized. 167 + */ 168 + static __always_inline void 169 + latch_tree_erase(struct latch_tree_node *node, 170 + struct latch_tree_root *root, 171 + const struct latch_tree_ops *ops) 172 + { 173 + raw_write_seqcount_latch(&root->seq); 174 + __lt_erase(node, root, 0); 175 + raw_write_seqcount_latch(&root->seq); 176 + __lt_erase(node, root, 1); 177 + } 178 + 179 + /** 180 + * latch_tree_find() - find the node matching @key in the trees @root 181 + * @key: search key 182 + * @root: trees to search for @key 183 + * @ops: operators defining the node order 184 + * 185 + * Does a lockless lookup in the trees @root for the node matching @key. 186 + * 187 + * It is assumed that this is called while holding the appropriate RCU read 188 + * side lock. 189 + * 190 + * If the operators define a partial order on the elements (there are multiple 191 + * elements which have the same key value) it is undefined which of these 192 + * elements will be found. Nor is it possible to iterate the tree to find 193 + * further elements with the same key value. 194 + * 195 + * Returns: a pointer to the node matching @key or NULL. 196 + */ 197 + static __always_inline struct latch_tree_node * 198 + latch_tree_find(void *key, struct latch_tree_root *root, 199 + const struct latch_tree_ops *ops) 200 + { 201 + struct latch_tree_node *node; 202 + unsigned int seq; 203 + 204 + do { 205 + seq = raw_read_seqcount_latch(&root->seq); 206 + node = __lt_find(key, root, seq & 1, ops->comp); 207 + } while (read_seqcount_retry(&root->seq, seq)); 208 + 209 + return node; 210 + } 211 + 212 + #endif /* RB_TREE_LATCH_H */
-15
include/linux/rcupdate.h
··· 633 633 #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) 634 634 635 635 /** 636 - * lockless_dereference() - safely load a pointer for later dereference 637 - * @p: The pointer to load 638 - * 639 - * Similar to rcu_dereference(), but for situations where the pointed-to 640 - * object's lifetime is managed by something other than RCU. That 641 - * "something other" might be reference counting or simple immortality. 642 - */ 643 - #define lockless_dereference(p) \ 644 - ({ \ 645 - typeof(p) _________p1 = READ_ONCE(p); \ 646 - smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ 647 - (_________p1); \ 648 - }) 649 - 650 - /** 651 636 * rcu_assign_pointer() - assign to RCU-protected pointer 652 637 * @p: pointer to assign to 653 638 * @v: value to assign (publish)
+80 -1
include/linux/seqlock.h
··· 35 35 #include <linux/spinlock.h> 36 36 #include <linux/preempt.h> 37 37 #include <linux/lockdep.h> 38 + #include <linux/compiler.h> 38 39 #include <asm/processor.h> 39 40 40 41 /* ··· 275 274 s->sequence++; 276 275 } 277 276 278 - /* 277 + static inline int raw_read_seqcount_latch(seqcount_t *s) 278 + { 279 + return lockless_dereference(s->sequence); 280 + } 281 + 282 + /** 279 283 * raw_write_seqcount_latch - redirect readers to even/odd copy 280 284 * @s: pointer to seqcount_t 285 + * 286 + * The latch technique is a multiversion concurrency control method that allows 287 + * queries during non-atomic modifications. If you can guarantee queries never 288 + * interrupt the modification -- e.g. the concurrency is strictly between CPUs 289 + * -- you most likely do not need this. 290 + * 291 + * Where the traditional RCU/lockless data structures rely on atomic 292 + * modifications to ensure queries observe either the old or the new state the 293 + * latch allows the same for non-atomic updates. The trade-off is doubling the 294 + * cost of storage; we have to maintain two copies of the entire data 295 + * structure. 296 + * 297 + * Very simply put: we first modify one copy and then the other. This ensures 298 + * there is always one copy in a stable state, ready to give us an answer. 299 + * 300 + * The basic form is a data structure like: 301 + * 302 + * struct latch_struct { 303 + * seqcount_t seq; 304 + * struct data_struct data[2]; 305 + * }; 306 + * 307 + * Where a modification, which is assumed to be externally serialized, does the 308 + * following: 309 + * 310 + * void latch_modify(struct latch_struct *latch, ...) 311 + * { 312 + * smp_wmb(); <- Ensure that the last data[1] update is visible 313 + * latch->seq++; 314 + * smp_wmb(); <- Ensure that the seqcount update is visible 315 + * 316 + * modify(latch->data[0], ...); 317 + * 318 + * smp_wmb(); <- Ensure that the data[0] update is visible 319 + * latch->seq++; 320 + * smp_wmb(); <- Ensure that the seqcount update is visible 321 + * 322 + * modify(latch->data[1], ...); 323 + * } 324 + * 325 + * The query will have a form like: 326 + * 327 + * struct entry *latch_query(struct latch_struct *latch, ...) 328 + * { 329 + * struct entry *entry; 330 + * unsigned seq, idx; 331 + * 332 + * do { 333 + * seq = lockless_dereference(latch->seq); 334 + * 335 + * idx = seq & 0x01; 336 + * entry = data_query(latch->data[idx], ...); 337 + * 338 + * smp_rmb(); 339 + * } while (seq != latch->seq); 340 + * 341 + * return entry; 342 + * } 343 + * 344 + * So during the modification, queries are first redirected to data[1]. Then we 345 + * modify data[0]. When that is complete, we redirect queries back to data[0] 346 + * and we can modify data[1]. 347 + * 348 + * NOTE: The non-requirement for atomic modifications does _NOT_ include 349 + * the publishing of new entries in the case where data is a dynamic 350 + * data structure. 351 + * 352 + * An iteration might start in data[0] and get suspended long enough 353 + * to miss an entire modification sequence, once it resumes it might 354 + * observe the new entry. 355 + * 356 + * NOTE: When data is a dynamic data structure; one should use regular RCU 357 + * patterns to manage the lifetimes of the objects within. 281 358 */ 282 359 static inline void raw_write_seqcount_latch(seqcount_t *s) 283 360 {
+14 -15
init/Kconfig
··· 1941 1941 bool "Compress modules on installation" 1942 1942 depends on MODULES 1943 1943 help 1944 - This option compresses the kernel modules when 'make 1945 - modules_install' is run. 1946 1944 1947 - The modules will be compressed either using gzip or xz depend on the 1948 - choice made in "Compression algorithm". 1945 + Compresses kernel modules when 'make modules_install' is run; gzip or 1946 + xz depending on "Compression algorithm" below. 1949 1947 1950 - module-init-tools has support for gzip format while kmod handle gzip 1951 - and xz compressed modules. 1948 + module-init-tools MAY support gzip, and kmod MAY support gzip and xz. 1952 1949 1953 - When a kernel module is installed from outside of the main kernel 1954 - source and uses the Kbuild system for installing modules then that 1955 - kernel module will also be compressed when it is installed. 1950 + Out-of-tree kernel modules installed using Kbuild will also be 1951 + compressed upon installation. 1956 1952 1957 - This option provides little benefit when the modules are to be used inside 1958 - an initrd or initramfs, it generally is more efficient to compress the whole 1959 - initrd or initramfs instead. 1953 + Note: for modules inside an initrd or initramfs, it's more efficient 1954 + to compress the whole initrd or initramfs instead. 1960 1955 1961 - This is fully compatible with signed modules while the signed module is 1962 - compressed. module-init-tools or kmod handles decompression and provide to 1963 - other layer the uncompressed but signed payload. 1956 + Note: This is fully compatible with signed modules. 1957 + 1958 + If in doubt, say N. 1964 1959 1965 1960 choice 1966 1961 prompt "Compression algorithm" ··· 1976 1981 endchoice 1977 1982 1978 1983 endif # MODULES 1984 + 1985 + config MODULES_TREE_LOOKUP 1986 + def_bool y 1987 + depends on PERF_EVENTS || TRACING 1979 1988 1980 1989 config INIT_ALL_POSSIBLE 1981 1990 bool
+6 -4
kernel/jump_label.c
··· 302 302 continue; 303 303 304 304 key = iterk; 305 - if (__module_address(iter->key) == mod) { 305 + if (within_module(iter->key, mod)) { 306 306 /* 307 307 * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. 308 308 */ ··· 339 339 340 340 key = (struct static_key *)(unsigned long)iter->key; 341 341 342 - if (__module_address(iter->key) == mod) 342 + if (within_module(iter->key, mod)) 343 343 continue; 344 344 345 345 prev = &key->next; ··· 443 443 { 444 444 struct jump_entry *stop = __stop___jump_table; 445 445 struct jump_entry *entry = jump_label_get_entries(key); 446 - 447 446 #ifdef CONFIG_MODULES 448 - struct module *mod = __module_address((unsigned long)key); 447 + struct module *mod; 449 448 450 449 __jump_label_mod_update(key, enable); 451 450 451 + preempt_disable(); 452 + mod = __module_address((unsigned long)key); 452 453 if (mod) 453 454 stop = mod->jump_entries + mod->num_jump_entries; 455 + preempt_enable(); 454 456 #endif 455 457 /* if there are no users, entry can be NULL */ 456 458 if (entry)
+237 -80
kernel/module.c
··· 101 101 DEFINE_MUTEX(module_mutex); 102 102 EXPORT_SYMBOL_GPL(module_mutex); 103 103 static LIST_HEAD(modules); 104 + 105 + #ifdef CONFIG_MODULES_TREE_LOOKUP 106 + 107 + /* 108 + * Use a latched RB-tree for __module_address(); this allows us to use 109 + * RCU-sched lookups of the address from any context. 110 + * 111 + * Because modules have two address ranges: init and core, we need two 112 + * latch_tree_nodes entries. Therefore we need the back-pointer from 113 + * mod_tree_node. 114 + * 115 + * Because init ranges are short lived we mark them unlikely and have placed 116 + * them outside the critical cacheline in struct module. 117 + * 118 + * This is conditional on PERF_EVENTS || TRACING because those can really hit 119 + * __module_address() hard by doing a lot of stack unwinding; potentially from 120 + * NMI context. 121 + */ 122 + 123 + static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) 124 + { 125 + struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); 126 + struct module *mod = mtn->mod; 127 + 128 + if (unlikely(mtn == &mod->mtn_init)) 129 + return (unsigned long)mod->module_init; 130 + 131 + return (unsigned long)mod->module_core; 132 + } 133 + 134 + static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n) 135 + { 136 + struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); 137 + struct module *mod = mtn->mod; 138 + 139 + if (unlikely(mtn == &mod->mtn_init)) 140 + return (unsigned long)mod->init_size; 141 + 142 + return (unsigned long)mod->core_size; 143 + } 144 + 145 + static __always_inline bool 146 + mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b) 147 + { 148 + return __mod_tree_val(a) < __mod_tree_val(b); 149 + } 150 + 151 + static __always_inline int 152 + mod_tree_comp(void *key, struct latch_tree_node *n) 153 + { 154 + unsigned long val = (unsigned long)key; 155 + unsigned long start, end; 156 + 157 + start = __mod_tree_val(n); 158 + if (val < start) 159 + return -1; 160 + 161 + end = start + __mod_tree_size(n); 162 + if (val >= end) 163 + return 1; 164 + 165 + return 0; 166 + } 167 + 168 + static const struct latch_tree_ops mod_tree_ops = { 169 + .less = mod_tree_less, 170 + .comp = mod_tree_comp, 171 + }; 172 + 173 + static struct mod_tree_root { 174 + struct latch_tree_root root; 175 + unsigned long addr_min; 176 + unsigned long addr_max; 177 + } mod_tree __cacheline_aligned = { 178 + .addr_min = -1UL, 179 + }; 180 + 181 + #define module_addr_min mod_tree.addr_min 182 + #define module_addr_max mod_tree.addr_max 183 + 184 + static noinline void __mod_tree_insert(struct mod_tree_node *node) 185 + { 186 + latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops); 187 + } 188 + 189 + static void __mod_tree_remove(struct mod_tree_node *node) 190 + { 191 + latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops); 192 + } 193 + 194 + /* 195 + * These modifications: insert, remove_init and remove; are serialized by the 196 + * module_mutex. 197 + */ 198 + static void mod_tree_insert(struct module *mod) 199 + { 200 + mod->mtn_core.mod = mod; 201 + mod->mtn_init.mod = mod; 202 + 203 + __mod_tree_insert(&mod->mtn_core); 204 + if (mod->init_size) 205 + __mod_tree_insert(&mod->mtn_init); 206 + } 207 + 208 + static void mod_tree_remove_init(struct module *mod) 209 + { 210 + if (mod->init_size) 211 + __mod_tree_remove(&mod->mtn_init); 212 + } 213 + 214 + static void mod_tree_remove(struct module *mod) 215 + { 216 + __mod_tree_remove(&mod->mtn_core); 217 + mod_tree_remove_init(mod); 218 + } 219 + 220 + static struct module *mod_find(unsigned long addr) 221 + { 222 + struct latch_tree_node *ltn; 223 + 224 + ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops); 225 + if (!ltn) 226 + return NULL; 227 + 228 + return container_of(ltn, struct mod_tree_node, node)->mod; 229 + } 230 + 231 + #else /* MODULES_TREE_LOOKUP */ 232 + 233 + static unsigned long module_addr_min = -1UL, module_addr_max = 0; 234 + 235 + static void mod_tree_insert(struct module *mod) { } 236 + static void mod_tree_remove_init(struct module *mod) { } 237 + static void mod_tree_remove(struct module *mod) { } 238 + 239 + static struct module *mod_find(unsigned long addr) 240 + { 241 + struct module *mod; 242 + 243 + list_for_each_entry_rcu(mod, &modules, list) { 244 + if (within_module(addr, mod)) 245 + return mod; 246 + } 247 + 248 + return NULL; 249 + } 250 + 251 + #endif /* MODULES_TREE_LOOKUP */ 252 + 253 + /* 254 + * Bounds of module text, for speeding up __module_address. 255 + * Protected by module_mutex. 256 + */ 257 + static void __mod_update_bounds(void *base, unsigned int size) 258 + { 259 + unsigned long min = (unsigned long)base; 260 + unsigned long max = min + size; 261 + 262 + if (min < module_addr_min) 263 + module_addr_min = min; 264 + if (max > module_addr_max) 265 + module_addr_max = max; 266 + } 267 + 268 + static void mod_update_bounds(struct module *mod) 269 + { 270 + __mod_update_bounds(mod->module_core, mod->core_size); 271 + if (mod->init_size) 272 + __mod_update_bounds(mod->module_init, mod->init_size); 273 + } 274 + 104 275 #ifdef CONFIG_KGDB_KDB 105 276 struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ 106 277 #endif /* CONFIG_KGDB_KDB */ 107 278 108 - #ifdef CONFIG_MODULE_SIG 109 - #ifdef CONFIG_MODULE_SIG_FORCE 110 - static bool sig_enforce = true; 111 - #else 112 - static bool sig_enforce = false; 113 - 114 - static int param_set_bool_enable_only(const char *val, 115 - const struct kernel_param *kp) 279 + static void module_assert_mutex(void) 116 280 { 117 - int err; 118 - bool test; 119 - struct kernel_param dummy_kp = *kp; 120 - 121 - dummy_kp.arg = &test; 122 - 123 - err = param_set_bool(val, &dummy_kp); 124 - if (err) 125 - return err; 126 - 127 - /* Don't let them unset it once it's set! */ 128 - if (!test && sig_enforce) 129 - return -EROFS; 130 - 131 - if (test) 132 - sig_enforce = true; 133 - return 0; 281 + lockdep_assert_held(&module_mutex); 134 282 } 135 283 136 - static const struct kernel_param_ops param_ops_bool_enable_only = { 137 - .flags = KERNEL_PARAM_OPS_FL_NOARG, 138 - .set = param_set_bool_enable_only, 139 - .get = param_get_bool, 140 - }; 141 - #define param_check_bool_enable_only param_check_bool 284 + static void module_assert_mutex_or_preempt(void) 285 + { 286 + #ifdef CONFIG_LOCKDEP 287 + if (unlikely(!debug_locks)) 288 + return; 142 289 290 + WARN_ON(!rcu_read_lock_sched_held() && 291 + !lockdep_is_held(&module_mutex)); 292 + #endif 293 + } 294 + 295 + static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); 296 + #ifndef CONFIG_MODULE_SIG_FORCE 143 297 module_param(sig_enforce, bool_enable_only, 0644); 144 298 #endif /* !CONFIG_MODULE_SIG_FORCE */ 145 - #endif /* CONFIG_MODULE_SIG */ 146 299 147 300 /* Block module loading/unloading? */ 148 301 int modules_disabled = 0; ··· 305 152 static DECLARE_WAIT_QUEUE_HEAD(module_wq); 306 153 307 154 static BLOCKING_NOTIFIER_HEAD(module_notify_list); 308 - 309 - /* Bounds of module allocation, for speeding __module_address. 310 - * Protected by module_mutex. */ 311 - static unsigned long module_addr_min = -1UL, module_addr_max = 0; 312 155 313 156 int register_module_notifier(struct notifier_block *nb) 314 157 { ··· 467 318 #endif 468 319 }; 469 320 321 + module_assert_mutex_or_preempt(); 322 + 470 323 if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) 471 324 return true; 472 325 ··· 607 456 bool even_unformed) 608 457 { 609 458 struct module *mod; 459 + 460 + module_assert_mutex(); 610 461 611 462 list_for_each_entry(mod, &modules, list) { 612 463 if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) ··· 1322 1169 { 1323 1170 const unsigned long *crc; 1324 1171 1325 - /* Since this should be found in kernel (which can't be removed), 1326 - * no locking is necessary. */ 1172 + /* 1173 + * Since this should be found in kernel (which can't be removed), no 1174 + * locking is necessary -- use preempt_disable() to placate lockdep. 1175 + */ 1176 + preempt_disable(); 1327 1177 if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL, 1328 - &crc, true, false)) 1178 + &crc, true, false)) { 1179 + preempt_enable(); 1329 1180 BUG(); 1181 + } 1182 + preempt_enable(); 1330 1183 return check_version(sechdrs, versindex, 1331 1184 VMLINUX_SYMBOL_STR(module_layout), mod, crc, 1332 1185 NULL); ··· 1820 1661 mod_kobject_put(mod); 1821 1662 } 1822 1663 1664 + static void init_param_lock(struct module *mod) 1665 + { 1666 + mutex_init(&mod->param_lock); 1667 + } 1823 1668 #else /* !CONFIG_SYSFS */ 1824 1669 1825 1670 static int mod_sysfs_setup(struct module *mod, ··· 1846 1683 { 1847 1684 } 1848 1685 1686 + static void init_param_lock(struct module *mod) 1687 + { 1688 + } 1849 1689 #endif /* CONFIG_SYSFS */ 1850 1690 1851 1691 static void mod_sysfs_teardown(struct module *mod) ··· 2018 1852 mutex_lock(&module_mutex); 2019 1853 /* Unlink carefully: kallsyms could be walking list. */ 2020 1854 list_del_rcu(&mod->list); 1855 + mod_tree_remove(mod); 2021 1856 /* Remove this module from bug list, this uses list_del_rcu */ 2022 1857 module_bug_cleanup(mod); 2023 - /* Wait for RCU synchronizing before releasing mod->list and buglist. */ 2024 - synchronize_rcu(); 1858 + /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ 1859 + synchronize_sched(); 2025 1860 mutex_unlock(&module_mutex); 2026 1861 2027 1862 /* This may be NULL, but that's OK */ ··· 2551 2384 return vmalloc_exec(size); 2552 2385 } 2553 2386 2554 - static void *module_alloc_update_bounds(unsigned long size) 2555 - { 2556 - void *ret = module_alloc(size); 2557 - 2558 - if (ret) { 2559 - mutex_lock(&module_mutex); 2560 - /* Update module bounds. */ 2561 - if ((unsigned long)ret < module_addr_min) 2562 - module_addr_min = (unsigned long)ret; 2563 - if ((unsigned long)ret + size > module_addr_max) 2564 - module_addr_max = (unsigned long)ret + size; 2565 - mutex_unlock(&module_mutex); 2566 - } 2567 - return ret; 2568 - } 2569 - 2570 2387 #ifdef CONFIG_DEBUG_KMEMLEAK 2571 2388 static void kmemleak_load_module(const struct module *mod, 2572 2389 const struct load_info *info) ··· 2956 2805 void *ptr; 2957 2806 2958 2807 /* Do the allocs. */ 2959 - ptr = module_alloc_update_bounds(mod->core_size); 2808 + ptr = module_alloc(mod->core_size); 2960 2809 /* 2961 2810 * The pointer to this block is stored in the module structure 2962 2811 * which is inside the block. Just mark it as not being a ··· 2970 2819 mod->module_core = ptr; 2971 2820 2972 2821 if (mod->init_size) { 2973 - ptr = module_alloc_update_bounds(mod->init_size); 2822 + ptr = module_alloc(mod->init_size); 2974 2823 /* 2975 2824 * The pointer to this block is stored in the module structure 2976 2825 * which is inside the block. This block doesn't need to be ··· 3270 3119 mod->symtab = mod->core_symtab; 3271 3120 mod->strtab = mod->core_strtab; 3272 3121 #endif 3122 + mod_tree_remove_init(mod); 3273 3123 unset_module_init_ro_nx(mod); 3274 3124 module_arch_freeing_init(mod); 3275 3125 mod->module_init = NULL; ··· 3279 3127 mod->init_text_size = 0; 3280 3128 /* 3281 3129 * We want to free module_init, but be aware that kallsyms may be 3282 - * walking this with preempt disabled. In all the failure paths, 3283 - * we call synchronize_rcu/synchronize_sched, but we don't want 3284 - * to slow down the success path, so use actual RCU here. 3130 + * walking this with preempt disabled. In all the failure paths, we 3131 + * call synchronize_sched(), but we don't want to slow down the success 3132 + * path, so use actual RCU here. 3285 3133 */ 3286 - call_rcu(&freeinit->rcu, do_free_init); 3134 + call_rcu_sched(&freeinit->rcu, do_free_init); 3287 3135 mutex_unlock(&module_mutex); 3288 3136 wake_up_all(&module_wq); 3289 3137 ··· 3340 3188 err = -EEXIST; 3341 3189 goto out; 3342 3190 } 3191 + mod_update_bounds(mod); 3343 3192 list_add_rcu(&mod->list, &modules); 3193 + mod_tree_insert(mod); 3344 3194 err = 0; 3345 3195 3346 3196 out: ··· 3458 3304 if (err) 3459 3305 goto unlink_mod; 3460 3306 3307 + init_param_lock(mod); 3308 + 3461 3309 /* Now we've got everything in the final locations, we can 3462 3310 * find optional sections. */ 3463 3311 err = find_module_sections(mod, info); ··· 3558 3402 /* Unlink carefully: kallsyms could be walking list. */ 3559 3403 list_del_rcu(&mod->list); 3560 3404 wake_up_all(&module_wq); 3561 - /* Wait for RCU synchronizing before releasing mod->list. */ 3562 - synchronize_rcu(); 3405 + /* Wait for RCU-sched synchronizing before releasing mod->list. */ 3406 + synchronize_sched(); 3563 3407 mutex_unlock(&module_mutex); 3564 3408 free_module: 3565 3409 /* Free lock-classes; relies on the preceding sync_rcu() */ ··· 3683 3527 char **modname, 3684 3528 char *namebuf) 3685 3529 { 3686 - struct module *mod; 3687 3530 const char *ret = NULL; 3531 + struct module *mod; 3688 3532 3689 3533 preempt_disable(); 3690 - list_for_each_entry_rcu(mod, &modules, list) { 3691 - if (mod->state == MODULE_STATE_UNFORMED) 3692 - continue; 3693 - if (within_module(addr, mod)) { 3694 - if (modname) 3695 - *modname = mod->name; 3696 - ret = get_ksymbol(mod, addr, size, offset); 3697 - break; 3698 - } 3534 + mod = __module_address(addr); 3535 + if (mod) { 3536 + if (modname) 3537 + *modname = mod->name; 3538 + ret = get_ksymbol(mod, addr, size, offset); 3699 3539 } 3700 3540 /* Make a copy in here where it's safe */ 3701 3541 if (ret) { ··· 3699 3547 ret = namebuf; 3700 3548 } 3701 3549 preempt_enable(); 3550 + 3702 3551 return ret; 3703 3552 } 3704 3553 ··· 3822 3669 struct module *mod; 3823 3670 unsigned int i; 3824 3671 int ret; 3672 + 3673 + module_assert_mutex(); 3825 3674 3826 3675 list_for_each_entry(mod, &modules, list) { 3827 3676 if (mod->state == MODULE_STATE_UNFORMED) ··· 3999 3844 if (addr < module_addr_min || addr > module_addr_max) 4000 3845 return NULL; 4001 3846 4002 - list_for_each_entry_rcu(mod, &modules, list) { 3847 + module_assert_mutex_or_preempt(); 3848 + 3849 + mod = mod_find(addr); 3850 + if (mod) { 3851 + BUG_ON(!within_module(addr, mod)); 4003 3852 if (mod->state == MODULE_STATE_UNFORMED) 4004 - continue; 4005 - if (within_module(addr, mod)) 4006 - return mod; 3853 + mod = NULL; 4007 3854 } 4008 - return NULL; 3855 + return mod; 4009 3856 } 4010 3857 EXPORT_SYMBOL_GPL(__module_address); 4011 3858
+86 -30
kernel/params.c
··· 25 25 #include <linux/slab.h> 26 26 #include <linux/ctype.h> 27 27 28 - /* Protects all parameters, and incidentally kmalloced_param list. */ 28 + #ifdef CONFIG_SYSFS 29 + /* Protects all built-in parameters, modules use their own param_lock */ 29 30 static DEFINE_MUTEX(param_lock); 31 + 32 + /* Use the module's mutex, or if built-in use the built-in mutex */ 33 + #ifdef CONFIG_MODULES 34 + #define KPARAM_MUTEX(mod) ((mod) ? &(mod)->param_lock : &param_lock) 35 + #else 36 + #define KPARAM_MUTEX(mod) (&param_lock) 37 + #endif 38 + 39 + static inline void check_kparam_locked(struct module *mod) 40 + { 41 + BUG_ON(!mutex_is_locked(KPARAM_MUTEX(mod))); 42 + } 43 + #else 44 + static inline void check_kparam_locked(struct module *mod) 45 + { 46 + } 47 + #endif /* !CONFIG_SYSFS */ 30 48 31 49 /* This just allows us to keep track of which parameters are kmalloced. */ 32 50 struct kmalloced_param { ··· 52 34 char val[]; 53 35 }; 54 36 static LIST_HEAD(kmalloced_params); 37 + static DEFINE_SPINLOCK(kmalloced_params_lock); 55 38 56 39 static void *kmalloc_parameter(unsigned int size) 57 40 { ··· 62 43 if (!p) 63 44 return NULL; 64 45 46 + spin_lock(&kmalloced_params_lock); 65 47 list_add(&p->list, &kmalloced_params); 48 + spin_unlock(&kmalloced_params_lock); 49 + 66 50 return p->val; 67 51 } 68 52 ··· 74 52 { 75 53 struct kmalloced_param *p; 76 54 55 + spin_lock(&kmalloced_params_lock); 77 56 list_for_each_entry(p, &kmalloced_params, list) { 78 57 if (p->val == param) { 79 58 list_del(&p->list); ··· 82 59 break; 83 60 } 84 61 } 62 + spin_unlock(&kmalloced_params_lock); 85 63 } 86 64 87 65 static char dash2underscore(char c) ··· 143 119 return -EINVAL; 144 120 pr_debug("handling %s with %p\n", param, 145 121 params[i].ops->set); 146 - mutex_lock(&param_lock); 122 + kernel_param_lock(params[i].mod); 147 123 param_check_unsafe(&params[i]); 148 124 err = params[i].ops->set(val, &params[i]); 149 - mutex_unlock(&param_lock); 125 + kernel_param_unlock(params[i].mod); 150 126 return err; 151 127 } 152 128 } ··· 278 254 return scnprintf(buffer, PAGE_SIZE, format, \ 279 255 *((type *)kp->arg)); \ 280 256 } \ 281 - struct kernel_param_ops param_ops_##name = { \ 257 + const struct kernel_param_ops param_ops_##name = { \ 282 258 .set = param_set_##name, \ 283 259 .get = param_get_##name, \ 284 260 }; \ ··· 330 306 maybe_kfree_parameter(*((char **)arg)); 331 307 } 332 308 333 - struct kernel_param_ops param_ops_charp = { 309 + const struct kernel_param_ops param_ops_charp = { 334 310 .set = param_set_charp, 335 311 .get = param_get_charp, 336 312 .free = param_free_charp, ··· 355 331 } 356 332 EXPORT_SYMBOL(param_get_bool); 357 333 358 - struct kernel_param_ops param_ops_bool = { 334 + const struct kernel_param_ops param_ops_bool = { 359 335 .flags = KERNEL_PARAM_OPS_FL_NOARG, 360 336 .set = param_set_bool, 361 337 .get = param_get_bool, 362 338 }; 363 339 EXPORT_SYMBOL(param_ops_bool); 340 + 341 + int param_set_bool_enable_only(const char *val, const struct kernel_param *kp) 342 + { 343 + int err = 0; 344 + bool new_value; 345 + bool orig_value = *(bool *)kp->arg; 346 + struct kernel_param dummy_kp = *kp; 347 + 348 + dummy_kp.arg = &new_value; 349 + 350 + err = param_set_bool(val, &dummy_kp); 351 + if (err) 352 + return err; 353 + 354 + /* Don't let them unset it once it's set! */ 355 + if (!new_value && orig_value) 356 + return -EROFS; 357 + 358 + if (new_value) 359 + err = param_set_bool(val, kp); 360 + 361 + return err; 362 + } 363 + EXPORT_SYMBOL_GPL(param_set_bool_enable_only); 364 + 365 + const struct kernel_param_ops param_ops_bool_enable_only = { 366 + .flags = KERNEL_PARAM_OPS_FL_NOARG, 367 + .set = param_set_bool_enable_only, 368 + .get = param_get_bool, 369 + }; 370 + EXPORT_SYMBOL_GPL(param_ops_bool_enable_only); 364 371 365 372 /* This one must be bool. */ 366 373 int param_set_invbool(const char *val, const struct kernel_param *kp) ··· 414 359 } 415 360 EXPORT_SYMBOL(param_get_invbool); 416 361 417 - struct kernel_param_ops param_ops_invbool = { 362 + const struct kernel_param_ops param_ops_invbool = { 418 363 .set = param_set_invbool, 419 364 .get = param_get_invbool, 420 365 }; ··· 422 367 423 368 int param_set_bint(const char *val, const struct kernel_param *kp) 424 369 { 425 - struct kernel_param boolkp; 370 + /* Match bool exactly, by re-using it. */ 371 + struct kernel_param boolkp = *kp; 426 372 bool v; 427 373 int ret; 428 374 429 - /* Match bool exactly, by re-using it. */ 430 - boolkp = *kp; 431 375 boolkp.arg = &v; 432 376 433 377 ret = param_set_bool(val, &boolkp); ··· 436 382 } 437 383 EXPORT_SYMBOL(param_set_bint); 438 384 439 - struct kernel_param_ops param_ops_bint = { 385 + const struct kernel_param_ops param_ops_bint = { 440 386 .flags = KERNEL_PARAM_OPS_FL_NOARG, 441 387 .set = param_set_bint, 442 388 .get = param_get_int, ··· 444 390 EXPORT_SYMBOL(param_ops_bint); 445 391 446 392 /* We break the rule and mangle the string. */ 447 - static int param_array(const char *name, 393 + static int param_array(struct module *mod, 394 + const char *name, 448 395 const char *val, 449 396 unsigned int min, unsigned int max, 450 397 void *elem, int elemsize, ··· 476 421 /* nul-terminate and parse */ 477 422 save = val[len]; 478 423 ((char *)val)[len] = '\0'; 479 - BUG_ON(!mutex_is_locked(&param_lock)); 424 + check_kparam_locked(mod); 480 425 ret = set(val, &kp); 481 426 482 427 if (ret != 0) ··· 498 443 const struct kparam_array *arr = kp->arr; 499 444 unsigned int temp_num; 500 445 501 - return param_array(kp->name, val, 1, arr->max, arr->elem, 446 + return param_array(kp->mod, kp->name, val, 1, arr->max, arr->elem, 502 447 arr->elemsize, arr->ops->set, kp->level, 503 448 arr->num ?: &temp_num); 504 449 } ··· 507 452 { 508 453 int i, off, ret; 509 454 const struct kparam_array *arr = kp->arr; 510 - struct kernel_param p; 455 + struct kernel_param p = *kp; 511 456 512 - p = *kp; 513 457 for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) { 514 458 if (i) 515 459 buffer[off++] = ','; 516 460 p.arg = arr->elem + arr->elemsize * i; 517 - BUG_ON(!mutex_is_locked(&param_lock)); 461 + check_kparam_locked(p.mod); 518 462 ret = arr->ops->get(buffer + off, &p); 519 463 if (ret < 0) 520 464 return ret; ··· 533 479 arr->ops->free(arr->elem + arr->elemsize * i); 534 480 } 535 481 536 - struct kernel_param_ops param_array_ops = { 482 + const struct kernel_param_ops param_array_ops = { 537 483 .set = param_array_set, 538 484 .get = param_array_get, 539 485 .free = param_array_free, ··· 561 507 } 562 508 EXPORT_SYMBOL(param_get_string); 563 509 564 - struct kernel_param_ops param_ops_string = { 510 + const struct kernel_param_ops param_ops_string = { 565 511 .set = param_set_copystring, 566 512 .get = param_get_string, 567 513 }; ··· 596 542 if (!attribute->param->ops->get) 597 543 return -EPERM; 598 544 599 - mutex_lock(&param_lock); 545 + kernel_param_lock(mk->mod); 600 546 count = attribute->param->ops->get(buf, attribute->param); 601 - mutex_unlock(&param_lock); 547 + kernel_param_unlock(mk->mod); 602 548 if (count > 0) { 603 549 strcat(buf, "\n"); 604 550 ++count; ··· 608 554 609 555 /* sysfs always hands a nul-terminated string in buf. We rely on that. */ 610 556 static ssize_t param_attr_store(struct module_attribute *mattr, 611 - struct module_kobject *km, 557 + struct module_kobject *mk, 612 558 const char *buf, size_t len) 613 559 { 614 560 int err; ··· 617 563 if (!attribute->param->ops->set) 618 564 return -EPERM; 619 565 620 - mutex_lock(&param_lock); 566 + kernel_param_lock(mk->mod); 621 567 param_check_unsafe(attribute->param); 622 568 err = attribute->param->ops->set(buf, attribute->param); 623 - mutex_unlock(&param_lock); 569 + kernel_param_unlock(mk->mod); 624 570 if (!err) 625 571 return len; 626 572 return err; ··· 634 580 #endif 635 581 636 582 #ifdef CONFIG_SYSFS 637 - void __kernel_param_lock(void) 583 + void kernel_param_lock(struct module *mod) 638 584 { 639 - mutex_lock(&param_lock); 585 + mutex_lock(KPARAM_MUTEX(mod)); 640 586 } 641 - EXPORT_SYMBOL(__kernel_param_lock); 642 587 643 - void __kernel_param_unlock(void) 588 + void kernel_param_unlock(struct module *mod) 644 589 { 645 - mutex_unlock(&param_lock); 590 + mutex_unlock(KPARAM_MUTEX(mod)); 646 591 } 647 - EXPORT_SYMBOL(__kernel_param_unlock); 592 + 593 + EXPORT_SYMBOL(kernel_param_lock); 594 + EXPORT_SYMBOL(kernel_param_unlock); 648 595 649 596 /* 650 597 * add_sysfs_param - add a parameter to sysfs ··· 911 856 mk = locate_module_kobject(vattr->module_name); 912 857 if (mk) { 913 858 err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); 859 + WARN_ON_ONCE(err); 914 860 kobject_uevent(&mk->kobj, KOBJ_ADD); 915 861 kobject_put(&mk->kobj); 916 862 }
+2 -27
kernel/time/timekeeping.c
··· 319 319 * We want to use this from any context including NMI and tracing / 320 320 * instrumenting the timekeeping code itself. 321 321 * 322 - * So we handle this differently than the other timekeeping accessor 323 - * functions which retry when the sequence count has changed. The 324 - * update side does: 325 - * 326 - * smp_wmb(); <- Ensure that the last base[1] update is visible 327 - * tkf->seq++; 328 - * smp_wmb(); <- Ensure that the seqcount update is visible 329 - * update(tkf->base[0], tkr); 330 - * smp_wmb(); <- Ensure that the base[0] update is visible 331 - * tkf->seq++; 332 - * smp_wmb(); <- Ensure that the seqcount update is visible 333 - * update(tkf->base[1], tkr); 334 - * 335 - * The reader side does: 336 - * 337 - * do { 338 - * seq = tkf->seq; 339 - * smp_rmb(); 340 - * idx = seq & 0x01; 341 - * now = now(tkf->base[idx]); 342 - * smp_rmb(); 343 - * } while (seq != tkf->seq) 344 - * 345 - * As long as we update base[0] readers are forced off to 346 - * base[1]. Once base[0] is updated readers are redirected to base[0] 347 - * and the base[1] update takes place. 322 + * Employ the latch technique; see @raw_write_seqcount_latch. 348 323 * 349 324 * So if a NMI hits the update of base[0] then it will use base[1] 350 325 * which is still consistent. In the worst case this can result is a ··· 382 407 u64 now; 383 408 384 409 do { 385 - seq = raw_read_seqcount(&tkf->seq); 410 + seq = raw_read_seqcount_latch(&tkf->seq); 386 411 tkr = tkf->base + (seq & 0x01); 387 412 now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); 388 413 } while (read_seqcount_retry(&tkf->seq, seq));
+1 -6
kernel/workqueue.c
··· 285 285 module_param_named(disable_numa, wq_disable_numa, bool, 0444); 286 286 287 287 /* see the comment above the definition of WQ_POWER_EFFICIENT */ 288 - #ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT 289 - static bool wq_power_efficient = true; 290 - #else 291 - static bool wq_power_efficient; 292 - #endif 293 - 288 + static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT); 294 289 module_param_named(power_efficient, wq_power_efficient, bool, 0444); 295 290 296 291 static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
+5 -2
lib/bug.c
··· 66 66 struct module *mod; 67 67 const struct bug_entry *bug = NULL; 68 68 69 - rcu_read_lock(); 69 + rcu_read_lock_sched(); 70 70 list_for_each_entry_rcu(mod, &module_bug_list, bug_list) { 71 71 unsigned i; 72 72 ··· 77 77 } 78 78 bug = NULL; 79 79 out: 80 - rcu_read_unlock(); 80 + rcu_read_unlock_sched(); 81 81 82 82 return bug; 83 83 } ··· 87 87 { 88 88 char *secstrings; 89 89 unsigned int i; 90 + 91 + lockdep_assert_held(&module_mutex); 90 92 91 93 mod->bug_table = NULL; 92 94 mod->num_bugs = 0; ··· 115 113 116 114 void module_bug_cleanup(struct module *mod) 117 115 { 116 + lockdep_assert_held(&module_mutex); 118 117 list_del_rcu(&mod->bug_list); 119 118 } 120 119
+54 -22
lib/rbtree.c
··· 44 44 * parentheses and have some accompanying text comment. 45 45 */ 46 46 47 + /* 48 + * Notes on lockless lookups: 49 + * 50 + * All stores to the tree structure (rb_left and rb_right) must be done using 51 + * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the 52 + * tree structure as seen in program order. 53 + * 54 + * These two requirements will allow lockless iteration of the tree -- not 55 + * correct iteration mind you, tree rotations are not atomic so a lookup might 56 + * miss entire subtrees. 57 + * 58 + * But they do guarantee that any such traversal will only see valid elements 59 + * and that it will indeed complete -- does not get stuck in a loop. 60 + * 61 + * It also guarantees that if the lookup returns an element it is the 'correct' 62 + * one. But not returning an element does _NOT_ mean it's not present. 63 + * 64 + * NOTE: 65 + * 66 + * Stores to __rb_parent_color are not important for simple lookups so those 67 + * are left undone as of now. Nor did I check for loops involving parent 68 + * pointers. 69 + */ 70 + 47 71 static inline void rb_set_black(struct rb_node *rb) 48 72 { 49 73 rb->__rb_parent_color |= RB_BLACK; ··· 153 129 * This still leaves us in violation of 4), the 154 130 * continuation into Case 3 will fix that. 155 131 */ 156 - parent->rb_right = tmp = node->rb_left; 157 - node->rb_left = parent; 132 + tmp = node->rb_left; 133 + WRITE_ONCE(parent->rb_right, tmp); 134 + WRITE_ONCE(node->rb_left, parent); 158 135 if (tmp) 159 136 rb_set_parent_color(tmp, parent, 160 137 RB_BLACK); ··· 174 149 * / \ 175 150 * n U 176 151 */ 177 - gparent->rb_left = tmp; /* == parent->rb_right */ 178 - parent->rb_right = gparent; 152 + WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */ 153 + WRITE_ONCE(parent->rb_right, gparent); 179 154 if (tmp) 180 155 rb_set_parent_color(tmp, gparent, RB_BLACK); 181 156 __rb_rotate_set_parents(gparent, parent, root, RB_RED); ··· 196 171 tmp = parent->rb_left; 197 172 if (node == tmp) { 198 173 /* Case 2 - right rotate at parent */ 199 - parent->rb_left = tmp = node->rb_right; 200 - node->rb_right = parent; 174 + tmp = node->rb_right; 175 + WRITE_ONCE(parent->rb_left, tmp); 176 + WRITE_ONCE(node->rb_right, parent); 201 177 if (tmp) 202 178 rb_set_parent_color(tmp, parent, 203 179 RB_BLACK); ··· 209 183 } 210 184 211 185 /* Case 3 - left rotate at gparent */ 212 - gparent->rb_right = tmp; /* == parent->rb_left */ 213 - parent->rb_left = gparent; 186 + WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */ 187 + WRITE_ONCE(parent->rb_left, gparent); 214 188 if (tmp) 215 189 rb_set_parent_color(tmp, gparent, RB_BLACK); 216 190 __rb_rotate_set_parents(gparent, parent, root, RB_RED); ··· 250 224 * / \ / \ 251 225 * Sl Sr N Sl 252 226 */ 253 - parent->rb_right = tmp1 = sibling->rb_left; 254 - sibling->rb_left = parent; 227 + tmp1 = sibling->rb_left; 228 + WRITE_ONCE(parent->rb_right, tmp1); 229 + WRITE_ONCE(sibling->rb_left, parent); 255 230 rb_set_parent_color(tmp1, parent, RB_BLACK); 256 231 __rb_rotate_set_parents(parent, sibling, root, 257 232 RB_RED); ··· 302 275 * \ 303 276 * Sr 304 277 */ 305 - sibling->rb_left = tmp1 = tmp2->rb_right; 306 - tmp2->rb_right = sibling; 307 - parent->rb_right = tmp2; 278 + tmp1 = tmp2->rb_right; 279 + WRITE_ONCE(sibling->rb_left, tmp1); 280 + WRITE_ONCE(tmp2->rb_right, sibling); 281 + WRITE_ONCE(parent->rb_right, tmp2); 308 282 if (tmp1) 309 283 rb_set_parent_color(tmp1, sibling, 310 284 RB_BLACK); ··· 325 297 * / \ / \ 326 298 * (sl) sr N (sl) 327 299 */ 328 - parent->rb_right = tmp2 = sibling->rb_left; 329 - sibling->rb_left = parent; 300 + tmp2 = sibling->rb_left; 301 + WRITE_ONCE(parent->rb_right, tmp2); 302 + WRITE_ONCE(sibling->rb_left, parent); 330 303 rb_set_parent_color(tmp1, sibling, RB_BLACK); 331 304 if (tmp2) 332 305 rb_set_parent(tmp2, parent); ··· 339 310 sibling = parent->rb_left; 340 311 if (rb_is_red(sibling)) { 341 312 /* Case 1 - right rotate at parent */ 342 - parent->rb_left = tmp1 = sibling->rb_right; 343 - sibling->rb_right = parent; 313 + tmp1 = sibling->rb_right; 314 + WRITE_ONCE(parent->rb_left, tmp1); 315 + WRITE_ONCE(sibling->rb_right, parent); 344 316 rb_set_parent_color(tmp1, parent, RB_BLACK); 345 317 __rb_rotate_set_parents(parent, sibling, root, 346 318 RB_RED); ··· 366 336 break; 367 337 } 368 338 /* Case 3 - right rotate at sibling */ 369 - sibling->rb_right = tmp1 = tmp2->rb_left; 370 - tmp2->rb_left = sibling; 371 - parent->rb_left = tmp2; 339 + tmp1 = tmp2->rb_left; 340 + WRITE_ONCE(sibling->rb_right, tmp1); 341 + WRITE_ONCE(tmp2->rb_left, sibling); 342 + WRITE_ONCE(parent->rb_left, tmp2); 372 343 if (tmp1) 373 344 rb_set_parent_color(tmp1, sibling, 374 345 RB_BLACK); ··· 378 347 sibling = tmp2; 379 348 } 380 349 /* Case 4 - left rotate at parent + color flips */ 381 - parent->rb_left = tmp2 = sibling->rb_right; 382 - sibling->rb_right = parent; 350 + tmp2 = sibling->rb_right; 351 + WRITE_ONCE(parent->rb_left, tmp2); 352 + WRITE_ONCE(sibling->rb_right, parent); 383 353 rb_set_parent_color(tmp1, sibling, RB_BLACK); 384 354 if (tmp2) 385 355 rb_set_parent(tmp2, parent);
+2 -2
net/mac80211/rate.c
··· 103 103 const struct rate_control_ops *ops; 104 104 const char *alg_name; 105 105 106 - kparam_block_sysfs_write(ieee80211_default_rc_algo); 106 + kernel_param_lock(THIS_MODULE); 107 107 if (!name) 108 108 alg_name = ieee80211_default_rc_algo; 109 109 else ··· 117 117 /* try built-in one if specific alg requested but not found */ 118 118 if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT)) 119 119 ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT); 120 - kparam_unblock_sysfs_write(ieee80211_default_rc_algo); 120 + kernel_param_unlock(THIS_MODULE); 121 121 122 122 return ops; 123 123 }
+1 -1
net/sunrpc/auth.c
··· 72 72 73 73 #define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int); 74 74 75 - static struct kernel_param_ops param_ops_hashtbl_sz = { 75 + static const struct kernel_param_ops param_ops_hashtbl_sz = { 76 76 .set = param_set_hashtbl_sz, 77 77 .get = param_get_hashtbl_sz, 78 78 };
+3 -3
net/sunrpc/xprtsock.c
··· 2982 2982 RPC_MAX_RESVPORT); 2983 2983 } 2984 2984 2985 - static struct kernel_param_ops param_ops_portnr = { 2985 + static const struct kernel_param_ops param_ops_portnr = { 2986 2986 .set = param_set_portnr, 2987 2987 .get = param_get_uint, 2988 2988 }; ··· 3001 3001 RPC_MAX_SLOT_TABLE); 3002 3002 } 3003 3003 3004 - static struct kernel_param_ops param_ops_slot_table_size = { 3004 + static const struct kernel_param_ops param_ops_slot_table_size = { 3005 3005 .set = param_set_slot_table_size, 3006 3006 .get = param_get_uint, 3007 3007 }; ··· 3017 3017 RPC_MAX_SLOT_TABLE_LIMIT); 3018 3018 } 3019 3019 3020 - static struct kernel_param_ops param_ops_max_slot_table_size = { 3020 + static const struct kernel_param_ops param_ops_max_slot_table_size = { 3021 3021 .set = param_set_max_slot_table_size, 3022 3022 .get = param_get_uint, 3023 3023 };
+3 -3
security/apparmor/lsm.c
··· 654 654 static int param_set_aabool(const char *val, const struct kernel_param *kp); 655 655 static int param_get_aabool(char *buffer, const struct kernel_param *kp); 656 656 #define param_check_aabool param_check_bool 657 - static struct kernel_param_ops param_ops_aabool = { 657 + static const struct kernel_param_ops param_ops_aabool = { 658 658 .flags = KERNEL_PARAM_OPS_FL_NOARG, 659 659 .set = param_set_aabool, 660 660 .get = param_get_aabool ··· 663 663 static int param_set_aauint(const char *val, const struct kernel_param *kp); 664 664 static int param_get_aauint(char *buffer, const struct kernel_param *kp); 665 665 #define param_check_aauint param_check_uint 666 - static struct kernel_param_ops param_ops_aauint = { 666 + static const struct kernel_param_ops param_ops_aauint = { 667 667 .set = param_set_aauint, 668 668 .get = param_get_aauint 669 669 }; ··· 671 671 static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp); 672 672 static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp); 673 673 #define param_check_aalockpolicy param_check_bool 674 - static struct kernel_param_ops param_ops_aalockpolicy = { 674 + static const struct kernel_param_ops param_ops_aalockpolicy = { 675 675 .flags = KERNEL_PARAM_OPS_FL_NOARG, 676 676 .set = param_set_aalockpolicy, 677 677 .get = param_get_aalockpolicy
+1 -1
security/integrity/ima/ima_crypto.c
··· 55 55 return 0; 56 56 } 57 57 58 - static struct kernel_param_ops param_ops_bufsize = { 58 + static const struct kernel_param_ops param_ops_bufsize = { 59 59 .set = param_set_bufsize, 60 60 .get = param_get_uint, 61 61 };
+1 -1
sound/pci/hda/hda_intel.c
··· 171 171 172 172 #ifdef CONFIG_PM 173 173 static int param_set_xint(const char *val, const struct kernel_param *kp); 174 - static struct kernel_param_ops param_ops_xint = { 174 + static const struct kernel_param_ops param_ops_xint = { 175 175 .set = param_set_xint, 176 176 .get = param_get_int, 177 177 };