Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

connector/cn_proc: Performance improvements

This patch adds the capability to filter messages sent by the proc
connector on the event type supplied in the message from the client
to the connector. The client can register to listen for an event type
given in struct proc_input.

This event based filteting will greatly enhance performance - handling
8K exits takes about 70ms, whereas 8K-forks + 8K-exits takes about 150ms
& handling 8K-forks + 8K-exits + 8K-execs takes 200ms. There are currently
9 different types of events, and we need to listen to all of them. Also,
measuring the time using pidfds for monitoring 8K process exits took
much longer - 200ms, as compared to 70ms using only exit notifications of
proc connector.

We also add a new event type - PROC_EVENT_NONZERO_EXIT, which is
only sent by kernel to a listening application when any process exiting,
has a non-zero exit status. This will help the clients like Oracle DB,
where a monitoring process wants notfications for non-zero process exits
so it can cleanup after them.

This kind of a new event could also be useful to other applications like
Google's lmkd daemon, which needs a killed process's exit notification.

The patch takes care that existing clients using old mechanism of not
sending the event type work without any changes.

cn_filter function checks to see if the event type being notified via
proc connector matches the event type requested by client, before
sending(matches) or dropping(does not match) a packet.

Signed-off-by: Anjali Kulkarni <anjali.k.kulkarni@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Anjali Kulkarni and committed by
David S. Miller
743acf35 2aa1f7a1

+75 -6
+56 -6
drivers/connector/cn_proc.c
··· 50 50 51 51 static int cn_filter(struct sock *dsk, struct sk_buff *skb, void *data) 52 52 { 53 + __u32 what, exit_code, *ptr; 53 54 enum proc_cn_mcast_op mc_op; 55 + uintptr_t val; 54 56 55 - if (!dsk) 57 + if (!dsk || !data) 56 58 return 0; 57 59 60 + ptr = (__u32 *)data; 61 + what = *ptr++; 62 + exit_code = *ptr; 63 + val = ((struct proc_input *)(dsk->sk_user_data))->event_type; 58 64 mc_op = ((struct proc_input *)(dsk->sk_user_data))->mcast_op; 59 65 60 66 if (mc_op == PROC_CN_MCAST_IGNORE) 61 67 return 1; 62 68 63 - return 0; 69 + if ((__u32)val == PROC_EVENT_ALL) 70 + return 0; 71 + 72 + /* 73 + * Drop packet if we have to report only non-zero exit status 74 + * (PROC_EVENT_NONZERO_EXIT) and exit status is 0 75 + */ 76 + if (((__u32)val & PROC_EVENT_NONZERO_EXIT) && 77 + (what == PROC_EVENT_EXIT)) { 78 + if (exit_code) 79 + return 0; 80 + } 81 + 82 + if ((__u32)val & what) 83 + return 0; 84 + 85 + return 1; 64 86 } 65 87 66 88 static inline void send_msg(struct cn_msg *msg) 67 89 { 90 + __u32 filter_data[2]; 91 + 68 92 local_lock(&local_event.lock); 69 93 70 94 msg->seq = __this_cpu_inc_return(local_event.count) - 1; ··· 100 76 * 101 77 * If cn_netlink_send() fails, the data is not sent. 102 78 */ 79 + filter_data[0] = ((struct proc_event *)msg->data)->what; 80 + if (filter_data[0] == PROC_EVENT_EXIT) { 81 + filter_data[1] = 82 + ((struct proc_event *)msg->data)->event_data.exit.exit_code; 83 + } else { 84 + filter_data[1] = 0; 85 + } 86 + 103 87 cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT, 104 - cn_filter, NULL); 88 + cn_filter, (void *)filter_data); 105 89 106 90 local_unlock(&local_event.lock); 107 91 } ··· 389 357 390 358 /** 391 359 * cn_proc_mcast_ctl 392 - * @data: message sent from userspace via the connector 360 + * @msg: message sent from userspace via the connector 361 + * @nsp: NETLINK_CB of the client's socket buffer 393 362 */ 394 363 static void cn_proc_mcast_ctl(struct cn_msg *msg, 395 364 struct netlink_skb_parms *nsp) 396 365 { 397 366 enum proc_cn_mcast_op mc_op = 0, prev_mc_op = 0; 367 + struct proc_input *pinput = NULL; 368 + enum proc_cn_event ev_type = 0; 398 369 int err = 0, initial = 0; 399 370 struct sock *sk = NULL; 400 371 ··· 416 381 goto out; 417 382 } 418 383 419 - if (msg->len == sizeof(mc_op)) 384 + if (msg->len == sizeof(*pinput)) { 385 + pinput = (struct proc_input *)msg->data; 386 + mc_op = pinput->mcast_op; 387 + ev_type = pinput->event_type; 388 + } else if (msg->len == sizeof(mc_op)) { 420 389 mc_op = *((enum proc_cn_mcast_op *)msg->data); 421 - else 390 + ev_type = PROC_EVENT_ALL; 391 + } else { 422 392 return; 393 + } 394 + 395 + ev_type = valid_event((enum proc_cn_event)ev_type); 396 + 397 + if (ev_type == PROC_EVENT_NONE) 398 + ev_type = PROC_EVENT_ALL; 423 399 424 400 if (nsp->sk) { 425 401 sk = nsp->sk; ··· 446 400 prev_mc_op = 447 401 ((struct proc_input *)(sk->sk_user_data))->mcast_op; 448 402 } 403 + ((struct proc_input *)(sk->sk_user_data))->event_type = 404 + ev_type; 449 405 ((struct proc_input *)(sk->sk_user_data))->mcast_op = mc_op; 450 406 } 451 407 ··· 459 411 case PROC_CN_MCAST_IGNORE: 460 412 if (!initial && (prev_mc_op != PROC_CN_MCAST_IGNORE)) 461 413 atomic_dec(&proc_event_num_listeners); 414 + ((struct proc_input *)(sk->sk_user_data))->event_type = 415 + PROC_EVENT_NONE; 462 416 break; 463 417 default: 464 418 err = EINVAL;
+19
include/uapi/linux/cn_proc.h
··· 30 30 PROC_CN_MCAST_IGNORE = 2 31 31 }; 32 32 33 + #define PROC_EVENT_ALL (PROC_EVENT_FORK | PROC_EVENT_EXEC | PROC_EVENT_UID | \ 34 + PROC_EVENT_GID | PROC_EVENT_SID | PROC_EVENT_PTRACE | \ 35 + PROC_EVENT_COMM | PROC_EVENT_NONZERO_EXIT | \ 36 + PROC_EVENT_COREDUMP | PROC_EVENT_EXIT) 37 + 38 + /* 39 + * If you add an entry in proc_cn_event, make sure you add it in 40 + * PROC_EVENT_ALL above as well. 41 + */ 33 42 enum proc_cn_event { 34 43 /* Use successive bits so the enums can be used to record 35 44 * sets of events as well ··· 54 45 /* "next" should be 0x00000400 */ 55 46 /* "last" is the last process event: exit, 56 47 * while "next to last" is coredumping event 48 + * before that is report only if process dies 49 + * with non-zero exit status 57 50 */ 51 + PROC_EVENT_NONZERO_EXIT = 0x20000000, 58 52 PROC_EVENT_COREDUMP = 0x40000000, 59 53 PROC_EVENT_EXIT = 0x80000000 60 54 }; 61 55 62 56 struct proc_input { 63 57 enum proc_cn_mcast_op mcast_op; 58 + enum proc_cn_event event_type; 64 59 }; 60 + 61 + static inline enum proc_cn_event valid_event(enum proc_cn_event ev_type) 62 + { 63 + ev_type &= PROC_EVENT_ALL; 64 + return ev_type; 65 + } 65 66 66 67 /* 67 68 * From the user's point of view, the process