Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.34-rc1 1312 lines 31 kB view raw
1/* 2 * linux/net/sunrpc/svc.c 3 * 4 * High-level RPC service routines 5 * 6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 7 * 8 * Multiple threads pools and NUMAisation 9 * Copyright (c) 2006 Silicon Graphics, Inc. 10 * by Greg Banks <gnb@melbourne.sgi.com> 11 */ 12 13#include <linux/linkage.h> 14#include <linux/sched.h> 15#include <linux/errno.h> 16#include <linux/net.h> 17#include <linux/in.h> 18#include <linux/mm.h> 19#include <linux/interrupt.h> 20#include <linux/module.h> 21#include <linux/kthread.h> 22 23#include <linux/sunrpc/types.h> 24#include <linux/sunrpc/xdr.h> 25#include <linux/sunrpc/stats.h> 26#include <linux/sunrpc/svcsock.h> 27#include <linux/sunrpc/clnt.h> 28#include <linux/sunrpc/bc_xprt.h> 29 30#define RPCDBG_FACILITY RPCDBG_SVCDSP 31 32static void svc_unregister(const struct svc_serv *serv); 33 34#define svc_serv_is_pooled(serv) ((serv)->sv_function) 35 36/* 37 * Mode for mapping cpus to pools. 38 */ 39enum { 40 SVC_POOL_AUTO = -1, /* choose one of the others */ 41 SVC_POOL_GLOBAL, /* no mapping, just a single global pool 42 * (legacy & UP mode) */ 43 SVC_POOL_PERCPU, /* one pool per cpu */ 44 SVC_POOL_PERNODE /* one pool per numa node */ 45}; 46#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL 47 48/* 49 * Structure for mapping cpus to pools and vice versa. 50 * Setup once during sunrpc initialisation. 51 */ 52static struct svc_pool_map { 53 int count; /* How many svc_servs use us */ 54 int mode; /* Note: int not enum to avoid 55 * warnings about "enumeration value 56 * not handled in switch" */ 57 unsigned int npools; 58 unsigned int *pool_to; /* maps pool id to cpu or node */ 59 unsigned int *to_pool; /* maps cpu or node to pool id */ 60} svc_pool_map = { 61 .count = 0, 62 .mode = SVC_POOL_DEFAULT 63}; 64static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ 65 66static int 67param_set_pool_mode(const char *val, struct kernel_param *kp) 68{ 69 int *ip = (int *)kp->arg; 70 struct svc_pool_map *m = &svc_pool_map; 71 int err; 72 73 mutex_lock(&svc_pool_map_mutex); 74 75 err = -EBUSY; 76 if (m->count) 77 goto out; 78 79 err = 0; 80 if (!strncmp(val, "auto", 4)) 81 *ip = SVC_POOL_AUTO; 82 else if (!strncmp(val, "global", 6)) 83 *ip = SVC_POOL_GLOBAL; 84 else if (!strncmp(val, "percpu", 6)) 85 *ip = SVC_POOL_PERCPU; 86 else if (!strncmp(val, "pernode", 7)) 87 *ip = SVC_POOL_PERNODE; 88 else 89 err = -EINVAL; 90 91out: 92 mutex_unlock(&svc_pool_map_mutex); 93 return err; 94} 95 96static int 97param_get_pool_mode(char *buf, struct kernel_param *kp) 98{ 99 int *ip = (int *)kp->arg; 100 101 switch (*ip) 102 { 103 case SVC_POOL_AUTO: 104 return strlcpy(buf, "auto", 20); 105 case SVC_POOL_GLOBAL: 106 return strlcpy(buf, "global", 20); 107 case SVC_POOL_PERCPU: 108 return strlcpy(buf, "percpu", 20); 109 case SVC_POOL_PERNODE: 110 return strlcpy(buf, "pernode", 20); 111 default: 112 return sprintf(buf, "%d", *ip); 113 } 114} 115 116module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode, 117 &svc_pool_map.mode, 0644); 118 119/* 120 * Detect best pool mapping mode heuristically, 121 * according to the machine's topology. 122 */ 123static int 124svc_pool_map_choose_mode(void) 125{ 126 unsigned int node; 127 128 if (nr_online_nodes > 1) { 129 /* 130 * Actually have multiple NUMA nodes, 131 * so split pools on NUMA node boundaries 132 */ 133 return SVC_POOL_PERNODE; 134 } 135 136 node = first_online_node; 137 if (nr_cpus_node(node) > 2) { 138 /* 139 * Non-trivial SMP, or CONFIG_NUMA on 140 * non-NUMA hardware, e.g. with a generic 141 * x86_64 kernel on Xeons. In this case we 142 * want to divide the pools on cpu boundaries. 143 */ 144 return SVC_POOL_PERCPU; 145 } 146 147 /* default: one global pool */ 148 return SVC_POOL_GLOBAL; 149} 150 151/* 152 * Allocate the to_pool[] and pool_to[] arrays. 153 * Returns 0 on success or an errno. 154 */ 155static int 156svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools) 157{ 158 m->to_pool = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL); 159 if (!m->to_pool) 160 goto fail; 161 m->pool_to = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL); 162 if (!m->pool_to) 163 goto fail_free; 164 165 return 0; 166 167fail_free: 168 kfree(m->to_pool); 169fail: 170 return -ENOMEM; 171} 172 173/* 174 * Initialise the pool map for SVC_POOL_PERCPU mode. 175 * Returns number of pools or <0 on error. 176 */ 177static int 178svc_pool_map_init_percpu(struct svc_pool_map *m) 179{ 180 unsigned int maxpools = nr_cpu_ids; 181 unsigned int pidx = 0; 182 unsigned int cpu; 183 int err; 184 185 err = svc_pool_map_alloc_arrays(m, maxpools); 186 if (err) 187 return err; 188 189 for_each_online_cpu(cpu) { 190 BUG_ON(pidx > maxpools); 191 m->to_pool[cpu] = pidx; 192 m->pool_to[pidx] = cpu; 193 pidx++; 194 } 195 /* cpus brought online later all get mapped to pool0, sorry */ 196 197 return pidx; 198}; 199 200 201/* 202 * Initialise the pool map for SVC_POOL_PERNODE mode. 203 * Returns number of pools or <0 on error. 204 */ 205static int 206svc_pool_map_init_pernode(struct svc_pool_map *m) 207{ 208 unsigned int maxpools = nr_node_ids; 209 unsigned int pidx = 0; 210 unsigned int node; 211 int err; 212 213 err = svc_pool_map_alloc_arrays(m, maxpools); 214 if (err) 215 return err; 216 217 for_each_node_with_cpus(node) { 218 /* some architectures (e.g. SN2) have cpuless nodes */ 219 BUG_ON(pidx > maxpools); 220 m->to_pool[node] = pidx; 221 m->pool_to[pidx] = node; 222 pidx++; 223 } 224 /* nodes brought online later all get mapped to pool0, sorry */ 225 226 return pidx; 227} 228 229 230/* 231 * Add a reference to the global map of cpus to pools (and 232 * vice versa). Initialise the map if we're the first user. 233 * Returns the number of pools. 234 */ 235static unsigned int 236svc_pool_map_get(void) 237{ 238 struct svc_pool_map *m = &svc_pool_map; 239 int npools = -1; 240 241 mutex_lock(&svc_pool_map_mutex); 242 243 if (m->count++) { 244 mutex_unlock(&svc_pool_map_mutex); 245 return m->npools; 246 } 247 248 if (m->mode == SVC_POOL_AUTO) 249 m->mode = svc_pool_map_choose_mode(); 250 251 switch (m->mode) { 252 case SVC_POOL_PERCPU: 253 npools = svc_pool_map_init_percpu(m); 254 break; 255 case SVC_POOL_PERNODE: 256 npools = svc_pool_map_init_pernode(m); 257 break; 258 } 259 260 if (npools < 0) { 261 /* default, or memory allocation failure */ 262 npools = 1; 263 m->mode = SVC_POOL_GLOBAL; 264 } 265 m->npools = npools; 266 267 mutex_unlock(&svc_pool_map_mutex); 268 return m->npools; 269} 270 271 272/* 273 * Drop a reference to the global map of cpus to pools. 274 * When the last reference is dropped, the map data is 275 * freed; this allows the sysadmin to change the pool 276 * mode using the pool_mode module option without 277 * rebooting or re-loading sunrpc.ko. 278 */ 279static void 280svc_pool_map_put(void) 281{ 282 struct svc_pool_map *m = &svc_pool_map; 283 284 mutex_lock(&svc_pool_map_mutex); 285 286 if (!--m->count) { 287 m->mode = SVC_POOL_DEFAULT; 288 kfree(m->to_pool); 289 kfree(m->pool_to); 290 m->npools = 0; 291 } 292 293 mutex_unlock(&svc_pool_map_mutex); 294} 295 296 297/* 298 * Set the given thread's cpus_allowed mask so that it 299 * will only run on cpus in the given pool. 300 */ 301static inline void 302svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) 303{ 304 struct svc_pool_map *m = &svc_pool_map; 305 unsigned int node = m->pool_to[pidx]; 306 307 /* 308 * The caller checks for sv_nrpools > 1, which 309 * implies that we've been initialized. 310 */ 311 BUG_ON(m->count == 0); 312 313 switch (m->mode) { 314 case SVC_POOL_PERCPU: 315 { 316 set_cpus_allowed_ptr(task, cpumask_of(node)); 317 break; 318 } 319 case SVC_POOL_PERNODE: 320 { 321 set_cpus_allowed_ptr(task, cpumask_of_node(node)); 322 break; 323 } 324 } 325} 326 327/* 328 * Use the mapping mode to choose a pool for a given CPU. 329 * Used when enqueueing an incoming RPC. Always returns 330 * a non-NULL pool pointer. 331 */ 332struct svc_pool * 333svc_pool_for_cpu(struct svc_serv *serv, int cpu) 334{ 335 struct svc_pool_map *m = &svc_pool_map; 336 unsigned int pidx = 0; 337 338 /* 339 * An uninitialised map happens in a pure client when 340 * lockd is brought up, so silently treat it the 341 * same as SVC_POOL_GLOBAL. 342 */ 343 if (svc_serv_is_pooled(serv)) { 344 switch (m->mode) { 345 case SVC_POOL_PERCPU: 346 pidx = m->to_pool[cpu]; 347 break; 348 case SVC_POOL_PERNODE: 349 pidx = m->to_pool[cpu_to_node(cpu)]; 350 break; 351 } 352 } 353 return &serv->sv_pools[pidx % serv->sv_nrpools]; 354} 355 356 357/* 358 * Create an RPC service 359 */ 360static struct svc_serv * 361__svc_create(struct svc_program *prog, unsigned int bufsize, int npools, 362 void (*shutdown)(struct svc_serv *serv)) 363{ 364 struct svc_serv *serv; 365 unsigned int vers; 366 unsigned int xdrsize; 367 unsigned int i; 368 369 if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) 370 return NULL; 371 serv->sv_name = prog->pg_name; 372 serv->sv_program = prog; 373 serv->sv_nrthreads = 1; 374 serv->sv_stats = prog->pg_stats; 375 if (bufsize > RPCSVC_MAXPAYLOAD) 376 bufsize = RPCSVC_MAXPAYLOAD; 377 serv->sv_max_payload = bufsize? bufsize : 4096; 378 serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); 379 serv->sv_shutdown = shutdown; 380 xdrsize = 0; 381 while (prog) { 382 prog->pg_lovers = prog->pg_nvers-1; 383 for (vers=0; vers<prog->pg_nvers ; vers++) 384 if (prog->pg_vers[vers]) { 385 prog->pg_hivers = vers; 386 if (prog->pg_lovers > vers) 387 prog->pg_lovers = vers; 388 if (prog->pg_vers[vers]->vs_xdrsize > xdrsize) 389 xdrsize = prog->pg_vers[vers]->vs_xdrsize; 390 } 391 prog = prog->pg_next; 392 } 393 serv->sv_xdrsize = xdrsize; 394 INIT_LIST_HEAD(&serv->sv_tempsocks); 395 INIT_LIST_HEAD(&serv->sv_permsocks); 396 init_timer(&serv->sv_temptimer); 397 spin_lock_init(&serv->sv_lock); 398 399 serv->sv_nrpools = npools; 400 serv->sv_pools = 401 kcalloc(serv->sv_nrpools, sizeof(struct svc_pool), 402 GFP_KERNEL); 403 if (!serv->sv_pools) { 404 kfree(serv); 405 return NULL; 406 } 407 408 for (i = 0; i < serv->sv_nrpools; i++) { 409 struct svc_pool *pool = &serv->sv_pools[i]; 410 411 dprintk("svc: initialising pool %u for %s\n", 412 i, serv->sv_name); 413 414 pool->sp_id = i; 415 INIT_LIST_HEAD(&pool->sp_threads); 416 INIT_LIST_HEAD(&pool->sp_sockets); 417 INIT_LIST_HEAD(&pool->sp_all_threads); 418 spin_lock_init(&pool->sp_lock); 419 } 420 421 /* Remove any stale portmap registrations */ 422 svc_unregister(serv); 423 424 return serv; 425} 426 427struct svc_serv * 428svc_create(struct svc_program *prog, unsigned int bufsize, 429 void (*shutdown)(struct svc_serv *serv)) 430{ 431 return __svc_create(prog, bufsize, /*npools*/1, shutdown); 432} 433EXPORT_SYMBOL_GPL(svc_create); 434 435struct svc_serv * 436svc_create_pooled(struct svc_program *prog, unsigned int bufsize, 437 void (*shutdown)(struct svc_serv *serv), 438 svc_thread_fn func, struct module *mod) 439{ 440 struct svc_serv *serv; 441 unsigned int npools = svc_pool_map_get(); 442 443 serv = __svc_create(prog, bufsize, npools, shutdown); 444 445 if (serv != NULL) { 446 serv->sv_function = func; 447 serv->sv_module = mod; 448 } 449 450 return serv; 451} 452EXPORT_SYMBOL_GPL(svc_create_pooled); 453 454/* 455 * Destroy an RPC service. Should be called with appropriate locking to 456 * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. 457 */ 458void 459svc_destroy(struct svc_serv *serv) 460{ 461 dprintk("svc: svc_destroy(%s, %d)\n", 462 serv->sv_program->pg_name, 463 serv->sv_nrthreads); 464 465 if (serv->sv_nrthreads) { 466 if (--(serv->sv_nrthreads) != 0) { 467 svc_sock_update_bufs(serv); 468 return; 469 } 470 } else 471 printk("svc_destroy: no threads for serv=%p!\n", serv); 472 473 del_timer_sync(&serv->sv_temptimer); 474 475 svc_close_all(&serv->sv_tempsocks); 476 477 if (serv->sv_shutdown) 478 serv->sv_shutdown(serv); 479 480 svc_close_all(&serv->sv_permsocks); 481 482 BUG_ON(!list_empty(&serv->sv_permsocks)); 483 BUG_ON(!list_empty(&serv->sv_tempsocks)); 484 485 cache_clean_deferred(serv); 486 487 if (svc_serv_is_pooled(serv)) 488 svc_pool_map_put(); 489 490#if defined(CONFIG_NFS_V4_1) 491 svc_sock_destroy(serv->bc_xprt); 492#endif /* CONFIG_NFS_V4_1 */ 493 494 svc_unregister(serv); 495 kfree(serv->sv_pools); 496 kfree(serv); 497} 498EXPORT_SYMBOL_GPL(svc_destroy); 499 500/* 501 * Allocate an RPC server's buffer space. 502 * We allocate pages and place them in rq_argpages. 503 */ 504static int 505svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) 506{ 507 unsigned int pages, arghi; 508 509 /* bc_xprt uses fore channel allocated buffers */ 510 if (svc_is_backchannel(rqstp)) 511 return 1; 512 513 pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. 514 * We assume one is at most one page 515 */ 516 arghi = 0; 517 BUG_ON(pages > RPCSVC_MAXPAGES); 518 while (pages) { 519 struct page *p = alloc_page(GFP_KERNEL); 520 if (!p) 521 break; 522 rqstp->rq_pages[arghi++] = p; 523 pages--; 524 } 525 return pages == 0; 526} 527 528/* 529 * Release an RPC server buffer 530 */ 531static void 532svc_release_buffer(struct svc_rqst *rqstp) 533{ 534 unsigned int i; 535 536 for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++) 537 if (rqstp->rq_pages[i]) 538 put_page(rqstp->rq_pages[i]); 539} 540 541struct svc_rqst * 542svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool) 543{ 544 struct svc_rqst *rqstp; 545 546 rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); 547 if (!rqstp) 548 goto out_enomem; 549 550 init_waitqueue_head(&rqstp->rq_wait); 551 552 serv->sv_nrthreads++; 553 spin_lock_bh(&pool->sp_lock); 554 pool->sp_nrthreads++; 555 list_add(&rqstp->rq_all, &pool->sp_all_threads); 556 spin_unlock_bh(&pool->sp_lock); 557 rqstp->rq_server = serv; 558 rqstp->rq_pool = pool; 559 560 rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL); 561 if (!rqstp->rq_argp) 562 goto out_thread; 563 564 rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL); 565 if (!rqstp->rq_resp) 566 goto out_thread; 567 568 if (!svc_init_buffer(rqstp, serv->sv_max_mesg)) 569 goto out_thread; 570 571 return rqstp; 572out_thread: 573 svc_exit_thread(rqstp); 574out_enomem: 575 return ERR_PTR(-ENOMEM); 576} 577EXPORT_SYMBOL_GPL(svc_prepare_thread); 578 579/* 580 * Choose a pool in which to create a new thread, for svc_set_num_threads 581 */ 582static inline struct svc_pool * 583choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) 584{ 585 if (pool != NULL) 586 return pool; 587 588 return &serv->sv_pools[(*state)++ % serv->sv_nrpools]; 589} 590 591/* 592 * Choose a thread to kill, for svc_set_num_threads 593 */ 594static inline struct task_struct * 595choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) 596{ 597 unsigned int i; 598 struct task_struct *task = NULL; 599 600 if (pool != NULL) { 601 spin_lock_bh(&pool->sp_lock); 602 } else { 603 /* choose a pool in round-robin fashion */ 604 for (i = 0; i < serv->sv_nrpools; i++) { 605 pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; 606 spin_lock_bh(&pool->sp_lock); 607 if (!list_empty(&pool->sp_all_threads)) 608 goto found_pool; 609 spin_unlock_bh(&pool->sp_lock); 610 } 611 return NULL; 612 } 613 614found_pool: 615 if (!list_empty(&pool->sp_all_threads)) { 616 struct svc_rqst *rqstp; 617 618 /* 619 * Remove from the pool->sp_all_threads list 620 * so we don't try to kill it again. 621 */ 622 rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all); 623 list_del_init(&rqstp->rq_all); 624 task = rqstp->rq_task; 625 } 626 spin_unlock_bh(&pool->sp_lock); 627 628 return task; 629} 630 631/* 632 * Create or destroy enough new threads to make the number 633 * of threads the given number. If `pool' is non-NULL, applies 634 * only to threads in that pool, otherwise round-robins between 635 * all pools. Must be called with a svc_get() reference and 636 * the BKL or another lock to protect access to svc_serv fields. 637 * 638 * Destroying threads relies on the service threads filling in 639 * rqstp->rq_task, which only the nfs ones do. Assumes the serv 640 * has been created using svc_create_pooled(). 641 * 642 * Based on code that used to be in nfsd_svc() but tweaked 643 * to be pool-aware. 644 */ 645int 646svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) 647{ 648 struct svc_rqst *rqstp; 649 struct task_struct *task; 650 struct svc_pool *chosen_pool; 651 int error = 0; 652 unsigned int state = serv->sv_nrthreads-1; 653 654 if (pool == NULL) { 655 /* The -1 assumes caller has done a svc_get() */ 656 nrservs -= (serv->sv_nrthreads-1); 657 } else { 658 spin_lock_bh(&pool->sp_lock); 659 nrservs -= pool->sp_nrthreads; 660 spin_unlock_bh(&pool->sp_lock); 661 } 662 663 /* create new threads */ 664 while (nrservs > 0) { 665 nrservs--; 666 chosen_pool = choose_pool(serv, pool, &state); 667 668 rqstp = svc_prepare_thread(serv, chosen_pool); 669 if (IS_ERR(rqstp)) { 670 error = PTR_ERR(rqstp); 671 break; 672 } 673 674 __module_get(serv->sv_module); 675 task = kthread_create(serv->sv_function, rqstp, serv->sv_name); 676 if (IS_ERR(task)) { 677 error = PTR_ERR(task); 678 module_put(serv->sv_module); 679 svc_exit_thread(rqstp); 680 break; 681 } 682 683 rqstp->rq_task = task; 684 if (serv->sv_nrpools > 1) 685 svc_pool_map_set_cpumask(task, chosen_pool->sp_id); 686 687 svc_sock_update_bufs(serv); 688 wake_up_process(task); 689 } 690 /* destroy old threads */ 691 while (nrservs < 0 && 692 (task = choose_victim(serv, pool, &state)) != NULL) { 693 send_sig(SIGINT, task, 1); 694 nrservs++; 695 } 696 697 return error; 698} 699EXPORT_SYMBOL_GPL(svc_set_num_threads); 700 701/* 702 * Called from a server thread as it's exiting. Caller must hold the BKL or 703 * the "service mutex", whichever is appropriate for the service. 704 */ 705void 706svc_exit_thread(struct svc_rqst *rqstp) 707{ 708 struct svc_serv *serv = rqstp->rq_server; 709 struct svc_pool *pool = rqstp->rq_pool; 710 711 svc_release_buffer(rqstp); 712 kfree(rqstp->rq_resp); 713 kfree(rqstp->rq_argp); 714 kfree(rqstp->rq_auth_data); 715 716 spin_lock_bh(&pool->sp_lock); 717 pool->sp_nrthreads--; 718 list_del(&rqstp->rq_all); 719 spin_unlock_bh(&pool->sp_lock); 720 721 kfree(rqstp); 722 723 /* Release the server */ 724 if (serv) 725 svc_destroy(serv); 726} 727EXPORT_SYMBOL_GPL(svc_exit_thread); 728 729/* 730 * Register an "inet" protocol family netid with the local 731 * rpcbind daemon via an rpcbind v4 SET request. 732 * 733 * No netconfig infrastructure is available in the kernel, so 734 * we map IP_ protocol numbers to netids by hand. 735 * 736 * Returns zero on success; a negative errno value is returned 737 * if any error occurs. 738 */ 739static int __svc_rpcb_register4(const u32 program, const u32 version, 740 const unsigned short protocol, 741 const unsigned short port) 742{ 743 const struct sockaddr_in sin = { 744 .sin_family = AF_INET, 745 .sin_addr.s_addr = htonl(INADDR_ANY), 746 .sin_port = htons(port), 747 }; 748 const char *netid; 749 int error; 750 751 switch (protocol) { 752 case IPPROTO_UDP: 753 netid = RPCBIND_NETID_UDP; 754 break; 755 case IPPROTO_TCP: 756 netid = RPCBIND_NETID_TCP; 757 break; 758 default: 759 return -ENOPROTOOPT; 760 } 761 762 error = rpcb_v4_register(program, version, 763 (const struct sockaddr *)&sin, netid); 764 765 /* 766 * User space didn't support rpcbind v4, so retry this 767 * registration request with the legacy rpcbind v2 protocol. 768 */ 769 if (error == -EPROTONOSUPPORT) 770 error = rpcb_register(program, version, protocol, port); 771 772 return error; 773} 774 775#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 776/* 777 * Register an "inet6" protocol family netid with the local 778 * rpcbind daemon via an rpcbind v4 SET request. 779 * 780 * No netconfig infrastructure is available in the kernel, so 781 * we map IP_ protocol numbers to netids by hand. 782 * 783 * Returns zero on success; a negative errno value is returned 784 * if any error occurs. 785 */ 786static int __svc_rpcb_register6(const u32 program, const u32 version, 787 const unsigned short protocol, 788 const unsigned short port) 789{ 790 const struct sockaddr_in6 sin6 = { 791 .sin6_family = AF_INET6, 792 .sin6_addr = IN6ADDR_ANY_INIT, 793 .sin6_port = htons(port), 794 }; 795 const char *netid; 796 int error; 797 798 switch (protocol) { 799 case IPPROTO_UDP: 800 netid = RPCBIND_NETID_UDP6; 801 break; 802 case IPPROTO_TCP: 803 netid = RPCBIND_NETID_TCP6; 804 break; 805 default: 806 return -ENOPROTOOPT; 807 } 808 809 error = rpcb_v4_register(program, version, 810 (const struct sockaddr *)&sin6, netid); 811 812 /* 813 * User space didn't support rpcbind version 4, so we won't 814 * use a PF_INET6 listener. 815 */ 816 if (error == -EPROTONOSUPPORT) 817 error = -EAFNOSUPPORT; 818 819 return error; 820} 821#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ 822 823/* 824 * Register a kernel RPC service via rpcbind version 4. 825 * 826 * Returns zero on success; a negative errno value is returned 827 * if any error occurs. 828 */ 829static int __svc_register(const char *progname, 830 const u32 program, const u32 version, 831 const int family, 832 const unsigned short protocol, 833 const unsigned short port) 834{ 835 int error = -EAFNOSUPPORT; 836 837 switch (family) { 838 case PF_INET: 839 error = __svc_rpcb_register4(program, version, 840 protocol, port); 841 break; 842#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 843 case PF_INET6: 844 error = __svc_rpcb_register6(program, version, 845 protocol, port); 846#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ 847 } 848 849 if (error < 0) 850 printk(KERN_WARNING "svc: failed to register %sv%u RPC " 851 "service (errno %d).\n", progname, version, -error); 852 return error; 853} 854 855/** 856 * svc_register - register an RPC service with the local portmapper 857 * @serv: svc_serv struct for the service to register 858 * @family: protocol family of service's listener socket 859 * @proto: transport protocol number to advertise 860 * @port: port to advertise 861 * 862 * Service is registered for any address in the passed-in protocol family 863 */ 864int svc_register(const struct svc_serv *serv, const int family, 865 const unsigned short proto, const unsigned short port) 866{ 867 struct svc_program *progp; 868 unsigned int i; 869 int error = 0; 870 871 BUG_ON(proto == 0 && port == 0); 872 873 for (progp = serv->sv_program; progp; progp = progp->pg_next) { 874 for (i = 0; i < progp->pg_nvers; i++) { 875 if (progp->pg_vers[i] == NULL) 876 continue; 877 878 dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n", 879 progp->pg_name, 880 i, 881 proto == IPPROTO_UDP? "udp" : "tcp", 882 port, 883 family, 884 progp->pg_vers[i]->vs_hidden? 885 " (but not telling portmap)" : ""); 886 887 if (progp->pg_vers[i]->vs_hidden) 888 continue; 889 890 error = __svc_register(progp->pg_name, progp->pg_prog, 891 i, family, proto, port); 892 if (error < 0) 893 break; 894 } 895 } 896 897 return error; 898} 899 900/* 901 * If user space is running rpcbind, it should take the v4 UNSET 902 * and clear everything for this [program, version]. If user space 903 * is running portmap, it will reject the v4 UNSET, but won't have 904 * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient 905 * in this case to clear all existing entries for [program, version]. 906 */ 907static void __svc_unregister(const u32 program, const u32 version, 908 const char *progname) 909{ 910 int error; 911 912 error = rpcb_v4_register(program, version, NULL, ""); 913 914 /* 915 * User space didn't support rpcbind v4, so retry this 916 * request with the legacy rpcbind v2 protocol. 917 */ 918 if (error == -EPROTONOSUPPORT) 919 error = rpcb_register(program, version, 0, 0); 920 921 dprintk("svc: %s(%sv%u), error %d\n", 922 __func__, progname, version, error); 923} 924 925/* 926 * All netids, bind addresses and ports registered for [program, version] 927 * are removed from the local rpcbind database (if the service is not 928 * hidden) to make way for a new instance of the service. 929 * 930 * The result of unregistration is reported via dprintk for those who want 931 * verification of the result, but is otherwise not important. 932 */ 933static void svc_unregister(const struct svc_serv *serv) 934{ 935 struct svc_program *progp; 936 unsigned long flags; 937 unsigned int i; 938 939 clear_thread_flag(TIF_SIGPENDING); 940 941 for (progp = serv->sv_program; progp; progp = progp->pg_next) { 942 for (i = 0; i < progp->pg_nvers; i++) { 943 if (progp->pg_vers[i] == NULL) 944 continue; 945 if (progp->pg_vers[i]->vs_hidden) 946 continue; 947 948 __svc_unregister(progp->pg_prog, i, progp->pg_name); 949 } 950 } 951 952 spin_lock_irqsave(&current->sighand->siglock, flags); 953 recalc_sigpending(); 954 spin_unlock_irqrestore(&current->sighand->siglock, flags); 955} 956 957/* 958 * Printk the given error with the address of the client that caused it. 959 */ 960static int 961__attribute__ ((format (printf, 2, 3))) 962svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) 963{ 964 va_list args; 965 int r; 966 char buf[RPC_MAX_ADDRBUFLEN]; 967 968 if (!net_ratelimit()) 969 return 0; 970 971 printk(KERN_WARNING "svc: %s: ", 972 svc_print_addr(rqstp, buf, sizeof(buf))); 973 974 va_start(args, fmt); 975 r = vprintk(fmt, args); 976 va_end(args); 977 978 return r; 979} 980 981/* 982 * Common routine for processing the RPC request. 983 */ 984static int 985svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) 986{ 987 struct svc_program *progp; 988 struct svc_version *versp = NULL; /* compiler food */ 989 struct svc_procedure *procp = NULL; 990 struct svc_serv *serv = rqstp->rq_server; 991 kxdrproc_t xdr; 992 __be32 *statp; 993 u32 prog, vers, proc; 994 __be32 auth_stat, rpc_stat; 995 int auth_res; 996 __be32 *reply_statp; 997 998 rpc_stat = rpc_success; 999 1000 if (argv->iov_len < 6*4) 1001 goto err_short_len; 1002 1003 /* Will be turned off only in gss privacy case: */ 1004 rqstp->rq_splice_ok = 1; 1005 /* Will be turned off only when NFSv4 Sessions are used */ 1006 rqstp->rq_usedeferral = 1; 1007 1008 /* Setup reply header */ 1009 rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); 1010 1011 svc_putu32(resv, rqstp->rq_xid); 1012 1013 vers = svc_getnl(argv); 1014 1015 /* First words of reply: */ 1016 svc_putnl(resv, 1); /* REPLY */ 1017 1018 if (vers != 2) /* RPC version number */ 1019 goto err_bad_rpc; 1020 1021 /* Save position in case we later decide to reject: */ 1022 reply_statp = resv->iov_base + resv->iov_len; 1023 1024 svc_putnl(resv, 0); /* ACCEPT */ 1025 1026 rqstp->rq_prog = prog = svc_getnl(argv); /* program number */ 1027 rqstp->rq_vers = vers = svc_getnl(argv); /* version number */ 1028 rqstp->rq_proc = proc = svc_getnl(argv); /* procedure number */ 1029 1030 progp = serv->sv_program; 1031 1032 for (progp = serv->sv_program; progp; progp = progp->pg_next) 1033 if (prog == progp->pg_prog) 1034 break; 1035 1036 /* 1037 * Decode auth data, and add verifier to reply buffer. 1038 * We do this before anything else in order to get a decent 1039 * auth verifier. 1040 */ 1041 auth_res = svc_authenticate(rqstp, &auth_stat); 1042 /* Also give the program a chance to reject this call: */ 1043 if (auth_res == SVC_OK && progp) { 1044 auth_stat = rpc_autherr_badcred; 1045 auth_res = progp->pg_authenticate(rqstp); 1046 } 1047 switch (auth_res) { 1048 case SVC_OK: 1049 break; 1050 case SVC_GARBAGE: 1051 goto err_garbage; 1052 case SVC_SYSERR: 1053 rpc_stat = rpc_system_err; 1054 goto err_bad; 1055 case SVC_DENIED: 1056 goto err_bad_auth; 1057 case SVC_DROP: 1058 goto dropit; 1059 case SVC_COMPLETE: 1060 goto sendit; 1061 } 1062 1063 if (progp == NULL) 1064 goto err_bad_prog; 1065 1066 if (vers >= progp->pg_nvers || 1067 !(versp = progp->pg_vers[vers])) 1068 goto err_bad_vers; 1069 1070 procp = versp->vs_proc + proc; 1071 if (proc >= versp->vs_nproc || !procp->pc_func) 1072 goto err_bad_proc; 1073 rqstp->rq_procinfo = procp; 1074 1075 /* Syntactic check complete */ 1076 serv->sv_stats->rpccnt++; 1077 1078 /* Build the reply header. */ 1079 statp = resv->iov_base +resv->iov_len; 1080 svc_putnl(resv, RPC_SUCCESS); 1081 1082 /* Bump per-procedure stats counter */ 1083 procp->pc_count++; 1084 1085 /* Initialize storage for argp and resp */ 1086 memset(rqstp->rq_argp, 0, procp->pc_argsize); 1087 memset(rqstp->rq_resp, 0, procp->pc_ressize); 1088 1089 /* un-reserve some of the out-queue now that we have a 1090 * better idea of reply size 1091 */ 1092 if (procp->pc_xdrressize) 1093 svc_reserve_auth(rqstp, procp->pc_xdrressize<<2); 1094 1095 /* Call the function that processes the request. */ 1096 if (!versp->vs_dispatch) { 1097 /* Decode arguments */ 1098 xdr = procp->pc_decode; 1099 if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp)) 1100 goto err_garbage; 1101 1102 *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); 1103 1104 /* Encode reply */ 1105 if (*statp == rpc_drop_reply) { 1106 if (procp->pc_release) 1107 procp->pc_release(rqstp, NULL, rqstp->rq_resp); 1108 goto dropit; 1109 } 1110 if (*statp == rpc_success && 1111 (xdr = procp->pc_encode) && 1112 !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) { 1113 dprintk("svc: failed to encode reply\n"); 1114 /* serv->sv_stats->rpcsystemerr++; */ 1115 *statp = rpc_system_err; 1116 } 1117 } else { 1118 dprintk("svc: calling dispatcher\n"); 1119 if (!versp->vs_dispatch(rqstp, statp)) { 1120 /* Release reply info */ 1121 if (procp->pc_release) 1122 procp->pc_release(rqstp, NULL, rqstp->rq_resp); 1123 goto dropit; 1124 } 1125 } 1126 1127 /* Check RPC status result */ 1128 if (*statp != rpc_success) 1129 resv->iov_len = ((void*)statp) - resv->iov_base + 4; 1130 1131 /* Release reply info */ 1132 if (procp->pc_release) 1133 procp->pc_release(rqstp, NULL, rqstp->rq_resp); 1134 1135 if (procp->pc_encode == NULL) 1136 goto dropit; 1137 1138 sendit: 1139 if (svc_authorise(rqstp)) 1140 goto dropit; 1141 return 1; /* Caller can now send it */ 1142 1143 dropit: 1144 svc_authorise(rqstp); /* doesn't hurt to call this twice */ 1145 dprintk("svc: svc_process dropit\n"); 1146 svc_drop(rqstp); 1147 return 0; 1148 1149err_short_len: 1150 svc_printk(rqstp, "short len %Zd, dropping request\n", 1151 argv->iov_len); 1152 1153 goto dropit; /* drop request */ 1154 1155err_bad_rpc: 1156 serv->sv_stats->rpcbadfmt++; 1157 svc_putnl(resv, 1); /* REJECT */ 1158 svc_putnl(resv, 0); /* RPC_MISMATCH */ 1159 svc_putnl(resv, 2); /* Only RPCv2 supported */ 1160 svc_putnl(resv, 2); 1161 goto sendit; 1162 1163err_bad_auth: 1164 dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat)); 1165 serv->sv_stats->rpcbadauth++; 1166 /* Restore write pointer to location of accept status: */ 1167 xdr_ressize_check(rqstp, reply_statp); 1168 svc_putnl(resv, 1); /* REJECT */ 1169 svc_putnl(resv, 1); /* AUTH_ERROR */ 1170 svc_putnl(resv, ntohl(auth_stat)); /* status */ 1171 goto sendit; 1172 1173err_bad_prog: 1174 dprintk("svc: unknown program %d\n", prog); 1175 serv->sv_stats->rpcbadfmt++; 1176 svc_putnl(resv, RPC_PROG_UNAVAIL); 1177 goto sendit; 1178 1179err_bad_vers: 1180 svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n", 1181 vers, prog, progp->pg_name); 1182 1183 serv->sv_stats->rpcbadfmt++; 1184 svc_putnl(resv, RPC_PROG_MISMATCH); 1185 svc_putnl(resv, progp->pg_lovers); 1186 svc_putnl(resv, progp->pg_hivers); 1187 goto sendit; 1188 1189err_bad_proc: 1190 svc_printk(rqstp, "unknown procedure (%d)\n", proc); 1191 1192 serv->sv_stats->rpcbadfmt++; 1193 svc_putnl(resv, RPC_PROC_UNAVAIL); 1194 goto sendit; 1195 1196err_garbage: 1197 svc_printk(rqstp, "failed to decode args\n"); 1198 1199 rpc_stat = rpc_garbage_args; 1200err_bad: 1201 serv->sv_stats->rpcbadfmt++; 1202 svc_putnl(resv, ntohl(rpc_stat)); 1203 goto sendit; 1204} 1205EXPORT_SYMBOL_GPL(svc_process); 1206 1207/* 1208 * Process the RPC request. 1209 */ 1210int 1211svc_process(struct svc_rqst *rqstp) 1212{ 1213 struct kvec *argv = &rqstp->rq_arg.head[0]; 1214 struct kvec *resv = &rqstp->rq_res.head[0]; 1215 struct svc_serv *serv = rqstp->rq_server; 1216 u32 dir; 1217 int error; 1218 1219 /* 1220 * Setup response xdr_buf. 1221 * Initially it has just one page 1222 */ 1223 rqstp->rq_resused = 1; 1224 resv->iov_base = page_address(rqstp->rq_respages[0]); 1225 resv->iov_len = 0; 1226 rqstp->rq_res.pages = rqstp->rq_respages + 1; 1227 rqstp->rq_res.len = 0; 1228 rqstp->rq_res.page_base = 0; 1229 rqstp->rq_res.page_len = 0; 1230 rqstp->rq_res.buflen = PAGE_SIZE; 1231 rqstp->rq_res.tail[0].iov_base = NULL; 1232 rqstp->rq_res.tail[0].iov_len = 0; 1233 1234 rqstp->rq_xid = svc_getu32(argv); 1235 1236 dir = svc_getnl(argv); 1237 if (dir != 0) { 1238 /* direction != CALL */ 1239 svc_printk(rqstp, "bad direction %d, dropping request\n", dir); 1240 serv->sv_stats->rpcbadfmt++; 1241 svc_drop(rqstp); 1242 return 0; 1243 } 1244 1245 error = svc_process_common(rqstp, argv, resv); 1246 if (error <= 0) 1247 return error; 1248 1249 return svc_send(rqstp); 1250} 1251 1252#if defined(CONFIG_NFS_V4_1) 1253/* 1254 * Process a backchannel RPC request that arrived over an existing 1255 * outbound connection 1256 */ 1257int 1258bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, 1259 struct svc_rqst *rqstp) 1260{ 1261 struct kvec *argv = &rqstp->rq_arg.head[0]; 1262 struct kvec *resv = &rqstp->rq_res.head[0]; 1263 int error; 1264 1265 /* Build the svc_rqst used by the common processing routine */ 1266 rqstp->rq_xprt = serv->bc_xprt; 1267 rqstp->rq_xid = req->rq_xid; 1268 rqstp->rq_prot = req->rq_xprt->prot; 1269 rqstp->rq_server = serv; 1270 1271 rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); 1272 memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen); 1273 memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg)); 1274 memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res)); 1275 1276 /* reset result send buffer "put" position */ 1277 resv->iov_len = 0; 1278 1279 if (rqstp->rq_prot != IPPROTO_TCP) { 1280 printk(KERN_ERR "No support for Non-TCP transports!\n"); 1281 BUG(); 1282 } 1283 1284 /* 1285 * Skip the next two words because they've already been 1286 * processed in the trasport 1287 */ 1288 svc_getu32(argv); /* XID */ 1289 svc_getnl(argv); /* CALLDIR */ 1290 1291 error = svc_process_common(rqstp, argv, resv); 1292 if (error <= 0) 1293 return error; 1294 1295 memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); 1296 return bc_send(req); 1297} 1298EXPORT_SYMBOL(bc_svc_process); 1299#endif /* CONFIG_NFS_V4_1 */ 1300 1301/* 1302 * Return (transport-specific) limit on the rpc payload. 1303 */ 1304u32 svc_max_payload(const struct svc_rqst *rqstp) 1305{ 1306 u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload; 1307 1308 if (rqstp->rq_server->sv_max_payload < max) 1309 max = rqstp->rq_server->sv_max_payload; 1310 return max; 1311} 1312EXPORT_SYMBOL_GPL(svc_max_payload);