at v3.11 841 lines 21 kB view raw
1/* 2 * Device operations for the pnfs nfs4 file layout driver. 3 * 4 * Copyright (c) 2002 5 * The Regents of the University of Michigan 6 * All Rights Reserved 7 * 8 * Dean Hildebrand <dhildebz@umich.edu> 9 * Garth Goodson <Garth.Goodson@netapp.com> 10 * 11 * Permission is granted to use, copy, create derivative works, and 12 * redistribute this software and such derivative works for any purpose, 13 * so long as the name of the University of Michigan is not used in 14 * any advertising or publicity pertaining to the use or distribution 15 * of this software without specific, written prior authorization. If 16 * the above copyright notice or any other identification of the 17 * University of Michigan is included in any copy of any portion of 18 * this software, then the disclaimer below must also be included. 19 * 20 * This software is provided as is, without representation or warranty 21 * of any kind either express or implied, including without limitation 22 * the implied warranties of merchantability, fitness for a particular 23 * purpose, or noninfringement. The Regents of the University of 24 * Michigan shall not be liable for any damages, including special, 25 * indirect, incidental, or consequential damages, with respect to any 26 * claim arising out of or in connection with the use of the software, 27 * even if it has been or is hereafter advised of the possibility of 28 * such damages. 29 */ 30 31#include <linux/nfs_fs.h> 32#include <linux/vmalloc.h> 33#include <linux/module.h> 34#include <linux/sunrpc/addr.h> 35 36#include "internal.h" 37#include "nfs4session.h" 38#include "nfs4filelayout.h" 39 40#define NFSDBG_FACILITY NFSDBG_PNFS_LD 41 42static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; 43static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; 44 45/* 46 * Data server cache 47 * 48 * Data servers can be mapped to different device ids. 49 * nfs4_pnfs_ds reference counting 50 * - set to 1 on allocation 51 * - incremented when a device id maps a data server already in the cache. 52 * - decremented when deviceid is removed from the cache. 53 */ 54static DEFINE_SPINLOCK(nfs4_ds_cache_lock); 55static LIST_HEAD(nfs4_data_server_cache); 56 57/* Debug routines */ 58void 59print_ds(struct nfs4_pnfs_ds *ds) 60{ 61 if (ds == NULL) { 62 printk("%s NULL device\n", __func__); 63 return; 64 } 65 printk(" ds %s\n" 66 " ref count %d\n" 67 " client %p\n" 68 " cl_exchange_flags %x\n", 69 ds->ds_remotestr, 70 atomic_read(&ds->ds_count), ds->ds_clp, 71 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); 72} 73 74static bool 75same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) 76{ 77 struct sockaddr_in *a, *b; 78 struct sockaddr_in6 *a6, *b6; 79 80 if (addr1->sa_family != addr2->sa_family) 81 return false; 82 83 switch (addr1->sa_family) { 84 case AF_INET: 85 a = (struct sockaddr_in *)addr1; 86 b = (struct sockaddr_in *)addr2; 87 88 if (a->sin_addr.s_addr == b->sin_addr.s_addr && 89 a->sin_port == b->sin_port) 90 return true; 91 break; 92 93 case AF_INET6: 94 a6 = (struct sockaddr_in6 *)addr1; 95 b6 = (struct sockaddr_in6 *)addr2; 96 97 /* LINKLOCAL addresses must have matching scope_id */ 98 if (ipv6_addr_scope(&a6->sin6_addr) == 99 IPV6_ADDR_SCOPE_LINKLOCAL && 100 a6->sin6_scope_id != b6->sin6_scope_id) 101 return false; 102 103 if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && 104 a6->sin6_port == b6->sin6_port) 105 return true; 106 break; 107 108 default: 109 dprintk("%s: unhandled address family: %u\n", 110 __func__, addr1->sa_family); 111 return false; 112 } 113 114 return false; 115} 116 117static bool 118_same_data_server_addrs_locked(const struct list_head *dsaddrs1, 119 const struct list_head *dsaddrs2) 120{ 121 struct nfs4_pnfs_ds_addr *da1, *da2; 122 123 /* step through both lists, comparing as we go */ 124 for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), 125 da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); 126 da1 != NULL && da2 != NULL; 127 da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), 128 da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { 129 if (!same_sockaddr((struct sockaddr *)&da1->da_addr, 130 (struct sockaddr *)&da2->da_addr)) 131 return false; 132 } 133 if (da1 == NULL && da2 == NULL) 134 return true; 135 136 return false; 137} 138 139/* 140 * Lookup DS by addresses. nfs4_ds_cache_lock is held 141 */ 142static struct nfs4_pnfs_ds * 143_data_server_lookup_locked(const struct list_head *dsaddrs) 144{ 145 struct nfs4_pnfs_ds *ds; 146 147 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) 148 if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) 149 return ds; 150 return NULL; 151} 152 153/* 154 * Create an rpc connection to the nfs4_pnfs_ds data server 155 * Currently only supports IPv4 and IPv6 addresses 156 */ 157static int 158nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) 159{ 160 struct nfs_client *clp = ERR_PTR(-EIO); 161 struct nfs4_pnfs_ds_addr *da; 162 int status = 0; 163 164 dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, 165 mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); 166 167 list_for_each_entry(da, &ds->ds_addrs, da_node) { 168 dprintk("%s: DS %s: trying address %s\n", 169 __func__, ds->ds_remotestr, da->da_remotestr); 170 171 clp = nfs4_set_ds_client(mds_srv->nfs_client, 172 (struct sockaddr *)&da->da_addr, 173 da->da_addrlen, IPPROTO_TCP, 174 dataserver_timeo, dataserver_retrans); 175 if (!IS_ERR(clp)) 176 break; 177 } 178 179 if (IS_ERR(clp)) { 180 status = PTR_ERR(clp); 181 goto out; 182 } 183 184 status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); 185 if (status) 186 goto out_put; 187 188 ds->ds_clp = clp; 189 dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); 190out: 191 return status; 192out_put: 193 nfs_put_client(clp); 194 goto out; 195} 196 197static void 198destroy_ds(struct nfs4_pnfs_ds *ds) 199{ 200 struct nfs4_pnfs_ds_addr *da; 201 202 dprintk("--> %s\n", __func__); 203 ifdebug(FACILITY) 204 print_ds(ds); 205 206 if (ds->ds_clp) 207 nfs_put_client(ds->ds_clp); 208 209 while (!list_empty(&ds->ds_addrs)) { 210 da = list_first_entry(&ds->ds_addrs, 211 struct nfs4_pnfs_ds_addr, 212 da_node); 213 list_del_init(&da->da_node); 214 kfree(da->da_remotestr); 215 kfree(da); 216 } 217 218 kfree(ds->ds_remotestr); 219 kfree(ds); 220} 221 222void 223nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) 224{ 225 struct nfs4_pnfs_ds *ds; 226 int i; 227 228 nfs4_print_deviceid(&dsaddr->id_node.deviceid); 229 230 for (i = 0; i < dsaddr->ds_num; i++) { 231 ds = dsaddr->ds_list[i]; 232 if (ds != NULL) { 233 if (atomic_dec_and_lock(&ds->ds_count, 234 &nfs4_ds_cache_lock)) { 235 list_del_init(&ds->ds_node); 236 spin_unlock(&nfs4_ds_cache_lock); 237 destroy_ds(ds); 238 } 239 } 240 } 241 kfree(dsaddr->stripe_indices); 242 kfree(dsaddr); 243} 244 245/* 246 * Create a string with a human readable address and port to avoid 247 * complicated setup around many dprinks. 248 */ 249static char * 250nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) 251{ 252 struct nfs4_pnfs_ds_addr *da; 253 char *remotestr; 254 size_t len; 255 char *p; 256 257 len = 3; /* '{', '}' and eol */ 258 list_for_each_entry(da, dsaddrs, da_node) { 259 len += strlen(da->da_remotestr) + 1; /* string plus comma */ 260 } 261 262 remotestr = kzalloc(len, gfp_flags); 263 if (!remotestr) 264 return NULL; 265 266 p = remotestr; 267 *(p++) = '{'; 268 len--; 269 list_for_each_entry(da, dsaddrs, da_node) { 270 size_t ll = strlen(da->da_remotestr); 271 272 if (ll > len) 273 goto out_err; 274 275 memcpy(p, da->da_remotestr, ll); 276 p += ll; 277 len -= ll; 278 279 if (len < 1) 280 goto out_err; 281 (*p++) = ','; 282 len--; 283 } 284 if (len < 2) 285 goto out_err; 286 *(p++) = '}'; 287 *p = '\0'; 288 return remotestr; 289out_err: 290 kfree(remotestr); 291 return NULL; 292} 293 294static struct nfs4_pnfs_ds * 295nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) 296{ 297 struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; 298 char *remotestr; 299 300 if (list_empty(dsaddrs)) { 301 dprintk("%s: no addresses defined\n", __func__); 302 goto out; 303 } 304 305 ds = kzalloc(sizeof(*ds), gfp_flags); 306 if (!ds) 307 goto out; 308 309 /* this is only used for debugging, so it's ok if its NULL */ 310 remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); 311 312 spin_lock(&nfs4_ds_cache_lock); 313 tmp_ds = _data_server_lookup_locked(dsaddrs); 314 if (tmp_ds == NULL) { 315 INIT_LIST_HEAD(&ds->ds_addrs); 316 list_splice_init(dsaddrs, &ds->ds_addrs); 317 ds->ds_remotestr = remotestr; 318 atomic_set(&ds->ds_count, 1); 319 INIT_LIST_HEAD(&ds->ds_node); 320 ds->ds_clp = NULL; 321 list_add(&ds->ds_node, &nfs4_data_server_cache); 322 dprintk("%s add new data server %s\n", __func__, 323 ds->ds_remotestr); 324 } else { 325 kfree(remotestr); 326 kfree(ds); 327 atomic_inc(&tmp_ds->ds_count); 328 dprintk("%s data server %s found, inc'ed ds_count to %d\n", 329 __func__, tmp_ds->ds_remotestr, 330 atomic_read(&tmp_ds->ds_count)); 331 ds = tmp_ds; 332 } 333 spin_unlock(&nfs4_ds_cache_lock); 334out: 335 return ds; 336} 337 338/* 339 * Currently only supports ipv4, ipv6 and one multi-path address. 340 */ 341static struct nfs4_pnfs_ds_addr * 342decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) 343{ 344 struct nfs4_pnfs_ds_addr *da = NULL; 345 char *buf, *portstr; 346 __be16 port; 347 int nlen, rlen; 348 int tmp[2]; 349 __be32 *p; 350 char *netid, *match_netid; 351 size_t len, match_netid_len; 352 char *startsep = ""; 353 char *endsep = ""; 354 355 356 /* r_netid */ 357 p = xdr_inline_decode(streamp, 4); 358 if (unlikely(!p)) 359 goto out_err; 360 nlen = be32_to_cpup(p++); 361 362 p = xdr_inline_decode(streamp, nlen); 363 if (unlikely(!p)) 364 goto out_err; 365 366 netid = kmalloc(nlen+1, gfp_flags); 367 if (unlikely(!netid)) 368 goto out_err; 369 370 netid[nlen] = '\0'; 371 memcpy(netid, p, nlen); 372 373 /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ 374 p = xdr_inline_decode(streamp, 4); 375 if (unlikely(!p)) 376 goto out_free_netid; 377 rlen = be32_to_cpup(p); 378 379 p = xdr_inline_decode(streamp, rlen); 380 if (unlikely(!p)) 381 goto out_free_netid; 382 383 /* port is ".ABC.DEF", 8 chars max */ 384 if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { 385 dprintk("%s: Invalid address, length %d\n", __func__, 386 rlen); 387 goto out_free_netid; 388 } 389 buf = kmalloc(rlen + 1, gfp_flags); 390 if (!buf) { 391 dprintk("%s: Not enough memory\n", __func__); 392 goto out_free_netid; 393 } 394 buf[rlen] = '\0'; 395 memcpy(buf, p, rlen); 396 397 /* replace port '.' with '-' */ 398 portstr = strrchr(buf, '.'); 399 if (!portstr) { 400 dprintk("%s: Failed finding expected dot in port\n", 401 __func__); 402 goto out_free_buf; 403 } 404 *portstr = '-'; 405 406 /* find '.' between address and port */ 407 portstr = strrchr(buf, '.'); 408 if (!portstr) { 409 dprintk("%s: Failed finding expected dot between address and " 410 "port\n", __func__); 411 goto out_free_buf; 412 } 413 *portstr = '\0'; 414 415 da = kzalloc(sizeof(*da), gfp_flags); 416 if (unlikely(!da)) 417 goto out_free_buf; 418 419 INIT_LIST_HEAD(&da->da_node); 420 421 if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, 422 sizeof(da->da_addr))) { 423 dprintk("%s: error parsing address %s\n", __func__, buf); 424 goto out_free_da; 425 } 426 427 portstr++; 428 sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); 429 port = htons((tmp[0] << 8) | (tmp[1])); 430 431 switch (da->da_addr.ss_family) { 432 case AF_INET: 433 ((struct sockaddr_in *)&da->da_addr)->sin_port = port; 434 da->da_addrlen = sizeof(struct sockaddr_in); 435 match_netid = "tcp"; 436 match_netid_len = 3; 437 break; 438 439 case AF_INET6: 440 ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; 441 da->da_addrlen = sizeof(struct sockaddr_in6); 442 match_netid = "tcp6"; 443 match_netid_len = 4; 444 startsep = "["; 445 endsep = "]"; 446 break; 447 448 default: 449 dprintk("%s: unsupported address family: %u\n", 450 __func__, da->da_addr.ss_family); 451 goto out_free_da; 452 } 453 454 if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { 455 dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", 456 __func__, netid, match_netid); 457 goto out_free_da; 458 } 459 460 /* save human readable address */ 461 len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; 462 da->da_remotestr = kzalloc(len, gfp_flags); 463 464 /* NULL is ok, only used for dprintk */ 465 if (da->da_remotestr) 466 snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, 467 buf, endsep, ntohs(port)); 468 469 dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); 470 kfree(buf); 471 kfree(netid); 472 return da; 473 474out_free_da: 475 kfree(da); 476out_free_buf: 477 dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); 478 kfree(buf); 479out_free_netid: 480 kfree(netid); 481out_err: 482 return NULL; 483} 484 485/* Decode opaque device data and return the result */ 486static struct nfs4_file_layout_dsaddr* 487decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) 488{ 489 int i; 490 u32 cnt, num; 491 u8 *indexp; 492 __be32 *p; 493 u8 *stripe_indices; 494 u8 max_stripe_index; 495 struct nfs4_file_layout_dsaddr *dsaddr = NULL; 496 struct xdr_stream stream; 497 struct xdr_buf buf; 498 struct page *scratch; 499 struct list_head dsaddrs; 500 struct nfs4_pnfs_ds_addr *da; 501 502 /* set up xdr stream */ 503 scratch = alloc_page(gfp_flags); 504 if (!scratch) 505 goto out_err; 506 507 xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); 508 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 509 510 /* Get the stripe count (number of stripe index) */ 511 p = xdr_inline_decode(&stream, 4); 512 if (unlikely(!p)) 513 goto out_err_free_scratch; 514 515 cnt = be32_to_cpup(p); 516 dprintk("%s stripe count %d\n", __func__, cnt); 517 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { 518 printk(KERN_WARNING "NFS: %s: stripe count %d greater than " 519 "supported maximum %d\n", __func__, 520 cnt, NFS4_PNFS_MAX_STRIPE_CNT); 521 goto out_err_free_scratch; 522 } 523 524 /* read stripe indices */ 525 stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags); 526 if (!stripe_indices) 527 goto out_err_free_scratch; 528 529 p = xdr_inline_decode(&stream, cnt << 2); 530 if (unlikely(!p)) 531 goto out_err_free_stripe_indices; 532 533 indexp = &stripe_indices[0]; 534 max_stripe_index = 0; 535 for (i = 0; i < cnt; i++) { 536 *indexp = be32_to_cpup(p++); 537 max_stripe_index = max(max_stripe_index, *indexp); 538 indexp++; 539 } 540 541 /* Check the multipath list count */ 542 p = xdr_inline_decode(&stream, 4); 543 if (unlikely(!p)) 544 goto out_err_free_stripe_indices; 545 546 num = be32_to_cpup(p); 547 dprintk("%s ds_num %u\n", __func__, num); 548 if (num > NFS4_PNFS_MAX_MULTI_CNT) { 549 printk(KERN_WARNING "NFS: %s: multipath count %d greater than " 550 "supported maximum %d\n", __func__, 551 num, NFS4_PNFS_MAX_MULTI_CNT); 552 goto out_err_free_stripe_indices; 553 } 554 555 /* validate stripe indices are all < num */ 556 if (max_stripe_index >= num) { 557 printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n", 558 __func__, max_stripe_index, num); 559 goto out_err_free_stripe_indices; 560 } 561 562 dsaddr = kzalloc(sizeof(*dsaddr) + 563 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), 564 gfp_flags); 565 if (!dsaddr) 566 goto out_err_free_stripe_indices; 567 568 dsaddr->stripe_count = cnt; 569 dsaddr->stripe_indices = stripe_indices; 570 stripe_indices = NULL; 571 dsaddr->ds_num = num; 572 nfs4_init_deviceid_node(&dsaddr->id_node, 573 NFS_SERVER(ino)->pnfs_curr_ld, 574 NFS_SERVER(ino)->nfs_client, 575 &pdev->dev_id); 576 577 INIT_LIST_HEAD(&dsaddrs); 578 579 for (i = 0; i < dsaddr->ds_num; i++) { 580 int j; 581 u32 mp_count; 582 583 p = xdr_inline_decode(&stream, 4); 584 if (unlikely(!p)) 585 goto out_err_free_deviceid; 586 587 mp_count = be32_to_cpup(p); /* multipath count */ 588 for (j = 0; j < mp_count; j++) { 589 da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net, 590 &stream, gfp_flags); 591 if (da) 592 list_add_tail(&da->da_node, &dsaddrs); 593 } 594 if (list_empty(&dsaddrs)) { 595 dprintk("%s: no suitable DS addresses found\n", 596 __func__); 597 goto out_err_free_deviceid; 598 } 599 600 dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); 601 if (!dsaddr->ds_list[i]) 602 goto out_err_drain_dsaddrs; 603 604 /* If DS was already in cache, free ds addrs */ 605 while (!list_empty(&dsaddrs)) { 606 da = list_first_entry(&dsaddrs, 607 struct nfs4_pnfs_ds_addr, 608 da_node); 609 list_del_init(&da->da_node); 610 kfree(da->da_remotestr); 611 kfree(da); 612 } 613 } 614 615 __free_page(scratch); 616 return dsaddr; 617 618out_err_drain_dsaddrs: 619 while (!list_empty(&dsaddrs)) { 620 da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, 621 da_node); 622 list_del_init(&da->da_node); 623 kfree(da->da_remotestr); 624 kfree(da); 625 } 626out_err_free_deviceid: 627 nfs4_fl_free_deviceid(dsaddr); 628 /* stripe_indicies was part of dsaddr */ 629 goto out_err_free_scratch; 630out_err_free_stripe_indices: 631 kfree(stripe_indices); 632out_err_free_scratch: 633 __free_page(scratch); 634out_err: 635 dprintk("%s ERROR: returning NULL\n", __func__); 636 return NULL; 637} 638 639/* 640 * Decode the opaque device specified in 'dev' and add it to the cache of 641 * available devices. 642 */ 643static struct nfs4_file_layout_dsaddr * 644decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) 645{ 646 struct nfs4_deviceid_node *d; 647 struct nfs4_file_layout_dsaddr *n, *new; 648 649 new = decode_device(inode, dev, gfp_flags); 650 if (!new) { 651 printk(KERN_WARNING "NFS: %s: Could not decode or add device\n", 652 __func__); 653 return NULL; 654 } 655 656 d = nfs4_insert_deviceid_node(&new->id_node); 657 n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); 658 if (n != new) { 659 nfs4_fl_free_deviceid(new); 660 return n; 661 } 662 663 return new; 664} 665 666/* 667 * Retrieve the information for dev_id, add it to the list 668 * of available devices, and return it. 669 */ 670struct nfs4_file_layout_dsaddr * 671filelayout_get_device_info(struct inode *inode, 672 struct nfs4_deviceid *dev_id, 673 struct rpc_cred *cred, 674 gfp_t gfp_flags) 675{ 676 struct pnfs_device *pdev = NULL; 677 u32 max_resp_sz; 678 int max_pages; 679 struct page **pages = NULL; 680 struct nfs4_file_layout_dsaddr *dsaddr = NULL; 681 int rc, i; 682 struct nfs_server *server = NFS_SERVER(inode); 683 684 /* 685 * Use the session max response size as the basis for setting 686 * GETDEVICEINFO's maxcount 687 */ 688 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; 689 max_pages = nfs_page_array_len(0, max_resp_sz); 690 dprintk("%s inode %p max_resp_sz %u max_pages %d\n", 691 __func__, inode, max_resp_sz, max_pages); 692 693 pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags); 694 if (pdev == NULL) 695 return NULL; 696 697 pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); 698 if (pages == NULL) { 699 kfree(pdev); 700 return NULL; 701 } 702 for (i = 0; i < max_pages; i++) { 703 pages[i] = alloc_page(gfp_flags); 704 if (!pages[i]) 705 goto out_free; 706 } 707 708 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); 709 pdev->layout_type = LAYOUT_NFSV4_1_FILES; 710 pdev->pages = pages; 711 pdev->pgbase = 0; 712 pdev->pglen = max_resp_sz; 713 pdev->mincount = 0; 714 pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; 715 716 rc = nfs4_proc_getdeviceinfo(server, pdev, cred); 717 dprintk("%s getdevice info returns %d\n", __func__, rc); 718 if (rc) 719 goto out_free; 720 721 /* 722 * Found new device, need to decode it and then add it to the 723 * list of known devices for this mountpoint. 724 */ 725 dsaddr = decode_and_add_device(inode, pdev, gfp_flags); 726out_free: 727 for (i = 0; i < max_pages; i++) 728 __free_page(pages[i]); 729 kfree(pages); 730 kfree(pdev); 731 dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); 732 return dsaddr; 733} 734 735void 736nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) 737{ 738 nfs4_put_deviceid_node(&dsaddr->id_node); 739} 740 741/* 742 * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit 743 * Then: ((res + fsi) % dsaddr->stripe_count) 744 */ 745u32 746nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset) 747{ 748 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); 749 u64 tmp; 750 751 tmp = offset - flseg->pattern_offset; 752 do_div(tmp, flseg->stripe_unit); 753 tmp += flseg->first_stripe_index; 754 return do_div(tmp, flseg->dsaddr->stripe_count); 755} 756 757u32 758nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j) 759{ 760 return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j]; 761} 762 763struct nfs_fh * 764nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) 765{ 766 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); 767 u32 i; 768 769 if (flseg->stripe_type == STRIPE_SPARSE) { 770 if (flseg->num_fh == 1) 771 i = 0; 772 else if (flseg->num_fh == 0) 773 /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ 774 return NULL; 775 else 776 i = nfs4_fl_calc_ds_index(lseg, j); 777 } else 778 i = j; 779 return flseg->fh_array[i]; 780} 781 782static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) 783{ 784 might_sleep(); 785 wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, 786 nfs_wait_bit_killable, TASK_KILLABLE); 787} 788 789static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) 790{ 791 smp_mb__before_clear_bit(); 792 clear_bit(NFS4DS_CONNECTING, &ds->ds_state); 793 smp_mb__after_clear_bit(); 794 wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); 795} 796 797 798struct nfs4_pnfs_ds * 799nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) 800{ 801 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; 802 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; 803 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); 804 805 if (filelayout_test_devid_unavailable(devid)) 806 return NULL; 807 808 if (ds == NULL) { 809 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", 810 __func__, ds_idx); 811 filelayout_mark_devid_invalid(devid); 812 return NULL; 813 } 814 if (ds->ds_clp) 815 return ds; 816 817 if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { 818 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); 819 int err; 820 821 err = nfs4_ds_connect(s, ds); 822 if (err) { 823 nfs4_mark_deviceid_unavailable(devid); 824 ds = NULL; 825 } 826 nfs4_clear_ds_conn_bit(ds); 827 } else { 828 /* Either ds is connected, or ds is NULL */ 829 nfs4_wait_ds_connect(ds); 830 } 831 return ds; 832} 833 834module_param(dataserver_retrans, uint, 0644); 835MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " 836 "retries a request before it attempts further " 837 " recovery action."); 838module_param(dataserver_timeo, uint, 0644); 839MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " 840 "NFSv4.1 client waits for a response from a " 841 " data server before it retries an NFS request.");