at v2.6.32 725 lines 18 kB view raw
1/* AFS volume location management 2 * 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12#include <linux/kernel.h> 13#include <linux/module.h> 14#include <linux/init.h> 15#include <linux/sched.h> 16#include "internal.h" 17 18static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ 19static unsigned afs_vlocation_update_timeout = 10 * 60; 20 21static void afs_vlocation_reaper(struct work_struct *); 22static void afs_vlocation_updater(struct work_struct *); 23 24static LIST_HEAD(afs_vlocation_updates); 25static LIST_HEAD(afs_vlocation_graveyard); 26static DEFINE_SPINLOCK(afs_vlocation_updates_lock); 27static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock); 28static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper); 29static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater); 30static struct workqueue_struct *afs_vlocation_update_worker; 31 32/* 33 * iterate through the VL servers in a cell until one of them admits knowing 34 * about the volume in question 35 */ 36static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, 37 struct key *key, 38 struct afs_cache_vlocation *vldb) 39{ 40 struct afs_cell *cell = vl->cell; 41 struct in_addr addr; 42 int count, ret; 43 44 _enter("%s,%s", cell->name, vl->vldb.name); 45 46 down_write(&vl->cell->vl_sem); 47 ret = -ENOMEDIUM; 48 for (count = cell->vl_naddrs; count > 0; count--) { 49 addr = cell->vl_addrs[cell->vl_curr_svix]; 50 51 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); 52 53 /* attempt to access the VL server */ 54 ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb, 55 &afs_sync_call); 56 switch (ret) { 57 case 0: 58 goto out; 59 case -ENOMEM: 60 case -ENONET: 61 case -ENETUNREACH: 62 case -EHOSTUNREACH: 63 case -ECONNREFUSED: 64 if (ret == -ENOMEM || ret == -ENONET) 65 goto out; 66 goto rotate; 67 case -ENOMEDIUM: 68 case -EKEYREJECTED: 69 case -EKEYEXPIRED: 70 goto out; 71 default: 72 ret = -EIO; 73 goto rotate; 74 } 75 76 /* rotate the server records upon lookup failure */ 77 rotate: 78 cell->vl_curr_svix++; 79 cell->vl_curr_svix %= cell->vl_naddrs; 80 } 81 82out: 83 up_write(&vl->cell->vl_sem); 84 _leave(" = %d", ret); 85 return ret; 86} 87 88/* 89 * iterate through the VL servers in a cell until one of them admits knowing 90 * about the volume in question 91 */ 92static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, 93 struct key *key, 94 afs_volid_t volid, 95 afs_voltype_t voltype, 96 struct afs_cache_vlocation *vldb) 97{ 98 struct afs_cell *cell = vl->cell; 99 struct in_addr addr; 100 int count, ret; 101 102 _enter("%s,%x,%d,", cell->name, volid, voltype); 103 104 down_write(&vl->cell->vl_sem); 105 ret = -ENOMEDIUM; 106 for (count = cell->vl_naddrs; count > 0; count--) { 107 addr = cell->vl_addrs[cell->vl_curr_svix]; 108 109 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); 110 111 /* attempt to access the VL server */ 112 ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb, 113 &afs_sync_call); 114 switch (ret) { 115 case 0: 116 goto out; 117 case -ENOMEM: 118 case -ENONET: 119 case -ENETUNREACH: 120 case -EHOSTUNREACH: 121 case -ECONNREFUSED: 122 if (ret == -ENOMEM || ret == -ENONET) 123 goto out; 124 goto rotate; 125 case -EBUSY: 126 vl->upd_busy_cnt++; 127 if (vl->upd_busy_cnt <= 3) { 128 if (vl->upd_busy_cnt > 1) { 129 /* second+ BUSY - sleep a little bit */ 130 set_current_state(TASK_UNINTERRUPTIBLE); 131 schedule_timeout(1); 132 __set_current_state(TASK_RUNNING); 133 } 134 continue; 135 } 136 break; 137 case -ENOMEDIUM: 138 vl->upd_rej_cnt++; 139 goto rotate; 140 default: 141 ret = -EIO; 142 goto rotate; 143 } 144 145 /* rotate the server records upon lookup failure */ 146 rotate: 147 cell->vl_curr_svix++; 148 cell->vl_curr_svix %= cell->vl_naddrs; 149 vl->upd_busy_cnt = 0; 150 } 151 152out: 153 if (ret < 0 && vl->upd_rej_cnt > 0) { 154 printk(KERN_NOTICE "kAFS:" 155 " Active volume no longer valid '%s'\n", 156 vl->vldb.name); 157 vl->valid = 0; 158 ret = -ENOMEDIUM; 159 } 160 161 up_write(&vl->cell->vl_sem); 162 _leave(" = %d", ret); 163 return ret; 164} 165 166/* 167 * allocate a volume location record 168 */ 169static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell, 170 const char *name, 171 size_t namesz) 172{ 173 struct afs_vlocation *vl; 174 175 vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL); 176 if (vl) { 177 vl->cell = cell; 178 vl->state = AFS_VL_NEW; 179 atomic_set(&vl->usage, 1); 180 INIT_LIST_HEAD(&vl->link); 181 INIT_LIST_HEAD(&vl->grave); 182 INIT_LIST_HEAD(&vl->update); 183 init_waitqueue_head(&vl->waitq); 184 spin_lock_init(&vl->lock); 185 memcpy(vl->vldb.name, name, namesz); 186 } 187 188 _leave(" = %p", vl); 189 return vl; 190} 191 192/* 193 * update record if we found it in the cache 194 */ 195static int afs_vlocation_update_record(struct afs_vlocation *vl, 196 struct key *key, 197 struct afs_cache_vlocation *vldb) 198{ 199 afs_voltype_t voltype; 200 afs_volid_t vid; 201 int ret; 202 203 /* try to look up a cached volume in the cell VL databases by ID */ 204 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 205 vl->vldb.name, 206 vl->vldb.vidmask, 207 ntohl(vl->vldb.servers[0].s_addr), 208 vl->vldb.srvtmask[0], 209 ntohl(vl->vldb.servers[1].s_addr), 210 vl->vldb.srvtmask[1], 211 ntohl(vl->vldb.servers[2].s_addr), 212 vl->vldb.srvtmask[2]); 213 214 _debug("Vids: %08x %08x %08x", 215 vl->vldb.vid[0], 216 vl->vldb.vid[1], 217 vl->vldb.vid[2]); 218 219 if (vl->vldb.vidmask & AFS_VOL_VTM_RW) { 220 vid = vl->vldb.vid[0]; 221 voltype = AFSVL_RWVOL; 222 } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) { 223 vid = vl->vldb.vid[1]; 224 voltype = AFSVL_ROVOL; 225 } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) { 226 vid = vl->vldb.vid[2]; 227 voltype = AFSVL_BACKVOL; 228 } else { 229 BUG(); 230 vid = 0; 231 voltype = 0; 232 } 233 234 /* contact the server to make sure the volume is still available 235 * - TODO: need to handle disconnected operation here 236 */ 237 ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb); 238 switch (ret) { 239 /* net error */ 240 default: 241 printk(KERN_WARNING "kAFS:" 242 " failed to update volume '%s' (%x) up in '%s': %d\n", 243 vl->vldb.name, vid, vl->cell->name, ret); 244 _leave(" = %d", ret); 245 return ret; 246 247 /* pulled from local cache into memory */ 248 case 0: 249 _leave(" = 0"); 250 return 0; 251 252 /* uh oh... looks like the volume got deleted */ 253 case -ENOMEDIUM: 254 printk(KERN_ERR "kAFS:" 255 " volume '%s' (%x) does not exist '%s'\n", 256 vl->vldb.name, vid, vl->cell->name); 257 258 /* TODO: make existing record unavailable */ 259 _leave(" = %d", ret); 260 return ret; 261 } 262} 263 264/* 265 * apply the update to a VL record 266 */ 267static void afs_vlocation_apply_update(struct afs_vlocation *vl, 268 struct afs_cache_vlocation *vldb) 269{ 270 _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 271 vldb->name, vldb->vidmask, 272 ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0], 273 ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1], 274 ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]); 275 276 _debug("Vids: %08x %08x %08x", 277 vldb->vid[0], vldb->vid[1], vldb->vid[2]); 278 279 if (strcmp(vldb->name, vl->vldb.name) != 0) 280 printk(KERN_NOTICE "kAFS:" 281 " name of volume '%s' changed to '%s' on server\n", 282 vl->vldb.name, vldb->name); 283 284 vl->vldb = *vldb; 285 286#ifdef CONFIG_AFS_FSCACHE 287 fscache_update_cookie(vl->cache); 288#endif 289} 290 291/* 292 * fill in a volume location record, consulting the cache and the VL server 293 * both 294 */ 295static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, 296 struct key *key) 297{ 298 struct afs_cache_vlocation vldb; 299 int ret; 300 301 _enter(""); 302 303 ASSERTCMP(vl->valid, ==, 0); 304 305 memset(&vldb, 0, sizeof(vldb)); 306 307 /* see if we have an in-cache copy (will set vl->valid if there is) */ 308#ifdef CONFIG_AFS_FSCACHE 309 vl->cache = fscache_acquire_cookie(vl->cell->cache, 310 &afs_vlocation_cache_index_def, vl); 311#endif 312 313 if (vl->valid) { 314 /* try to update a known volume in the cell VL databases by 315 * ID as the name may have changed */ 316 _debug("found in cache"); 317 ret = afs_vlocation_update_record(vl, key, &vldb); 318 } else { 319 /* try to look up an unknown volume in the cell VL databases by 320 * name */ 321 ret = afs_vlocation_access_vl_by_name(vl, key, &vldb); 322 if (ret < 0) { 323 printk("kAFS: failed to locate '%s' in cell '%s'\n", 324 vl->vldb.name, vl->cell->name); 325 return ret; 326 } 327 } 328 329 afs_vlocation_apply_update(vl, &vldb); 330 _leave(" = 0"); 331 return 0; 332} 333 334/* 335 * queue a vlocation record for updates 336 */ 337static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) 338{ 339 struct afs_vlocation *xvl; 340 341 /* wait at least 10 minutes before updating... */ 342 vl->update_at = get_seconds() + afs_vlocation_update_timeout; 343 344 spin_lock(&afs_vlocation_updates_lock); 345 346 if (!list_empty(&afs_vlocation_updates)) { 347 /* ... but wait at least 1 second more than the newest record 348 * already queued so that we don't spam the VL server suddenly 349 * with lots of requests 350 */ 351 xvl = list_entry(afs_vlocation_updates.prev, 352 struct afs_vlocation, update); 353 if (vl->update_at <= xvl->update_at) 354 vl->update_at = xvl->update_at + 1; 355 } else { 356 queue_delayed_work(afs_vlocation_update_worker, 357 &afs_vlocation_update, 358 afs_vlocation_update_timeout * HZ); 359 } 360 361 list_add_tail(&vl->update, &afs_vlocation_updates); 362 spin_unlock(&afs_vlocation_updates_lock); 363} 364 365/* 366 * lookup volume location 367 * - iterate through the VL servers in a cell until one of them admits knowing 368 * about the volume in question 369 * - lookup in the local cache if not able to find on the VL server 370 * - insert/update in the local cache if did get a VL response 371 */ 372struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell, 373 struct key *key, 374 const char *name, 375 size_t namesz) 376{ 377 struct afs_vlocation *vl; 378 int ret; 379 380 _enter("{%s},{%x},%*.*s,%zu", 381 cell->name, key_serial(key), 382 (int) namesz, (int) namesz, name, namesz); 383 384 if (namesz >= sizeof(vl->vldb.name)) { 385 _leave(" = -ENAMETOOLONG"); 386 return ERR_PTR(-ENAMETOOLONG); 387 } 388 389 /* see if we have an in-memory copy first */ 390 down_write(&cell->vl_sem); 391 spin_lock(&cell->vl_lock); 392 list_for_each_entry(vl, &cell->vl_list, link) { 393 if (vl->vldb.name[namesz] != '\0') 394 continue; 395 if (memcmp(vl->vldb.name, name, namesz) == 0) 396 goto found_in_memory; 397 } 398 spin_unlock(&cell->vl_lock); 399 400 /* not in the cell's in-memory lists - create a new record */ 401 vl = afs_vlocation_alloc(cell, name, namesz); 402 if (!vl) { 403 up_write(&cell->vl_sem); 404 return ERR_PTR(-ENOMEM); 405 } 406 407 afs_get_cell(cell); 408 409 list_add_tail(&vl->link, &cell->vl_list); 410 vl->state = AFS_VL_CREATING; 411 up_write(&cell->vl_sem); 412 413fill_in_record: 414 ret = afs_vlocation_fill_in_record(vl, key); 415 if (ret < 0) 416 goto error_abandon; 417 spin_lock(&vl->lock); 418 vl->state = AFS_VL_VALID; 419 spin_unlock(&vl->lock); 420 wake_up(&vl->waitq); 421 422 /* update volume entry in local cache */ 423#ifdef CONFIG_AFS_FSCACHE 424 fscache_update_cookie(vl->cache); 425#endif 426 427 /* schedule for regular updates */ 428 afs_vlocation_queue_for_updates(vl); 429 goto success; 430 431found_in_memory: 432 /* found in memory */ 433 _debug("found in memory"); 434 atomic_inc(&vl->usage); 435 spin_unlock(&cell->vl_lock); 436 if (!list_empty(&vl->grave)) { 437 spin_lock(&afs_vlocation_graveyard_lock); 438 list_del_init(&vl->grave); 439 spin_unlock(&afs_vlocation_graveyard_lock); 440 } 441 up_write(&cell->vl_sem); 442 443 /* see if it was an abandoned record that we might try filling in */ 444 spin_lock(&vl->lock); 445 while (vl->state != AFS_VL_VALID) { 446 afs_vlocation_state_t state = vl->state; 447 448 _debug("invalid [state %d]", state); 449 450 if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) { 451 vl->state = AFS_VL_CREATING; 452 spin_unlock(&vl->lock); 453 goto fill_in_record; 454 } 455 456 /* must now wait for creation or update by someone else to 457 * complete */ 458 _debug("wait"); 459 460 spin_unlock(&vl->lock); 461 ret = wait_event_interruptible(vl->waitq, 462 vl->state == AFS_VL_NEW || 463 vl->state == AFS_VL_VALID || 464 vl->state == AFS_VL_NO_VOLUME); 465 if (ret < 0) 466 goto error; 467 spin_lock(&vl->lock); 468 } 469 spin_unlock(&vl->lock); 470 471success: 472 _leave(" = %p", vl); 473 return vl; 474 475error_abandon: 476 spin_lock(&vl->lock); 477 vl->state = AFS_VL_NEW; 478 spin_unlock(&vl->lock); 479 wake_up(&vl->waitq); 480error: 481 ASSERT(vl != NULL); 482 afs_put_vlocation(vl); 483 _leave(" = %d", ret); 484 return ERR_PTR(ret); 485} 486 487/* 488 * finish using a volume location record 489 */ 490void afs_put_vlocation(struct afs_vlocation *vl) 491{ 492 if (!vl) 493 return; 494 495 _enter("%s", vl->vldb.name); 496 497 ASSERTCMP(atomic_read(&vl->usage), >, 0); 498 499 if (likely(!atomic_dec_and_test(&vl->usage))) { 500 _leave(""); 501 return; 502 } 503 504 spin_lock(&afs_vlocation_graveyard_lock); 505 if (atomic_read(&vl->usage) == 0) { 506 _debug("buried"); 507 list_move_tail(&vl->grave, &afs_vlocation_graveyard); 508 vl->time_of_death = get_seconds(); 509 schedule_delayed_work(&afs_vlocation_reap, 510 afs_vlocation_timeout * HZ); 511 512 /* suspend updates on this record */ 513 if (!list_empty(&vl->update)) { 514 spin_lock(&afs_vlocation_updates_lock); 515 list_del_init(&vl->update); 516 spin_unlock(&afs_vlocation_updates_lock); 517 } 518 } 519 spin_unlock(&afs_vlocation_graveyard_lock); 520 _leave(" [killed?]"); 521} 522 523/* 524 * destroy a dead volume location record 525 */ 526static void afs_vlocation_destroy(struct afs_vlocation *vl) 527{ 528 _enter("%p", vl); 529 530#ifdef CONFIG_AFS_FSCACHE 531 fscache_relinquish_cookie(vl->cache, 0); 532#endif 533 afs_put_cell(vl->cell); 534 kfree(vl); 535} 536 537/* 538 * reap dead volume location records 539 */ 540static void afs_vlocation_reaper(struct work_struct *work) 541{ 542 LIST_HEAD(corpses); 543 struct afs_vlocation *vl; 544 unsigned long delay, expiry; 545 time_t now; 546 547 _enter(""); 548 549 now = get_seconds(); 550 spin_lock(&afs_vlocation_graveyard_lock); 551 552 while (!list_empty(&afs_vlocation_graveyard)) { 553 vl = list_entry(afs_vlocation_graveyard.next, 554 struct afs_vlocation, grave); 555 556 _debug("check %p", vl); 557 558 /* the queue is ordered most dead first */ 559 expiry = vl->time_of_death + afs_vlocation_timeout; 560 if (expiry > now) { 561 delay = (expiry - now) * HZ; 562 _debug("delay %lu", delay); 563 if (!schedule_delayed_work(&afs_vlocation_reap, 564 delay)) { 565 cancel_delayed_work(&afs_vlocation_reap); 566 schedule_delayed_work(&afs_vlocation_reap, 567 delay); 568 } 569 break; 570 } 571 572 spin_lock(&vl->cell->vl_lock); 573 if (atomic_read(&vl->usage) > 0) { 574 _debug("no reap"); 575 list_del_init(&vl->grave); 576 } else { 577 _debug("reap"); 578 list_move_tail(&vl->grave, &corpses); 579 list_del_init(&vl->link); 580 } 581 spin_unlock(&vl->cell->vl_lock); 582 } 583 584 spin_unlock(&afs_vlocation_graveyard_lock); 585 586 /* now reap the corpses we've extracted */ 587 while (!list_empty(&corpses)) { 588 vl = list_entry(corpses.next, struct afs_vlocation, grave); 589 list_del(&vl->grave); 590 afs_vlocation_destroy(vl); 591 } 592 593 _leave(""); 594} 595 596/* 597 * initialise the VL update process 598 */ 599int __init afs_vlocation_update_init(void) 600{ 601 afs_vlocation_update_worker = 602 create_singlethread_workqueue("kafs_vlupdated"); 603 return afs_vlocation_update_worker ? 0 : -ENOMEM; 604} 605 606/* 607 * discard all the volume location records for rmmod 608 */ 609void afs_vlocation_purge(void) 610{ 611 afs_vlocation_timeout = 0; 612 613 spin_lock(&afs_vlocation_updates_lock); 614 list_del_init(&afs_vlocation_updates); 615 spin_unlock(&afs_vlocation_updates_lock); 616 cancel_delayed_work(&afs_vlocation_update); 617 queue_delayed_work(afs_vlocation_update_worker, 618 &afs_vlocation_update, 0); 619 destroy_workqueue(afs_vlocation_update_worker); 620 621 cancel_delayed_work(&afs_vlocation_reap); 622 schedule_delayed_work(&afs_vlocation_reap, 0); 623} 624 625/* 626 * update a volume location 627 */ 628static void afs_vlocation_updater(struct work_struct *work) 629{ 630 struct afs_cache_vlocation vldb; 631 struct afs_vlocation *vl, *xvl; 632 time_t now; 633 long timeout; 634 int ret; 635 636 _enter(""); 637 638 now = get_seconds(); 639 640 /* find a record to update */ 641 spin_lock(&afs_vlocation_updates_lock); 642 for (;;) { 643 if (list_empty(&afs_vlocation_updates)) { 644 spin_unlock(&afs_vlocation_updates_lock); 645 _leave(" [nothing]"); 646 return; 647 } 648 649 vl = list_entry(afs_vlocation_updates.next, 650 struct afs_vlocation, update); 651 if (atomic_read(&vl->usage) > 0) 652 break; 653 list_del_init(&vl->update); 654 } 655 656 timeout = vl->update_at - now; 657 if (timeout > 0) { 658 queue_delayed_work(afs_vlocation_update_worker, 659 &afs_vlocation_update, timeout * HZ); 660 spin_unlock(&afs_vlocation_updates_lock); 661 _leave(" [nothing]"); 662 return; 663 } 664 665 list_del_init(&vl->update); 666 atomic_inc(&vl->usage); 667 spin_unlock(&afs_vlocation_updates_lock); 668 669 /* we can now perform the update */ 670 _debug("update %s", vl->vldb.name); 671 vl->state = AFS_VL_UPDATING; 672 vl->upd_rej_cnt = 0; 673 vl->upd_busy_cnt = 0; 674 675 ret = afs_vlocation_update_record(vl, NULL, &vldb); 676 spin_lock(&vl->lock); 677 switch (ret) { 678 case 0: 679 afs_vlocation_apply_update(vl, &vldb); 680 vl->state = AFS_VL_VALID; 681 break; 682 case -ENOMEDIUM: 683 vl->state = AFS_VL_VOLUME_DELETED; 684 break; 685 default: 686 vl->state = AFS_VL_UNCERTAIN; 687 break; 688 } 689 spin_unlock(&vl->lock); 690 wake_up(&vl->waitq); 691 692 /* and then reschedule */ 693 _debug("reschedule"); 694 vl->update_at = get_seconds() + afs_vlocation_update_timeout; 695 696 spin_lock(&afs_vlocation_updates_lock); 697 698 if (!list_empty(&afs_vlocation_updates)) { 699 /* next update in 10 minutes, but wait at least 1 second more 700 * than the newest record already queued so that we don't spam 701 * the VL server suddenly with lots of requests 702 */ 703 xvl = list_entry(afs_vlocation_updates.prev, 704 struct afs_vlocation, update); 705 if (vl->update_at <= xvl->update_at) 706 vl->update_at = xvl->update_at + 1; 707 xvl = list_entry(afs_vlocation_updates.next, 708 struct afs_vlocation, update); 709 timeout = xvl->update_at - now; 710 if (timeout < 0) 711 timeout = 0; 712 } else { 713 timeout = afs_vlocation_update_timeout; 714 } 715 716 ASSERT(list_empty(&vl->update)); 717 718 list_add_tail(&vl->update, &afs_vlocation_updates); 719 720 _debug("timeout %ld", timeout); 721 queue_delayed_work(afs_vlocation_update_worker, 722 &afs_vlocation_update, timeout * HZ); 723 spin_unlock(&afs_vlocation_updates_lock); 724 afs_put_vlocation(vl); 725}