at v2.6.30 723 lines 18 kB view raw
1/* AFS volume location management 2 * 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12#include <linux/kernel.h> 13#include <linux/module.h> 14#include <linux/init.h> 15#include <linux/sched.h> 16#include "internal.h" 17 18static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ 19static unsigned afs_vlocation_update_timeout = 10 * 60; 20 21static void afs_vlocation_reaper(struct work_struct *); 22static void afs_vlocation_updater(struct work_struct *); 23 24static LIST_HEAD(afs_vlocation_updates); 25static LIST_HEAD(afs_vlocation_graveyard); 26static DEFINE_SPINLOCK(afs_vlocation_updates_lock); 27static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock); 28static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper); 29static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater); 30static struct workqueue_struct *afs_vlocation_update_worker; 31 32/* 33 * iterate through the VL servers in a cell until one of them admits knowing 34 * about the volume in question 35 */ 36static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, 37 struct key *key, 38 struct afs_cache_vlocation *vldb) 39{ 40 struct afs_cell *cell = vl->cell; 41 struct in_addr addr; 42 int count, ret; 43 44 _enter("%s,%s", cell->name, vl->vldb.name); 45 46 down_write(&vl->cell->vl_sem); 47 ret = -ENOMEDIUM; 48 for (count = cell->vl_naddrs; count > 0; count--) { 49 addr = cell->vl_addrs[cell->vl_curr_svix]; 50 51 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); 52 53 /* attempt to access the VL server */ 54 ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb, 55 &afs_sync_call); 56 switch (ret) { 57 case 0: 58 goto out; 59 case -ENOMEM: 60 case -ENONET: 61 case -ENETUNREACH: 62 case -EHOSTUNREACH: 63 case -ECONNREFUSED: 64 if (ret == -ENOMEM || ret == -ENONET) 65 goto out; 66 goto rotate; 67 case -ENOMEDIUM: 68 goto out; 69 default: 70 ret = -EIO; 71 goto rotate; 72 } 73 74 /* rotate the server records upon lookup failure */ 75 rotate: 76 cell->vl_curr_svix++; 77 cell->vl_curr_svix %= cell->vl_naddrs; 78 } 79 80out: 81 up_write(&vl->cell->vl_sem); 82 _leave(" = %d", ret); 83 return ret; 84} 85 86/* 87 * iterate through the VL servers in a cell until one of them admits knowing 88 * about the volume in question 89 */ 90static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, 91 struct key *key, 92 afs_volid_t volid, 93 afs_voltype_t voltype, 94 struct afs_cache_vlocation *vldb) 95{ 96 struct afs_cell *cell = vl->cell; 97 struct in_addr addr; 98 int count, ret; 99 100 _enter("%s,%x,%d,", cell->name, volid, voltype); 101 102 down_write(&vl->cell->vl_sem); 103 ret = -ENOMEDIUM; 104 for (count = cell->vl_naddrs; count > 0; count--) { 105 addr = cell->vl_addrs[cell->vl_curr_svix]; 106 107 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); 108 109 /* attempt to access the VL server */ 110 ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb, 111 &afs_sync_call); 112 switch (ret) { 113 case 0: 114 goto out; 115 case -ENOMEM: 116 case -ENONET: 117 case -ENETUNREACH: 118 case -EHOSTUNREACH: 119 case -ECONNREFUSED: 120 if (ret == -ENOMEM || ret == -ENONET) 121 goto out; 122 goto rotate; 123 case -EBUSY: 124 vl->upd_busy_cnt++; 125 if (vl->upd_busy_cnt <= 3) { 126 if (vl->upd_busy_cnt > 1) { 127 /* second+ BUSY - sleep a little bit */ 128 set_current_state(TASK_UNINTERRUPTIBLE); 129 schedule_timeout(1); 130 __set_current_state(TASK_RUNNING); 131 } 132 continue; 133 } 134 break; 135 case -ENOMEDIUM: 136 vl->upd_rej_cnt++; 137 goto rotate; 138 default: 139 ret = -EIO; 140 goto rotate; 141 } 142 143 /* rotate the server records upon lookup failure */ 144 rotate: 145 cell->vl_curr_svix++; 146 cell->vl_curr_svix %= cell->vl_naddrs; 147 vl->upd_busy_cnt = 0; 148 } 149 150out: 151 if (ret < 0 && vl->upd_rej_cnt > 0) { 152 printk(KERN_NOTICE "kAFS:" 153 " Active volume no longer valid '%s'\n", 154 vl->vldb.name); 155 vl->valid = 0; 156 ret = -ENOMEDIUM; 157 } 158 159 up_write(&vl->cell->vl_sem); 160 _leave(" = %d", ret); 161 return ret; 162} 163 164/* 165 * allocate a volume location record 166 */ 167static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell, 168 const char *name, 169 size_t namesz) 170{ 171 struct afs_vlocation *vl; 172 173 vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL); 174 if (vl) { 175 vl->cell = cell; 176 vl->state = AFS_VL_NEW; 177 atomic_set(&vl->usage, 1); 178 INIT_LIST_HEAD(&vl->link); 179 INIT_LIST_HEAD(&vl->grave); 180 INIT_LIST_HEAD(&vl->update); 181 init_waitqueue_head(&vl->waitq); 182 spin_lock_init(&vl->lock); 183 memcpy(vl->vldb.name, name, namesz); 184 } 185 186 _leave(" = %p", vl); 187 return vl; 188} 189 190/* 191 * update record if we found it in the cache 192 */ 193static int afs_vlocation_update_record(struct afs_vlocation *vl, 194 struct key *key, 195 struct afs_cache_vlocation *vldb) 196{ 197 afs_voltype_t voltype; 198 afs_volid_t vid; 199 int ret; 200 201 /* try to look up a cached volume in the cell VL databases by ID */ 202 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 203 vl->vldb.name, 204 vl->vldb.vidmask, 205 ntohl(vl->vldb.servers[0].s_addr), 206 vl->vldb.srvtmask[0], 207 ntohl(vl->vldb.servers[1].s_addr), 208 vl->vldb.srvtmask[1], 209 ntohl(vl->vldb.servers[2].s_addr), 210 vl->vldb.srvtmask[2]); 211 212 _debug("Vids: %08x %08x %08x", 213 vl->vldb.vid[0], 214 vl->vldb.vid[1], 215 vl->vldb.vid[2]); 216 217 if (vl->vldb.vidmask & AFS_VOL_VTM_RW) { 218 vid = vl->vldb.vid[0]; 219 voltype = AFSVL_RWVOL; 220 } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) { 221 vid = vl->vldb.vid[1]; 222 voltype = AFSVL_ROVOL; 223 } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) { 224 vid = vl->vldb.vid[2]; 225 voltype = AFSVL_BACKVOL; 226 } else { 227 BUG(); 228 vid = 0; 229 voltype = 0; 230 } 231 232 /* contact the server to make sure the volume is still available 233 * - TODO: need to handle disconnected operation here 234 */ 235 ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb); 236 switch (ret) { 237 /* net error */ 238 default: 239 printk(KERN_WARNING "kAFS:" 240 " failed to update volume '%s' (%x) up in '%s': %d\n", 241 vl->vldb.name, vid, vl->cell->name, ret); 242 _leave(" = %d", ret); 243 return ret; 244 245 /* pulled from local cache into memory */ 246 case 0: 247 _leave(" = 0"); 248 return 0; 249 250 /* uh oh... looks like the volume got deleted */ 251 case -ENOMEDIUM: 252 printk(KERN_ERR "kAFS:" 253 " volume '%s' (%x) does not exist '%s'\n", 254 vl->vldb.name, vid, vl->cell->name); 255 256 /* TODO: make existing record unavailable */ 257 _leave(" = %d", ret); 258 return ret; 259 } 260} 261 262/* 263 * apply the update to a VL record 264 */ 265static void afs_vlocation_apply_update(struct afs_vlocation *vl, 266 struct afs_cache_vlocation *vldb) 267{ 268 _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 269 vldb->name, vldb->vidmask, 270 ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0], 271 ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1], 272 ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]); 273 274 _debug("Vids: %08x %08x %08x", 275 vldb->vid[0], vldb->vid[1], vldb->vid[2]); 276 277 if (strcmp(vldb->name, vl->vldb.name) != 0) 278 printk(KERN_NOTICE "kAFS:" 279 " name of volume '%s' changed to '%s' on server\n", 280 vl->vldb.name, vldb->name); 281 282 vl->vldb = *vldb; 283 284#ifdef CONFIG_AFS_FSCACHE 285 fscache_update_cookie(vl->cache); 286#endif 287} 288 289/* 290 * fill in a volume location record, consulting the cache and the VL server 291 * both 292 */ 293static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, 294 struct key *key) 295{ 296 struct afs_cache_vlocation vldb; 297 int ret; 298 299 _enter(""); 300 301 ASSERTCMP(vl->valid, ==, 0); 302 303 memset(&vldb, 0, sizeof(vldb)); 304 305 /* see if we have an in-cache copy (will set vl->valid if there is) */ 306#ifdef CONFIG_AFS_FSCACHE 307 vl->cache = fscache_acquire_cookie(vl->cell->cache, 308 &afs_vlocation_cache_index_def, vl); 309#endif 310 311 if (vl->valid) { 312 /* try to update a known volume in the cell VL databases by 313 * ID as the name may have changed */ 314 _debug("found in cache"); 315 ret = afs_vlocation_update_record(vl, key, &vldb); 316 } else { 317 /* try to look up an unknown volume in the cell VL databases by 318 * name */ 319 ret = afs_vlocation_access_vl_by_name(vl, key, &vldb); 320 if (ret < 0) { 321 printk("kAFS: failed to locate '%s' in cell '%s'\n", 322 vl->vldb.name, vl->cell->name); 323 return ret; 324 } 325 } 326 327 afs_vlocation_apply_update(vl, &vldb); 328 _leave(" = 0"); 329 return 0; 330} 331 332/* 333 * queue a vlocation record for updates 334 */ 335static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) 336{ 337 struct afs_vlocation *xvl; 338 339 /* wait at least 10 minutes before updating... */ 340 vl->update_at = get_seconds() + afs_vlocation_update_timeout; 341 342 spin_lock(&afs_vlocation_updates_lock); 343 344 if (!list_empty(&afs_vlocation_updates)) { 345 /* ... but wait at least 1 second more than the newest record 346 * already queued so that we don't spam the VL server suddenly 347 * with lots of requests 348 */ 349 xvl = list_entry(afs_vlocation_updates.prev, 350 struct afs_vlocation, update); 351 if (vl->update_at <= xvl->update_at) 352 vl->update_at = xvl->update_at + 1; 353 } else { 354 queue_delayed_work(afs_vlocation_update_worker, 355 &afs_vlocation_update, 356 afs_vlocation_update_timeout * HZ); 357 } 358 359 list_add_tail(&vl->update, &afs_vlocation_updates); 360 spin_unlock(&afs_vlocation_updates_lock); 361} 362 363/* 364 * lookup volume location 365 * - iterate through the VL servers in a cell until one of them admits knowing 366 * about the volume in question 367 * - lookup in the local cache if not able to find on the VL server 368 * - insert/update in the local cache if did get a VL response 369 */ 370struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell, 371 struct key *key, 372 const char *name, 373 size_t namesz) 374{ 375 struct afs_vlocation *vl; 376 int ret; 377 378 _enter("{%s},{%x},%*.*s,%zu", 379 cell->name, key_serial(key), 380 (int) namesz, (int) namesz, name, namesz); 381 382 if (namesz >= sizeof(vl->vldb.name)) { 383 _leave(" = -ENAMETOOLONG"); 384 return ERR_PTR(-ENAMETOOLONG); 385 } 386 387 /* see if we have an in-memory copy first */ 388 down_write(&cell->vl_sem); 389 spin_lock(&cell->vl_lock); 390 list_for_each_entry(vl, &cell->vl_list, link) { 391 if (vl->vldb.name[namesz] != '\0') 392 continue; 393 if (memcmp(vl->vldb.name, name, namesz) == 0) 394 goto found_in_memory; 395 } 396 spin_unlock(&cell->vl_lock); 397 398 /* not in the cell's in-memory lists - create a new record */ 399 vl = afs_vlocation_alloc(cell, name, namesz); 400 if (!vl) { 401 up_write(&cell->vl_sem); 402 return ERR_PTR(-ENOMEM); 403 } 404 405 afs_get_cell(cell); 406 407 list_add_tail(&vl->link, &cell->vl_list); 408 vl->state = AFS_VL_CREATING; 409 up_write(&cell->vl_sem); 410 411fill_in_record: 412 ret = afs_vlocation_fill_in_record(vl, key); 413 if (ret < 0) 414 goto error_abandon; 415 spin_lock(&vl->lock); 416 vl->state = AFS_VL_VALID; 417 spin_unlock(&vl->lock); 418 wake_up(&vl->waitq); 419 420 /* update volume entry in local cache */ 421#ifdef CONFIG_AFS_FSCACHE 422 fscache_update_cookie(vl->cache); 423#endif 424 425 /* schedule for regular updates */ 426 afs_vlocation_queue_for_updates(vl); 427 goto success; 428 429found_in_memory: 430 /* found in memory */ 431 _debug("found in memory"); 432 atomic_inc(&vl->usage); 433 spin_unlock(&cell->vl_lock); 434 if (!list_empty(&vl->grave)) { 435 spin_lock(&afs_vlocation_graveyard_lock); 436 list_del_init(&vl->grave); 437 spin_unlock(&afs_vlocation_graveyard_lock); 438 } 439 up_write(&cell->vl_sem); 440 441 /* see if it was an abandoned record that we might try filling in */ 442 spin_lock(&vl->lock); 443 while (vl->state != AFS_VL_VALID) { 444 afs_vlocation_state_t state = vl->state; 445 446 _debug("invalid [state %d]", state); 447 448 if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) { 449 vl->state = AFS_VL_CREATING; 450 spin_unlock(&vl->lock); 451 goto fill_in_record; 452 } 453 454 /* must now wait for creation or update by someone else to 455 * complete */ 456 _debug("wait"); 457 458 spin_unlock(&vl->lock); 459 ret = wait_event_interruptible(vl->waitq, 460 vl->state == AFS_VL_NEW || 461 vl->state == AFS_VL_VALID || 462 vl->state == AFS_VL_NO_VOLUME); 463 if (ret < 0) 464 goto error; 465 spin_lock(&vl->lock); 466 } 467 spin_unlock(&vl->lock); 468 469success: 470 _leave(" = %p", vl); 471 return vl; 472 473error_abandon: 474 spin_lock(&vl->lock); 475 vl->state = AFS_VL_NEW; 476 spin_unlock(&vl->lock); 477 wake_up(&vl->waitq); 478error: 479 ASSERT(vl != NULL); 480 afs_put_vlocation(vl); 481 _leave(" = %d", ret); 482 return ERR_PTR(ret); 483} 484 485/* 486 * finish using a volume location record 487 */ 488void afs_put_vlocation(struct afs_vlocation *vl) 489{ 490 if (!vl) 491 return; 492 493 _enter("%s", vl->vldb.name); 494 495 ASSERTCMP(atomic_read(&vl->usage), >, 0); 496 497 if (likely(!atomic_dec_and_test(&vl->usage))) { 498 _leave(""); 499 return; 500 } 501 502 spin_lock(&afs_vlocation_graveyard_lock); 503 if (atomic_read(&vl->usage) == 0) { 504 _debug("buried"); 505 list_move_tail(&vl->grave, &afs_vlocation_graveyard); 506 vl->time_of_death = get_seconds(); 507 schedule_delayed_work(&afs_vlocation_reap, 508 afs_vlocation_timeout * HZ); 509 510 /* suspend updates on this record */ 511 if (!list_empty(&vl->update)) { 512 spin_lock(&afs_vlocation_updates_lock); 513 list_del_init(&vl->update); 514 spin_unlock(&afs_vlocation_updates_lock); 515 } 516 } 517 spin_unlock(&afs_vlocation_graveyard_lock); 518 _leave(" [killed?]"); 519} 520 521/* 522 * destroy a dead volume location record 523 */ 524static void afs_vlocation_destroy(struct afs_vlocation *vl) 525{ 526 _enter("%p", vl); 527 528#ifdef CONFIG_AFS_FSCACHE 529 fscache_relinquish_cookie(vl->cache, 0); 530#endif 531 afs_put_cell(vl->cell); 532 kfree(vl); 533} 534 535/* 536 * reap dead volume location records 537 */ 538static void afs_vlocation_reaper(struct work_struct *work) 539{ 540 LIST_HEAD(corpses); 541 struct afs_vlocation *vl; 542 unsigned long delay, expiry; 543 time_t now; 544 545 _enter(""); 546 547 now = get_seconds(); 548 spin_lock(&afs_vlocation_graveyard_lock); 549 550 while (!list_empty(&afs_vlocation_graveyard)) { 551 vl = list_entry(afs_vlocation_graveyard.next, 552 struct afs_vlocation, grave); 553 554 _debug("check %p", vl); 555 556 /* the queue is ordered most dead first */ 557 expiry = vl->time_of_death + afs_vlocation_timeout; 558 if (expiry > now) { 559 delay = (expiry - now) * HZ; 560 _debug("delay %lu", delay); 561 if (!schedule_delayed_work(&afs_vlocation_reap, 562 delay)) { 563 cancel_delayed_work(&afs_vlocation_reap); 564 schedule_delayed_work(&afs_vlocation_reap, 565 delay); 566 } 567 break; 568 } 569 570 spin_lock(&vl->cell->vl_lock); 571 if (atomic_read(&vl->usage) > 0) { 572 _debug("no reap"); 573 list_del_init(&vl->grave); 574 } else { 575 _debug("reap"); 576 list_move_tail(&vl->grave, &corpses); 577 list_del_init(&vl->link); 578 } 579 spin_unlock(&vl->cell->vl_lock); 580 } 581 582 spin_unlock(&afs_vlocation_graveyard_lock); 583 584 /* now reap the corpses we've extracted */ 585 while (!list_empty(&corpses)) { 586 vl = list_entry(corpses.next, struct afs_vlocation, grave); 587 list_del(&vl->grave); 588 afs_vlocation_destroy(vl); 589 } 590 591 _leave(""); 592} 593 594/* 595 * initialise the VL update process 596 */ 597int __init afs_vlocation_update_init(void) 598{ 599 afs_vlocation_update_worker = 600 create_singlethread_workqueue("kafs_vlupdated"); 601 return afs_vlocation_update_worker ? 0 : -ENOMEM; 602} 603 604/* 605 * discard all the volume location records for rmmod 606 */ 607void afs_vlocation_purge(void) 608{ 609 afs_vlocation_timeout = 0; 610 611 spin_lock(&afs_vlocation_updates_lock); 612 list_del_init(&afs_vlocation_updates); 613 spin_unlock(&afs_vlocation_updates_lock); 614 cancel_delayed_work(&afs_vlocation_update); 615 queue_delayed_work(afs_vlocation_update_worker, 616 &afs_vlocation_update, 0); 617 destroy_workqueue(afs_vlocation_update_worker); 618 619 cancel_delayed_work(&afs_vlocation_reap); 620 schedule_delayed_work(&afs_vlocation_reap, 0); 621} 622 623/* 624 * update a volume location 625 */ 626static void afs_vlocation_updater(struct work_struct *work) 627{ 628 struct afs_cache_vlocation vldb; 629 struct afs_vlocation *vl, *xvl; 630 time_t now; 631 long timeout; 632 int ret; 633 634 _enter(""); 635 636 now = get_seconds(); 637 638 /* find a record to update */ 639 spin_lock(&afs_vlocation_updates_lock); 640 for (;;) { 641 if (list_empty(&afs_vlocation_updates)) { 642 spin_unlock(&afs_vlocation_updates_lock); 643 _leave(" [nothing]"); 644 return; 645 } 646 647 vl = list_entry(afs_vlocation_updates.next, 648 struct afs_vlocation, update); 649 if (atomic_read(&vl->usage) > 0) 650 break; 651 list_del_init(&vl->update); 652 } 653 654 timeout = vl->update_at - now; 655 if (timeout > 0) { 656 queue_delayed_work(afs_vlocation_update_worker, 657 &afs_vlocation_update, timeout * HZ); 658 spin_unlock(&afs_vlocation_updates_lock); 659 _leave(" [nothing]"); 660 return; 661 } 662 663 list_del_init(&vl->update); 664 atomic_inc(&vl->usage); 665 spin_unlock(&afs_vlocation_updates_lock); 666 667 /* we can now perform the update */ 668 _debug("update %s", vl->vldb.name); 669 vl->state = AFS_VL_UPDATING; 670 vl->upd_rej_cnt = 0; 671 vl->upd_busy_cnt = 0; 672 673 ret = afs_vlocation_update_record(vl, NULL, &vldb); 674 spin_lock(&vl->lock); 675 switch (ret) { 676 case 0: 677 afs_vlocation_apply_update(vl, &vldb); 678 vl->state = AFS_VL_VALID; 679 break; 680 case -ENOMEDIUM: 681 vl->state = AFS_VL_VOLUME_DELETED; 682 break; 683 default: 684 vl->state = AFS_VL_UNCERTAIN; 685 break; 686 } 687 spin_unlock(&vl->lock); 688 wake_up(&vl->waitq); 689 690 /* and then reschedule */ 691 _debug("reschedule"); 692 vl->update_at = get_seconds() + afs_vlocation_update_timeout; 693 694 spin_lock(&afs_vlocation_updates_lock); 695 696 if (!list_empty(&afs_vlocation_updates)) { 697 /* next update in 10 minutes, but wait at least 1 second more 698 * than the newest record already queued so that we don't spam 699 * the VL server suddenly with lots of requests 700 */ 701 xvl = list_entry(afs_vlocation_updates.prev, 702 struct afs_vlocation, update); 703 if (vl->update_at <= xvl->update_at) 704 vl->update_at = xvl->update_at + 1; 705 xvl = list_entry(afs_vlocation_updates.next, 706 struct afs_vlocation, update); 707 timeout = xvl->update_at - now; 708 if (timeout < 0) 709 timeout = 0; 710 } else { 711 timeout = afs_vlocation_update_timeout; 712 } 713 714 ASSERT(list_empty(&vl->update)); 715 716 list_add_tail(&vl->update, &afs_vlocation_updates); 717 718 _debug("timeout %ld", timeout); 719 queue_delayed_work(afs_vlocation_update_worker, 720 &afs_vlocation_update, timeout * HZ); 721 spin_unlock(&afs_vlocation_updates_lock); 722 afs_put_vlocation(vl); 723}