Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v3.8-rc2 718 lines 18 kB view raw
1/* AFS volume location management 2 * 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12#include <linux/kernel.h> 13#include <linux/module.h> 14#include <linux/slab.h> 15#include <linux/init.h> 16#include <linux/sched.h> 17#include "internal.h" 18 19static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ 20static unsigned afs_vlocation_update_timeout = 10 * 60; 21 22static void afs_vlocation_reaper(struct work_struct *); 23static void afs_vlocation_updater(struct work_struct *); 24 25static LIST_HEAD(afs_vlocation_updates); 26static LIST_HEAD(afs_vlocation_graveyard); 27static DEFINE_SPINLOCK(afs_vlocation_updates_lock); 28static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock); 29static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper); 30static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater); 31static struct workqueue_struct *afs_vlocation_update_worker; 32 33/* 34 * iterate through the VL servers in a cell until one of them admits knowing 35 * about the volume in question 36 */ 37static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, 38 struct key *key, 39 struct afs_cache_vlocation *vldb) 40{ 41 struct afs_cell *cell = vl->cell; 42 struct in_addr addr; 43 int count, ret; 44 45 _enter("%s,%s", cell->name, vl->vldb.name); 46 47 down_write(&vl->cell->vl_sem); 48 ret = -ENOMEDIUM; 49 for (count = cell->vl_naddrs; count > 0; count--) { 50 addr = cell->vl_addrs[cell->vl_curr_svix]; 51 52 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); 53 54 /* attempt to access the VL server */ 55 ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb, 56 &afs_sync_call); 57 switch (ret) { 58 case 0: 59 goto out; 60 case -ENOMEM: 61 case -ENONET: 62 case -ENETUNREACH: 63 case -EHOSTUNREACH: 64 case -ECONNREFUSED: 65 if (ret == -ENOMEM || ret == -ENONET) 66 goto out; 67 goto rotate; 68 case -ENOMEDIUM: 69 case -EKEYREJECTED: 70 case -EKEYEXPIRED: 71 goto out; 72 default: 73 ret = -EIO; 74 goto rotate; 75 } 76 77 /* rotate the server records upon lookup failure */ 78 rotate: 79 cell->vl_curr_svix++; 80 cell->vl_curr_svix %= cell->vl_naddrs; 81 } 82 83out: 84 up_write(&vl->cell->vl_sem); 85 _leave(" = %d", ret); 86 return ret; 87} 88 89/* 90 * iterate through the VL servers in a cell until one of them admits knowing 91 * about the volume in question 92 */ 93static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, 94 struct key *key, 95 afs_volid_t volid, 96 afs_voltype_t voltype, 97 struct afs_cache_vlocation *vldb) 98{ 99 struct afs_cell *cell = vl->cell; 100 struct in_addr addr; 101 int count, ret; 102 103 _enter("%s,%x,%d,", cell->name, volid, voltype); 104 105 down_write(&vl->cell->vl_sem); 106 ret = -ENOMEDIUM; 107 for (count = cell->vl_naddrs; count > 0; count--) { 108 addr = cell->vl_addrs[cell->vl_curr_svix]; 109 110 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); 111 112 /* attempt to access the VL server */ 113 ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb, 114 &afs_sync_call); 115 switch (ret) { 116 case 0: 117 goto out; 118 case -ENOMEM: 119 case -ENONET: 120 case -ENETUNREACH: 121 case -EHOSTUNREACH: 122 case -ECONNREFUSED: 123 if (ret == -ENOMEM || ret == -ENONET) 124 goto out; 125 goto rotate; 126 case -EBUSY: 127 vl->upd_busy_cnt++; 128 if (vl->upd_busy_cnt <= 3) { 129 if (vl->upd_busy_cnt > 1) { 130 /* second+ BUSY - sleep a little bit */ 131 set_current_state(TASK_UNINTERRUPTIBLE); 132 schedule_timeout(1); 133 __set_current_state(TASK_RUNNING); 134 } 135 continue; 136 } 137 break; 138 case -ENOMEDIUM: 139 vl->upd_rej_cnt++; 140 goto rotate; 141 default: 142 ret = -EIO; 143 goto rotate; 144 } 145 146 /* rotate the server records upon lookup failure */ 147 rotate: 148 cell->vl_curr_svix++; 149 cell->vl_curr_svix %= cell->vl_naddrs; 150 vl->upd_busy_cnt = 0; 151 } 152 153out: 154 if (ret < 0 && vl->upd_rej_cnt > 0) { 155 printk(KERN_NOTICE "kAFS:" 156 " Active volume no longer valid '%s'\n", 157 vl->vldb.name); 158 vl->valid = 0; 159 ret = -ENOMEDIUM; 160 } 161 162 up_write(&vl->cell->vl_sem); 163 _leave(" = %d", ret); 164 return ret; 165} 166 167/* 168 * allocate a volume location record 169 */ 170static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell, 171 const char *name, 172 size_t namesz) 173{ 174 struct afs_vlocation *vl; 175 176 vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL); 177 if (vl) { 178 vl->cell = cell; 179 vl->state = AFS_VL_NEW; 180 atomic_set(&vl->usage, 1); 181 INIT_LIST_HEAD(&vl->link); 182 INIT_LIST_HEAD(&vl->grave); 183 INIT_LIST_HEAD(&vl->update); 184 init_waitqueue_head(&vl->waitq); 185 spin_lock_init(&vl->lock); 186 memcpy(vl->vldb.name, name, namesz); 187 } 188 189 _leave(" = %p", vl); 190 return vl; 191} 192 193/* 194 * update record if we found it in the cache 195 */ 196static int afs_vlocation_update_record(struct afs_vlocation *vl, 197 struct key *key, 198 struct afs_cache_vlocation *vldb) 199{ 200 afs_voltype_t voltype; 201 afs_volid_t vid; 202 int ret; 203 204 /* try to look up a cached volume in the cell VL databases by ID */ 205 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 206 vl->vldb.name, 207 vl->vldb.vidmask, 208 ntohl(vl->vldb.servers[0].s_addr), 209 vl->vldb.srvtmask[0], 210 ntohl(vl->vldb.servers[1].s_addr), 211 vl->vldb.srvtmask[1], 212 ntohl(vl->vldb.servers[2].s_addr), 213 vl->vldb.srvtmask[2]); 214 215 _debug("Vids: %08x %08x %08x", 216 vl->vldb.vid[0], 217 vl->vldb.vid[1], 218 vl->vldb.vid[2]); 219 220 if (vl->vldb.vidmask & AFS_VOL_VTM_RW) { 221 vid = vl->vldb.vid[0]; 222 voltype = AFSVL_RWVOL; 223 } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) { 224 vid = vl->vldb.vid[1]; 225 voltype = AFSVL_ROVOL; 226 } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) { 227 vid = vl->vldb.vid[2]; 228 voltype = AFSVL_BACKVOL; 229 } else { 230 BUG(); 231 vid = 0; 232 voltype = 0; 233 } 234 235 /* contact the server to make sure the volume is still available 236 * - TODO: need to handle disconnected operation here 237 */ 238 ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb); 239 switch (ret) { 240 /* net error */ 241 default: 242 printk(KERN_WARNING "kAFS:" 243 " failed to update volume '%s' (%x) up in '%s': %d\n", 244 vl->vldb.name, vid, vl->cell->name, ret); 245 _leave(" = %d", ret); 246 return ret; 247 248 /* pulled from local cache into memory */ 249 case 0: 250 _leave(" = 0"); 251 return 0; 252 253 /* uh oh... looks like the volume got deleted */ 254 case -ENOMEDIUM: 255 printk(KERN_ERR "kAFS:" 256 " volume '%s' (%x) does not exist '%s'\n", 257 vl->vldb.name, vid, vl->cell->name); 258 259 /* TODO: make existing record unavailable */ 260 _leave(" = %d", ret); 261 return ret; 262 } 263} 264 265/* 266 * apply the update to a VL record 267 */ 268static void afs_vlocation_apply_update(struct afs_vlocation *vl, 269 struct afs_cache_vlocation *vldb) 270{ 271 _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 272 vldb->name, vldb->vidmask, 273 ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0], 274 ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1], 275 ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]); 276 277 _debug("Vids: %08x %08x %08x", 278 vldb->vid[0], vldb->vid[1], vldb->vid[2]); 279 280 if (strcmp(vldb->name, vl->vldb.name) != 0) 281 printk(KERN_NOTICE "kAFS:" 282 " name of volume '%s' changed to '%s' on server\n", 283 vl->vldb.name, vldb->name); 284 285 vl->vldb = *vldb; 286 287#ifdef CONFIG_AFS_FSCACHE 288 fscache_update_cookie(vl->cache); 289#endif 290} 291 292/* 293 * fill in a volume location record, consulting the cache and the VL server 294 * both 295 */ 296static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, 297 struct key *key) 298{ 299 struct afs_cache_vlocation vldb; 300 int ret; 301 302 _enter(""); 303 304 ASSERTCMP(vl->valid, ==, 0); 305 306 memset(&vldb, 0, sizeof(vldb)); 307 308 /* see if we have an in-cache copy (will set vl->valid if there is) */ 309#ifdef CONFIG_AFS_FSCACHE 310 vl->cache = fscache_acquire_cookie(vl->cell->cache, 311 &afs_vlocation_cache_index_def, vl); 312#endif 313 314 if (vl->valid) { 315 /* try to update a known volume in the cell VL databases by 316 * ID as the name may have changed */ 317 _debug("found in cache"); 318 ret = afs_vlocation_update_record(vl, key, &vldb); 319 } else { 320 /* try to look up an unknown volume in the cell VL databases by 321 * name */ 322 ret = afs_vlocation_access_vl_by_name(vl, key, &vldb); 323 if (ret < 0) { 324 printk("kAFS: failed to locate '%s' in cell '%s'\n", 325 vl->vldb.name, vl->cell->name); 326 return ret; 327 } 328 } 329 330 afs_vlocation_apply_update(vl, &vldb); 331 _leave(" = 0"); 332 return 0; 333} 334 335/* 336 * queue a vlocation record for updates 337 */ 338static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) 339{ 340 struct afs_vlocation *xvl; 341 342 /* wait at least 10 minutes before updating... */ 343 vl->update_at = get_seconds() + afs_vlocation_update_timeout; 344 345 spin_lock(&afs_vlocation_updates_lock); 346 347 if (!list_empty(&afs_vlocation_updates)) { 348 /* ... but wait at least 1 second more than the newest record 349 * already queued so that we don't spam the VL server suddenly 350 * with lots of requests 351 */ 352 xvl = list_entry(afs_vlocation_updates.prev, 353 struct afs_vlocation, update); 354 if (vl->update_at <= xvl->update_at) 355 vl->update_at = xvl->update_at + 1; 356 } else { 357 queue_delayed_work(afs_vlocation_update_worker, 358 &afs_vlocation_update, 359 afs_vlocation_update_timeout * HZ); 360 } 361 362 list_add_tail(&vl->update, &afs_vlocation_updates); 363 spin_unlock(&afs_vlocation_updates_lock); 364} 365 366/* 367 * lookup volume location 368 * - iterate through the VL servers in a cell until one of them admits knowing 369 * about the volume in question 370 * - lookup in the local cache if not able to find on the VL server 371 * - insert/update in the local cache if did get a VL response 372 */ 373struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell, 374 struct key *key, 375 const char *name, 376 size_t namesz) 377{ 378 struct afs_vlocation *vl; 379 int ret; 380 381 _enter("{%s},{%x},%*.*s,%zu", 382 cell->name, key_serial(key), 383 (int) namesz, (int) namesz, name, namesz); 384 385 if (namesz >= sizeof(vl->vldb.name)) { 386 _leave(" = -ENAMETOOLONG"); 387 return ERR_PTR(-ENAMETOOLONG); 388 } 389 390 /* see if we have an in-memory copy first */ 391 down_write(&cell->vl_sem); 392 spin_lock(&cell->vl_lock); 393 list_for_each_entry(vl, &cell->vl_list, link) { 394 if (vl->vldb.name[namesz] != '\0') 395 continue; 396 if (memcmp(vl->vldb.name, name, namesz) == 0) 397 goto found_in_memory; 398 } 399 spin_unlock(&cell->vl_lock); 400 401 /* not in the cell's in-memory lists - create a new record */ 402 vl = afs_vlocation_alloc(cell, name, namesz); 403 if (!vl) { 404 up_write(&cell->vl_sem); 405 return ERR_PTR(-ENOMEM); 406 } 407 408 afs_get_cell(cell); 409 410 list_add_tail(&vl->link, &cell->vl_list); 411 vl->state = AFS_VL_CREATING; 412 up_write(&cell->vl_sem); 413 414fill_in_record: 415 ret = afs_vlocation_fill_in_record(vl, key); 416 if (ret < 0) 417 goto error_abandon; 418 spin_lock(&vl->lock); 419 vl->state = AFS_VL_VALID; 420 spin_unlock(&vl->lock); 421 wake_up(&vl->waitq); 422 423 /* update volume entry in local cache */ 424#ifdef CONFIG_AFS_FSCACHE 425 fscache_update_cookie(vl->cache); 426#endif 427 428 /* schedule for regular updates */ 429 afs_vlocation_queue_for_updates(vl); 430 goto success; 431 432found_in_memory: 433 /* found in memory */ 434 _debug("found in memory"); 435 atomic_inc(&vl->usage); 436 spin_unlock(&cell->vl_lock); 437 if (!list_empty(&vl->grave)) { 438 spin_lock(&afs_vlocation_graveyard_lock); 439 list_del_init(&vl->grave); 440 spin_unlock(&afs_vlocation_graveyard_lock); 441 } 442 up_write(&cell->vl_sem); 443 444 /* see if it was an abandoned record that we might try filling in */ 445 spin_lock(&vl->lock); 446 while (vl->state != AFS_VL_VALID) { 447 afs_vlocation_state_t state = vl->state; 448 449 _debug("invalid [state %d]", state); 450 451 if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) { 452 vl->state = AFS_VL_CREATING; 453 spin_unlock(&vl->lock); 454 goto fill_in_record; 455 } 456 457 /* must now wait for creation or update by someone else to 458 * complete */ 459 _debug("wait"); 460 461 spin_unlock(&vl->lock); 462 ret = wait_event_interruptible(vl->waitq, 463 vl->state == AFS_VL_NEW || 464 vl->state == AFS_VL_VALID || 465 vl->state == AFS_VL_NO_VOLUME); 466 if (ret < 0) 467 goto error; 468 spin_lock(&vl->lock); 469 } 470 spin_unlock(&vl->lock); 471 472success: 473 _leave(" = %p", vl); 474 return vl; 475 476error_abandon: 477 spin_lock(&vl->lock); 478 vl->state = AFS_VL_NEW; 479 spin_unlock(&vl->lock); 480 wake_up(&vl->waitq); 481error: 482 ASSERT(vl != NULL); 483 afs_put_vlocation(vl); 484 _leave(" = %d", ret); 485 return ERR_PTR(ret); 486} 487 488/* 489 * finish using a volume location record 490 */ 491void afs_put_vlocation(struct afs_vlocation *vl) 492{ 493 if (!vl) 494 return; 495 496 _enter("%s", vl->vldb.name); 497 498 ASSERTCMP(atomic_read(&vl->usage), >, 0); 499 500 if (likely(!atomic_dec_and_test(&vl->usage))) { 501 _leave(""); 502 return; 503 } 504 505 spin_lock(&afs_vlocation_graveyard_lock); 506 if (atomic_read(&vl->usage) == 0) { 507 _debug("buried"); 508 list_move_tail(&vl->grave, &afs_vlocation_graveyard); 509 vl->time_of_death = get_seconds(); 510 queue_delayed_work(afs_wq, &afs_vlocation_reap, 511 afs_vlocation_timeout * HZ); 512 513 /* suspend updates on this record */ 514 if (!list_empty(&vl->update)) { 515 spin_lock(&afs_vlocation_updates_lock); 516 list_del_init(&vl->update); 517 spin_unlock(&afs_vlocation_updates_lock); 518 } 519 } 520 spin_unlock(&afs_vlocation_graveyard_lock); 521 _leave(" [killed?]"); 522} 523 524/* 525 * destroy a dead volume location record 526 */ 527static void afs_vlocation_destroy(struct afs_vlocation *vl) 528{ 529 _enter("%p", vl); 530 531#ifdef CONFIG_AFS_FSCACHE 532 fscache_relinquish_cookie(vl->cache, 0); 533#endif 534 afs_put_cell(vl->cell); 535 kfree(vl); 536} 537 538/* 539 * reap dead volume location records 540 */ 541static void afs_vlocation_reaper(struct work_struct *work) 542{ 543 LIST_HEAD(corpses); 544 struct afs_vlocation *vl; 545 unsigned long delay, expiry; 546 time_t now; 547 548 _enter(""); 549 550 now = get_seconds(); 551 spin_lock(&afs_vlocation_graveyard_lock); 552 553 while (!list_empty(&afs_vlocation_graveyard)) { 554 vl = list_entry(afs_vlocation_graveyard.next, 555 struct afs_vlocation, grave); 556 557 _debug("check %p", vl); 558 559 /* the queue is ordered most dead first */ 560 expiry = vl->time_of_death + afs_vlocation_timeout; 561 if (expiry > now) { 562 delay = (expiry - now) * HZ; 563 _debug("delay %lu", delay); 564 mod_delayed_work(afs_wq, &afs_vlocation_reap, delay); 565 break; 566 } 567 568 spin_lock(&vl->cell->vl_lock); 569 if (atomic_read(&vl->usage) > 0) { 570 _debug("no reap"); 571 list_del_init(&vl->grave); 572 } else { 573 _debug("reap"); 574 list_move_tail(&vl->grave, &corpses); 575 list_del_init(&vl->link); 576 } 577 spin_unlock(&vl->cell->vl_lock); 578 } 579 580 spin_unlock(&afs_vlocation_graveyard_lock); 581 582 /* now reap the corpses we've extracted */ 583 while (!list_empty(&corpses)) { 584 vl = list_entry(corpses.next, struct afs_vlocation, grave); 585 list_del(&vl->grave); 586 afs_vlocation_destroy(vl); 587 } 588 589 _leave(""); 590} 591 592/* 593 * initialise the VL update process 594 */ 595int __init afs_vlocation_update_init(void) 596{ 597 afs_vlocation_update_worker = 598 create_singlethread_workqueue("kafs_vlupdated"); 599 return afs_vlocation_update_worker ? 0 : -ENOMEM; 600} 601 602/* 603 * discard all the volume location records for rmmod 604 */ 605void afs_vlocation_purge(void) 606{ 607 afs_vlocation_timeout = 0; 608 609 spin_lock(&afs_vlocation_updates_lock); 610 list_del_init(&afs_vlocation_updates); 611 spin_unlock(&afs_vlocation_updates_lock); 612 mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0); 613 destroy_workqueue(afs_vlocation_update_worker); 614 615 mod_delayed_work(afs_wq, &afs_vlocation_reap, 0); 616} 617 618/* 619 * update a volume location 620 */ 621static void afs_vlocation_updater(struct work_struct *work) 622{ 623 struct afs_cache_vlocation vldb; 624 struct afs_vlocation *vl, *xvl; 625 time_t now; 626 long timeout; 627 int ret; 628 629 _enter(""); 630 631 now = get_seconds(); 632 633 /* find a record to update */ 634 spin_lock(&afs_vlocation_updates_lock); 635 for (;;) { 636 if (list_empty(&afs_vlocation_updates)) { 637 spin_unlock(&afs_vlocation_updates_lock); 638 _leave(" [nothing]"); 639 return; 640 } 641 642 vl = list_entry(afs_vlocation_updates.next, 643 struct afs_vlocation, update); 644 if (atomic_read(&vl->usage) > 0) 645 break; 646 list_del_init(&vl->update); 647 } 648 649 timeout = vl->update_at - now; 650 if (timeout > 0) { 651 queue_delayed_work(afs_vlocation_update_worker, 652 &afs_vlocation_update, timeout * HZ); 653 spin_unlock(&afs_vlocation_updates_lock); 654 _leave(" [nothing]"); 655 return; 656 } 657 658 list_del_init(&vl->update); 659 atomic_inc(&vl->usage); 660 spin_unlock(&afs_vlocation_updates_lock); 661 662 /* we can now perform the update */ 663 _debug("update %s", vl->vldb.name); 664 vl->state = AFS_VL_UPDATING; 665 vl->upd_rej_cnt = 0; 666 vl->upd_busy_cnt = 0; 667 668 ret = afs_vlocation_update_record(vl, NULL, &vldb); 669 spin_lock(&vl->lock); 670 switch (ret) { 671 case 0: 672 afs_vlocation_apply_update(vl, &vldb); 673 vl->state = AFS_VL_VALID; 674 break; 675 case -ENOMEDIUM: 676 vl->state = AFS_VL_VOLUME_DELETED; 677 break; 678 default: 679 vl->state = AFS_VL_UNCERTAIN; 680 break; 681 } 682 spin_unlock(&vl->lock); 683 wake_up(&vl->waitq); 684 685 /* and then reschedule */ 686 _debug("reschedule"); 687 vl->update_at = get_seconds() + afs_vlocation_update_timeout; 688 689 spin_lock(&afs_vlocation_updates_lock); 690 691 if (!list_empty(&afs_vlocation_updates)) { 692 /* next update in 10 minutes, but wait at least 1 second more 693 * than the newest record already queued so that we don't spam 694 * the VL server suddenly with lots of requests 695 */ 696 xvl = list_entry(afs_vlocation_updates.prev, 697 struct afs_vlocation, update); 698 if (vl->update_at <= xvl->update_at) 699 vl->update_at = xvl->update_at + 1; 700 xvl = list_entry(afs_vlocation_updates.next, 701 struct afs_vlocation, update); 702 timeout = xvl->update_at - now; 703 if (timeout < 0) 704 timeout = 0; 705 } else { 706 timeout = afs_vlocation_update_timeout; 707 } 708 709 ASSERT(list_empty(&vl->update)); 710 711 list_add_tail(&vl->update, &afs_vlocation_updates); 712 713 _debug("timeout %ld", timeout); 714 queue_delayed_work(afs_vlocation_update_worker, 715 &afs_vlocation_update, timeout * HZ); 716 spin_unlock(&afs_vlocation_updates_lock); 717 afs_put_vlocation(vl); 718}