Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at 77b2555b52a894a2e39a42e43d993df875c46a6a 511 lines 14 kB view raw
1/* 2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 * 33 * $Id: fmr_pool.c 2730 2005-06-28 16:43:03Z sean.hefty $ 34 */ 35 36#include <linux/errno.h> 37#include <linux/spinlock.h> 38#include <linux/slab.h> 39#include <linux/jhash.h> 40#include <linux/kthread.h> 41 42#include <rdma/ib_fmr_pool.h> 43 44#include "core_priv.h" 45 46enum { 47 IB_FMR_MAX_REMAPS = 32, 48 49 IB_FMR_HASH_BITS = 8, 50 IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS, 51 IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1 52}; 53 54/* 55 * If an FMR is not in use, then the list member will point to either 56 * its pool's free_list (if the FMR can be mapped again; that is, 57 * remap_count < IB_FMR_MAX_REMAPS) or its pool's dirty_list (if the 58 * FMR needs to be unmapped before being remapped). In either of 59 * these cases it is a bug if the ref_count is not 0. In other words, 60 * if ref_count is > 0, then the list member must not be linked into 61 * either free_list or dirty_list. 62 * 63 * The cache_node member is used to link the FMR into a cache bucket 64 * (if caching is enabled). This is independent of the reference 65 * count of the FMR. When a valid FMR is released, its ref_count is 66 * decremented, and if ref_count reaches 0, the FMR is placed in 67 * either free_list or dirty_list as appropriate. However, it is not 68 * removed from the cache and may be "revived" if a call to 69 * ib_fmr_register_physical() occurs before the FMR is remapped. In 70 * this case we just increment the ref_count and remove the FMR from 71 * free_list/dirty_list. 72 * 73 * Before we remap an FMR from free_list, we remove it from the cache 74 * (to prevent another user from obtaining a stale FMR). When an FMR 75 * is released, we add it to the tail of the free list, so that our 76 * cache eviction policy is "least recently used." 77 * 78 * All manipulation of ref_count, list and cache_node is protected by 79 * pool_lock to maintain consistency. 80 */ 81 82struct ib_fmr_pool { 83 spinlock_t pool_lock; 84 85 int pool_size; 86 int max_pages; 87 int dirty_watermark; 88 int dirty_len; 89 struct list_head free_list; 90 struct list_head dirty_list; 91 struct hlist_head *cache_bucket; 92 93 void (*flush_function)(struct ib_fmr_pool *pool, 94 void * arg); 95 void *flush_arg; 96 97 struct task_struct *thread; 98 99 atomic_t req_ser; 100 atomic_t flush_ser; 101 102 wait_queue_head_t force_wait; 103}; 104 105static inline u32 ib_fmr_hash(u64 first_page) 106{ 107 return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) & 108 (IB_FMR_HASH_SIZE - 1); 109} 110 111/* Caller must hold pool_lock */ 112static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, 113 u64 *page_list, 114 int page_list_len, 115 u64 io_virtual_address) 116{ 117 struct hlist_head *bucket; 118 struct ib_pool_fmr *fmr; 119 struct hlist_node *pos; 120 121 if (!pool->cache_bucket) 122 return NULL; 123 124 bucket = pool->cache_bucket + ib_fmr_hash(*page_list); 125 126 hlist_for_each_entry(fmr, pos, bucket, cache_node) 127 if (io_virtual_address == fmr->io_virtual_address && 128 page_list_len == fmr->page_list_len && 129 !memcmp(page_list, fmr->page_list, 130 page_list_len * sizeof *page_list)) 131 return fmr; 132 133 return NULL; 134} 135 136static void ib_fmr_batch_release(struct ib_fmr_pool *pool) 137{ 138 int ret; 139 struct ib_pool_fmr *fmr; 140 LIST_HEAD(unmap_list); 141 LIST_HEAD(fmr_list); 142 143 spin_lock_irq(&pool->pool_lock); 144 145 list_for_each_entry(fmr, &pool->dirty_list, list) { 146 hlist_del_init(&fmr->cache_node); 147 fmr->remap_count = 0; 148 list_add_tail(&fmr->fmr->list, &fmr_list); 149 150#ifdef DEBUG 151 if (fmr->ref_count !=0) { 152 printk(KERN_WARNING "Unmapping FMR 0x%08x with ref count %d", 153 fmr, fmr->ref_count); 154 } 155#endif 156 } 157 158 list_splice(&pool->dirty_list, &unmap_list); 159 INIT_LIST_HEAD(&pool->dirty_list); 160 pool->dirty_len = 0; 161 162 spin_unlock_irq(&pool->pool_lock); 163 164 if (list_empty(&unmap_list)) { 165 return; 166 } 167 168 ret = ib_unmap_fmr(&fmr_list); 169 if (ret) 170 printk(KERN_WARNING "ib_unmap_fmr returned %d", ret); 171 172 spin_lock_irq(&pool->pool_lock); 173 list_splice(&unmap_list, &pool->free_list); 174 spin_unlock_irq(&pool->pool_lock); 175} 176 177static int ib_fmr_cleanup_thread(void *pool_ptr) 178{ 179 struct ib_fmr_pool *pool = pool_ptr; 180 181 do { 182 if (pool->dirty_len >= pool->dirty_watermark || 183 atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) { 184 ib_fmr_batch_release(pool); 185 186 atomic_inc(&pool->flush_ser); 187 wake_up_interruptible(&pool->force_wait); 188 189 if (pool->flush_function) 190 pool->flush_function(pool, pool->flush_arg); 191 } 192 193 set_current_state(TASK_INTERRUPTIBLE); 194 if (pool->dirty_len < pool->dirty_watermark && 195 atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 && 196 !kthread_should_stop()) 197 schedule(); 198 __set_current_state(TASK_RUNNING); 199 } while (!kthread_should_stop()); 200 201 return 0; 202} 203 204/** 205 * ib_create_fmr_pool - Create an FMR pool 206 * @pd:Protection domain for FMRs 207 * @params:FMR pool parameters 208 * 209 * Create a pool of FMRs. Return value is pointer to new pool or 210 * error code if creation failed. 211 */ 212struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, 213 struct ib_fmr_pool_param *params) 214{ 215 struct ib_device *device; 216 struct ib_fmr_pool *pool; 217 int i; 218 int ret; 219 220 if (!params) 221 return ERR_PTR(-EINVAL); 222 223 device = pd->device; 224 if (!device->alloc_fmr || !device->dealloc_fmr || 225 !device->map_phys_fmr || !device->unmap_fmr) { 226 printk(KERN_WARNING "Device %s does not support fast memory regions", 227 device->name); 228 return ERR_PTR(-ENOSYS); 229 } 230 231 pool = kmalloc(sizeof *pool, GFP_KERNEL); 232 if (!pool) { 233 printk(KERN_WARNING "couldn't allocate pool struct"); 234 return ERR_PTR(-ENOMEM); 235 } 236 237 pool->cache_bucket = NULL; 238 239 pool->flush_function = params->flush_function; 240 pool->flush_arg = params->flush_arg; 241 242 INIT_LIST_HEAD(&pool->free_list); 243 INIT_LIST_HEAD(&pool->dirty_list); 244 245 if (params->cache) { 246 pool->cache_bucket = 247 kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, 248 GFP_KERNEL); 249 if (!pool->cache_bucket) { 250 printk(KERN_WARNING "Failed to allocate cache in pool"); 251 ret = -ENOMEM; 252 goto out_free_pool; 253 } 254 255 for (i = 0; i < IB_FMR_HASH_SIZE; ++i) 256 INIT_HLIST_HEAD(pool->cache_bucket + i); 257 } 258 259 pool->pool_size = 0; 260 pool->max_pages = params->max_pages_per_fmr; 261 pool->dirty_watermark = params->dirty_watermark; 262 pool->dirty_len = 0; 263 spin_lock_init(&pool->pool_lock); 264 atomic_set(&pool->req_ser, 0); 265 atomic_set(&pool->flush_ser, 0); 266 init_waitqueue_head(&pool->force_wait); 267 268 pool->thread = kthread_create(ib_fmr_cleanup_thread, 269 pool, 270 "ib_fmr(%s)", 271 device->name); 272 if (IS_ERR(pool->thread)) { 273 printk(KERN_WARNING "couldn't start cleanup thread"); 274 ret = PTR_ERR(pool->thread); 275 goto out_free_pool; 276 } 277 278 { 279 struct ib_pool_fmr *fmr; 280 struct ib_fmr_attr attr = { 281 .max_pages = params->max_pages_per_fmr, 282 .max_maps = IB_FMR_MAX_REMAPS, 283 .page_size = PAGE_SHIFT 284 }; 285 286 for (i = 0; i < params->pool_size; ++i) { 287 fmr = kmalloc(sizeof *fmr + params->max_pages_per_fmr * sizeof (u64), 288 GFP_KERNEL); 289 if (!fmr) { 290 printk(KERN_WARNING "failed to allocate fmr struct " 291 "for FMR %d", i); 292 goto out_fail; 293 } 294 295 fmr->pool = pool; 296 fmr->remap_count = 0; 297 fmr->ref_count = 0; 298 INIT_HLIST_NODE(&fmr->cache_node); 299 300 fmr->fmr = ib_alloc_fmr(pd, params->access, &attr); 301 if (IS_ERR(fmr->fmr)) { 302 printk(KERN_WARNING "fmr_create failed for FMR %d", i); 303 kfree(fmr); 304 goto out_fail; 305 } 306 307 list_add_tail(&fmr->list, &pool->free_list); 308 ++pool->pool_size; 309 } 310 } 311 312 return pool; 313 314 out_free_pool: 315 kfree(pool->cache_bucket); 316 kfree(pool); 317 318 return ERR_PTR(ret); 319 320 out_fail: 321 ib_destroy_fmr_pool(pool); 322 323 return ERR_PTR(-ENOMEM); 324} 325EXPORT_SYMBOL(ib_create_fmr_pool); 326 327/** 328 * ib_destroy_fmr_pool - Free FMR pool 329 * @pool:FMR pool to free 330 * 331 * Destroy an FMR pool and free all associated resources. 332 */ 333void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) 334{ 335 struct ib_pool_fmr *fmr; 336 struct ib_pool_fmr *tmp; 337 LIST_HEAD(fmr_list); 338 int i; 339 340 kthread_stop(pool->thread); 341 ib_fmr_batch_release(pool); 342 343 i = 0; 344 list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { 345 if (fmr->remap_count) { 346 INIT_LIST_HEAD(&fmr_list); 347 list_add_tail(&fmr->fmr->list, &fmr_list); 348 ib_unmap_fmr(&fmr_list); 349 } 350 ib_dealloc_fmr(fmr->fmr); 351 list_del(&fmr->list); 352 kfree(fmr); 353 ++i; 354 } 355 356 if (i < pool->pool_size) 357 printk(KERN_WARNING "pool still has %d regions registered", 358 pool->pool_size - i); 359 360 kfree(pool->cache_bucket); 361 kfree(pool); 362} 363EXPORT_SYMBOL(ib_destroy_fmr_pool); 364 365/** 366 * ib_flush_fmr_pool - Invalidate all unmapped FMRs 367 * @pool:FMR pool to flush 368 * 369 * Ensure that all unmapped FMRs are fully invalidated. 370 */ 371int ib_flush_fmr_pool(struct ib_fmr_pool *pool) 372{ 373 int serial; 374 375 atomic_inc(&pool->req_ser); 376 /* 377 * It's OK if someone else bumps req_ser again here -- we'll 378 * just wait a little longer. 379 */ 380 serial = atomic_read(&pool->req_ser); 381 382 wake_up_process(pool->thread); 383 384 if (wait_event_interruptible(pool->force_wait, 385 atomic_read(&pool->flush_ser) - 386 atomic_read(&pool->req_ser) >= 0)) 387 return -EINTR; 388 389 return 0; 390} 391EXPORT_SYMBOL(ib_flush_fmr_pool); 392 393/** 394 * ib_fmr_pool_map_phys - 395 * @pool:FMR pool to allocate FMR from 396 * @page_list:List of pages to map 397 * @list_len:Number of pages in @page_list 398 * @io_virtual_address:I/O virtual address for new FMR 399 * 400 * Map an FMR from an FMR pool. 401 */ 402struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, 403 u64 *page_list, 404 int list_len, 405 u64 *io_virtual_address) 406{ 407 struct ib_fmr_pool *pool = pool_handle; 408 struct ib_pool_fmr *fmr; 409 unsigned long flags; 410 int result; 411 412 if (list_len < 1 || list_len > pool->max_pages) 413 return ERR_PTR(-EINVAL); 414 415 spin_lock_irqsave(&pool->pool_lock, flags); 416 fmr = ib_fmr_cache_lookup(pool, 417 page_list, 418 list_len, 419 *io_virtual_address); 420 if (fmr) { 421 /* found in cache */ 422 ++fmr->ref_count; 423 if (fmr->ref_count == 1) { 424 list_del(&fmr->list); 425 } 426 427 spin_unlock_irqrestore(&pool->pool_lock, flags); 428 429 return fmr; 430 } 431 432 if (list_empty(&pool->free_list)) { 433 spin_unlock_irqrestore(&pool->pool_lock, flags); 434 return ERR_PTR(-EAGAIN); 435 } 436 437 fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list); 438 list_del(&fmr->list); 439 hlist_del_init(&fmr->cache_node); 440 spin_unlock_irqrestore(&pool->pool_lock, flags); 441 442 result = ib_map_phys_fmr(fmr->fmr, page_list, list_len, 443 *io_virtual_address); 444 445 if (result) { 446 spin_lock_irqsave(&pool->pool_lock, flags); 447 list_add(&fmr->list, &pool->free_list); 448 spin_unlock_irqrestore(&pool->pool_lock, flags); 449 450 printk(KERN_WARNING "fmr_map returns %d\n", 451 result); 452 453 return ERR_PTR(result); 454 } 455 456 ++fmr->remap_count; 457 fmr->ref_count = 1; 458 459 if (pool->cache_bucket) { 460 fmr->io_virtual_address = *io_virtual_address; 461 fmr->page_list_len = list_len; 462 memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list)); 463 464 spin_lock_irqsave(&pool->pool_lock, flags); 465 hlist_add_head(&fmr->cache_node, 466 pool->cache_bucket + ib_fmr_hash(fmr->page_list[0])); 467 spin_unlock_irqrestore(&pool->pool_lock, flags); 468 } 469 470 return fmr; 471} 472EXPORT_SYMBOL(ib_fmr_pool_map_phys); 473 474/** 475 * ib_fmr_pool_unmap - Unmap FMR 476 * @fmr:FMR to unmap 477 * 478 * Unmap an FMR. The FMR mapping may remain valid until the FMR is 479 * reused (or until ib_flush_fmr_pool() is called). 480 */ 481int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) 482{ 483 struct ib_fmr_pool *pool; 484 unsigned long flags; 485 486 pool = fmr->pool; 487 488 spin_lock_irqsave(&pool->pool_lock, flags); 489 490 --fmr->ref_count; 491 if (!fmr->ref_count) { 492 if (fmr->remap_count < IB_FMR_MAX_REMAPS) { 493 list_add_tail(&fmr->list, &pool->free_list); 494 } else { 495 list_add_tail(&fmr->list, &pool->dirty_list); 496 ++pool->dirty_len; 497 wake_up_process(pool->thread); 498 } 499 } 500 501#ifdef DEBUG 502 if (fmr->ref_count < 0) 503 printk(KERN_WARNING "FMR %p has ref count %d < 0", 504 fmr, fmr->ref_count); 505#endif 506 507 spin_unlock_irqrestore(&pool->pool_lock, flags); 508 509 return 0; 510} 511EXPORT_SYMBOL(ib_fmr_pool_unmap);