[XFS] On-demand reaping of the MRU cache

Instead of running the mru cache reaper all the time based on a timeout,
we should only run it when the cache has active objects. This allows CPUs
to sleep when there is no activity rather than be woken repeatedly just to
check if there is anything to do.

SGI-PV: 968554
SGI-Modid: xfs-linux-melb:xfs-kern:29305a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Donald Douwsma <donaldd@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>

authored by David Chinner and committed by Tim Shimmin 65de5567 c2f82897

+31 -50
+1 -2
fs/xfs/xfs_filestream.c
··· 467 xfs_filestream_flush( 468 xfs_mount_t *mp) 469 { 470 - /* point in time flush, so keep the reaper running */ 471 - xfs_mru_cache_flush(mp->m_filestream, 1); 472 } 473 474 /*
··· 467 xfs_filestream_flush( 468 xfs_mount_t *mp) 469 { 470 + xfs_mru_cache_flush(mp->m_filestream); 471 } 472 473 /*
+28 -44
fs/xfs/xfs_mru_cache.c
··· 206 */ 207 if (!_xfs_mru_cache_migrate(mru, now)) { 208 mru->time_zero = now; 209 - if (!mru->next_reap) 210 - mru->next_reap = mru->grp_count * mru->grp_time; 211 } else { 212 grp = (now - mru->time_zero) / mru->grp_time; 213 grp = (mru->lru_grp + grp) % mru->grp_count; ··· 274 struct work_struct *work) 275 { 276 xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); 277 - unsigned long now; 278 279 ASSERT(mru && mru->lists); 280 if (!mru || !mru->lists) 281 return; 282 283 mutex_spinlock(&mru->lock); 284 - now = jiffies; 285 - if (mru->reap_all || 286 - (mru->next_reap && time_after(now, mru->next_reap))) { 287 - if (mru->reap_all) 288 - now += mru->grp_count * mru->grp_time * 2; 289 - mru->next_reap = _xfs_mru_cache_migrate(mru, now); 290 - _xfs_mru_cache_clear_reap_list(mru); 291 } 292 293 - /* 294 - * the process that triggered the reap_all is responsible 295 - * for restating the periodic reap if it is required. 296 - */ 297 - if (!mru->reap_all) 298 - queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); 299 - mru->reap_all = 0; 300 mutex_spinunlock(&mru->lock, 0); 301 } 302 ··· 352 353 /* An extra list is needed to avoid reaping up to a grp_time early. */ 354 mru->grp_count = grp_count + 1; 355 - mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); 356 357 if (!mru->lists) { 358 err = ENOMEM; ··· 374 mru->grp_time = grp_time; 375 mru->free_func = free_func; 376 377 - /* start up the reaper event */ 378 - mru->next_reap = 0; 379 - mru->reap_all = 0; 380 - queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); 381 - 382 *mrup = mru; 383 384 exit: ··· 389 * Call xfs_mru_cache_flush() to flush out all cached entries, calling their 390 * free functions as they're deleted. When this function returns, the caller is 391 * guaranteed that all the free functions for all the elements have finished 392 - * executing. 393 - * 394 - * While we are flushing, we stop the periodic reaper event from triggering. 395 - * Normally, we want to restart this periodic event, but if we are shutting 396 - * down the cache we do not want it restarted. hence the restart parameter 397 - * where 0 = do not restart reaper and 1 = restart reaper. 398 */ 399 void 400 xfs_mru_cache_flush( 401 - xfs_mru_cache_t *mru, 402 - int restart) 403 { 404 if (!mru || !mru->lists) 405 return; 406 407 - cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); 408 - 409 mutex_spinlock(&mru->lock); 410 - mru->reap_all = 1; 411 - mutex_spinunlock(&mru->lock, 0); 412 413 - queue_work(xfs_mru_reap_wq, &mru->work.work); 414 - flush_workqueue(xfs_mru_reap_wq); 415 416 - mutex_spinlock(&mru->lock); 417 - WARN_ON_ONCE(mru->reap_all != 0); 418 - mru->reap_all = 0; 419 - if (restart) 420 - queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); 421 mutex_spinunlock(&mru->lock, 0); 422 } 423 ··· 418 if (!mru || !mru->lists) 419 return; 420 421 - /* we don't want the reaper to restart here */ 422 - xfs_mru_cache_flush(mru, 0); 423 424 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); 425 kmem_free(mru, sizeof(*mru));
··· 206 */ 207 if (!_xfs_mru_cache_migrate(mru, now)) { 208 mru->time_zero = now; 209 + if (!mru->queued) { 210 + mru->queued = 1; 211 + queue_delayed_work(xfs_mru_reap_wq, &mru->work, 212 + mru->grp_count * mru->grp_time); 213 + } 214 } else { 215 grp = (now - mru->time_zero) / mru->grp_time; 216 grp = (mru->lru_grp + grp) % mru->grp_count; ··· 271 struct work_struct *work) 272 { 273 xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); 274 + unsigned long now, next; 275 276 ASSERT(mru && mru->lists); 277 if (!mru || !mru->lists) 278 return; 279 280 mutex_spinlock(&mru->lock); 281 + next = _xfs_mru_cache_migrate(mru, jiffies); 282 + _xfs_mru_cache_clear_reap_list(mru); 283 + 284 + mru->queued = next; 285 + if ((mru->queued > 0)) { 286 + now = jiffies; 287 + if (next <= now) 288 + next = 0; 289 + else 290 + next -= now; 291 + queue_delayed_work(xfs_mru_reap_wq, &mru->work, next); 292 } 293 294 mutex_spinunlock(&mru->lock, 0); 295 } 296 ··· 352 353 /* An extra list is needed to avoid reaping up to a grp_time early. */ 354 mru->grp_count = grp_count + 1; 355 + mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); 356 357 if (!mru->lists) { 358 err = ENOMEM; ··· 374 mru->grp_time = grp_time; 375 mru->free_func = free_func; 376 377 *mrup = mru; 378 379 exit: ··· 394 * Call xfs_mru_cache_flush() to flush out all cached entries, calling their 395 * free functions as they're deleted. When this function returns, the caller is 396 * guaranteed that all the free functions for all the elements have finished 397 + * executing and the reaper is not running. 398 */ 399 void 400 xfs_mru_cache_flush( 401 + xfs_mru_cache_t *mru) 402 { 403 if (!mru || !mru->lists) 404 return; 405 406 mutex_spinlock(&mru->lock); 407 + if (mru->queued) { 408 + mutex_spinunlock(&mru->lock, 0); 409 + cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); 410 + mutex_spinlock(&mru->lock); 411 + } 412 413 + _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time); 414 + _xfs_mru_cache_clear_reap_list(mru); 415 416 mutex_spinunlock(&mru->lock, 0); 417 } 418 ··· 433 if (!mru || !mru->lists) 434 return; 435 436 + xfs_mru_cache_flush(mru); 437 438 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); 439 kmem_free(mru, sizeof(*mru));
+2 -4
fs/xfs/xfs_mru_cache.h
··· 32 unsigned int grp_time; /* Time period spanned by grps. */ 33 unsigned int lru_grp; /* Group containing time zero. */ 34 unsigned long time_zero; /* Time first element was added. */ 35 - unsigned long next_reap; /* Time that the reaper should 36 - next do something. */ 37 - unsigned int reap_all; /* if set, reap all lists */ 38 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ 39 struct delayed_work work; /* Workqueue data for reaping. */ 40 } xfs_mru_cache_t; 41 42 int xfs_mru_cache_init(void); ··· 42 int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, 43 unsigned int grp_count, 44 xfs_mru_cache_free_func_t free_func); 45 - void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart); 46 void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); 47 int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, 48 void *value);
··· 32 unsigned int grp_time; /* Time period spanned by grps. */ 33 unsigned int lru_grp; /* Group containing time zero. */ 34 unsigned long time_zero; /* Time first element was added. */ 35 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ 36 struct delayed_work work; /* Workqueue data for reaping. */ 37 + unsigned int queued; /* work has been queued */ 38 } xfs_mru_cache_t; 39 40 int xfs_mru_cache_init(void); ··· 44 int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, 45 unsigned int grp_count, 46 xfs_mru_cache_free_func_t free_func); 47 + void xfs_mru_cache_flush(xfs_mru_cache_t *mru); 48 void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); 49 int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, 50 void *value);