Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915: Ratelimit i915_globals_park

When doing our global park, we like to be a good citizen and shrink our
slab caches (of which we have quite a few now), but each
kmem_cache_shrink() incurs a stop_machine() and so ends up being quite
expensive, causing machine-wide stalls. While ideally we would like to
throw away unused pages in our slab caches whenever it appears that we
are idling, doing so will require a much cheaper mechanism. In the
meantime use a delayed worked to impose a rate-limit that means we have
to have been idle for more than 2 seconds before we start shrinking.

References: https://gitlab.freedesktop.org/drm/intel/issues/848
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191218094057.3510459-1-chris@chris-wilson.co.uk

+44 -9
+44 -9
drivers/gpu/drm/i915/i915_globals.c
··· 20 20 static atomic_t active; 21 21 static atomic_t epoch; 22 22 static struct park_work { 23 - struct rcu_work work; 23 + struct delayed_work work; 24 + struct rcu_head rcu; 25 + unsigned long flags; 26 + #define PENDING 0 24 27 int epoch; 25 28 } park; 26 29 ··· 40 37 global->shrink(); 41 38 } 42 39 40 + static void __i915_globals_grace(struct rcu_head *rcu) 41 + { 42 + /* Ratelimit parking as shrinking is quite slow */ 43 + schedule_delayed_work(&park.work, round_jiffies_up_relative(2 * HZ)); 44 + } 45 + 46 + static void __i915_globals_queue_rcu(void) 47 + { 48 + park.epoch = atomic_inc_return(&epoch); 49 + if (!atomic_read(&active)) { 50 + init_rcu_head(&park.rcu); 51 + call_rcu(&park.rcu, __i915_globals_grace); 52 + } 53 + } 54 + 43 55 static void __i915_globals_park(struct work_struct *work) 44 56 { 57 + destroy_rcu_head(&park.rcu); 58 + 45 59 /* Confirm nothing woke up in the last grace period */ 46 - if (park.epoch == atomic_read(&epoch)) 47 - i915_globals_shrink(); 60 + if (park.epoch != atomic_read(&epoch)) { 61 + __i915_globals_queue_rcu(); 62 + return; 63 + } 64 + 65 + clear_bit(PENDING, &park.flags); 66 + i915_globals_shrink(); 48 67 } 49 68 50 69 void __init i915_global_register(struct i915_global *global) ··· 110 85 } 111 86 } 112 87 113 - INIT_RCU_WORK(&park.work, __i915_globals_park); 88 + INIT_DELAYED_WORK(&park.work, __i915_globals_park); 114 89 return 0; 115 90 } 116 91 ··· 128 103 if (!atomic_dec_and_test(&active)) 129 104 return; 130 105 131 - park.epoch = atomic_inc_return(&epoch); 132 - queue_rcu_work(system_wq, &park.work); 106 + /* Queue cleanup after the next RCU grace period has freed slabs */ 107 + if (!test_and_set_bit(PENDING, &park.flags)) 108 + __i915_globals_queue_rcu(); 133 109 } 134 110 135 111 void i915_globals_unpark(void) ··· 139 113 atomic_inc(&active); 140 114 } 141 115 116 + static void __exit __i915_globals_flush(void) 117 + { 118 + atomic_inc(&active); /* skip shrinking */ 119 + 120 + rcu_barrier(); /* wait for the work to be queued */ 121 + flush_delayed_work(&park.work); 122 + 123 + atomic_dec(&active); 124 + } 125 + 142 126 void __exit i915_globals_exit(void) 143 127 { 144 - /* Flush any residual park_work */ 145 - atomic_inc(&epoch); 146 - flush_rcu_work(&park.work); 128 + GEM_BUG_ON(atomic_read(&active)); 147 129 130 + __i915_globals_flush(); 148 131 __i915_globals_cleanup(); 149 132 150 133 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */