Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/lima: Add optional devfreq and cooling device support

Most platforms with a Mali-400 or Mali-450 GPU also have support for
changing the GPU clock frequency. Add devfreq support so the GPU clock
rate is updated based on the actual GPU usage when the
"operating-points-v2" property is present in the board.dts.

The actual devfreq code is taken from panfrost_devfreq.c and modified so
it matches what the lima hardware needs:
- a call to dev_pm_opp_set_clkname() during initialization because there
are two clocks on Mali-4x0 IPs. "core" is the one that actually clocks
the GPU so we need to control it using devfreq.
- locking when reading or writing the devfreq statistics because (unlike
than panfrost) we have multiple PP and GP IRQs which may finish jobs
concurrently.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200319203427.2259891-3-martin.blumenstingl@googlemail.com

authored by

Martin Blumenstingl and committed by
Qiang Yu
19969707 6bb0942e

+308 -3
+2
drivers/gpu/drm/lima/Kconfig
··· 10 10 depends on OF 11 11 select DRM_SCHED 12 12 select DRM_GEM_SHMEM_HELPER 13 + select PM_DEVFREQ 14 + select DEVFREQ_GOV_SIMPLE_ONDEMAND 13 15 help 14 16 DRM driver for ARM Mali 400/450 GPUs.
+2 -1
drivers/gpu/drm/lima/Makefile
··· 15 15 lima_ctx.o \ 16 16 lima_dlbu.o \ 17 17 lima_bcast.o \ 18 - lima_trace.o 18 + lima_trace.o \ 19 + lima_devfreq.o 19 20 20 21 obj-$(CONFIG_DRM_LIMA) += lima.o
+234
drivers/gpu/drm/lima/lima_devfreq.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright 2020 Martin Blumenstingl <martin.blumenstingl@googlemail.com> 4 + * 5 + * Based on panfrost_devfreq.c: 6 + * Copyright 2019 Collabora ltd. 7 + */ 8 + #include <linux/clk.h> 9 + #include <linux/devfreq.h> 10 + #include <linux/devfreq_cooling.h> 11 + #include <linux/device.h> 12 + #include <linux/platform_device.h> 13 + #include <linux/pm_opp.h> 14 + #include <linux/property.h> 15 + 16 + #include "lima_device.h" 17 + #include "lima_devfreq.h" 18 + 19 + static void lima_devfreq_update_utilization(struct lima_devfreq *devfreq) 20 + { 21 + ktime_t now, last; 22 + 23 + now = ktime_get(); 24 + last = devfreq->time_last_update; 25 + 26 + if (devfreq->busy_count > 0) 27 + devfreq->busy_time += ktime_sub(now, last); 28 + else 29 + devfreq->idle_time += ktime_sub(now, last); 30 + 31 + devfreq->time_last_update = now; 32 + } 33 + 34 + static int lima_devfreq_target(struct device *dev, unsigned long *freq, 35 + u32 flags) 36 + { 37 + struct dev_pm_opp *opp; 38 + int err; 39 + 40 + opp = devfreq_recommended_opp(dev, freq, flags); 41 + if (IS_ERR(opp)) 42 + return PTR_ERR(opp); 43 + dev_pm_opp_put(opp); 44 + 45 + err = dev_pm_opp_set_rate(dev, *freq); 46 + if (err) 47 + return err; 48 + 49 + return 0; 50 + } 51 + 52 + static void lima_devfreq_reset(struct lima_devfreq *devfreq) 53 + { 54 + devfreq->busy_time = 0; 55 + devfreq->idle_time = 0; 56 + devfreq->time_last_update = ktime_get(); 57 + } 58 + 59 + static int lima_devfreq_get_dev_status(struct device *dev, 60 + struct devfreq_dev_status *status) 61 + { 62 + struct lima_device *ldev = dev_get_drvdata(dev); 63 + struct lima_devfreq *devfreq = &ldev->devfreq; 64 + unsigned long irqflags; 65 + 66 + status->current_frequency = clk_get_rate(ldev->clk_gpu); 67 + 68 + spin_lock_irqsave(&devfreq->lock, irqflags); 69 + 70 + lima_devfreq_update_utilization(devfreq); 71 + 72 + status->total_time = ktime_to_ns(ktime_add(devfreq->busy_time, 73 + devfreq->idle_time)); 74 + status->busy_time = ktime_to_ns(devfreq->busy_time); 75 + 76 + lima_devfreq_reset(devfreq); 77 + 78 + spin_unlock_irqrestore(&devfreq->lock, irqflags); 79 + 80 + dev_dbg(ldev->dev, "busy %lu total %lu %lu %% freq %lu MHz\n", 81 + status->busy_time, status->total_time, 82 + status->busy_time / (status->total_time / 100), 83 + status->current_frequency / 1000 / 1000); 84 + 85 + return 0; 86 + } 87 + 88 + static struct devfreq_dev_profile lima_devfreq_profile = { 89 + .polling_ms = 50, /* ~3 frames */ 90 + .target = lima_devfreq_target, 91 + .get_dev_status = lima_devfreq_get_dev_status, 92 + }; 93 + 94 + void lima_devfreq_fini(struct lima_device *ldev) 95 + { 96 + struct lima_devfreq *devfreq = &ldev->devfreq; 97 + 98 + if (devfreq->cooling) { 99 + devfreq_cooling_unregister(devfreq->cooling); 100 + devfreq->cooling = NULL; 101 + } 102 + 103 + if (devfreq->devfreq) { 104 + devm_devfreq_remove_device(&ldev->pdev->dev, 105 + devfreq->devfreq); 106 + devfreq->devfreq = NULL; 107 + } 108 + 109 + if (devfreq->opp_of_table_added) { 110 + dev_pm_opp_of_remove_table(&ldev->pdev->dev); 111 + devfreq->opp_of_table_added = false; 112 + } 113 + 114 + if (devfreq->regulators_opp_table) { 115 + dev_pm_opp_put_regulators(devfreq->regulators_opp_table); 116 + devfreq->regulators_opp_table = NULL; 117 + } 118 + 119 + if (devfreq->clkname_opp_table) { 120 + dev_pm_opp_put_clkname(devfreq->clkname_opp_table); 121 + devfreq->clkname_opp_table = NULL; 122 + } 123 + } 124 + 125 + int lima_devfreq_init(struct lima_device *ldev) 126 + { 127 + struct thermal_cooling_device *cooling; 128 + struct device *dev = &ldev->pdev->dev; 129 + struct opp_table *opp_table; 130 + struct devfreq *devfreq; 131 + struct lima_devfreq *ldevfreq = &ldev->devfreq; 132 + struct dev_pm_opp *opp; 133 + unsigned long cur_freq; 134 + int ret; 135 + 136 + if (!device_property_present(dev, "operating-points-v2")) 137 + /* Optional, continue without devfreq */ 138 + return 0; 139 + 140 + spin_lock_init(&ldevfreq->lock); 141 + 142 + opp_table = dev_pm_opp_set_clkname(dev, "core"); 143 + if (IS_ERR(opp_table)) { 144 + ret = PTR_ERR(opp_table); 145 + goto err_fini; 146 + } 147 + 148 + ldevfreq->clkname_opp_table = opp_table; 149 + 150 + opp_table = dev_pm_opp_set_regulators(dev, 151 + (const char *[]){ "mali" }, 152 + 1); 153 + if (IS_ERR(opp_table)) { 154 + ret = PTR_ERR(opp_table); 155 + 156 + /* Continue if the optional regulator is missing */ 157 + if (ret != -ENODEV) 158 + goto err_fini; 159 + } else { 160 + ldevfreq->regulators_opp_table = opp_table; 161 + } 162 + 163 + ret = dev_pm_opp_of_add_table(dev); 164 + if (ret) 165 + goto err_fini; 166 + ldevfreq->opp_of_table_added = true; 167 + 168 + lima_devfreq_reset(ldevfreq); 169 + 170 + cur_freq = clk_get_rate(ldev->clk_gpu); 171 + 172 + opp = devfreq_recommended_opp(dev, &cur_freq, 0); 173 + if (IS_ERR(opp)) { 174 + ret = PTR_ERR(opp); 175 + goto err_fini; 176 + } 177 + 178 + lima_devfreq_profile.initial_freq = cur_freq; 179 + dev_pm_opp_put(opp); 180 + 181 + devfreq = devm_devfreq_add_device(dev, &lima_devfreq_profile, 182 + DEVFREQ_GOV_SIMPLE_ONDEMAND, NULL); 183 + if (IS_ERR(devfreq)) { 184 + dev_err(dev, "Couldn't initialize GPU devfreq\n"); 185 + ret = PTR_ERR(devfreq); 186 + goto err_fini; 187 + } 188 + 189 + ldevfreq->devfreq = devfreq; 190 + 191 + cooling = of_devfreq_cooling_register(dev->of_node, devfreq); 192 + if (IS_ERR(cooling)) 193 + dev_info(dev, "Failed to register cooling device\n"); 194 + else 195 + ldevfreq->cooling = cooling; 196 + 197 + return 0; 198 + 199 + err_fini: 200 + lima_devfreq_fini(ldev); 201 + return ret; 202 + } 203 + 204 + void lima_devfreq_record_busy(struct lima_devfreq *devfreq) 205 + { 206 + unsigned long irqflags; 207 + 208 + if (!devfreq->devfreq) 209 + return; 210 + 211 + spin_lock_irqsave(&devfreq->lock, irqflags); 212 + 213 + lima_devfreq_update_utilization(devfreq); 214 + 215 + devfreq->busy_count++; 216 + 217 + spin_unlock_irqrestore(&devfreq->lock, irqflags); 218 + } 219 + 220 + void lima_devfreq_record_idle(struct lima_devfreq *devfreq) 221 + { 222 + unsigned long irqflags; 223 + 224 + if (!devfreq->devfreq) 225 + return; 226 + 227 + spin_lock_irqsave(&devfreq->lock, irqflags); 228 + 229 + lima_devfreq_update_utilization(devfreq); 230 + 231 + WARN_ON(--devfreq->busy_count < 0); 232 + 233 + spin_unlock_irqrestore(&devfreq->lock, irqflags); 234 + }
+41
drivers/gpu/drm/lima/lima_devfreq.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright 2020 Martin Blumenstingl <martin.blumenstingl@googlemail.com> */ 3 + 4 + #ifndef __LIMA_DEVFREQ_H__ 5 + #define __LIMA_DEVFREQ_H__ 6 + 7 + #include <linux/spinlock.h> 8 + #include <linux/ktime.h> 9 + 10 + struct devfreq; 11 + struct opp_table; 12 + struct thermal_cooling_device; 13 + 14 + struct lima_device; 15 + 16 + struct lima_devfreq { 17 + struct devfreq *devfreq; 18 + struct opp_table *clkname_opp_table; 19 + struct opp_table *regulators_opp_table; 20 + struct thermal_cooling_device *cooling; 21 + bool opp_of_table_added; 22 + 23 + ktime_t busy_time; 24 + ktime_t idle_time; 25 + ktime_t time_last_update; 26 + int busy_count; 27 + /* 28 + * Protect busy_time, idle_time, time_last_update and busy_count 29 + * because these can be updated concurrently, for example by the GP 30 + * and PP interrupts. 31 + */ 32 + spinlock_t lock; 33 + }; 34 + 35 + int lima_devfreq_init(struct lima_device *ldev); 36 + void lima_devfreq_fini(struct lima_device *ldev); 37 + 38 + void lima_devfreq_record_busy(struct lima_devfreq *devfreq); 39 + void lima_devfreq_record_idle(struct lima_devfreq *devfreq); 40 + 41 + #endif
+4
drivers/gpu/drm/lima/lima_device.c
··· 214 214 struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp; 215 215 int err; 216 216 217 + pipe->ldev = dev; 218 + 217 219 err = lima_sched_pipe_init(pipe, "gp"); 218 220 if (err) 219 221 return err; ··· 245 243 { 246 244 struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp; 247 245 int err, i; 246 + 247 + pipe->ldev = dev; 248 248 249 249 err = lima_sched_pipe_init(pipe, "pp"); 250 250 if (err)
+3
drivers/gpu/drm/lima/lima_device.h
··· 11 11 12 12 #include "lima_sched.h" 13 13 #include "lima_dump.h" 14 + #include "lima_devfreq.h" 14 15 15 16 enum lima_gpu_id { 16 17 lima_gpu_mali400 = 0, ··· 98 97 99 98 u32 *dlbu_cpu; 100 99 dma_addr_t dlbu_dma; 100 + 101 + struct lima_devfreq devfreq; 101 102 102 103 /* debug info */ 103 104 struct lima_dump_head dump;
+12 -2
drivers/gpu/drm/lima/lima_drv.c
··· 10 10 #include <drm/drm_prime.h> 11 11 #include <drm/lima_drm.h> 12 12 13 + #include "lima_device.h" 13 14 #include "lima_drv.h" 14 15 #include "lima_gem.h" 15 16 #include "lima_vm.h" ··· 398 397 if (err) 399 398 goto err_out1; 400 399 400 + err = lima_devfreq_init(ldev); 401 + if (err) { 402 + dev_err(&pdev->dev, "Fatal error during devfreq init\n"); 403 + goto err_out2; 404 + } 405 + 401 406 /* 402 407 * Register the DRM device with the core and the connectors with 403 408 * sysfs. 404 409 */ 405 410 err = drm_dev_register(ddev, 0); 406 411 if (err < 0) 407 - goto err_out2; 412 + goto err_out3; 408 413 409 414 platform_set_drvdata(pdev, ldev); 410 415 ··· 419 412 420 413 return 0; 421 414 422 - err_out2: 415 + err_out3: 423 416 lima_device_fini(ldev); 417 + err_out2: 418 + lima_devfreq_fini(ldev); 424 419 err_out1: 425 420 drm_dev_put(ddev); 426 421 err_out0: ··· 438 429 sysfs_remove_bin_file(&ldev->dev->kobj, &lima_error_state_attr); 439 430 platform_set_drvdata(pdev, NULL); 440 431 drm_dev_unregister(ddev); 432 + lima_devfreq_fini(ldev); 441 433 lima_device_fini(ldev); 442 434 drm_dev_put(ddev); 443 435 lima_sched_slab_fini();
+7
drivers/gpu/drm/lima/lima_sched.c
··· 5 5 #include <linux/slab.h> 6 6 #include <linux/vmalloc.h> 7 7 8 + #include "lima_devfreq.h" 8 9 #include "lima_drv.h" 9 10 #include "lima_sched.h" 10 11 #include "lima_vm.h" ··· 217 216 */ 218 217 ret = dma_fence_get(task->fence); 219 218 219 + lima_devfreq_record_busy(&pipe->ldev->devfreq); 220 + 220 221 pipe->current_task = task; 221 222 222 223 /* this is needed for MMU to work correctly, otherwise GP/PP ··· 421 418 pipe->current_vm = NULL; 422 419 pipe->current_task = NULL; 423 420 421 + lima_devfreq_record_idle(&pipe->ldev->devfreq); 422 + 424 423 drm_sched_resubmit_jobs(&pipe->base); 425 424 drm_sched_start(&pipe->base, true); 426 425 } ··· 502 497 } else { 503 498 pipe->task_fini(pipe); 504 499 dma_fence_signal(task->fence); 500 + 501 + lima_devfreq_record_idle(&pipe->ldev->devfreq); 505 502 } 506 503 }
+3
drivers/gpu/drm/lima/lima_sched.h
··· 8 8 #include <linux/list.h> 9 9 #include <linux/xarray.h> 10 10 11 + struct lima_device; 11 12 struct lima_vm; 12 13 13 14 struct lima_sched_error_task { ··· 52 51 u64 fence_context; 53 52 u32 fence_seqno; 54 53 spinlock_t fence_lock; 54 + 55 + struct lima_device *ldev; 55 56 56 57 struct lima_sched_task *current_task; 57 58 struct lima_vm *current_vm;