gpu: host1x: Add channel support · tjh.dev/kernel@6579324

+12

drivers/gpu/host1x/Kconfig

··· 8 8 Tegra's graphics- and multimedia-related modules. The modules served 9 9 by host1x are referred to as clients. host1x includes some other 10 10 functionality, such as synchronization. 11 + 12 + if TEGRA_HOST1X 13 + 14 + config TEGRA_HOST1X_FIREWALL 15 + bool "Enable HOST1X security firewall" 16 + default y 17 + help 18 + Say yes if kernel should protect command streams from tampering. 19 + 20 + If unsure, choose Y. 21 + 22 + endif

+3

drivers/gpu/host1x/Makefile

··· 4 4 syncpt.o \ 5 5 dev.o \ 6 6 intr.o \ 7 + cdma.o \ 8 + channel.o \ 9 + job.o \ 7 10 hw/host1x01.o 8 11 9 12 obj-$(CONFIG_TEGRA_HOST1X) += host1x.o

+487

drivers/gpu/host1x/cdma.c

··· 1 + /* 2 + * Tegra host1x Command DMA 3 + * 4 + * Copyright (c) 2010-2013, NVIDIA Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms and conditions of the GNU General Public License, 8 + * version 2, as published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope it will be useful, but WITHOUT 11 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 + * more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + 20 + #include <asm/cacheflush.h> 21 + #include <linux/device.h> 22 + #include <linux/dma-mapping.h> 23 + #include <linux/interrupt.h> 24 + #include <linux/kernel.h> 25 + #include <linux/kfifo.h> 26 + #include <linux/slab.h> 27 + #include <trace/events/host1x.h> 28 + 29 + #include "cdma.h" 30 + #include "channel.h" 31 + #include "dev.h" 32 + #include "debug.h" 33 + #include "host1x_bo.h" 34 + #include "job.h" 35 + 36 + /* 37 + * push_buffer 38 + * 39 + * The push buffer is a circular array of words to be fetched by command DMA. 40 + * Note that it works slightly differently to the sync queue; fence == pos 41 + * means that the push buffer is full, not empty. 42 + */ 43 + 44 + #define HOST1X_PUSHBUFFER_SLOTS 512 45 + 46 + /* 47 + * Clean up push buffer resources 48 + */ 49 + static void host1x_pushbuffer_destroy(struct push_buffer *pb) 50 + { 51 + struct host1x_cdma *cdma = pb_to_cdma(pb); 52 + struct host1x *host1x = cdma_to_host1x(cdma); 53 + 54 + if (pb->phys != 0) 55 + dma_free_writecombine(host1x->dev, pb->size_bytes + 4, 56 + pb->mapped, pb->phys); 57 + 58 + pb->mapped = NULL; 59 + pb->phys = 0; 60 + } 61 + 62 + /* 63 + * Init push buffer resources 64 + */ 65 + static int host1x_pushbuffer_init(struct push_buffer *pb) 66 + { 67 + struct host1x_cdma *cdma = pb_to_cdma(pb); 68 + struct host1x *host1x = cdma_to_host1x(cdma); 69 + 70 + pb->mapped = NULL; 71 + pb->phys = 0; 72 + pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8; 73 + 74 + /* initialize buffer pointers */ 75 + pb->fence = pb->size_bytes - 8; 76 + pb->pos = 0; 77 + 78 + /* allocate and map pushbuffer memory */ 79 + pb->mapped = dma_alloc_writecombine(host1x->dev, pb->size_bytes + 4, 80 + &pb->phys, GFP_KERNEL); 81 + if (!pb->mapped) 82 + goto fail; 83 + 84 + host1x_hw_pushbuffer_init(host1x, pb); 85 + 86 + return 0; 87 + 88 + fail: 89 + host1x_pushbuffer_destroy(pb); 90 + return -ENOMEM; 91 + } 92 + 93 + /* 94 + * Push two words to the push buffer 95 + * Caller must ensure push buffer is not full 96 + */ 97 + static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2) 98 + { 99 + u32 pos = pb->pos; 100 + u32 *p = (u32 *)((u32)pb->mapped + pos); 101 + WARN_ON(pos == pb->fence); 102 + *(p++) = op1; 103 + *(p++) = op2; 104 + pb->pos = (pos + 8) & (pb->size_bytes - 1); 105 + } 106 + 107 + /* 108 + * Pop a number of two word slots from the push buffer 109 + * Caller must ensure push buffer is not empty 110 + */ 111 + static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) 112 + { 113 + /* Advance the next write position */ 114 + pb->fence = (pb->fence + slots * 8) & (pb->size_bytes - 1); 115 + } 116 + 117 + /* 118 + * Return the number of two word slots free in the push buffer 119 + */ 120 + static u32 host1x_pushbuffer_space(struct push_buffer *pb) 121 + { 122 + return ((pb->fence - pb->pos) & (pb->size_bytes - 1)) / 8; 123 + } 124 + 125 + /* 126 + * Sleep (if necessary) until the requested event happens 127 + * - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty. 128 + * - Returns 1 129 + * - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer 130 + * - Return the amount of space (> 0) 131 + * Must be called with the cdma lock held. 132 + */ 133 + unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, 134 + enum cdma_event event) 135 + { 136 + for (;;) { 137 + unsigned int space; 138 + 139 + if (event == CDMA_EVENT_SYNC_QUEUE_EMPTY) 140 + space = list_empty(&cdma->sync_queue) ? 1 : 0; 141 + else if (event == CDMA_EVENT_PUSH_BUFFER_SPACE) { 142 + struct push_buffer *pb = &cdma->push_buffer; 143 + space = host1x_pushbuffer_space(pb); 144 + } else { 145 + WARN_ON(1); 146 + return -EINVAL; 147 + } 148 + 149 + if (space) 150 + return space; 151 + 152 + trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), 153 + event); 154 + 155 + /* If somebody has managed to already start waiting, yield */ 156 + if (cdma->event != CDMA_EVENT_NONE) { 157 + mutex_unlock(&cdma->lock); 158 + schedule(); 159 + mutex_lock(&cdma->lock); 160 + continue; 161 + } 162 + cdma->event = event; 163 + 164 + mutex_unlock(&cdma->lock); 165 + down(&cdma->sem); 166 + mutex_lock(&cdma->lock); 167 + } 168 + return 0; 169 + } 170 + 171 + /* 172 + * Start timer that tracks the time spent by the job. 173 + * Must be called with the cdma lock held. 174 + */ 175 + static void cdma_start_timer_locked(struct host1x_cdma *cdma, 176 + struct host1x_job *job) 177 + { 178 + struct host1x *host = cdma_to_host1x(cdma); 179 + 180 + if (cdma->timeout.client) { 181 + /* timer already started */ 182 + return; 183 + } 184 + 185 + cdma->timeout.client = job->client; 186 + cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id); 187 + cdma->timeout.syncpt_val = job->syncpt_end; 188 + cdma->timeout.start_ktime = ktime_get(); 189 + 190 + schedule_delayed_work(&cdma->timeout.wq, 191 + msecs_to_jiffies(job->timeout)); 192 + } 193 + 194 + /* 195 + * Stop timer when a buffer submission completes. 196 + * Must be called with the cdma lock held. 197 + */ 198 + static void stop_cdma_timer_locked(struct host1x_cdma *cdma) 199 + { 200 + cancel_delayed_work(&cdma->timeout.wq); 201 + cdma->timeout.client = 0; 202 + } 203 + 204 + /* 205 + * For all sync queue entries that have already finished according to the 206 + * current sync point registers: 207 + * - unpin & unref their mems 208 + * - pop their push buffer slots 209 + * - remove them from the sync queue 210 + * This is normally called from the host code's worker thread, but can be 211 + * called manually if necessary. 212 + * Must be called with the cdma lock held. 213 + */ 214 + static void update_cdma_locked(struct host1x_cdma *cdma) 215 + { 216 + bool signal = false; 217 + struct host1x *host1x = cdma_to_host1x(cdma); 218 + struct host1x_job *job, *n; 219 + 220 + /* If CDMA is stopped, queue is cleared and we can return */ 221 + if (!cdma->running) 222 + return; 223 + 224 + /* 225 + * Walk the sync queue, reading the sync point registers as necessary, 226 + * to consume as many sync queue entries as possible without blocking 227 + */ 228 + list_for_each_entry_safe(job, n, &cdma->sync_queue, list) { 229 + struct host1x_syncpt *sp = 230 + host1x_syncpt_get(host1x, job->syncpt_id); 231 + 232 + /* Check whether this syncpt has completed, and bail if not */ 233 + if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { 234 + /* Start timer on next pending syncpt */ 235 + if (job->timeout) 236 + cdma_start_timer_locked(cdma, job); 237 + break; 238 + } 239 + 240 + /* Cancel timeout, when a buffer completes */ 241 + if (cdma->timeout.client) 242 + stop_cdma_timer_locked(cdma); 243 + 244 + /* Unpin the memory */ 245 + host1x_job_unpin(job); 246 + 247 + /* Pop push buffer slots */ 248 + if (job->num_slots) { 249 + struct push_buffer *pb = &cdma->push_buffer; 250 + host1x_pushbuffer_pop(pb, job->num_slots); 251 + if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE) 252 + signal = true; 253 + } 254 + 255 + list_del(&job->list); 256 + host1x_job_put(job); 257 + } 258 + 259 + if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY && 260 + list_empty(&cdma->sync_queue)) 261 + signal = true; 262 + 263 + if (signal) { 264 + cdma->event = CDMA_EVENT_NONE; 265 + up(&cdma->sem); 266 + } 267 + } 268 + 269 + void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, 270 + struct device *dev) 271 + { 272 + u32 restart_addr; 273 + u32 syncpt_incrs; 274 + struct host1x_job *job = NULL; 275 + u32 syncpt_val; 276 + struct host1x *host1x = cdma_to_host1x(cdma); 277 + 278 + syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); 279 + 280 + dev_dbg(dev, "%s: starting cleanup (thresh %d)\n", 281 + __func__, syncpt_val); 282 + 283 + /* 284 + * Move the sync_queue read pointer to the first entry that hasn't 285 + * completed based on the current HW syncpt value. It's likely there 286 + * won't be any (i.e. we're still at the head), but covers the case 287 + * where a syncpt incr happens just prior/during the teardown. 288 + */ 289 + 290 + dev_dbg(dev, "%s: skip completed buffers still in sync_queue\n", 291 + __func__); 292 + 293 + list_for_each_entry(job, &cdma->sync_queue, list) { 294 + if (syncpt_val < job->syncpt_end) 295 + break; 296 + 297 + host1x_job_dump(dev, job); 298 + } 299 + 300 + /* 301 + * Walk the sync_queue, first incrementing with the CPU syncpts that 302 + * are partially executed (the first buffer) or fully skipped while 303 + * still in the current context (slots are also NOP-ed). 304 + * 305 + * At the point contexts are interleaved, syncpt increments must be 306 + * done inline with the pushbuffer from a GATHER buffer to maintain 307 + * the order (slots are modified to be a GATHER of syncpt incrs). 308 + * 309 + * Note: save in restart_addr the location where the timed out buffer 310 + * started in the PB, so we can start the refetch from there (with the 311 + * modified NOP-ed PB slots). This lets things appear to have completed 312 + * properly for this buffer and resources are freed. 313 + */ 314 + 315 + dev_dbg(dev, "%s: perform CPU incr on pending same ctx buffers\n", 316 + __func__); 317 + 318 + if (!list_empty(&cdma->sync_queue)) 319 + restart_addr = job->first_get; 320 + else 321 + restart_addr = cdma->last_pos; 322 + 323 + /* do CPU increments as long as this context continues */ 324 + list_for_each_entry_from(job, &cdma->sync_queue, list) { 325 + /* different context, gets us out of this loop */ 326 + if (job->client != cdma->timeout.client) 327 + break; 328 + 329 + /* won't need a timeout when replayed */ 330 + job->timeout = 0; 331 + 332 + syncpt_incrs = job->syncpt_end - syncpt_val; 333 + dev_dbg(dev, "%s: CPU incr (%d)\n", __func__, syncpt_incrs); 334 + 335 + host1x_job_dump(dev, job); 336 + 337 + /* safe to use CPU to incr syncpts */ 338 + host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get, 339 + syncpt_incrs, job->syncpt_end, 340 + job->num_slots); 341 + 342 + syncpt_val += syncpt_incrs; 343 + } 344 + 345 + /* The following sumbits from the same client may be dependent on the 346 + * failed submit and therefore they may fail. Force a small timeout 347 + * to make the queue cleanup faster */ 348 + 349 + list_for_each_entry_from(job, &cdma->sync_queue, list) 350 + if (job->client == cdma->timeout.client) 351 + job->timeout = min_t(unsigned int, job->timeout, 500); 352 + 353 + dev_dbg(dev, "%s: finished sync_queue modification\n", __func__); 354 + 355 + /* roll back DMAGET and start up channel again */ 356 + host1x_hw_cdma_resume(host1x, cdma, restart_addr); 357 + } 358 + 359 + /* 360 + * Create a cdma 361 + */ 362 + int host1x_cdma_init(struct host1x_cdma *cdma) 363 + { 364 + int err; 365 + 366 + mutex_init(&cdma->lock); 367 + sema_init(&cdma->sem, 0); 368 + 369 + INIT_LIST_HEAD(&cdma->sync_queue); 370 + 371 + cdma->event = CDMA_EVENT_NONE; 372 + cdma->running = false; 373 + cdma->torndown = false; 374 + 375 + err = host1x_pushbuffer_init(&cdma->push_buffer); 376 + if (err) 377 + return err; 378 + return 0; 379 + } 380 + 381 + /* 382 + * Destroy a cdma 383 + */ 384 + int host1x_cdma_deinit(struct host1x_cdma *cdma) 385 + { 386 + struct push_buffer *pb = &cdma->push_buffer; 387 + struct host1x *host1x = cdma_to_host1x(cdma); 388 + 389 + if (cdma->running) { 390 + pr_warn("%s: CDMA still running\n", __func__); 391 + return -EBUSY; 392 + } 393 + 394 + host1x_pushbuffer_destroy(pb); 395 + host1x_hw_cdma_timeout_destroy(host1x, cdma); 396 + 397 + return 0; 398 + } 399 + 400 + /* 401 + * Begin a cdma submit 402 + */ 403 + int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) 404 + { 405 + struct host1x *host1x = cdma_to_host1x(cdma); 406 + 407 + mutex_lock(&cdma->lock); 408 + 409 + if (job->timeout) { 410 + /* init state on first submit with timeout value */ 411 + if (!cdma->timeout.initialized) { 412 + int err; 413 + err = host1x_hw_cdma_timeout_init(host1x, cdma, 414 + job->syncpt_id); 415 + if (err) { 416 + mutex_unlock(&cdma->lock); 417 + return err; 418 + } 419 + } 420 + } 421 + if (!cdma->running) 422 + host1x_hw_cdma_start(host1x, cdma); 423 + 424 + cdma->slots_free = 0; 425 + cdma->slots_used = 0; 426 + cdma->first_get = cdma->push_buffer.pos; 427 + 428 + trace_host1x_cdma_begin(dev_name(job->channel->dev)); 429 + return 0; 430 + } 431 + 432 + /* 433 + * Push two words into a push buffer slot 434 + * Blocks as necessary if the push buffer is full. 435 + */ 436 + void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) 437 + { 438 + struct host1x *host1x = cdma_to_host1x(cdma); 439 + struct push_buffer *pb = &cdma->push_buffer; 440 + u32 slots_free = cdma->slots_free; 441 + 442 + if (slots_free == 0) { 443 + host1x_hw_cdma_flush(host1x, cdma); 444 + slots_free = host1x_cdma_wait_locked(cdma, 445 + CDMA_EVENT_PUSH_BUFFER_SPACE); 446 + } 447 + cdma->slots_free = slots_free - 1; 448 + cdma->slots_used++; 449 + host1x_pushbuffer_push(pb, op1, op2); 450 + } 451 + 452 + /* 453 + * End a cdma submit 454 + * Kick off DMA, add job to the sync queue, and a number of slots to be freed 455 + * from the pushbuffer. The handles for a submit must all be pinned at the same 456 + * time, but they can be unpinned in smaller chunks. 457 + */ 458 + void host1x_cdma_end(struct host1x_cdma *cdma, 459 + struct host1x_job *job) 460 + { 461 + struct host1x *host1x = cdma_to_host1x(cdma); 462 + bool idle = list_empty(&cdma->sync_queue); 463 + 464 + host1x_hw_cdma_flush(host1x, cdma); 465 + 466 + job->first_get = cdma->first_get; 467 + job->num_slots = cdma->slots_used; 468 + host1x_job_get(job); 469 + list_add_tail(&job->list, &cdma->sync_queue); 470 + 471 + /* start timer on idle -> active transitions */ 472 + if (job->timeout && idle) 473 + cdma_start_timer_locked(cdma, job); 474 + 475 + trace_host1x_cdma_end(dev_name(job->channel->dev)); 476 + mutex_unlock(&cdma->lock); 477 + } 478 + 479 + /* 480 + * Update cdma state according to current sync point values 481 + */ 482 + void host1x_cdma_update(struct host1x_cdma *cdma) 483 + { 484 + mutex_lock(&cdma->lock); 485 + update_cdma_locked(cdma); 486 + mutex_unlock(&cdma->lock); 487 + }

+100

drivers/gpu/host1x/cdma.h

··· 1 + /* 2 + * Tegra host1x Command DMA 3 + * 4 + * Copyright (c) 2010-2013, NVIDIA Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms and conditions of the GNU General Public License, 8 + * version 2, as published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope it will be useful, but WITHOUT 11 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 + * more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + #ifndef __HOST1X_CDMA_H 20 + #define __HOST1X_CDMA_H 21 + 22 + #include <linux/sched.h> 23 + #include <linux/semaphore.h> 24 + #include <linux/list.h> 25 + 26 + struct host1x_syncpt; 27 + struct host1x_userctx_timeout; 28 + struct host1x_job; 29 + 30 + /* 31 + * cdma 32 + * 33 + * This is in charge of a host command DMA channel. 34 + * Sends ops to a push buffer, and takes responsibility for unpinning 35 + * (& possibly freeing) of memory after those ops have completed. 36 + * Producer: 37 + * begin 38 + * push - send ops to the push buffer 39 + * end - start command DMA and enqueue handles to be unpinned 40 + * Consumer: 41 + * update - call to update sync queue and push buffer, unpin memory 42 + */ 43 + 44 + struct push_buffer { 45 + u32 *mapped; /* mapped pushbuffer memory */ 46 + dma_addr_t phys; /* physical address of pushbuffer */ 47 + u32 fence; /* index we've written */ 48 + u32 pos; /* index to write to */ 49 + u32 size_bytes; 50 + }; 51 + 52 + struct buffer_timeout { 53 + struct delayed_work wq; /* work queue */ 54 + bool initialized; /* timer one-time setup flag */ 55 + struct host1x_syncpt *syncpt; /* buffer completion syncpt */ 56 + u32 syncpt_val; /* syncpt value when completed */ 57 + ktime_t start_ktime; /* starting time */ 58 + /* context timeout information */ 59 + int client; 60 + }; 61 + 62 + enum cdma_event { 63 + CDMA_EVENT_NONE, /* not waiting for any event */ 64 + CDMA_EVENT_SYNC_QUEUE_EMPTY, /* wait for empty sync queue */ 65 + CDMA_EVENT_PUSH_BUFFER_SPACE /* wait for space in push buffer */ 66 + }; 67 + 68 + struct host1x_cdma { 69 + struct mutex lock; /* controls access to shared state */ 70 + struct semaphore sem; /* signalled when event occurs */ 71 + enum cdma_event event; /* event that sem is waiting for */ 72 + unsigned int slots_used; /* pb slots used in current submit */ 73 + unsigned int slots_free; /* pb slots free in current submit */ 74 + unsigned int first_get; /* DMAGET value, where submit begins */ 75 + unsigned int last_pos; /* last value written to DMAPUT */ 76 + struct push_buffer push_buffer; /* channel's push buffer */ 77 + struct list_head sync_queue; /* job queue */ 78 + struct buffer_timeout timeout; /* channel's timeout state/wq */ 79 + bool running; 80 + bool torndown; 81 + }; 82 + 83 + #define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma) 84 + #define cdma_to_host1x(cdma) dev_get_drvdata(cdma_to_channel(cdma)->dev->parent) 85 + #define pb_to_cdma(pb) container_of(pb, struct host1x_cdma, push_buffer) 86 + 87 + int host1x_cdma_init(struct host1x_cdma *cdma); 88 + int host1x_cdma_deinit(struct host1x_cdma *cdma); 89 + void host1x_cdma_stop(struct host1x_cdma *cdma); 90 + int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job); 91 + void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2); 92 + void host1x_cdma_end(struct host1x_cdma *cdma, struct host1x_job *job); 93 + void host1x_cdma_update(struct host1x_cdma *cdma); 94 + void host1x_cdma_peek(struct host1x_cdma *cdma, u32 dmaget, int slot, 95 + u32 *out); 96 + unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, 97 + enum cdma_event event); 98 + void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, 99 + struct device *dev); 100 + #endif

+126

drivers/gpu/host1x/channel.c

··· 1 + /* 2 + * Tegra host1x Channel 3 + * 4 + * Copyright (c) 2010-2013, NVIDIA Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms and conditions of the GNU General Public License, 8 + * version 2, as published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope it will be useful, but WITHOUT 11 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 + * more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + #include <linux/slab.h> 20 + #include <linux/module.h> 21 + 22 + #include "channel.h" 23 + #include "dev.h" 24 + #include "job.h" 25 + 26 + /* Constructor for the host1x device list */ 27 + int host1x_channel_list_init(struct host1x *host) 28 + { 29 + INIT_LIST_HEAD(&host->chlist.list); 30 + mutex_init(&host->chlist_mutex); 31 + 32 + if (host->info->nb_channels > BITS_PER_LONG) { 33 + WARN(1, "host1x hardware has more channels than supported by the driver\n"); 34 + return -ENOSYS; 35 + } 36 + 37 + return 0; 38 + } 39 + 40 + int host1x_job_submit(struct host1x_job *job) 41 + { 42 + struct host1x *host = dev_get_drvdata(job->channel->dev->parent); 43 + 44 + return host1x_hw_channel_submit(host, job); 45 + } 46 + 47 + struct host1x_channel *host1x_channel_get(struct host1x_channel *channel) 48 + { 49 + int err = 0; 50 + 51 + mutex_lock(&channel->reflock); 52 + 53 + if (channel->refcount == 0) 54 + err = host1x_cdma_init(&channel->cdma); 55 + 56 + if (!err) 57 + channel->refcount++; 58 + 59 + mutex_unlock(&channel->reflock); 60 + 61 + return err ? NULL : channel; 62 + } 63 + 64 + void host1x_channel_put(struct host1x_channel *channel) 65 + { 66 + mutex_lock(&channel->reflock); 67 + 68 + if (channel->refcount == 1) { 69 + struct host1x *host = dev_get_drvdata(channel->dev->parent); 70 + 71 + host1x_hw_cdma_stop(host, &channel->cdma); 72 + host1x_cdma_deinit(&channel->cdma); 73 + } 74 + 75 + channel->refcount--; 76 + 77 + mutex_unlock(&channel->reflock); 78 + } 79 + 80 + struct host1x_channel *host1x_channel_request(struct device *dev) 81 + { 82 + struct host1x *host = dev_get_drvdata(dev->parent); 83 + int max_channels = host->info->nb_channels; 84 + struct host1x_channel *channel = NULL; 85 + int index, err; 86 + 87 + mutex_lock(&host->chlist_mutex); 88 + 89 + index = find_first_zero_bit(&host->allocated_channels, max_channels); 90 + if (index >= max_channels) 91 + goto fail; 92 + 93 + channel = kzalloc(sizeof(*channel), GFP_KERNEL); 94 + if (!channel) 95 + goto fail; 96 + 97 + err = host1x_hw_channel_init(host, channel, index); 98 + if (err < 0) 99 + goto fail; 100 + 101 + /* Link device to host1x_channel */ 102 + channel->dev = dev; 103 + 104 + /* Add to channel list */ 105 + list_add_tail(&channel->list, &host->chlist.list); 106 + 107 + host->allocated_channels |= BIT(index); 108 + 109 + mutex_unlock(&host->chlist_mutex); 110 + return channel; 111 + 112 + fail: 113 + dev_err(dev, "failed to init channel\n"); 114 + kfree(channel); 115 + mutex_unlock(&host->chlist_mutex); 116 + return NULL; 117 + } 118 + 119 + void host1x_channel_free(struct host1x_channel *channel) 120 + { 121 + struct host1x *host = dev_get_drvdata(channel->dev->parent); 122 + 123 + host->allocated_channels &= ~BIT(channel->id); 124 + list_del(&channel->list); 125 + kfree(channel); 126 + }

+52

drivers/gpu/host1x/channel.h

··· 1 + /* 2 + * Tegra host1x Channel 3 + * 4 + * Copyright (c) 2010-2013, NVIDIA Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms and conditions of the GNU General Public License, 8 + * version 2, as published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope it will be useful, but WITHOUT 11 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 + * more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + #ifndef __HOST1X_CHANNEL_H 20 + #define __HOST1X_CHANNEL_H 21 + 22 + #include <linux/io.h> 23 + 24 + #include "cdma.h" 25 + 26 + struct host1x; 27 + 28 + struct host1x_channel { 29 + struct list_head list; 30 + 31 + unsigned int refcount; 32 + unsigned int id; 33 + struct mutex reflock; 34 + struct mutex submitlock; 35 + void __iomem *regs; 36 + struct device *dev; 37 + struct host1x_cdma cdma; 38 + }; 39 + 40 + /* channel list operations */ 41 + int host1x_channel_list_init(struct host1x *host); 42 + 43 + struct host1x_channel *host1x_channel_request(struct device *dev); 44 + void host1x_channel_free(struct host1x_channel *channel); 45 + struct host1x_channel *host1x_channel_get(struct host1x_channel *channel); 46 + void host1x_channel_put(struct host1x_channel *channel); 47 + int host1x_job_submit(struct host1x_job *job); 48 + 49 + #define host1x_for_each_channel(host, channel) \ 50 + list_for_each_entry(channel, &host->chlist.list, list) 51 + 52 + #endif

+17

drivers/gpu/host1x/dev.c

··· 29 29 30 30 #include "dev.h" 31 31 #include "intr.h" 32 + #include "channel.h" 32 33 #include "hw/host1x01.h" 33 34 34 35 void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r) ··· 44 43 void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; 45 44 46 45 return readl(sync_regs + r); 46 + } 47 + 48 + void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r) 49 + { 50 + writel(v, ch->regs + r); 51 + } 52 + 53 + u32 host1x_ch_readl(struct host1x_channel *ch, u32 r) 54 + { 55 + return readl(ch->regs + r); 47 56 } 48 57 49 58 static const struct host1x_info host1x01_info = { ··· 120 109 if (IS_ERR(host->clk)) { 121 110 dev_err(&pdev->dev, "failed to get clock\n"); 122 111 err = PTR_ERR(host->clk); 112 + return err; 113 + } 114 + 115 + err = host1x_channel_list_init(host); 116 + if (err) { 117 + dev_err(&pdev->dev, "failed to initialize channel list\n"); 123 118 return err; 124 119 } 125 120

+113

drivers/gpu/host1x/dev.h

··· 20 20 #include <linux/platform_device.h> 21 21 #include <linux/device.h> 22 22 23 + #include "channel.h" 23 24 #include "syncpt.h" 24 25 #include "intr.h" 26 + #include "cdma.h" 27 + #include "job.h" 25 28 26 29 struct host1x_syncpt; 30 + struct host1x_channel; 31 + struct host1x_cdma; 32 + struct host1x_job; 33 + struct push_buffer; 34 + 35 + struct host1x_channel_ops { 36 + int (*init)(struct host1x_channel *channel, struct host1x *host, 37 + unsigned int id); 38 + int (*submit)(struct host1x_job *job); 39 + }; 40 + 41 + struct host1x_cdma_ops { 42 + void (*start)(struct host1x_cdma *cdma); 43 + void (*stop)(struct host1x_cdma *cdma); 44 + void (*flush)(struct host1x_cdma *cdma); 45 + int (*timeout_init)(struct host1x_cdma *cdma, u32 syncpt_id); 46 + void (*timeout_destroy)(struct host1x_cdma *cdma); 47 + void (*freeze)(struct host1x_cdma *cdma); 48 + void (*resume)(struct host1x_cdma *cdma, u32 getptr); 49 + void (*timeout_cpu_incr)(struct host1x_cdma *cdma, u32 getptr, 50 + u32 syncpt_incrs, u32 syncval, u32 nr_slots); 51 + }; 52 + 53 + struct host1x_pushbuffer_ops { 54 + void (*init)(struct push_buffer *pb); 55 + }; 27 56 28 57 struct host1x_syncpt_ops { 29 58 void (*restore)(struct host1x_syncpt *syncpt); ··· 97 68 98 69 const struct host1x_syncpt_ops *syncpt_op; 99 70 const struct host1x_intr_ops *intr_op; 71 + const struct host1x_channel_ops *channel_op; 72 + const struct host1x_cdma_ops *cdma_op; 73 + const struct host1x_pushbuffer_ops *cdma_pb_op; 100 74 75 + struct host1x_syncpt *nop_sp; 76 + 77 + struct mutex chlist_mutex; 78 + struct host1x_channel chlist; 79 + unsigned long allocated_channels; 80 + unsigned int num_allocated_channels; 101 81 }; 102 82 103 83 void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v); 104 84 u32 host1x_sync_readl(struct host1x *host1x, u32 r); 85 + void host1x_ch_writel(struct host1x_channel *ch, u32 r, u32 v); 86 + u32 host1x_ch_readl(struct host1x_channel *ch, u32 r); 105 87 106 88 static inline void host1x_hw_syncpt_restore(struct host1x *host, 107 89 struct host1x_syncpt *sp) ··· 184 144 { 185 145 return host->intr_op->free_syncpt_irq(host); 186 146 } 147 + 148 + static inline int host1x_hw_channel_init(struct host1x *host, 149 + struct host1x_channel *channel, 150 + int chid) 151 + { 152 + return host->channel_op->init(channel, host, chid); 153 + } 154 + 155 + static inline int host1x_hw_channel_submit(struct host1x *host, 156 + struct host1x_job *job) 157 + { 158 + return host->channel_op->submit(job); 159 + } 160 + 161 + static inline void host1x_hw_cdma_start(struct host1x *host, 162 + struct host1x_cdma *cdma) 163 + { 164 + host->cdma_op->start(cdma); 165 + } 166 + 167 + static inline void host1x_hw_cdma_stop(struct host1x *host, 168 + struct host1x_cdma *cdma) 169 + { 170 + host->cdma_op->stop(cdma); 171 + } 172 + 173 + static inline void host1x_hw_cdma_flush(struct host1x *host, 174 + struct host1x_cdma *cdma) 175 + { 176 + host->cdma_op->flush(cdma); 177 + } 178 + 179 + static inline int host1x_hw_cdma_timeout_init(struct host1x *host, 180 + struct host1x_cdma *cdma, 181 + u32 syncpt_id) 182 + { 183 + return host->cdma_op->timeout_init(cdma, syncpt_id); 184 + } 185 + 186 + static inline void host1x_hw_cdma_timeout_destroy(struct host1x *host, 187 + struct host1x_cdma *cdma) 188 + { 189 + host->cdma_op->timeout_destroy(cdma); 190 + } 191 + 192 + static inline void host1x_hw_cdma_freeze(struct host1x *host, 193 + struct host1x_cdma *cdma) 194 + { 195 + host->cdma_op->freeze(cdma); 196 + } 197 + 198 + static inline void host1x_hw_cdma_resume(struct host1x *host, 199 + struct host1x_cdma *cdma, u32 getptr) 200 + { 201 + host->cdma_op->resume(cdma, getptr); 202 + } 203 + 204 + static inline void host1x_hw_cdma_timeout_cpu_incr(struct host1x *host, 205 + struct host1x_cdma *cdma, 206 + u32 getptr, 207 + u32 syncpt_incrs, 208 + u32 syncval, u32 nr_slots) 209 + { 210 + host->cdma_op->timeout_cpu_incr(cdma, getptr, syncpt_incrs, syncval, 211 + nr_slots); 212 + } 213 + 214 + static inline void host1x_hw_pushbuffer_init(struct host1x *host, 215 + struct push_buffer *pb) 216 + { 217 + host->cdma_pb_op->init(pb); 218 + } 219 + 187 220 #endif

+28

drivers/gpu/host1x/host1x.h

··· 1 + /* 2 + * Tegra host1x driver 3 + * 4 + * Copyright (c) 2009-2013, NVIDIA Corporation. All rights reserved. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License as published by 8 + * the Free Software Foundation; either version 2 of the License, or 9 + * (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it will be useful, but WITHOUT 12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 + * more details. 15 + * 16 + * You should have received a copy of the GNU General Public License along 17 + * with this program; if not, write to the Free Software Foundation, Inc., 18 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + 21 + #ifndef __LINUX_HOST1X_H 22 + #define __LINUX_HOST1X_H 23 + 24 + enum host1x_class { 25 + HOST1X_CLASS_HOST1X = 0x1 26 + }; 27 + 28 + #endif

+87

drivers/gpu/host1x/host1x_bo.h

··· 1 + /* 2 + * Tegra host1x Memory Management Abstraction header 3 + * 4 + * Copyright (c) 2012-2013, NVIDIA Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms and conditions of the GNU General Public License, 8 + * version 2, as published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope it will be useful, but WITHOUT 11 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 + * more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + #ifndef _HOST1X_BO_H 20 + #define _HOST1X_BO_H 21 + 22 + struct host1x_bo; 23 + 24 + struct host1x_bo_ops { 25 + struct host1x_bo *(*get)(struct host1x_bo *bo); 26 + void (*put)(struct host1x_bo *bo); 27 + dma_addr_t (*pin)(struct host1x_bo *bo, struct sg_table **sgt); 28 + void (*unpin)(struct host1x_bo *bo, struct sg_table *sgt); 29 + void *(*mmap)(struct host1x_bo *bo); 30 + void (*munmap)(struct host1x_bo *bo, void *addr); 31 + void *(*kmap)(struct host1x_bo *bo, unsigned int pagenum); 32 + void (*kunmap)(struct host1x_bo *bo, unsigned int pagenum, void *addr); 33 + }; 34 + 35 + struct host1x_bo { 36 + const struct host1x_bo_ops *ops; 37 + }; 38 + 39 + static inline void host1x_bo_init(struct host1x_bo *bo, 40 + const struct host1x_bo_ops *ops) 41 + { 42 + bo->ops = ops; 43 + } 44 + 45 + static inline struct host1x_bo *host1x_bo_get(struct host1x_bo *bo) 46 + { 47 + return bo->ops->get(bo); 48 + } 49 + 50 + static inline void host1x_bo_put(struct host1x_bo *bo) 51 + { 52 + bo->ops->put(bo); 53 + } 54 + 55 + static inline dma_addr_t host1x_bo_pin(struct host1x_bo *bo, 56 + struct sg_table **sgt) 57 + { 58 + return bo->ops->pin(bo, sgt); 59 + } 60 + 61 + static inline void host1x_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt) 62 + { 63 + bo->ops->unpin(bo, sgt); 64 + } 65 + 66 + static inline void *host1x_bo_mmap(struct host1x_bo *bo) 67 + { 68 + return bo->ops->mmap(bo); 69 + } 70 + 71 + static inline void host1x_bo_munmap(struct host1x_bo *bo, void *addr) 72 + { 73 + bo->ops->munmap(bo, addr); 74 + } 75 + 76 + static inline void *host1x_bo_kmap(struct host1x_bo *bo, unsigned int pagenum) 77 + { 78 + return bo->ops->kmap(bo, pagenum); 79 + } 80 + 81 + static inline void host1x_bo_kunmap(struct host1x_bo *bo, 82 + unsigned int pagenum, void *addr) 83 + { 84 + bo->ops->kunmap(bo, pagenum, addr); 85 + } 86 + 87 + #endif

+324

drivers/gpu/host1x/hw/cdma_hw.c

··· 1 + /* 2 + * Tegra host1x Command DMA 3 + * 4 + * Copyright (c) 2010-2013, NVIDIA Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms and conditions of the GNU General Public License, 8 + * version 2, as published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope it will be useful, but WITHOUT 11 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 + * more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + #include <linux/slab.h> 20 + #include <linux/scatterlist.h> 21 + #include <linux/dma-mapping.h> 22 + 23 + #include "cdma.h" 24 + #include "channel.h" 25 + #include "dev.h" 26 + #include "debug.h" 27 + 28 + /* 29 + * Put the restart at the end of pushbuffer memor 30 + */ 31 + static void push_buffer_init(struct push_buffer *pb) 32 + { 33 + *(pb->mapped + (pb->size_bytes >> 2)) = host1x_opcode_restart(0); 34 + } 35 + 36 + /* 37 + * Increment timedout buffer's syncpt via CPU. 38 + */ 39 + static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, 40 + u32 syncpt_incrs, u32 syncval, u32 nr_slots) 41 + { 42 + struct host1x *host1x = cdma_to_host1x(cdma); 43 + struct push_buffer *pb = &cdma->push_buffer; 44 + u32 i; 45 + 46 + for (i = 0; i < syncpt_incrs; i++) 47 + host1x_syncpt_cpu_incr(cdma->timeout.syncpt); 48 + 49 + /* after CPU incr, ensure shadow is up to date */ 50 + host1x_syncpt_load(cdma->timeout.syncpt); 51 + 52 + /* NOP all the PB slots */ 53 + while (nr_slots--) { 54 + u32 *p = (u32 *)((u32)pb->mapped + getptr); 55 + *(p++) = HOST1X_OPCODE_NOP; 56 + *(p++) = HOST1X_OPCODE_NOP; 57 + dev_dbg(host1x->dev, "%s: NOP at 0x%x\n", __func__, 58 + pb->phys + getptr); 59 + getptr = (getptr + 8) & (pb->size_bytes - 1); 60 + } 61 + wmb(); 62 + } 63 + 64 + /* 65 + * Start channel DMA 66 + */ 67 + static void cdma_start(struct host1x_cdma *cdma) 68 + { 69 + struct host1x_channel *ch = cdma_to_channel(cdma); 70 + 71 + if (cdma->running) 72 + return; 73 + 74 + cdma->last_pos = cdma->push_buffer.pos; 75 + 76 + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, 77 + HOST1X_CHANNEL_DMACTRL); 78 + 79 + /* set base, put and end pointer */ 80 + host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); 81 + host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); 82 + host1x_ch_writel(ch, cdma->push_buffer.phys + 83 + cdma->push_buffer.size_bytes + 4, 84 + HOST1X_CHANNEL_DMAEND); 85 + 86 + /* reset GET */ 87 + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP | 88 + HOST1X_CHANNEL_DMACTRL_DMAGETRST | 89 + HOST1X_CHANNEL_DMACTRL_DMAINITGET, 90 + HOST1X_CHANNEL_DMACTRL); 91 + 92 + /* start the command DMA */ 93 + host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL); 94 + 95 + cdma->running = true; 96 + } 97 + 98 + /* 99 + * Similar to cdma_start(), but rather than starting from an idle 100 + * state (where DMA GET is set to DMA PUT), on a timeout we restore 101 + * DMA GET from an explicit value (so DMA may again be pending). 102 + */ 103 + static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr) 104 + { 105 + struct host1x *host1x = cdma_to_host1x(cdma); 106 + struct host1x_channel *ch = cdma_to_channel(cdma); 107 + 108 + if (cdma->running) 109 + return; 110 + 111 + cdma->last_pos = cdma->push_buffer.pos; 112 + 113 + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, 114 + HOST1X_CHANNEL_DMACTRL); 115 + 116 + /* set base, end pointer (all of memory) */ 117 + host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); 118 + host1x_ch_writel(ch, cdma->push_buffer.phys + 119 + cdma->push_buffer.size_bytes, 120 + HOST1X_CHANNEL_DMAEND); 121 + 122 + /* set GET, by loading the value in PUT (then reset GET) */ 123 + host1x_ch_writel(ch, getptr, HOST1X_CHANNEL_DMAPUT); 124 + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP | 125 + HOST1X_CHANNEL_DMACTRL_DMAGETRST | 126 + HOST1X_CHANNEL_DMACTRL_DMAINITGET, 127 + HOST1X_CHANNEL_DMACTRL); 128 + 129 + dev_dbg(host1x->dev, 130 + "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n", __func__, 131 + host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET), 132 + host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT), 133 + cdma->last_pos); 134 + 135 + /* deassert GET reset and set PUT */ 136 + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, 137 + HOST1X_CHANNEL_DMACTRL); 138 + host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); 139 + 140 + /* start the command DMA */ 141 + host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL); 142 + 143 + cdma->running = true; 144 + } 145 + 146 + /* 147 + * Kick channel DMA into action by writing its PUT offset (if it has changed) 148 + */ 149 + static void cdma_flush(struct host1x_cdma *cdma) 150 + { 151 + struct host1x_channel *ch = cdma_to_channel(cdma); 152 + 153 + if (cdma->push_buffer.pos != cdma->last_pos) { 154 + host1x_ch_writel(ch, cdma->push_buffer.pos, 155 + HOST1X_CHANNEL_DMAPUT); 156 + cdma->last_pos = cdma->push_buffer.pos; 157 + } 158 + } 159 + 160 + static void cdma_stop(struct host1x_cdma *cdma) 161 + { 162 + struct host1x_channel *ch = cdma_to_channel(cdma); 163 + 164 + mutex_lock(&cdma->lock); 165 + if (cdma->running) { 166 + host1x_cdma_wait_locked(cdma, CDMA_EVENT_SYNC_QUEUE_EMPTY); 167 + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, 168 + HOST1X_CHANNEL_DMACTRL); 169 + cdma->running = false; 170 + } 171 + mutex_unlock(&cdma->lock); 172 + } 173 + 174 + /* 175 + * Stops both channel's command processor and CDMA immediately. 176 + * Also, tears down the channel and resets corresponding module. 177 + */ 178 + static void cdma_freeze(struct host1x_cdma *cdma) 179 + { 180 + struct host1x *host = cdma_to_host1x(cdma); 181 + struct host1x_channel *ch = cdma_to_channel(cdma); 182 + u32 cmdproc_stop; 183 + 184 + if (cdma->torndown && !cdma->running) { 185 + dev_warn(host->dev, "Already torn down\n"); 186 + return; 187 + } 188 + 189 + dev_dbg(host->dev, "freezing channel (id %d)\n", ch->id); 190 + 191 + cmdproc_stop = host1x_sync_readl(host, HOST1X_SYNC_CMDPROC_STOP); 192 + cmdproc_stop |= BIT(ch->id); 193 + host1x_sync_writel(host, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP); 194 + 195 + dev_dbg(host->dev, "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n", 196 + __func__, host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET), 197 + host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT), 198 + cdma->last_pos); 199 + 200 + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, 201 + HOST1X_CHANNEL_DMACTRL); 202 + 203 + host1x_sync_writel(host, BIT(ch->id), HOST1X_SYNC_CH_TEARDOWN); 204 + 205 + cdma->running = false; 206 + cdma->torndown = true; 207 + } 208 + 209 + static void cdma_resume(struct host1x_cdma *cdma, u32 getptr) 210 + { 211 + struct host1x *host1x = cdma_to_host1x(cdma); 212 + struct host1x_channel *ch = cdma_to_channel(cdma); 213 + u32 cmdproc_stop; 214 + 215 + dev_dbg(host1x->dev, 216 + "resuming channel (id %d, DMAGET restart = 0x%x)\n", 217 + ch->id, getptr); 218 + 219 + cmdproc_stop = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP); 220 + cmdproc_stop &= ~(BIT(ch->id)); 221 + host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP); 222 + 223 + cdma->torndown = false; 224 + cdma_timeout_restart(cdma, getptr); 225 + } 226 + 227 + /* 228 + * If this timeout fires, it indicates the current sync_queue entry has 229 + * exceeded its TTL and the userctx should be timed out and remaining 230 + * submits already issued cleaned up (future submits return an error). 231 + */ 232 + static void cdma_timeout_handler(struct work_struct *work) 233 + { 234 + struct host1x_cdma *cdma; 235 + struct host1x *host1x; 236 + struct host1x_channel *ch; 237 + 238 + u32 syncpt_val; 239 + 240 + u32 prev_cmdproc, cmdproc_stop; 241 + 242 + cdma = container_of(to_delayed_work(work), struct host1x_cdma, 243 + timeout.wq); 244 + host1x = cdma_to_host1x(cdma); 245 + ch = cdma_to_channel(cdma); 246 + 247 + mutex_lock(&cdma->lock); 248 + 249 + if (!cdma->timeout.client) { 250 + dev_dbg(host1x->dev, 251 + "cdma_timeout: expired, but has no clientid\n"); 252 + mutex_unlock(&cdma->lock); 253 + return; 254 + } 255 + 256 + /* stop processing to get a clean snapshot */ 257 + prev_cmdproc = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP); 258 + cmdproc_stop = prev_cmdproc | BIT(ch->id); 259 + host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP); 260 + 261 + dev_dbg(host1x->dev, "cdma_timeout: cmdproc was 0x%x is 0x%x\n", 262 + prev_cmdproc, cmdproc_stop); 263 + 264 + syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); 265 + 266 + /* has buffer actually completed? */ 267 + if ((s32)(syncpt_val - cdma->timeout.syncpt_val) >= 0) { 268 + dev_dbg(host1x->dev, 269 + "cdma_timeout: expired, but buffer had completed\n"); 270 + /* restore */ 271 + cmdproc_stop = prev_cmdproc & ~(BIT(ch->id)); 272 + host1x_sync_writel(host1x, cmdproc_stop, 273 + HOST1X_SYNC_CMDPROC_STOP); 274 + mutex_unlock(&cdma->lock); 275 + return; 276 + } 277 + 278 + dev_warn(host1x->dev, "%s: timeout: %d (%s), HW thresh %d, done %d\n", 279 + __func__, cdma->timeout.syncpt->id, cdma->timeout.syncpt->name, 280 + syncpt_val, cdma->timeout.syncpt_val); 281 + 282 + /* stop HW, resetting channel/module */ 283 + host1x_hw_cdma_freeze(host1x, cdma); 284 + 285 + host1x_cdma_update_sync_queue(cdma, ch->dev); 286 + mutex_unlock(&cdma->lock); 287 + } 288 + 289 + /* 290 + * Init timeout resources 291 + */ 292 + static int cdma_timeout_init(struct host1x_cdma *cdma, u32 syncpt_id) 293 + { 294 + INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler); 295 + cdma->timeout.initialized = true; 296 + 297 + return 0; 298 + } 299 + 300 + /* 301 + * Clean up timeout resources 302 + */ 303 + static void cdma_timeout_destroy(struct host1x_cdma *cdma) 304 + { 305 + if (cdma->timeout.initialized) 306 + cancel_delayed_work(&cdma->timeout.wq); 307 + cdma->timeout.initialized = false; 308 + } 309 + 310 + static const struct host1x_cdma_ops host1x_cdma_ops = { 311 + .start = cdma_start, 312 + .stop = cdma_stop, 313 + .flush = cdma_flush, 314 + 315 + .timeout_init = cdma_timeout_init, 316 + .timeout_destroy = cdma_timeout_destroy, 317 + .freeze = cdma_freeze, 318 + .resume = cdma_resume, 319 + .timeout_cpu_incr = cdma_timeout_cpu_incr, 320 + }; 321 + 322 + static const struct host1x_pushbuffer_ops host1x_pushbuffer_ops = { 323 + .init = push_buffer_init, 324 + };

+143

drivers/gpu/host1x/hw/channel_hw.c

··· 1 + /* 2 + * Tegra host1x Channel 3 + * 4 + * Copyright (c) 2010-2013, NVIDIA Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms and conditions of the GNU General Public License, 8 + * version 2, as published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope it will be useful, but WITHOUT 11 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 + * more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + #include <linux/slab.h> 20 + #include <trace/events/host1x.h> 21 + 22 + #include "host1x.h" 23 + #include "host1x_bo.h" 24 + #include "channel.h" 25 + #include "dev.h" 26 + #include "intr.h" 27 + #include "job.h" 28 + 29 + #define HOST1X_CHANNEL_SIZE 16384 30 + #define TRACE_MAX_LENGTH 128U 31 + 32 + static void submit_gathers(struct host1x_job *job) 33 + { 34 + struct host1x_cdma *cdma = &job->channel->cdma; 35 + unsigned int i; 36 + 37 + for (i = 0; i < job->num_gathers; i++) { 38 + struct host1x_job_gather *g = &job->gathers[i]; 39 + u32 op1 = host1x_opcode_gather(g->words); 40 + u32 op2 = g->base + g->offset; 41 + host1x_cdma_push(cdma, op1, op2); 42 + } 43 + } 44 + 45 + static int channel_submit(struct host1x_job *job) 46 + { 47 + struct host1x_channel *ch = job->channel; 48 + struct host1x_syncpt *sp; 49 + u32 user_syncpt_incrs = job->syncpt_incrs; 50 + u32 prev_max = 0; 51 + u32 syncval; 52 + int err; 53 + struct host1x_waitlist *completed_waiter = NULL; 54 + struct host1x *host = dev_get_drvdata(ch->dev->parent); 55 + 56 + sp = host->syncpt + job->syncpt_id; 57 + trace_host1x_channel_submit(dev_name(ch->dev), 58 + job->num_gathers, job->num_relocs, 59 + job->num_waitchk, job->syncpt_id, 60 + job->syncpt_incrs); 61 + 62 + /* before error checks, return current max */ 63 + prev_max = job->syncpt_end = host1x_syncpt_read_max(sp); 64 + 65 + /* get submit lock */ 66 + err = mutex_lock_interruptible(&ch->submitlock); 67 + if (err) 68 + goto error; 69 + 70 + completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL); 71 + if (!completed_waiter) { 72 + mutex_unlock(&ch->submitlock); 73 + err = -ENOMEM; 74 + goto error; 75 + } 76 + 77 + /* begin a CDMA submit */ 78 + err = host1x_cdma_begin(&ch->cdma, job); 79 + if (err) { 80 + mutex_unlock(&ch->submitlock); 81 + goto error; 82 + } 83 + 84 + if (job->serialize) { 85 + /* 86 + * Force serialization by inserting a host wait for the 87 + * previous job to finish before this one can commence. 88 + */ 89 + host1x_cdma_push(&ch->cdma, 90 + host1x_opcode_setclass(HOST1X_CLASS_HOST1X, 91 + host1x_uclass_wait_syncpt_r(), 1), 92 + host1x_class_host_wait_syncpt(job->syncpt_id, 93 + host1x_syncpt_read_max(sp))); 94 + } 95 + 96 + syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs); 97 + 98 + job->syncpt_end = syncval; 99 + 100 + /* add a setclass for modules that require it */ 101 + if (job->class) 102 + host1x_cdma_push(&ch->cdma, 103 + host1x_opcode_setclass(job->class, 0, 0), 104 + HOST1X_OPCODE_NOP); 105 + 106 + submit_gathers(job); 107 + 108 + /* end CDMA submit & stash pinned hMems into sync queue */ 109 + host1x_cdma_end(&ch->cdma, job); 110 + 111 + trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval); 112 + 113 + /* schedule a submit complete interrupt */ 114 + err = host1x_intr_add_action(host, job->syncpt_id, syncval, 115 + HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch, 116 + completed_waiter, NULL); 117 + completed_waiter = NULL; 118 + WARN(err, "Failed to set submit complete interrupt"); 119 + 120 + mutex_unlock(&ch->submitlock); 121 + 122 + return 0; 123 + 124 + error: 125 + kfree(completed_waiter); 126 + return err; 127 + } 128 + 129 + static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, 130 + unsigned int index) 131 + { 132 + ch->id = index; 133 + mutex_init(&ch->reflock); 134 + mutex_init(&ch->submitlock); 135 + 136 + ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE; 137 + return 0; 138 + } 139 + 140 + static const struct host1x_channel_ops host1x_channel_ops = { 141 + .init = host1x_channel_init, 142 + .submit = channel_submit, 143 + };

+5

drivers/gpu/host1x/hw/host1x01.c

··· 21 21 #include "hw/host1x01_hardware.h" 22 22 23 23 /* include code */ 24 + #include "hw/cdma_hw.c" 25 + #include "hw/channel_hw.c" 24 26 #include "hw/intr_hw.c" 25 27 #include "hw/syncpt_hw.c" 26 28 ··· 30 28 31 29 int host1x01_init(struct host1x *host) 32 30 { 31 + host->channel_op = &host1x_channel_ops; 32 + host->cdma_op = &host1x_cdma_ops; 33 + host->cdma_pb_op = &host1x_pushbuffer_ops; 33 34 host->syncpt_op = &host1x_syncpt_ops; 34 35 host->intr_op = &host1x_intr_ops; 35 36

+116

drivers/gpu/host1x/hw/host1x01_hardware.h

··· 22 22 #include <linux/types.h> 23 23 #include <linux/bitops.h> 24 24 25 + #include "hw_host1x01_channel.h" 25 26 #include "hw_host1x01_sync.h" 27 + #include "hw_host1x01_uclass.h" 28 + 29 + static inline u32 host1x_class_host_wait_syncpt( 30 + unsigned indx, unsigned threshold) 31 + { 32 + return host1x_uclass_wait_syncpt_indx_f(indx) 33 + | host1x_uclass_wait_syncpt_thresh_f(threshold); 34 + } 35 + 36 + static inline u32 host1x_class_host_load_syncpt_base( 37 + unsigned indx, unsigned threshold) 38 + { 39 + return host1x_uclass_load_syncpt_base_base_indx_f(indx) 40 + | host1x_uclass_load_syncpt_base_value_f(threshold); 41 + } 42 + 43 + static inline u32 host1x_class_host_wait_syncpt_base( 44 + unsigned indx, unsigned base_indx, unsigned offset) 45 + { 46 + return host1x_uclass_wait_syncpt_base_indx_f(indx) 47 + | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) 48 + | host1x_uclass_wait_syncpt_base_offset_f(offset); 49 + } 50 + 51 + static inline u32 host1x_class_host_incr_syncpt_base( 52 + unsigned base_indx, unsigned offset) 53 + { 54 + return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) 55 + | host1x_uclass_incr_syncpt_base_offset_f(offset); 56 + } 57 + 58 + static inline u32 host1x_class_host_incr_syncpt( 59 + unsigned cond, unsigned indx) 60 + { 61 + return host1x_uclass_incr_syncpt_cond_f(cond) 62 + | host1x_uclass_incr_syncpt_indx_f(indx); 63 + } 64 + 65 + static inline u32 host1x_class_host_indoff_reg_write( 66 + unsigned mod_id, unsigned offset, bool auto_inc) 67 + { 68 + u32 v = host1x_uclass_indoff_indbe_f(0xf) 69 + | host1x_uclass_indoff_indmodid_f(mod_id) 70 + | host1x_uclass_indoff_indroffset_f(offset); 71 + if (auto_inc) 72 + v |= host1x_uclass_indoff_autoinc_f(1); 73 + return v; 74 + } 75 + 76 + static inline u32 host1x_class_host_indoff_reg_read( 77 + unsigned mod_id, unsigned offset, bool auto_inc) 78 + { 79 + u32 v = host1x_uclass_indoff_indmodid_f(mod_id) 80 + | host1x_uclass_indoff_indroffset_f(offset) 81 + | host1x_uclass_indoff_rwn_read_v(); 82 + if (auto_inc) 83 + v |= host1x_uclass_indoff_autoinc_f(1); 84 + return v; 85 + } 86 + 87 + 88 + /* cdma opcodes */ 89 + static inline u32 host1x_opcode_setclass( 90 + unsigned class_id, unsigned offset, unsigned mask) 91 + { 92 + return (0 << 28) | (offset << 16) | (class_id << 6) | mask; 93 + } 94 + 95 + static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) 96 + { 97 + return (1 << 28) | (offset << 16) | count; 98 + } 99 + 100 + static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) 101 + { 102 + return (2 << 28) | (offset << 16) | count; 103 + } 104 + 105 + static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) 106 + { 107 + return (3 << 28) | (offset << 16) | mask; 108 + } 109 + 110 + static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) 111 + { 112 + return (4 << 28) | (offset << 16) | value; 113 + } 114 + 115 + static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) 116 + { 117 + return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), 118 + host1x_class_host_incr_syncpt(cond, indx)); 119 + } 120 + 121 + static inline u32 host1x_opcode_restart(unsigned address) 122 + { 123 + return (5 << 28) | (address >> 4); 124 + } 125 + 126 + static inline u32 host1x_opcode_gather(unsigned count) 127 + { 128 + return (6 << 28) | count; 129 + } 130 + 131 + static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) 132 + { 133 + return (6 << 28) | (offset << 16) | BIT(15) | count; 134 + } 135 + 136 + static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) 137 + { 138 + return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; 139 + } 140 + 141 + #define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) 26 142 27 143 #endif

+102

drivers/gpu/host1x/hw/hw_host1x01_channel.h

··· 1 + /* 2 + * Copyright (c) 2012-2013, NVIDIA Corporation. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + * 16 + */ 17 + 18 + /* 19 + * Function naming determines intended use: 20 + * 21 + * <x>_r(void) : Returns the offset for register <x>. 22 + * 23 + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. 24 + * 25 + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. 26 + * 27 + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted 28 + * and masked to place it at field <y> of register <x>. This value 29 + * can be |'d with others to produce a full register value for 30 + * register <x>. 31 + * 32 + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This 33 + * value can be ~'d and then &'d to clear the value of field <y> for 34 + * register <x>. 35 + * 36 + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted 37 + * to place it at field <y> of register <x>. This value can be |'d 38 + * with others to produce a full register value for <x>. 39 + * 40 + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register 41 + * <x> value 'r' after being shifted to place its LSB at bit 0. 42 + * This value is suitable for direct comparison with other unshifted 43 + * values appropriate for use in field <y> of register <x>. 44 + * 45 + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for 46 + * field <y> of register <x>. This value is suitable for direct 47 + * comparison with unshifted values appropriate for use in field <y> 48 + * of register <x>. 49 + */ 50 + 51 + #ifndef __hw_host1x_channel_host1x_h__ 52 + #define __hw_host1x_channel_host1x_h__ 53 + 54 + static inline u32 host1x_channel_dmastart_r(void) 55 + { 56 + return 0x14; 57 + } 58 + #define HOST1X_CHANNEL_DMASTART \ 59 + host1x_channel_dmastart_r() 60 + static inline u32 host1x_channel_dmaput_r(void) 61 + { 62 + return 0x18; 63 + } 64 + #define HOST1X_CHANNEL_DMAPUT \ 65 + host1x_channel_dmaput_r() 66 + static inline u32 host1x_channel_dmaget_r(void) 67 + { 68 + return 0x1c; 69 + } 70 + #define HOST1X_CHANNEL_DMAGET \ 71 + host1x_channel_dmaget_r() 72 + static inline u32 host1x_channel_dmaend_r(void) 73 + { 74 + return 0x20; 75 + } 76 + #define HOST1X_CHANNEL_DMAEND \ 77 + host1x_channel_dmaend_r() 78 + static inline u32 host1x_channel_dmactrl_r(void) 79 + { 80 + return 0x24; 81 + } 82 + #define HOST1X_CHANNEL_DMACTRL \ 83 + host1x_channel_dmactrl_r() 84 + static inline u32 host1x_channel_dmactrl_dmastop(void) 85 + { 86 + return 1 << 0; 87 + } 88 + #define HOST1X_CHANNEL_DMACTRL_DMASTOP \ 89 + host1x_channel_dmactrl_dmastop() 90 + static inline u32 host1x_channel_dmactrl_dmagetrst(void) 91 + { 92 + return 1 << 1; 93 + } 94 + #define HOST1X_CHANNEL_DMACTRL_DMAGETRST \ 95 + host1x_channel_dmactrl_dmagetrst() 96 + static inline u32 host1x_channel_dmactrl_dmainitget(void) 97 + { 98 + return 1 << 2; 99 + } 100 + #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \ 101 + host1x_channel_dmactrl_dmainitget() 102 + #endif

+12

drivers/gpu/host1x/hw/hw_host1x01_sync.h

··· 77 77 } 78 78 #define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \ 79 79 host1x_sync_syncpt_thresh_int_enable_cpu0_r(id) 80 + static inline u32 host1x_sync_cmdproc_stop_r(void) 81 + { 82 + return 0xac; 83 + } 84 + #define HOST1X_SYNC_CMDPROC_STOP \ 85 + host1x_sync_cmdproc_stop_r() 86 + static inline u32 host1x_sync_ch_teardown_r(void) 87 + { 88 + return 0xb0; 89 + } 90 + #define HOST1X_SYNC_CH_TEARDOWN \ 91 + host1x_sync_ch_teardown_r() 80 92 static inline u32 host1x_sync_usec_clk_r(void) 81 93 { 82 94 return 0x1a4;

+168

drivers/gpu/host1x/hw/hw_host1x01_uclass.h

··· 1 + /* 2 + * Copyright (c) 2012-2013, NVIDIA Corporation. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + * 16 + */ 17 + 18 + /* 19 + * Function naming determines intended use: 20 + * 21 + * <x>_r(void) : Returns the offset for register <x>. 22 + * 23 + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. 24 + * 25 + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. 26 + * 27 + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted 28 + * and masked to place it at field <y> of register <x>. This value 29 + * can be |'d with others to produce a full register value for 30 + * register <x>. 31 + * 32 + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This 33 + * value can be ~'d and then &'d to clear the value of field <y> for 34 + * register <x>. 35 + * 36 + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted 37 + * to place it at field <y> of register <x>. This value can be |'d 38 + * with others to produce a full register value for <x>. 39 + * 40 + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register 41 + * <x> value 'r' after being shifted to place its LSB at bit 0. 42 + * This value is suitable for direct comparison with other unshifted 43 + * values appropriate for use in field <y> of register <x>. 44 + * 45 + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for 46 + * field <y> of register <x>. This value is suitable for direct 47 + * comparison with unshifted values appropriate for use in field <y> 48 + * of register <x>. 49 + */ 50 + 51 + #ifndef __hw_host1x_uclass_host1x_h__ 52 + #define __hw_host1x_uclass_host1x_h__ 53 + 54 + static inline u32 host1x_uclass_incr_syncpt_r(void) 55 + { 56 + return 0x0; 57 + } 58 + #define HOST1X_UCLASS_INCR_SYNCPT \ 59 + host1x_uclass_incr_syncpt_r() 60 + static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) 61 + { 62 + return (v & 0xff) << 8; 63 + } 64 + #define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ 65 + host1x_uclass_incr_syncpt_cond_f(v) 66 + static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) 67 + { 68 + return (v & 0xff) << 0; 69 + } 70 + #define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ 71 + host1x_uclass_incr_syncpt_indx_f(v) 72 + static inline u32 host1x_uclass_wait_syncpt_r(void) 73 + { 74 + return 0x8; 75 + } 76 + #define HOST1X_UCLASS_WAIT_SYNCPT \ 77 + host1x_uclass_wait_syncpt_r() 78 + static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) 79 + { 80 + return (v & 0xff) << 24; 81 + } 82 + #define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ 83 + host1x_uclass_wait_syncpt_indx_f(v) 84 + static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) 85 + { 86 + return (v & 0xffffff) << 0; 87 + } 88 + #define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ 89 + host1x_uclass_wait_syncpt_thresh_f(v) 90 + static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) 91 + { 92 + return (v & 0xff) << 24; 93 + } 94 + #define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ 95 + host1x_uclass_wait_syncpt_base_indx_f(v) 96 + static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) 97 + { 98 + return (v & 0xff) << 16; 99 + } 100 + #define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ 101 + host1x_uclass_wait_syncpt_base_base_indx_f(v) 102 + static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) 103 + { 104 + return (v & 0xffff) << 0; 105 + } 106 + #define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ 107 + host1x_uclass_wait_syncpt_base_offset_f(v) 108 + static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) 109 + { 110 + return (v & 0xff) << 24; 111 + } 112 + #define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ 113 + host1x_uclass_load_syncpt_base_base_indx_f(v) 114 + static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) 115 + { 116 + return (v & 0xffffff) << 0; 117 + } 118 + #define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ 119 + host1x_uclass_load_syncpt_base_value_f(v) 120 + static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) 121 + { 122 + return (v & 0xff) << 24; 123 + } 124 + #define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ 125 + host1x_uclass_incr_syncpt_base_base_indx_f(v) 126 + static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) 127 + { 128 + return (v & 0xffffff) << 0; 129 + } 130 + #define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ 131 + host1x_uclass_incr_syncpt_base_offset_f(v) 132 + static inline u32 host1x_uclass_indoff_r(void) 133 + { 134 + return 0x2d; 135 + } 136 + #define HOST1X_UCLASS_INDOFF \ 137 + host1x_uclass_indoff_r() 138 + static inline u32 host1x_uclass_indoff_indbe_f(u32 v) 139 + { 140 + return (v & 0xf) << 28; 141 + } 142 + #define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ 143 + host1x_uclass_indoff_indbe_f(v) 144 + static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) 145 + { 146 + return (v & 0x1) << 27; 147 + } 148 + #define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ 149 + host1x_uclass_indoff_autoinc_f(v) 150 + static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) 151 + { 152 + return (v & 0xff) << 18; 153 + } 154 + #define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ 155 + host1x_uclass_indoff_indmodid_f(v) 156 + static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) 157 + { 158 + return (v & 0xffff) << 2; 159 + } 160 + #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ 161 + host1x_uclass_indoff_indroffset_f(v) 162 + static inline u32 host1x_uclass_indoff_rwn_read_v(void) 163 + { 164 + return 1; 165 + } 166 + #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ 167 + host1x_uclass_indoff_indroffset_f(v) 168 + #endif

+11

drivers/gpu/host1x/hw/syncpt_hw.c

··· 93 93 wmb(); 94 94 } 95 95 96 + /* remove a wait pointed to by patch_addr */ 97 + static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr) 98 + { 99 + u32 override = host1x_class_host_wait_syncpt( 100 + HOST1X_SYNCPT_RESERVED, 0); 101 + 102 + *((u32 *)patch_addr) = override; 103 + return 0; 104 + } 105 + 96 106 static const struct host1x_syncpt_ops host1x_syncpt_ops = { 97 107 .restore = syncpt_restore, 98 108 .restore_wait_base = syncpt_restore_wait_base, 99 109 .load_wait_base = syncpt_read_wait_base, 100 110 .load = syncpt_load, 101 111 .cpu_incr = syncpt_cpu_incr, 112 + .patch_wait = syncpt_patch_wait, 102 113 };

+27 -1

drivers/gpu/host1x/intr.c

··· 21 21 #include <linux/slab.h> 22 22 #include <linux/irq.h> 23 23 24 + #include <trace/events/host1x.h> 25 + #include "channel.h" 24 26 #include "dev.h" 25 27 #include "intr.h" 26 28 ··· 68 66 struct list_head completed[HOST1X_INTR_ACTION_COUNT]) 69 67 { 70 68 struct list_head *dest; 71 - struct host1x_waitlist *waiter, *next; 69 + struct host1x_waitlist *waiter, *next, *prev; 72 70 73 71 list_for_each_entry_safe(waiter, next, head, list) { 74 72 if ((s32)(waiter->thresh - sync) > 0) 75 73 break; 76 74 77 75 dest = completed + waiter->action; 76 + 77 + /* consolidate submit cleanups */ 78 + if (waiter->action == HOST1X_INTR_ACTION_SUBMIT_COMPLETE && 79 + !list_empty(dest)) { 80 + prev = list_entry(dest->prev, 81 + struct host1x_waitlist, list); 82 + if (prev->data == waiter->data) { 83 + prev->count++; 84 + dest = NULL; 85 + } 86 + } 78 87 79 88 /* PENDING->REMOVED or CANCELLED->HANDLED */ 80 89 if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) { ··· 107 94 host1x_hw_intr_enable_syncpt_intr(host, id); 108 95 } 109 96 97 + static void action_submit_complete(struct host1x_waitlist *waiter) 98 + { 99 + struct host1x_channel *channel = waiter->data; 100 + 101 + host1x_cdma_update(&channel->cdma); 102 + 103 + /* Add nr_completed to trace */ 104 + trace_host1x_channel_submit_complete(dev_name(channel->dev), 105 + waiter->count, waiter->thresh); 106 + 107 + } 108 + 110 109 static void action_wakeup(struct host1x_waitlist *waiter) 111 110 { 112 111 wait_queue_head_t *wq = waiter->data; ··· 134 109 typedef void (*action_handler)(struct host1x_waitlist *waiter); 135 110 136 111 static action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = { 112 + action_submit_complete, 137 113 action_wakeup, 138 114 action_wakeup_interruptible, 139 115 };

+6

drivers/gpu/host1x/intr.h

··· 26 26 27 27 enum host1x_intr_action { 28 28 /* 29 + * Perform cleanup after a submit has completed. 30 + * 'data' points to a channel 31 + */ 32 + HOST1X_INTR_ACTION_SUBMIT_COMPLETE = 0, 33 + 34 + /* 29 35 * Wake up a task. 30 36 * 'data' points to a wait_queue_head_t 31 37 */

+603

drivers/gpu/host1x/job.c

··· 1 + /* 2 + * Tegra host1x Job 3 + * 4 + * Copyright (c) 2010-2013, NVIDIA Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms and conditions of the GNU General Public License, 8 + * version 2, as published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope it will be useful, but WITHOUT 11 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 + * more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + #include <linux/dma-mapping.h> 20 + #include <linux/err.h> 21 + #include <linux/kref.h> 22 + #include <linux/module.h> 23 + #include <linux/scatterlist.h> 24 + #include <linux/slab.h> 25 + #include <linux/vmalloc.h> 26 + #include <trace/events/host1x.h> 27 + 28 + #include "channel.h" 29 + #include "dev.h" 30 + #include "host1x_bo.h" 31 + #include "job.h" 32 + #include "syncpt.h" 33 + 34 + struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, 35 + u32 num_cmdbufs, u32 num_relocs, 36 + u32 num_waitchks) 37 + { 38 + struct host1x_job *job = NULL; 39 + unsigned int num_unpins = num_cmdbufs + num_relocs; 40 + u64 total; 41 + void *mem; 42 + 43 + /* Check that we're not going to overflow */ 44 + total = sizeof(struct host1x_job) + 45 + num_relocs * sizeof(struct host1x_reloc) + 46 + num_unpins * sizeof(struct host1x_job_unpin_data) + 47 + num_waitchks * sizeof(struct host1x_waitchk) + 48 + num_cmdbufs * sizeof(struct host1x_job_gather) + 49 + num_unpins * sizeof(dma_addr_t) + 50 + num_unpins * sizeof(u32 *); 51 + if (total > ULONG_MAX) 52 + return NULL; 53 + 54 + mem = job = kzalloc(total, GFP_KERNEL); 55 + if (!job) 56 + return NULL; 57 + 58 + kref_init(&job->ref); 59 + job->channel = ch; 60 + 61 + /* Redistribute memory to the structs */ 62 + mem += sizeof(struct host1x_job); 63 + job->relocarray = num_relocs ? mem : NULL; 64 + mem += num_relocs * sizeof(struct host1x_reloc); 65 + job->unpins = num_unpins ? mem : NULL; 66 + mem += num_unpins * sizeof(struct host1x_job_unpin_data); 67 + job->waitchk = num_waitchks ? mem : NULL; 68 + mem += num_waitchks * sizeof(struct host1x_waitchk); 69 + job->gathers = num_cmdbufs ? mem : NULL; 70 + mem += num_cmdbufs * sizeof(struct host1x_job_gather); 71 + job->addr_phys = num_unpins ? mem : NULL; 72 + 73 + job->reloc_addr_phys = job->addr_phys; 74 + job->gather_addr_phys = &job->addr_phys[num_relocs]; 75 + 76 + return job; 77 + } 78 + 79 + struct host1x_job *host1x_job_get(struct host1x_job *job) 80 + { 81 + kref_get(&job->ref); 82 + return job; 83 + } 84 + 85 + static void job_free(struct kref *ref) 86 + { 87 + struct host1x_job *job = container_of(ref, struct host1x_job, ref); 88 + 89 + kfree(job); 90 + } 91 + 92 + void host1x_job_put(struct host1x_job *job) 93 + { 94 + kref_put(&job->ref, job_free); 95 + } 96 + 97 + void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, 98 + u32 words, u32 offset) 99 + { 100 + struct host1x_job_gather *cur_gather = &job->gathers[job->num_gathers]; 101 + 102 + cur_gather->words = words; 103 + cur_gather->bo = bo; 104 + cur_gather->offset = offset; 105 + job->num_gathers++; 106 + } 107 + 108 + /* 109 + * NULL an already satisfied WAIT_SYNCPT host method, by patching its 110 + * args in the command stream. The method data is changed to reference 111 + * a reserved (never given out or incr) HOST1X_SYNCPT_RESERVED syncpt 112 + * with a matching threshold value of 0, so is guaranteed to be popped 113 + * by the host HW. 114 + */ 115 + static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp, 116 + struct host1x_bo *h, u32 offset) 117 + { 118 + void *patch_addr = NULL; 119 + 120 + /* patch the wait */ 121 + patch_addr = host1x_bo_kmap(h, offset >> PAGE_SHIFT); 122 + if (patch_addr) { 123 + host1x_syncpt_patch_wait(sp, 124 + patch_addr + (offset & ~PAGE_MASK)); 125 + host1x_bo_kunmap(h, offset >> PAGE_SHIFT, patch_addr); 126 + } else 127 + pr_err("Could not map cmdbuf for wait check\n"); 128 + } 129 + 130 + /* 131 + * Check driver supplied waitchk structs for syncpt thresholds 132 + * that have already been satisfied and NULL the comparison (to 133 + * avoid a wrap condition in the HW). 134 + */ 135 + static int do_waitchks(struct host1x_job *job, struct host1x *host, 136 + struct host1x_bo *patch) 137 + { 138 + int i; 139 + 140 + /* compare syncpt vs wait threshold */ 141 + for (i = 0; i < job->num_waitchk; i++) { 142 + struct host1x_waitchk *wait = &job->waitchk[i]; 143 + struct host1x_syncpt *sp = 144 + host1x_syncpt_get(host, wait->syncpt_id); 145 + 146 + /* validate syncpt id */ 147 + if (wait->syncpt_id > host1x_syncpt_nb_pts(host)) 148 + continue; 149 + 150 + /* skip all other gathers */ 151 + if (patch != wait->bo) 152 + continue; 153 + 154 + trace_host1x_syncpt_wait_check(wait->bo, wait->offset, 155 + wait->syncpt_id, wait->thresh, 156 + host1x_syncpt_read_min(sp)); 157 + 158 + if (host1x_syncpt_is_expired(sp, wait->thresh)) { 159 + dev_dbg(host->dev, 160 + "drop WAIT id %d (%s) thresh 0x%x, min 0x%x\n", 161 + wait->syncpt_id, sp->name, wait->thresh, 162 + host1x_syncpt_read_min(sp)); 163 + 164 + host1x_syncpt_patch_offset(sp, patch, wait->offset); 165 + } 166 + 167 + wait->bo = NULL; 168 + } 169 + 170 + return 0; 171 + } 172 + 173 + static unsigned int pin_job(struct host1x_job *job) 174 + { 175 + unsigned int i; 176 + 177 + job->num_unpins = 0; 178 + 179 + for (i = 0; i < job->num_relocs; i++) { 180 + struct host1x_reloc *reloc = &job->relocarray[i]; 181 + struct sg_table *sgt; 182 + dma_addr_t phys_addr; 183 + 184 + reloc->target = host1x_bo_get(reloc->target); 185 + if (!reloc->target) 186 + goto unpin; 187 + 188 + phys_addr = host1x_bo_pin(reloc->target, &sgt); 189 + if (!phys_addr) 190 + goto unpin; 191 + 192 + job->addr_phys[job->num_unpins] = phys_addr; 193 + job->unpins[job->num_unpins].bo = reloc->target; 194 + job->unpins[job->num_unpins].sgt = sgt; 195 + job->num_unpins++; 196 + } 197 + 198 + for (i = 0; i < job->num_gathers; i++) { 199 + struct host1x_job_gather *g = &job->gathers[i]; 200 + struct sg_table *sgt; 201 + dma_addr_t phys_addr; 202 + 203 + g->bo = host1x_bo_get(g->bo); 204 + if (!g->bo) 205 + goto unpin; 206 + 207 + phys_addr = host1x_bo_pin(g->bo, &sgt); 208 + if (!phys_addr) 209 + goto unpin; 210 + 211 + job->addr_phys[job->num_unpins] = phys_addr; 212 + job->unpins[job->num_unpins].bo = g->bo; 213 + job->unpins[job->num_unpins].sgt = sgt; 214 + job->num_unpins++; 215 + } 216 + 217 + return job->num_unpins; 218 + 219 + unpin: 220 + host1x_job_unpin(job); 221 + return 0; 222 + } 223 + 224 + static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) 225 + { 226 + int i = 0; 227 + u32 last_page = ~0; 228 + void *cmdbuf_page_addr = NULL; 229 + 230 + /* pin & patch the relocs for one gather */ 231 + while (i < job->num_relocs) { 232 + struct host1x_reloc *reloc = &job->relocarray[i]; 233 + u32 reloc_addr = (job->reloc_addr_phys[i] + 234 + reloc->target_offset) >> reloc->shift; 235 + u32 *target; 236 + 237 + /* skip all other gathers */ 238 + if (!(reloc->cmdbuf && cmdbuf == reloc->cmdbuf)) { 239 + i++; 240 + continue; 241 + } 242 + 243 + if (last_page != reloc->cmdbuf_offset >> PAGE_SHIFT) { 244 + if (cmdbuf_page_addr) 245 + host1x_bo_kunmap(cmdbuf, last_page, 246 + cmdbuf_page_addr); 247 + 248 + cmdbuf_page_addr = host1x_bo_kmap(cmdbuf, 249 + reloc->cmdbuf_offset >> PAGE_SHIFT); 250 + last_page = reloc->cmdbuf_offset >> PAGE_SHIFT; 251 + 252 + if (unlikely(!cmdbuf_page_addr)) { 253 + pr_err("Could not map cmdbuf for relocation\n"); 254 + return -ENOMEM; 255 + } 256 + } 257 + 258 + target = cmdbuf_page_addr + (reloc->cmdbuf_offset & ~PAGE_MASK); 259 + *target = reloc_addr; 260 + 261 + /* mark this gather as handled */ 262 + reloc->cmdbuf = 0; 263 + } 264 + 265 + if (cmdbuf_page_addr) 266 + host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr); 267 + 268 + return 0; 269 + } 270 + 271 + static int check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, 272 + unsigned int offset) 273 + { 274 + offset *= sizeof(u32); 275 + 276 + if (reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset) 277 + return -EINVAL; 278 + 279 + return 0; 280 + } 281 + 282 + struct host1x_firewall { 283 + struct host1x_job *job; 284 + struct device *dev; 285 + 286 + unsigned int num_relocs; 287 + struct host1x_reloc *reloc; 288 + 289 + struct host1x_bo *cmdbuf_id; 290 + unsigned int offset; 291 + 292 + u32 words; 293 + u32 class; 294 + u32 reg; 295 + u32 mask; 296 + u32 count; 297 + }; 298 + 299 + static int check_mask(struct host1x_firewall *fw) 300 + { 301 + u32 mask = fw->mask; 302 + u32 reg = fw->reg; 303 + 304 + while (mask) { 305 + if (fw->words == 0) 306 + return -EINVAL; 307 + 308 + if (mask & 1) { 309 + if (fw->job->is_addr_reg(fw->dev, fw->class, reg)) { 310 + bool bad_reloc = check_reloc(fw->reloc, 311 + fw->cmdbuf_id, 312 + fw->offset); 313 + if (!fw->num_relocs || bad_reloc) 314 + return -EINVAL; 315 + fw->reloc++; 316 + fw->num_relocs--; 317 + } 318 + fw->words--; 319 + fw->offset++; 320 + } 321 + mask >>= 1; 322 + reg++; 323 + } 324 + 325 + return 0; 326 + } 327 + 328 + static int check_incr(struct host1x_firewall *fw) 329 + { 330 + u32 count = fw->count; 331 + u32 reg = fw->reg; 332 + 333 + while (fw) { 334 + if (fw->words == 0) 335 + return -EINVAL; 336 + 337 + if (fw->job->is_addr_reg(fw->dev, fw->class, reg)) { 338 + bool bad_reloc = check_reloc(fw->reloc, fw->cmdbuf_id, 339 + fw->offset); 340 + if (!fw->num_relocs || bad_reloc) 341 + return -EINVAL; 342 + fw->reloc++; 343 + fw->num_relocs--; 344 + } 345 + reg++; 346 + fw->words--; 347 + fw->offset++; 348 + count--; 349 + } 350 + 351 + return 0; 352 + } 353 + 354 + static int check_nonincr(struct host1x_firewall *fw) 355 + { 356 + int is_addr_reg = fw->job->is_addr_reg(fw->dev, fw->class, fw->reg); 357 + u32 count = fw->count; 358 + 359 + while (count) { 360 + if (fw->words == 0) 361 + return -EINVAL; 362 + 363 + if (is_addr_reg) { 364 + bool bad_reloc = check_reloc(fw->reloc, fw->cmdbuf_id, 365 + fw->offset); 366 + if (!fw->num_relocs || bad_reloc) 367 + return -EINVAL; 368 + fw->reloc++; 369 + fw->num_relocs--; 370 + } 371 + fw->words--; 372 + fw->offset++; 373 + count--; 374 + } 375 + 376 + return 0; 377 + } 378 + 379 + static int validate(struct host1x_job *job, struct device *dev, 380 + struct host1x_job_gather *g) 381 + { 382 + u32 *cmdbuf_base; 383 + int err = 0; 384 + struct host1x_firewall fw; 385 + 386 + fw.job = job; 387 + fw.dev = dev; 388 + fw.reloc = job->relocarray; 389 + fw.num_relocs = job->num_relocs; 390 + fw.cmdbuf_id = g->bo; 391 + 392 + fw.offset = 0; 393 + fw.class = 0; 394 + 395 + if (!job->is_addr_reg) 396 + return 0; 397 + 398 + cmdbuf_base = host1x_bo_mmap(g->bo); 399 + if (!cmdbuf_base) 400 + return -ENOMEM; 401 + 402 + fw.words = g->words; 403 + while (fw.words && !err) { 404 + u32 word = cmdbuf_base[fw.offset]; 405 + u32 opcode = (word & 0xf0000000) >> 28; 406 + 407 + fw.mask = 0; 408 + fw.reg = 0; 409 + fw.count = 0; 410 + fw.words--; 411 + fw.offset++; 412 + 413 + switch (opcode) { 414 + case 0: 415 + fw.class = word >> 6 & 0x3ff; 416 + fw.mask = word & 0x3f; 417 + fw.reg = word >> 16 & 0xfff; 418 + err = check_mask(&fw); 419 + if (err) 420 + goto out; 421 + break; 422 + case 1: 423 + fw.reg = word >> 16 & 0xfff; 424 + fw.count = word & 0xffff; 425 + err = check_incr(&fw); 426 + if (err) 427 + goto out; 428 + break; 429 + 430 + case 2: 431 + fw.reg = word >> 16 & 0xfff; 432 + fw.count = word & 0xffff; 433 + err = check_nonincr(&fw); 434 + if (err) 435 + goto out; 436 + break; 437 + 438 + case 3: 439 + fw.mask = word & 0xffff; 440 + fw.reg = word >> 16 & 0xfff; 441 + err = check_mask(&fw); 442 + if (err) 443 + goto out; 444 + break; 445 + case 4: 446 + case 5: 447 + case 14: 448 + break; 449 + default: 450 + err = -EINVAL; 451 + break; 452 + } 453 + } 454 + 455 + /* No relocs should remain at this point */ 456 + if (fw.num_relocs) 457 + err = -EINVAL; 458 + 459 + out: 460 + host1x_bo_munmap(g->bo, cmdbuf_base); 461 + 462 + return err; 463 + } 464 + 465 + static inline int copy_gathers(struct host1x_job *job, struct device *dev) 466 + { 467 + size_t size = 0; 468 + size_t offset = 0; 469 + int i; 470 + 471 + for (i = 0; i < job->num_gathers; i++) { 472 + struct host1x_job_gather *g = &job->gathers[i]; 473 + size += g->words * sizeof(u32); 474 + } 475 + 476 + job->gather_copy_mapped = dma_alloc_writecombine(dev, size, 477 + &job->gather_copy, 478 + GFP_KERNEL); 479 + if (!job->gather_copy_mapped) { 480 + int err = PTR_ERR(job->gather_copy_mapped); 481 + job->gather_copy_mapped = NULL; 482 + return err; 483 + } 484 + 485 + job->gather_copy_size = size; 486 + 487 + for (i = 0; i < job->num_gathers; i++) { 488 + struct host1x_job_gather *g = &job->gathers[i]; 489 + void *gather; 490 + 491 + gather = host1x_bo_mmap(g->bo); 492 + memcpy(job->gather_copy_mapped + offset, gather + g->offset, 493 + g->words * sizeof(u32)); 494 + host1x_bo_munmap(g->bo, gather); 495 + 496 + g->base = job->gather_copy; 497 + g->offset = offset; 498 + g->bo = NULL; 499 + 500 + offset += g->words * sizeof(u32); 501 + } 502 + 503 + return 0; 504 + } 505 + 506 + int host1x_job_pin(struct host1x_job *job, struct device *dev) 507 + { 508 + int err; 509 + unsigned int i, j; 510 + struct host1x *host = dev_get_drvdata(dev->parent); 511 + DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host)); 512 + 513 + bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host)); 514 + for (i = 0; i < job->num_waitchk; i++) { 515 + u32 syncpt_id = job->waitchk[i].syncpt_id; 516 + if (syncpt_id < host1x_syncpt_nb_pts(host)) 517 + set_bit(syncpt_id, waitchk_mask); 518 + } 519 + 520 + /* get current syncpt values for waitchk */ 521 + for_each_set_bit(i, waitchk_mask, host1x_syncpt_nb_pts(host)) 522 + host1x_syncpt_load(host->syncpt + i); 523 + 524 + /* pin memory */ 525 + err = pin_job(job); 526 + if (!err) 527 + goto out; 528 + 529 + /* patch gathers */ 530 + for (i = 0; i < job->num_gathers; i++) { 531 + struct host1x_job_gather *g = &job->gathers[i]; 532 + 533 + /* process each gather mem only once */ 534 + if (g->handled) 535 + continue; 536 + 537 + g->base = job->gather_addr_phys[i]; 538 + 539 + for (j = 0; j < job->num_gathers; j++) 540 + if (job->gathers[j].bo == g->bo) 541 + job->gathers[j].handled = true; 542 + 543 + err = 0; 544 + 545 + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) 546 + err = validate(job, dev, g); 547 + 548 + if (err) 549 + dev_err(dev, "Job invalid (err=%d)\n", err); 550 + 551 + if (!err) 552 + err = do_relocs(job, g->bo); 553 + 554 + if (!err) 555 + err = do_waitchks(job, host, g->bo); 556 + 557 + if (err) 558 + break; 559 + } 560 + 561 + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !err) { 562 + err = copy_gathers(job, dev); 563 + if (err) { 564 + host1x_job_unpin(job); 565 + return err; 566 + } 567 + } 568 + 569 + out: 570 + wmb(); 571 + 572 + return err; 573 + } 574 + 575 + void host1x_job_unpin(struct host1x_job *job) 576 + { 577 + unsigned int i; 578 + 579 + for (i = 0; i < job->num_unpins; i++) { 580 + struct host1x_job_unpin_data *unpin = &job->unpins[i]; 581 + host1x_bo_unpin(unpin->bo, unpin->sgt); 582 + host1x_bo_put(unpin->bo); 583 + } 584 + job->num_unpins = 0; 585 + 586 + if (job->gather_copy_size) 587 + dma_free_writecombine(job->channel->dev, job->gather_copy_size, 588 + job->gather_copy_mapped, 589 + job->gather_copy); 590 + } 591 + 592 + /* 593 + * Debug routine used to dump job entries 594 + */ 595 + void host1x_job_dump(struct device *dev, struct host1x_job *job) 596 + { 597 + dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt_id); 598 + dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end); 599 + dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get); 600 + dev_dbg(dev, " TIMEOUT %d\n", job->timeout); 601 + dev_dbg(dev, " NUM_SLOTS %d\n", job->num_slots); 602 + dev_dbg(dev, " NUM_HANDLES %d\n", job->num_unpins); 603 + }

+162

drivers/gpu/host1x/job.h

··· 1 + /* 2 + * Tegra host1x Job 3 + * 4 + * Copyright (c) 2011-2013, NVIDIA Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms and conditions of the GNU General Public License, 8 + * version 2, as published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope it will be useful, but WITHOUT 11 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 + * more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + #ifndef __HOST1X_JOB_H 20 + #define __HOST1X_JOB_H 21 + 22 + struct host1x_job_gather { 23 + u32 words; 24 + dma_addr_t base; 25 + struct host1x_bo *bo; 26 + int offset; 27 + bool handled; 28 + }; 29 + 30 + struct host1x_cmdbuf { 31 + u32 handle; 32 + u32 offset; 33 + u32 words; 34 + u32 pad; 35 + }; 36 + 37 + struct host1x_reloc { 38 + struct host1x_bo *cmdbuf; 39 + u32 cmdbuf_offset; 40 + struct host1x_bo *target; 41 + u32 target_offset; 42 + u32 shift; 43 + u32 pad; 44 + }; 45 + 46 + struct host1x_waitchk { 47 + struct host1x_bo *bo; 48 + u32 offset; 49 + u32 syncpt_id; 50 + u32 thresh; 51 + }; 52 + 53 + struct host1x_job_unpin_data { 54 + struct host1x_bo *bo; 55 + struct sg_table *sgt; 56 + }; 57 + 58 + /* 59 + * Each submit is tracked as a host1x_job. 60 + */ 61 + struct host1x_job { 62 + /* When refcount goes to zero, job can be freed */ 63 + struct kref ref; 64 + 65 + /* List entry */ 66 + struct list_head list; 67 + 68 + /* Channel where job is submitted to */ 69 + struct host1x_channel *channel; 70 + 71 + u32 client; 72 + 73 + /* Gathers and their memory */ 74 + struct host1x_job_gather *gathers; 75 + unsigned int num_gathers; 76 + 77 + /* Wait checks to be processed at submit time */ 78 + struct host1x_waitchk *waitchk; 79 + unsigned int num_waitchk; 80 + u32 waitchk_mask; 81 + 82 + /* Array of handles to be pinned & unpinned */ 83 + struct host1x_reloc *relocarray; 84 + unsigned int num_relocs; 85 + struct host1x_job_unpin_data *unpins; 86 + unsigned int num_unpins; 87 + 88 + dma_addr_t *addr_phys; 89 + dma_addr_t *gather_addr_phys; 90 + dma_addr_t *reloc_addr_phys; 91 + 92 + /* Sync point id, number of increments and end related to the submit */ 93 + u32 syncpt_id; 94 + u32 syncpt_incrs; 95 + u32 syncpt_end; 96 + 97 + /* Maximum time to wait for this job */ 98 + unsigned int timeout; 99 + 100 + /* Index and number of slots used in the push buffer */ 101 + unsigned int first_get; 102 + unsigned int num_slots; 103 + 104 + /* Copy of gathers */ 105 + size_t gather_copy_size; 106 + dma_addr_t gather_copy; 107 + u8 *gather_copy_mapped; 108 + 109 + /* Check if register is marked as an address reg */ 110 + int (*is_addr_reg)(struct device *dev, u32 reg, u32 class); 111 + 112 + /* Request a SETCLASS to this class */ 113 + u32 class; 114 + 115 + /* Add a channel wait for previous ops to complete */ 116 + bool serialize; 117 + }; 118 + /* 119 + * Allocate memory for a job. Just enough memory will be allocated to 120 + * accomodate the submit. 121 + */ 122 + struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, 123 + u32 num_cmdbufs, u32 num_relocs, 124 + u32 num_waitchks); 125 + 126 + /* 127 + * Add a gather to a job. 128 + */ 129 + void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *mem_id, 130 + u32 words, u32 offset); 131 + 132 + /* 133 + * Increment reference going to host1x_job. 134 + */ 135 + struct host1x_job *host1x_job_get(struct host1x_job *job); 136 + 137 + /* 138 + * Decrement reference job, free if goes to zero. 139 + */ 140 + void host1x_job_put(struct host1x_job *job); 141 + 142 + /* 143 + * Pin memory related to job. This handles relocation of addresses to the 144 + * host1x address space. Handles both the gather memory and any other memory 145 + * referred to from the gather buffers. 146 + * 147 + * Handles also patching out host waits that would wait for an expired sync 148 + * point value. 149 + */ 150 + int host1x_job_pin(struct host1x_job *job, struct device *dev); 151 + 152 + /* 153 + * Unpin memory related to job. 154 + */ 155 + void host1x_job_unpin(struct host1x_job *job); 156 + 157 + /* 158 + * Dump contents of job to debug output. 159 + */ 160 + void host1x_job_dump(struct device *dev, struct host1x_job *job); 161 + 162 + #endif

+11

drivers/gpu/host1x/syncpt.c

··· 300 300 return (s32)(current_val - thresh) >= 0; 301 301 } 302 302 303 + /* remove a wait pointed to by patch_addr */ 304 + int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr) 305 + { 306 + return host1x_hw_syncpt_patch_wait(sp->host, sp, patch_addr); 307 + } 308 + 303 309 int host1x_syncpt_init(struct host1x *host) 304 310 { 305 311 struct host1x_syncpt *syncpt; ··· 324 318 host->syncpt = syncpt; 325 319 326 320 host1x_syncpt_restore(host); 321 + 322 + /* Allocate sync point to use for clearing waits for expired fences */ 323 + host->nop_sp = _host1x_syncpt_alloc(host, NULL, 0); 324 + if (!host->nop_sp) 325 + return -ENOMEM; 327 326 328 327 return 0; 329 328 }

+6

drivers/gpu/host1x/syncpt.h

··· 27 27 28 28 struct host1x; 29 29 30 + /* Reserved for replacing an expired wait with a NOP */ 31 + #define HOST1X_SYNCPT_RESERVED 0 32 + 30 33 struct host1x_syncpt { 31 34 int id; 32 35 atomic_t min_val; ··· 148 145 { 149 146 return sp->id < host1x_syncpt_nb_pts(sp->host); 150 147 } 148 + 149 + /* Patch a wait by replacing it with a wait for syncpt 0 value 0 */ 150 + int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr); 151 151 152 152 /* Return id of the sync point */ 153 153 u32 host1x_syncpt_id(struct host1x_syncpt *sp);

+192

include/trace/events/host1x.h

··· 37 37 TP_printk("name=%s", __entry->name) 38 38 ); 39 39 40 + DEFINE_EVENT(host1x, host1x_channel_open, 41 + TP_PROTO(const char *name), 42 + TP_ARGS(name) 43 + ); 44 + 45 + DEFINE_EVENT(host1x, host1x_channel_release, 46 + TP_PROTO(const char *name), 47 + TP_ARGS(name) 48 + ); 49 + 50 + DEFINE_EVENT(host1x, host1x_cdma_begin, 51 + TP_PROTO(const char *name), 52 + TP_ARGS(name) 53 + ); 54 + 55 + DEFINE_EVENT(host1x, host1x_cdma_end, 56 + TP_PROTO(const char *name), 57 + TP_ARGS(name) 58 + ); 59 + 60 + TRACE_EVENT(host1x_cdma_push, 61 + TP_PROTO(const char *name, u32 op1, u32 op2), 62 + 63 + TP_ARGS(name, op1, op2), 64 + 65 + TP_STRUCT__entry( 66 + __field(const char *, name) 67 + __field(u32, op1) 68 + __field(u32, op2) 69 + ), 70 + 71 + TP_fast_assign( 72 + __entry->name = name; 73 + __entry->op1 = op1; 74 + __entry->op2 = op2; 75 + ), 76 + 77 + TP_printk("name=%s, op1=%08x, op2=%08x", 78 + __entry->name, __entry->op1, __entry->op2) 79 + ); 80 + 81 + TRACE_EVENT(host1x_cdma_push_gather, 82 + TP_PROTO(const char *name, u32 mem_id, 83 + u32 words, u32 offset, void *cmdbuf), 84 + 85 + TP_ARGS(name, mem_id, words, offset, cmdbuf), 86 + 87 + TP_STRUCT__entry( 88 + __field(const char *, name) 89 + __field(u32, mem_id) 90 + __field(u32, words) 91 + __field(u32, offset) 92 + __field(bool, cmdbuf) 93 + __dynamic_array(u32, cmdbuf, words) 94 + ), 95 + 96 + TP_fast_assign( 97 + if (cmdbuf) { 98 + memcpy(__get_dynamic_array(cmdbuf), cmdbuf+offset, 99 + words * sizeof(u32)); 100 + } 101 + __entry->cmdbuf = cmdbuf; 102 + __entry->name = name; 103 + __entry->mem_id = mem_id; 104 + __entry->words = words; 105 + __entry->offset = offset; 106 + ), 107 + 108 + TP_printk("name=%s, mem_id=%08x, words=%u, offset=%d, contents=[%s]", 109 + __entry->name, __entry->mem_id, 110 + __entry->words, __entry->offset, 111 + __print_hex(__get_dynamic_array(cmdbuf), 112 + __entry->cmdbuf ? __entry->words * 4 : 0)) 113 + ); 114 + 115 + TRACE_EVENT(host1x_channel_submit, 116 + TP_PROTO(const char *name, u32 cmdbufs, u32 relocs, u32 waitchks, 117 + u32 syncpt_id, u32 syncpt_incrs), 118 + 119 + TP_ARGS(name, cmdbufs, relocs, waitchks, syncpt_id, syncpt_incrs), 120 + 121 + TP_STRUCT__entry( 122 + __field(const char *, name) 123 + __field(u32, cmdbufs) 124 + __field(u32, relocs) 125 + __field(u32, waitchks) 126 + __field(u32, syncpt_id) 127 + __field(u32, syncpt_incrs) 128 + ), 129 + 130 + TP_fast_assign( 131 + __entry->name = name; 132 + __entry->cmdbufs = cmdbufs; 133 + __entry->relocs = relocs; 134 + __entry->waitchks = waitchks; 135 + __entry->syncpt_id = syncpt_id; 136 + __entry->syncpt_incrs = syncpt_incrs; 137 + ), 138 + 139 + TP_printk("name=%s, cmdbufs=%u, relocs=%u, waitchks=%d," 140 + "syncpt_id=%u, syncpt_incrs=%u", 141 + __entry->name, __entry->cmdbufs, __entry->relocs, __entry->waitchks, 142 + __entry->syncpt_id, __entry->syncpt_incrs) 143 + ); 144 + 145 + TRACE_EVENT(host1x_channel_submitted, 146 + TP_PROTO(const char *name, u32 syncpt_base, u32 syncpt_max), 147 + 148 + TP_ARGS(name, syncpt_base, syncpt_max), 149 + 150 + TP_STRUCT__entry( 151 + __field(const char *, name) 152 + __field(u32, syncpt_base) 153 + __field(u32, syncpt_max) 154 + ), 155 + 156 + TP_fast_assign( 157 + __entry->name = name; 158 + __entry->syncpt_base = syncpt_base; 159 + __entry->syncpt_max = syncpt_max; 160 + ), 161 + 162 + TP_printk("name=%s, syncpt_base=%d, syncpt_max=%d", 163 + __entry->name, __entry->syncpt_base, __entry->syncpt_max) 164 + ); 165 + 166 + TRACE_EVENT(host1x_channel_submit_complete, 167 + TP_PROTO(const char *name, int count, u32 thresh), 168 + 169 + TP_ARGS(name, count, thresh), 170 + 171 + TP_STRUCT__entry( 172 + __field(const char *, name) 173 + __field(int, count) 174 + __field(u32, thresh) 175 + ), 176 + 177 + TP_fast_assign( 178 + __entry->name = name; 179 + __entry->count = count; 180 + __entry->thresh = thresh; 181 + ), 182 + 183 + TP_printk("name=%s, count=%d, thresh=%d", 184 + __entry->name, __entry->count, __entry->thresh) 185 + ); 186 + 187 + TRACE_EVENT(host1x_wait_cdma, 188 + TP_PROTO(const char *name, u32 eventid), 189 + 190 + TP_ARGS(name, eventid), 191 + 192 + TP_STRUCT__entry( 193 + __field(const char *, name) 194 + __field(u32, eventid) 195 + ), 196 + 197 + TP_fast_assign( 198 + __entry->name = name; 199 + __entry->eventid = eventid; 200 + ), 201 + 202 + TP_printk("name=%s, event=%d", __entry->name, __entry->eventid) 203 + ); 204 + 40 205 TRACE_EVENT(host1x_syncpt_load_min, 41 206 TP_PROTO(u32 id, u32 val), 42 207 ··· 218 53 ), 219 54 220 55 TP_printk("id=%d, val=%d", __entry->id, __entry->val) 56 + ); 57 + 58 + TRACE_EVENT(host1x_syncpt_wait_check, 59 + TP_PROTO(void *mem_id, u32 offset, u32 syncpt_id, u32 thresh, u32 min), 60 + 61 + TP_ARGS(mem_id, offset, syncpt_id, thresh, min), 62 + 63 + TP_STRUCT__entry( 64 + __field(void *, mem_id) 65 + __field(u32, offset) 66 + __field(u32, syncpt_id) 67 + __field(u32, thresh) 68 + __field(u32, min) 69 + ), 70 + 71 + TP_fast_assign( 72 + __entry->mem_id = mem_id; 73 + __entry->offset = offset; 74 + __entry->syncpt_id = syncpt_id; 75 + __entry->thresh = thresh; 76 + __entry->min = min; 77 + ), 78 + 79 + TP_printk("mem_id=%p, offset=%05x, id=%d, thresh=%d, current=%d", 80 + __entry->mem_id, __entry->offset, 81 + __entry->syncpt_id, __entry->thresh, 82 + __entry->min) 221 83 ); 222 84 223 85 #endif /* _TRACE_HOST1X_H */