Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

coresight: tmc: implementing TMC-ETF AUX space API

This patch implement the AUX area interfaces required to
use the TMC (configured as an ETF) from the Perf sub-system.

The heuristic is heavily borrowed from the ETB10 implementation.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Mathieu Poirier and committed by
Greg Kroah-Hartman
2e499bbc a02e81f7

+200
+199
drivers/hwtracing/coresight/coresight-tmc-etf.c
··· 15 15 * this program. If not, see <http://www.gnu.org/licenses/>. 16 16 */ 17 17 18 + #include <linux/circ_buf.h> 18 19 #include <linux/coresight.h> 20 + #include <linux/perf_event.h> 19 21 #include <linux/slab.h> 20 22 #include "coresight-priv.h" 21 23 #include "coresight-tmc.h" ··· 284 282 dev_info(drvdata->dev, "TMC disabled\n"); 285 283 } 286 284 285 + static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, int cpu, 286 + void **pages, int nr_pages, bool overwrite) 287 + { 288 + int node; 289 + struct cs_buffers *buf; 290 + 291 + if (cpu == -1) 292 + cpu = smp_processor_id(); 293 + node = cpu_to_node(cpu); 294 + 295 + /* Allocate memory structure for interaction with Perf */ 296 + buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node); 297 + if (!buf) 298 + return NULL; 299 + 300 + buf->snapshot = overwrite; 301 + buf->nr_pages = nr_pages; 302 + buf->data_pages = pages; 303 + 304 + return buf; 305 + } 306 + 307 + static void tmc_free_etf_buffer(void *config) 308 + { 309 + struct cs_buffers *buf = config; 310 + 311 + kfree(buf); 312 + } 313 + 314 + static int tmc_set_etf_buffer(struct coresight_device *csdev, 315 + struct perf_output_handle *handle, 316 + void *sink_config) 317 + { 318 + int ret = 0; 319 + unsigned long head; 320 + struct cs_buffers *buf = sink_config; 321 + 322 + /* wrap head around to the amount of space we have */ 323 + head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); 324 + 325 + /* find the page to write to */ 326 + buf->cur = head / PAGE_SIZE; 327 + 328 + /* and offset within that page */ 329 + buf->offset = head % PAGE_SIZE; 330 + 331 + local_set(&buf->data_size, 0); 332 + 333 + return ret; 334 + } 335 + 336 + static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev, 337 + struct perf_output_handle *handle, 338 + void *sink_config, bool *lost) 339 + { 340 + long size = 0; 341 + struct cs_buffers *buf = sink_config; 342 + 343 + if (buf) { 344 + /* 345 + * In snapshot mode ->data_size holds the new address of the 346 + * ring buffer's head. The size itself is the whole address 347 + * range since we want the latest information. 348 + */ 349 + if (buf->snapshot) 350 + handle->head = local_xchg(&buf->data_size, 351 + buf->nr_pages << PAGE_SHIFT); 352 + /* 353 + * Tell the tracer PMU how much we got in this run and if 354 + * something went wrong along the way. Nobody else can use 355 + * this cs_buffers instance until we are done. As such 356 + * resetting parameters here and squaring off with the ring 357 + * buffer API in the tracer PMU is fine. 358 + */ 359 + *lost = !!local_xchg(&buf->lost, 0); 360 + size = local_xchg(&buf->data_size, 0); 361 + } 362 + 363 + return size; 364 + } 365 + 366 + static void tmc_update_etf_buffer(struct coresight_device *csdev, 367 + struct perf_output_handle *handle, 368 + void *sink_config) 369 + { 370 + int i, cur; 371 + u32 *buf_ptr; 372 + u32 read_ptr, write_ptr; 373 + u32 status, to_read; 374 + unsigned long offset; 375 + struct cs_buffers *buf = sink_config; 376 + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); 377 + 378 + if (!buf) 379 + return; 380 + 381 + /* This shouldn't happen */ 382 + if (WARN_ON_ONCE(local_read(&drvdata->mode) != CS_MODE_PERF)) 383 + return; 384 + 385 + CS_UNLOCK(drvdata->base); 386 + 387 + tmc_flush_and_stop(drvdata); 388 + 389 + read_ptr = readl_relaxed(drvdata->base + TMC_RRP); 390 + write_ptr = readl_relaxed(drvdata->base + TMC_RWP); 391 + 392 + /* 393 + * Get a hold of the status register and see if a wrap around 394 + * has occurred. If so adjust things accordingly. 395 + */ 396 + status = readl_relaxed(drvdata->base + TMC_STS); 397 + if (status & TMC_STS_FULL) { 398 + local_inc(&buf->lost); 399 + to_read = drvdata->size; 400 + } else { 401 + to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size); 402 + } 403 + 404 + /* 405 + * The TMC RAM buffer may be bigger than the space available in the 406 + * perf ring buffer (handle->size). If so advance the RRP so that we 407 + * get the latest trace data. 408 + */ 409 + if (to_read > handle->size) { 410 + u32 mask = 0; 411 + 412 + /* 413 + * The value written to RRP must be byte-address aligned to 414 + * the width of the trace memory databus _and_ to a frame 415 + * boundary (16 byte), whichever is the biggest. For example, 416 + * for 32-bit, 64-bit and 128-bit wide trace memory, the four 417 + * LSBs must be 0s. For 256-bit wide trace memory, the five 418 + * LSBs must be 0s. 419 + */ 420 + switch (drvdata->memwidth) { 421 + case TMC_MEM_INTF_WIDTH_32BITS: 422 + case TMC_MEM_INTF_WIDTH_64BITS: 423 + case TMC_MEM_INTF_WIDTH_128BITS: 424 + mask = GENMASK(31, 5); 425 + break; 426 + case TMC_MEM_INTF_WIDTH_256BITS: 427 + mask = GENMASK(31, 6); 428 + break; 429 + } 430 + 431 + /* 432 + * Make sure the new size is aligned in accordance with the 433 + * requirement explained above. 434 + */ 435 + to_read = handle->size & mask; 436 + /* Move the RAM read pointer up */ 437 + read_ptr = (write_ptr + drvdata->size) - to_read; 438 + /* Make sure we are still within our limits */ 439 + if (read_ptr > (drvdata->size - 1)) 440 + read_ptr -= drvdata->size; 441 + /* Tell the HW */ 442 + writel_relaxed(read_ptr, drvdata->base + TMC_RRP); 443 + local_inc(&buf->lost); 444 + } 445 + 446 + cur = buf->cur; 447 + offset = buf->offset; 448 + 449 + /* for every byte to read */ 450 + for (i = 0; i < to_read; i += 4) { 451 + buf_ptr = buf->data_pages[cur] + offset; 452 + *buf_ptr = readl_relaxed(drvdata->base + TMC_RRD); 453 + 454 + offset += 4; 455 + if (offset >= PAGE_SIZE) { 456 + offset = 0; 457 + cur++; 458 + /* wrap around at the end of the buffer */ 459 + cur &= buf->nr_pages - 1; 460 + } 461 + } 462 + 463 + /* 464 + * In snapshot mode all we have to do is communicate to 465 + * perf_aux_output_end() the address of the current head. In full 466 + * trace mode the same function expects a size to move rb->aux_head 467 + * forward. 468 + */ 469 + if (buf->snapshot) 470 + local_set(&buf->data_size, (cur * PAGE_SIZE) + offset); 471 + else 472 + local_add(to_read, &buf->data_size); 473 + 474 + CS_LOCK(drvdata->base); 475 + } 476 + 287 477 static const struct coresight_ops_sink tmc_etf_sink_ops = { 288 478 .enable = tmc_enable_etf_sink, 289 479 .disable = tmc_disable_etf_sink, 480 + .alloc_buffer = tmc_alloc_etf_buffer, 481 + .free_buffer = tmc_free_etf_buffer, 482 + .set_buffer = tmc_set_etf_buffer, 483 + .reset_buffer = tmc_reset_etf_buffer, 484 + .update_buffer = tmc_update_etf_buffer, 290 485 }; 291 486 292 487 static const struct coresight_ops_link tmc_etf_link_ops = {
+1
drivers/hwtracing/coresight/coresight-tmc.h
··· 52 52 #define TMC_CTL_CAPT_EN BIT(0) 53 53 /* TMC_STS - 0x00C */ 54 54 #define TMC_STS_TMCREADY_BIT 2 55 + #define TMC_STS_FULL BIT(0) 55 56 #define TMC_STS_TRIGGERED BIT(1) 56 57 /* TMC_AXICTL - 0x110 */ 57 58 #define TMC_AXICTL_PROT_CTL_B0 BIT(0)