Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Add address watch operation to debugger

The address watch operation gives the ability to specify watch points
which will generate a shader breakpoint, based on a specified single
address or range of addresses.

There is support for read/write/any access modes.

Signed-off-by: Yair Shachar <yair.shachar@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

authored by

Yair Shachar and committed by
Oded Gabbay
e2e9afc4 788bf83d

+295
+274
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
··· 236 236 return status; 237 237 } 238 238 239 + static void dbgdev_address_watch_set_registers( 240 + const struct dbg_address_watch_info *adw_info, 241 + union TCP_WATCH_ADDR_H_BITS *addrHi, 242 + union TCP_WATCH_ADDR_L_BITS *addrLo, 243 + union TCP_WATCH_CNTL_BITS *cntl, 244 + unsigned int index, unsigned int vmid) 245 + { 246 + union ULARGE_INTEGER addr; 247 + 248 + BUG_ON(!adw_info || !addrHi || !addrLo || !cntl); 249 + 250 + addr.quad_part = 0; 251 + addrHi->u32All = 0; 252 + addrLo->u32All = 0; 253 + cntl->u32All = 0; 254 + 255 + if (adw_info->watch_mask != NULL) 256 + cntl->bitfields.mask = 257 + (uint32_t) (adw_info->watch_mask[index] & 258 + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); 259 + else 260 + cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; 261 + 262 + addr.quad_part = (unsigned long long) adw_info->watch_address[index]; 263 + 264 + addrHi->bitfields.addr = addr.u.high_part & 265 + ADDRESS_WATCH_REG_ADDHIGH_MASK; 266 + addrLo->bitfields.addr = 267 + (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); 268 + 269 + cntl->bitfields.mode = adw_info->watch_mode[index]; 270 + cntl->bitfields.vmid = (uint32_t) vmid; 271 + /* for now assume it is an ATC address */ 272 + cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; 273 + 274 + pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); 275 + pr_debug("\t\t%20s %08x\n", "set reg add high :", 276 + addrHi->bitfields.addr); 277 + pr_debug("\t\t%20s %08x\n", "set reg add low :", 278 + addrLo->bitfields.addr); 279 + } 280 + 281 + static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, 282 + struct dbg_address_watch_info *adw_info) 283 + { 284 + union TCP_WATCH_ADDR_H_BITS addrHi; 285 + union TCP_WATCH_ADDR_L_BITS addrLo; 286 + union TCP_WATCH_CNTL_BITS cntl; 287 + struct kfd_process_device *pdd; 288 + unsigned int i; 289 + 290 + BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); 291 + 292 + /* taking the vmid for that process on the safe way using pdd */ 293 + pdd = kfd_get_process_device_data(dbgdev->dev, 294 + adw_info->process); 295 + if (!pdd) { 296 + pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); 297 + return -EFAULT; 298 + } 299 + 300 + addrHi.u32All = 0; 301 + addrLo.u32All = 0; 302 + cntl.u32All = 0; 303 + 304 + if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || 305 + (adw_info->num_watch_points == 0)) { 306 + pr_err("amdkfd: num_watch_points is invalid\n"); 307 + return -EINVAL; 308 + } 309 + 310 + if ((adw_info->watch_mode == NULL) || 311 + (adw_info->watch_address == NULL)) { 312 + pr_err("amdkfd: adw_info fields are not valid\n"); 313 + return -EINVAL; 314 + } 315 + 316 + for (i = 0 ; i < adw_info->num_watch_points ; i++) { 317 + dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, 318 + &cntl, i, pdd->qpd.vmid); 319 + 320 + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 321 + pr_debug("\t\t%20s %08x\n", "register index :", i); 322 + pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); 323 + pr_debug("\t\t%20s %08x\n", "Address Low is :", 324 + addrLo.bitfields.addr); 325 + pr_debug("\t\t%20s %08x\n", "Address high is :", 326 + addrHi.bitfields.addr); 327 + pr_debug("\t\t%20s %08x\n", "Address high is :", 328 + addrHi.bitfields.addr); 329 + pr_debug("\t\t%20s %08x\n", "Control Mask is :", 330 + cntl.bitfields.mask); 331 + pr_debug("\t\t%20s %08x\n", "Control Mode is :", 332 + cntl.bitfields.mode); 333 + pr_debug("\t\t%20s %08x\n", "Control Vmid is :", 334 + cntl.bitfields.vmid); 335 + pr_debug("\t\t%20s %08x\n", "Control atc is :", 336 + cntl.bitfields.atc); 337 + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 338 + 339 + pdd->dev->kfd2kgd->address_watch_execute( 340 + dbgdev->dev->kgd, 341 + i, 342 + cntl.u32All, 343 + addrHi.u32All, 344 + addrLo.u32All); 345 + } 346 + 347 + return 0; 348 + } 349 + 350 + static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, 351 + struct dbg_address_watch_info *adw_info) 352 + { 353 + struct pm4__set_config_reg *packets_vec; 354 + union TCP_WATCH_ADDR_H_BITS addrHi; 355 + union TCP_WATCH_ADDR_L_BITS addrLo; 356 + union TCP_WATCH_CNTL_BITS cntl; 357 + struct kfd_mem_obj *mem_obj; 358 + unsigned int aw_reg_add_dword; 359 + uint32_t *packet_buff_uint; 360 + unsigned int i; 361 + int status; 362 + size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; 363 + /* we do not control the vmid in DIQ mode, just a place holder */ 364 + unsigned int vmid = 0; 365 + 366 + BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); 367 + 368 + addrHi.u32All = 0; 369 + addrLo.u32All = 0; 370 + cntl.u32All = 0; 371 + 372 + if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || 373 + (adw_info->num_watch_points == 0)) { 374 + pr_err("amdkfd: num_watch_points is invalid\n"); 375 + return -EINVAL; 376 + } 377 + 378 + if ((NULL == adw_info->watch_mode) || 379 + (NULL == adw_info->watch_address)) { 380 + pr_err("amdkfd: adw_info fields are not valid\n"); 381 + return -EINVAL; 382 + } 383 + 384 + status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); 385 + 386 + if (status != 0) { 387 + pr_err("amdkfd: Failed to allocate GART memory\n"); 388 + return status; 389 + } 390 + 391 + packet_buff_uint = mem_obj->cpu_ptr; 392 + 393 + memset(packet_buff_uint, 0, ib_size); 394 + 395 + packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); 396 + 397 + packets_vec[0].header.count = 1; 398 + packets_vec[0].header.opcode = IT_SET_CONFIG_REG; 399 + packets_vec[0].header.type = PM4_TYPE_3; 400 + packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; 401 + packets_vec[0].bitfields2.insert_vmid = 1; 402 + packets_vec[1].ordinal1 = packets_vec[0].ordinal1; 403 + packets_vec[1].bitfields2.insert_vmid = 0; 404 + packets_vec[2].ordinal1 = packets_vec[0].ordinal1; 405 + packets_vec[2].bitfields2.insert_vmid = 0; 406 + packets_vec[3].ordinal1 = packets_vec[0].ordinal1; 407 + packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; 408 + packets_vec[3].bitfields2.insert_vmid = 1; 409 + 410 + for (i = 0; i < adw_info->num_watch_points; i++) { 411 + dbgdev_address_watch_set_registers(adw_info, 412 + &addrHi, 413 + &addrLo, 414 + &cntl, 415 + i, 416 + vmid); 417 + 418 + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 419 + pr_debug("\t\t%20s %08x\n", "register index :", i); 420 + pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); 421 + pr_debug("\t\t%20s %p\n", "Add ptr is :", 422 + adw_info->watch_address); 423 + pr_debug("\t\t%20s %08llx\n", "Add is :", 424 + adw_info->watch_address[i]); 425 + pr_debug("\t\t%20s %08x\n", "Address Low is :", 426 + addrLo.bitfields.addr); 427 + pr_debug("\t\t%20s %08x\n", "Address high is :", 428 + addrHi.bitfields.addr); 429 + pr_debug("\t\t%20s %08x\n", "Control Mask is :", 430 + cntl.bitfields.mask); 431 + pr_debug("\t\t%20s %08x\n", "Control Mode is :", 432 + cntl.bitfields.mode); 433 + pr_debug("\t\t%20s %08x\n", "Control Vmid is :", 434 + cntl.bitfields.vmid); 435 + pr_debug("\t\t%20s %08x\n", "Control atc is :", 436 + cntl.bitfields.atc); 437 + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 438 + 439 + aw_reg_add_dword = 440 + dbgdev->dev->kfd2kgd->address_watch_get_offset( 441 + dbgdev->dev->kgd, 442 + i, 443 + ADDRESS_WATCH_REG_CNTL); 444 + 445 + aw_reg_add_dword /= sizeof(uint32_t); 446 + 447 + packets_vec[0].bitfields2.reg_offset = 448 + aw_reg_add_dword - CONFIG_REG_BASE; 449 + 450 + packets_vec[0].reg_data[0] = cntl.u32All; 451 + 452 + aw_reg_add_dword = 453 + dbgdev->dev->kfd2kgd->address_watch_get_offset( 454 + dbgdev->dev->kgd, 455 + i, 456 + ADDRESS_WATCH_REG_ADDR_HI); 457 + 458 + aw_reg_add_dword /= sizeof(uint32_t); 459 + 460 + packets_vec[1].bitfields2.reg_offset = 461 + aw_reg_add_dword - CONFIG_REG_BASE; 462 + packets_vec[1].reg_data[0] = addrHi.u32All; 463 + 464 + aw_reg_add_dword = 465 + dbgdev->dev->kfd2kgd->address_watch_get_offset( 466 + dbgdev->dev->kgd, 467 + i, 468 + ADDRESS_WATCH_REG_ADDR_LO); 469 + 470 + aw_reg_add_dword /= sizeof(uint32_t); 471 + 472 + packets_vec[2].bitfields2.reg_offset = 473 + aw_reg_add_dword - CONFIG_REG_BASE; 474 + packets_vec[2].reg_data[0] = addrLo.u32All; 475 + 476 + /* enable watch flag if address is not zero*/ 477 + if (adw_info->watch_address[i] > 0) 478 + cntl.bitfields.valid = 1; 479 + else 480 + cntl.bitfields.valid = 0; 481 + 482 + aw_reg_add_dword = 483 + dbgdev->dev->kfd2kgd->address_watch_get_offset( 484 + dbgdev->dev->kgd, 485 + i, 486 + ADDRESS_WATCH_REG_CNTL); 487 + 488 + aw_reg_add_dword /= sizeof(uint32_t); 489 + 490 + packets_vec[3].bitfields2.reg_offset = 491 + aw_reg_add_dword - CONFIG_REG_BASE; 492 + packets_vec[3].reg_data[0] = cntl.u32All; 493 + 494 + status = dbgdev_diq_submit_ib( 495 + dbgdev, 496 + adw_info->process->pasid, 497 + mem_obj->gpu_addr, 498 + packet_buff_uint, 499 + ib_size); 500 + 501 + if (status != 0) { 502 + pr_err("amdkfd: Failed to submit IB to DIQ\n"); 503 + break; 504 + } 505 + } 506 + 507 + kfd_gtt_sa_free(dbgdev->dev, mem_obj); 508 + return status; 509 + } 510 + 239 511 static int dbgdev_wave_control_set_registers( 240 512 struct dbg_wave_control_info *wac_info, 241 513 union SQ_CMD_BITS *in_reg_sq_cmd, ··· 807 535 pdbgdev->dbgdev_register = dbgdev_register_nodiq; 808 536 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; 809 537 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; 538 + pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; 810 539 break; 811 540 case DBGDEV_TYPE_DIQ: 812 541 default: 813 542 pdbgdev->dbgdev_register = dbgdev_register_diq; 814 543 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; 815 544 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; 545 + pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; 816 546 break; 817 547 } 818 548
+17
drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
··· 149 149 return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info); 150 150 } 151 151 152 + long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, 153 + struct dbg_address_watch_info *adw_info) 154 + { 155 + BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info); 156 + 157 + 158 + /* Is the requests coming from the already registered process? */ 159 + if (pmgr->pasid != adw_info->process->pasid) { 160 + pr_debug("H/W debugger support was not registered for requester pasid %d\n", 161 + adw_info->process->pasid); 162 + return -EINVAL; 163 + } 164 + 165 + return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev, 166 + adw_info); 167 + } 168 +
+4
drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
··· 268 268 /* virtualized function pointers to device dbg */ 269 269 int (*dbgdev_register)(struct kfd_dbgdev *dbgdev); 270 270 int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev); 271 + int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev, 272 + struct dbg_address_watch_info *adw_info); 271 273 int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev, 272 274 struct dbg_wave_control_info *wac_info); 273 275 ··· 289 287 long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p); 290 288 long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, 291 289 struct dbg_wave_control_info *wac_info); 290 + long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, 291 + struct dbg_address_watch_info *adw_info); 292 292 #endif /* KFD_DBGMGR_H_ */