Merge tag 'drm-xe-next-2025-09-19' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

+214 -83

drivers/gpu/drm/drm_gpusvm.c

··· 373 373 * 374 374 * This function initializes the GPU SVM. 375 375 * 376 + * Note: If only using the simple drm_gpusvm_pages API (get/unmap/free), 377 + * then only @gpusvm, @name, and @drm are expected. However, the same base 378 + * @gpusvm can also be used with both modes together in which case the full 379 + * setup is needed, where the core drm_gpusvm_pages API will simply never use 380 + * the other fields. 381 + * 376 382 * Return: 0 on success, a negative error code on failure. 377 383 */ 378 384 int drm_gpusvm_init(struct drm_gpusvm *gpusvm, ··· 389 383 const struct drm_gpusvm_ops *ops, 390 384 const unsigned long *chunk_sizes, int num_chunks) 391 385 { 392 - if (!ops->invalidate || !num_chunks) 393 - return -EINVAL; 386 + if (mm) { 387 + if (!ops->invalidate || !num_chunks) 388 + return -EINVAL; 389 + mmgrab(mm); 390 + } else { 391 + /* No full SVM mode, only core drm_gpusvm_pages API. */ 392 + if (ops || num_chunks || mm_range || notifier_size || 393 + device_private_page_owner) 394 + return -EINVAL; 395 + } 394 396 395 397 gpusvm->name = name; 396 398 gpusvm->drm = drm; ··· 411 397 gpusvm->chunk_sizes = chunk_sizes; 412 398 gpusvm->num_chunks = num_chunks; 413 399 414 - mmgrab(mm); 415 400 gpusvm->root = RB_ROOT_CACHED; 416 401 INIT_LIST_HEAD(&gpusvm->notifier_list); 417 402 ··· 502 489 drm_gpusvm_range_remove(gpusvm, range); 503 490 } 504 491 505 - mmdrop(gpusvm->mm); 492 + if (gpusvm->mm) 493 + mmdrop(gpusvm->mm); 506 494 WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root)); 507 495 } 508 496 EXPORT_SYMBOL_GPL(drm_gpusvm_fini); ··· 643 629 range->itree.start = ALIGN_DOWN(fault_addr, chunk_size); 644 630 range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1; 645 631 INIT_LIST_HEAD(&range->entry); 646 - range->notifier_seq = LONG_MAX; 647 - range->flags.migrate_devmem = migrate_devmem ? 1 : 0; 632 + range->pages.notifier_seq = LONG_MAX; 633 + range->pages.flags.migrate_devmem = migrate_devmem ? 1 : 0; 648 634 649 635 return range; 636 + } 637 + 638 + /** 639 + * drm_gpusvm_hmm_pfn_to_order() - Get the largest CPU mapping order. 640 + * @hmm_pfn: The current hmm_pfn. 641 + * @hmm_pfn_index: Index of the @hmm_pfn within the pfn array. 642 + * @npages: Number of pages within the pfn array i.e the hmm range size. 643 + * 644 + * To allow skipping PFNs with the same flags (like when they belong to 645 + * the same huge PTE) when looping over the pfn array, take a given a hmm_pfn, 646 + * and return the largest order that will fit inside the CPU PTE, but also 647 + * crucially accounting for the original hmm range boundaries. 648 + * 649 + * Return: The largest order that will safely fit within the size of the hmm_pfn 650 + * CPU PTE. 651 + */ 652 + static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn, 653 + unsigned long hmm_pfn_index, 654 + unsigned long npages) 655 + { 656 + unsigned long size; 657 + 658 + size = 1UL << hmm_pfn_to_map_order(hmm_pfn); 659 + size -= (hmm_pfn & ~HMM_PFN_FLAGS) & (size - 1); 660 + hmm_pfn_index += size; 661 + if (hmm_pfn_index > npages) 662 + size -= (hmm_pfn_index - npages); 663 + 664 + return ilog2(size); 650 665 } 651 666 652 667 /** ··· 736 693 err = -EFAULT; 737 694 goto err_free; 738 695 } 739 - i += 0x1 << hmm_pfn_to_map_order(pfns[i]); 696 + i += 0x1 << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); 740 697 } 741 698 742 699 err_free: ··· 994 951 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert); 995 952 996 953 /** 997 - * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal) 954 + * __drm_gpusvm_unmap_pages() - Unmap pages associated with GPU SVM pages (internal) 998 955 * @gpusvm: Pointer to the GPU SVM structure 999 - * @range: Pointer to the GPU SVM range structure 956 + * @svm_pages: Pointer to the GPU SVM pages structure 1000 957 * @npages: Number of pages to unmap 1001 958 * 1002 - * This function unmap pages associated with a GPU SVM range. Assumes and 959 + * This function unmap pages associated with a GPU SVM pages struct. Assumes and 1003 960 * asserts correct locking is in place when called. 1004 961 */ 1005 - static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1006 - struct drm_gpusvm_range *range, 1007 - unsigned long npages) 962 + static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, 963 + struct drm_gpusvm_pages *svm_pages, 964 + unsigned long npages) 1008 965 { 1009 - unsigned long i, j; 1010 - struct drm_pagemap *dpagemap = range->dpagemap; 966 + struct drm_pagemap *dpagemap = svm_pages->dpagemap; 1011 967 struct device *dev = gpusvm->drm->dev; 968 + unsigned long i, j; 1012 969 1013 970 lockdep_assert_held(&gpusvm->notifier_lock); 1014 971 1015 - if (range->flags.has_dma_mapping) { 1016 - struct drm_gpusvm_range_flags flags = { 1017 - .__flags = range->flags.__flags, 972 + if (svm_pages->flags.has_dma_mapping) { 973 + struct drm_gpusvm_pages_flags flags = { 974 + .__flags = svm_pages->flags.__flags, 1018 975 }; 1019 976 1020 977 for (i = 0, j = 0; i < npages; j++) { 1021 - struct drm_pagemap_addr *addr = &range->dma_addr[j]; 978 + struct drm_pagemap_addr *addr = &svm_pages->dma_addr[j]; 1022 979 1023 980 if (addr->proto == DRM_INTERCONNECT_SYSTEM) 1024 981 dma_unmap_page(dev, ··· 1034 991 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1035 992 flags.has_devmem_pages = false; 1036 993 flags.has_dma_mapping = false; 1037 - WRITE_ONCE(range->flags.__flags, flags.__flags); 994 + WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); 1038 995 1039 - range->dpagemap = NULL; 996 + svm_pages->dpagemap = NULL; 1040 997 } 1041 998 } 1042 999 1043 1000 /** 1044 - * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range 1001 + * __drm_gpusvm_free_pages() - Free dma array associated with GPU SVM pages 1045 1002 * @gpusvm: Pointer to the GPU SVM structure 1046 - * @range: Pointer to the GPU SVM range structure 1003 + * @svm_pages: Pointer to the GPU SVM pages structure 1047 1004 * 1048 1005 * This function frees the dma address array associated with a GPU SVM range. 1049 1006 */ 1050 - static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm, 1051 - struct drm_gpusvm_range *range) 1007 + static void __drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, 1008 + struct drm_gpusvm_pages *svm_pages) 1052 1009 { 1053 1010 lockdep_assert_held(&gpusvm->notifier_lock); 1054 1011 1055 - if (range->dma_addr) { 1056 - kvfree(range->dma_addr); 1057 - range->dma_addr = NULL; 1012 + if (svm_pages->dma_addr) { 1013 + kvfree(svm_pages->dma_addr); 1014 + svm_pages->dma_addr = NULL; 1058 1015 } 1059 1016 } 1017 + 1018 + /** 1019 + * drm_gpusvm_free_pages() - Free dma-mapping associated with GPU SVM pages 1020 + * struct 1021 + * @gpusvm: Pointer to the GPU SVM structure 1022 + * @svm_pages: Pointer to the GPU SVM pages structure 1023 + * @npages: Number of mapped pages 1024 + * 1025 + * This function unmaps and frees the dma address array associated with a GPU 1026 + * SVM pages struct. 1027 + */ 1028 + void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, 1029 + struct drm_gpusvm_pages *svm_pages, 1030 + unsigned long npages) 1031 + { 1032 + drm_gpusvm_notifier_lock(gpusvm); 1033 + __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages); 1034 + __drm_gpusvm_free_pages(gpusvm, svm_pages); 1035 + drm_gpusvm_notifier_unlock(gpusvm); 1036 + } 1037 + EXPORT_SYMBOL_GPL(drm_gpusvm_free_pages); 1060 1038 1061 1039 /** 1062 1040 * drm_gpusvm_range_remove() - Remove GPU SVM range ··· 1104 1040 return; 1105 1041 1106 1042 drm_gpusvm_notifier_lock(gpusvm); 1107 - __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); 1108 - drm_gpusvm_range_free_pages(gpusvm, range); 1043 + __drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages); 1044 + __drm_gpusvm_free_pages(gpusvm, &range->pages); 1109 1045 __drm_gpusvm_range_remove(notifier, range); 1110 1046 drm_gpusvm_notifier_unlock(gpusvm); 1111 1047 ··· 1171 1107 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); 1172 1108 1173 1109 /** 1110 + * drm_gpusvm_pages_valid() - GPU SVM range pages valid 1111 + * @gpusvm: Pointer to the GPU SVM structure 1112 + * @svm_pages: Pointer to the GPU SVM pages structure 1113 + * 1114 + * This function determines if a GPU SVM range pages are valid. Expected be 1115 + * called holding gpusvm->notifier_lock and as the last step before committing a 1116 + * GPU binding. This is akin to a notifier seqno check in the HMM documentation 1117 + * but due to wider notifiers (i.e., notifiers which span multiple ranges) this 1118 + * function is required for finer grained checking (i.e., per range) if pages 1119 + * are valid. 1120 + * 1121 + * Return: True if GPU SVM range has valid pages, False otherwise 1122 + */ 1123 + static bool drm_gpusvm_pages_valid(struct drm_gpusvm *gpusvm, 1124 + struct drm_gpusvm_pages *svm_pages) 1125 + { 1126 + lockdep_assert_held(&gpusvm->notifier_lock); 1127 + 1128 + return svm_pages->flags.has_devmem_pages || svm_pages->flags.has_dma_mapping; 1129 + } 1130 + 1131 + /** 1174 1132 * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid 1175 1133 * @gpusvm: Pointer to the GPU SVM structure 1176 1134 * @range: Pointer to the GPU SVM range structure ··· 1209 1123 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, 1210 1124 struct drm_gpusvm_range *range) 1211 1125 { 1212 - lockdep_assert_held(&gpusvm->notifier_lock); 1213 - 1214 - return range->flags.has_devmem_pages || range->flags.has_dma_mapping; 1126 + return drm_gpusvm_pages_valid(gpusvm, &range->pages); 1215 1127 } 1216 1128 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); 1217 1129 ··· 1223 1139 * 1224 1140 * Return: True if GPU SVM range has valid pages, False otherwise 1225 1141 */ 1226 - static bool 1227 - drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm, 1228 - struct drm_gpusvm_range *range) 1142 + static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm, 1143 + struct drm_gpusvm_pages *svm_pages) 1229 1144 { 1230 1145 bool pages_valid; 1231 1146 1232 - if (!range->dma_addr) 1147 + if (!svm_pages->dma_addr) 1233 1148 return false; 1234 1149 1235 1150 drm_gpusvm_notifier_lock(gpusvm); 1236 - pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range); 1151 + pages_valid = drm_gpusvm_pages_valid(gpusvm, svm_pages); 1237 1152 if (!pages_valid) 1238 - drm_gpusvm_range_free_pages(gpusvm, range); 1153 + __drm_gpusvm_free_pages(gpusvm, svm_pages); 1239 1154 drm_gpusvm_notifier_unlock(gpusvm); 1240 1155 1241 1156 return pages_valid; 1242 1157 } 1243 1158 1244 1159 /** 1245 - * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range 1160 + * drm_gpusvm_get_pages() - Get pages and populate GPU SVM pages struct 1246 1161 * @gpusvm: Pointer to the GPU SVM structure 1247 - * @range: Pointer to the GPU SVM range structure 1162 + * @svm_pages: The SVM pages to populate. This will contain the dma-addresses 1163 + * @mm: The mm corresponding to the CPU range 1164 + * @notifier: The corresponding notifier for the given CPU range 1165 + * @pages_start: Start CPU address for the pages 1166 + * @pages_end: End CPU address for the pages (exclusive) 1248 1167 * @ctx: GPU SVM context 1249 1168 * 1250 - * This function gets pages for a GPU SVM range and ensures they are mapped for 1251 - * DMA access. 1169 + * This function gets and maps pages for CPU range and ensures they are 1170 + * mapped for DMA access. 1252 1171 * 1253 1172 * Return: 0 on success, negative error code on failure. 1254 1173 */ 1255 - int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, 1256 - struct drm_gpusvm_range *range, 1257 - const struct drm_gpusvm_ctx *ctx) 1174 + int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, 1175 + struct drm_gpusvm_pages *svm_pages, 1176 + struct mm_struct *mm, 1177 + struct mmu_interval_notifier *notifier, 1178 + unsigned long pages_start, unsigned long pages_end, 1179 + const struct drm_gpusvm_ctx *ctx) 1258 1180 { 1259 - struct mmu_interval_notifier *notifier = &range->notifier->notifier; 1260 1181 struct hmm_range hmm_range = { 1261 1182 .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 : 1262 1183 HMM_PFN_REQ_WRITE), 1263 1184 .notifier = notifier, 1264 - .start = drm_gpusvm_range_start(range), 1265 - .end = drm_gpusvm_range_end(range), 1185 + .start = pages_start, 1186 + .end = pages_end, 1266 1187 .dev_private_owner = gpusvm->device_private_page_owner, 1267 1188 }; 1268 - struct mm_struct *mm = gpusvm->mm; 1269 1189 void *zdd; 1270 1190 unsigned long timeout = 1271 1191 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1272 1192 unsigned long i, j; 1273 - unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1274 - drm_gpusvm_range_end(range)); 1193 + unsigned long npages = npages_in_range(pages_start, pages_end); 1275 1194 unsigned long num_dma_mapped; 1276 1195 unsigned int order = 0; 1277 1196 unsigned long *pfns; 1278 1197 int err = 0; 1279 1198 struct dev_pagemap *pagemap; 1280 1199 struct drm_pagemap *dpagemap; 1281 - struct drm_gpusvm_range_flags flags; 1200 + struct drm_gpusvm_pages_flags flags; 1201 + enum dma_data_direction dma_dir = ctx->read_only ? DMA_TO_DEVICE : 1202 + DMA_BIDIRECTIONAL; 1282 1203 1283 1204 retry: 1284 1205 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1285 - if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range)) 1206 + if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages)) 1286 1207 goto set_seqno; 1287 1208 1288 1209 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); ··· 1327 1238 */ 1328 1239 drm_gpusvm_notifier_lock(gpusvm); 1329 1240 1330 - flags.__flags = range->flags.__flags; 1241 + flags.__flags = svm_pages->flags.__flags; 1331 1242 if (flags.unmapped) { 1332 1243 drm_gpusvm_notifier_unlock(gpusvm); 1333 1244 err = -EFAULT; ··· 1340 1251 goto retry; 1341 1252 } 1342 1253 1343 - if (!range->dma_addr) { 1254 + if (!svm_pages->dma_addr) { 1344 1255 /* Unlock and restart mapping to allocate memory. */ 1345 1256 drm_gpusvm_notifier_unlock(gpusvm); 1346 - range->dma_addr = kvmalloc_array(npages, 1347 - sizeof(*range->dma_addr), 1348 - GFP_KERNEL); 1349 - if (!range->dma_addr) { 1257 + svm_pages->dma_addr = 1258 + kvmalloc_array(npages, sizeof(*svm_pages->dma_addr), GFP_KERNEL); 1259 + if (!svm_pages->dma_addr) { 1350 1260 err = -ENOMEM; 1351 1261 goto err_free; 1352 1262 } ··· 1358 1270 for (i = 0, j = 0; i < npages; ++j) { 1359 1271 struct page *page = hmm_pfn_to_page(pfns[i]); 1360 1272 1361 - order = hmm_pfn_to_map_order(pfns[i]); 1273 + order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); 1362 1274 if (is_device_private_page(page) || 1363 1275 is_device_coherent_page(page)) { 1364 1276 if (zdd != page->zone_device_data && i > 0) { ··· 1384 1296 goto err_unmap; 1385 1297 } 1386 1298 } 1387 - range->dma_addr[j] = 1299 + svm_pages->dma_addr[j] = 1388 1300 dpagemap->ops->device_map(dpagemap, 1389 1301 gpusvm->drm->dev, 1390 1302 page, order, 1391 - DMA_BIDIRECTIONAL); 1303 + dma_dir); 1392 1304 if (dma_mapping_error(gpusvm->drm->dev, 1393 - range->dma_addr[j].addr)) { 1305 + svm_pages->dma_addr[j].addr)) { 1394 1306 err = -EFAULT; 1395 1307 goto err_unmap; 1396 1308 } ··· 1410 1322 addr = dma_map_page(gpusvm->drm->dev, 1411 1323 page, 0, 1412 1324 PAGE_SIZE << order, 1413 - DMA_BIDIRECTIONAL); 1325 + dma_dir); 1414 1326 if (dma_mapping_error(gpusvm->drm->dev, addr)) { 1415 1327 err = -EFAULT; 1416 1328 goto err_unmap; 1417 1329 } 1418 1330 1419 - range->dma_addr[j] = drm_pagemap_addr_encode 1331 + svm_pages->dma_addr[j] = drm_pagemap_addr_encode 1420 1332 (addr, DRM_INTERCONNECT_SYSTEM, order, 1421 - DMA_BIDIRECTIONAL); 1333 + dma_dir); 1422 1334 } 1423 1335 i += 1 << order; 1424 1336 num_dma_mapped = i; ··· 1427 1339 1428 1340 if (pagemap) { 1429 1341 flags.has_devmem_pages = true; 1430 - range->dpagemap = dpagemap; 1342 + svm_pages->dpagemap = dpagemap; 1431 1343 } 1432 1344 1433 1345 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1434 - WRITE_ONCE(range->flags.__flags, flags.__flags); 1346 + WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); 1435 1347 1436 1348 drm_gpusvm_notifier_unlock(gpusvm); 1437 1349 kvfree(pfns); 1438 1350 set_seqno: 1439 - range->notifier_seq = hmm_range.notifier_seq; 1351 + svm_pages->notifier_seq = hmm_range.notifier_seq; 1440 1352 1441 1353 return 0; 1442 1354 1443 1355 err_unmap: 1444 - __drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped); 1356 + __drm_gpusvm_unmap_pages(gpusvm, svm_pages, num_dma_mapped); 1445 1357 drm_gpusvm_notifier_unlock(gpusvm); 1446 1358 err_free: 1447 1359 kvfree(pfns); ··· 1449 1361 goto retry; 1450 1362 return err; 1451 1363 } 1364 + EXPORT_SYMBOL_GPL(drm_gpusvm_get_pages); 1365 + 1366 + /** 1367 + * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range 1368 + * @gpusvm: Pointer to the GPU SVM structure 1369 + * @range: Pointer to the GPU SVM range structure 1370 + * @ctx: GPU SVM context 1371 + * 1372 + * This function gets pages for a GPU SVM range and ensures they are mapped for 1373 + * DMA access. 1374 + * 1375 + * Return: 0 on success, negative error code on failure. 1376 + */ 1377 + int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, 1378 + struct drm_gpusvm_range *range, 1379 + const struct drm_gpusvm_ctx *ctx) 1380 + { 1381 + return drm_gpusvm_get_pages(gpusvm, &range->pages, gpusvm->mm, 1382 + &range->notifier->notifier, 1383 + drm_gpusvm_range_start(range), 1384 + drm_gpusvm_range_end(range), ctx); 1385 + } 1452 1386 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); 1453 1387 1454 1388 /** 1389 + * drm_gpusvm_unmap_pages() - Unmap GPU svm pages 1390 + * @gpusvm: Pointer to the GPU SVM structure 1391 + * @svm_pages: Pointer to the GPU SVM pages structure 1392 + * @npages: Number of pages in @svm_pages. 1393 + * @ctx: GPU SVM context 1394 + * 1395 + * This function unmaps pages associated with a GPU SVM pages struct. If 1396 + * @in_notifier is set, it is assumed that gpusvm->notifier_lock is held in 1397 + * write mode; if it is clear, it acquires gpusvm->notifier_lock in read mode. 1398 + * Must be called in the invalidate() callback of the corresponding notifier for 1399 + * IOMMU security model. 1400 + */ 1401 + void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, 1402 + struct drm_gpusvm_pages *svm_pages, 1403 + unsigned long npages, 1404 + const struct drm_gpusvm_ctx *ctx) 1405 + { 1406 + if (ctx->in_notifier) 1407 + lockdep_assert_held_write(&gpusvm->notifier_lock); 1408 + else 1409 + drm_gpusvm_notifier_lock(gpusvm); 1410 + 1411 + __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages); 1412 + 1413 + if (!ctx->in_notifier) 1414 + drm_gpusvm_notifier_unlock(gpusvm); 1415 + } 1416 + EXPORT_SYMBOL_GPL(drm_gpusvm_unmap_pages); 1417 + 1418 + /** 1455 1419 * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range 1456 - * drm_gpusvm_range_evict() - Evict GPU SVM range 1457 1420 * @gpusvm: Pointer to the GPU SVM structure 1458 1421 * @range: Pointer to the GPU SVM range structure 1459 1422 * @ctx: GPU SVM context ··· 1522 1383 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1523 1384 drm_gpusvm_range_end(range)); 1524 1385 1525 - if (ctx->in_notifier) 1526 - lockdep_assert_held_write(&gpusvm->notifier_lock); 1527 - else 1528 - drm_gpusvm_notifier_lock(gpusvm); 1529 - 1530 - __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); 1531 - 1532 - if (!ctx->in_notifier) 1533 - drm_gpusvm_notifier_unlock(gpusvm); 1386 + return drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages, ctx); 1534 1387 } 1535 1388 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); 1536 1389 ··· 1620 1489 { 1621 1490 lockdep_assert_held_write(&range->gpusvm->notifier_lock); 1622 1491 1623 - range->flags.unmapped = true; 1492 + range->pages.flags.unmapped = true; 1624 1493 if (drm_gpusvm_range_start(range) < mmu_range->start || 1625 1494 drm_gpusvm_range_end(range) > mmu_range->end) 1626 - range->flags.partial_unmap = true; 1495 + range->pages.flags.partial_unmap = true; 1627 1496 } 1628 1497 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); 1629 1498

+1 -1

drivers/gpu/drm/xe/Kconfig

··· 40 40 select DRM_TTM 41 41 select DRM_TTM_HELPER 42 42 select DRM_EXEC 43 + select DRM_GPUSVM if !UML && DEVICE_PRIVATE 43 44 select DRM_GPUVM 44 45 select DRM_SCHED 45 46 select MMU_NOTIFIER 46 47 select WANT_DEV_COREDUMP 47 48 select AUXILIARY_BUS 48 - select HMM_MIRROR 49 49 select REGMAP if I2C 50 50 help 51 51 Driver for Intel Xe2 series GPUs and later. Experimental support

+1

drivers/gpu/drm/xe/Kconfig.debug

··· 104 104 105 105 config DRM_XE_USERPTR_INVAL_INJECT 106 106 bool "Inject userptr invalidation -EINVAL errors" 107 + depends on DRM_GPUSVM 107 108 default n 108 109 help 109 110 Choose this option when debugging error paths that

+4 -1

drivers/gpu/drm/xe/Makefile

··· 84 84 xe_hw_error.o \ 85 85 xe_hw_fence.o \ 86 86 xe_irq.o \ 87 + xe_late_bind_fw.o \ 87 88 xe_lrc.o \ 88 89 xe_migrate.o \ 89 90 xe_mmio.o \ ··· 131 130 xe_tuning.o \ 132 131 xe_uc.o \ 133 132 xe_uc_fw.o \ 133 + xe_validation.o \ 134 134 xe_vm.o \ 135 135 xe_vm_madvise.o \ 136 136 xe_vram.o \ ··· 142 140 xe_wopcm.o 143 141 144 142 xe-$(CONFIG_I2C) += xe_i2c.o 145 - xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o 146 143 xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o 144 + xe-$(CONFIG_DRM_GPUSVM) += xe_userptr.o 147 145 148 146 # graphics hardware monitoring (HWMON) support 149 147 xe-$(CONFIG_HWMON) += xe_hwmon.o ··· 328 326 xe_gt_stats.o \ 329 327 xe_guc_debugfs.o \ 330 328 xe_huc_debugfs.o \ 329 + xe_tile_debugfs.o \ 331 330 xe_uc_debugfs.o 332 331 333 332 xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o

+3

drivers/gpu/drm/xe/abi/guc_actions_abi.h

··· 117 117 XE_GUC_ACTION_ENTER_S_STATE = 0x501, 118 118 XE_GUC_ACTION_EXIT_S_STATE = 0x502, 119 119 XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, 120 + XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509, 120 121 XE_GUC_ACTION_SCHED_CONTEXT = 0x1000, 121 122 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, 122 123 XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, ··· 155 154 XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, 156 155 XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, 157 156 XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005, 157 + XE_GUC_ACTION_TEST_G2G_SEND = 0xF001, 158 + XE_GUC_ACTION_TEST_G2G_RECV = 0xF002, 158 159 XE_GUC_ACTION_LIMIT 159 160 }; 160 161

+5

drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h

··· 210 210 u8 reserved_mode_definition[4096]; 211 211 } __packed; 212 212 213 + enum slpc_power_profile { 214 + SLPC_POWER_PROFILE_BASE = 0x0, 215 + SLPC_POWER_PROFILE_POWER_SAVING = 0x1 216 + }; 217 + 213 218 /** 214 219 * DOC: SLPC H2G MESSAGE FORMAT 215 220 *

+25

drivers/gpu/drm/xe/abi/guc_klvs_abi.h

··· 17 17 * | 0 | 31:16 | **KEY** - KLV key identifier | 18 18 * | | | - `GuC Self Config KLVs`_ | 19 19 * | | | - `GuC Opt In Feature KLVs`_ | 20 + * | | | - `GuC Scheduling Policies KLVs`_ | 20 21 * | | | - `GuC VGT Policy KLVs`_ | 21 22 * | | | - `GuC VF Configuration KLVs`_ | 22 23 * | | | | ··· 152 151 153 152 #define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_KEY 0x4003 154 153 #define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u 154 + 155 + /** 156 + * DOC: GuC Scheduling Policies KLVs 157 + * 158 + * `GuC KLV`_ keys available for use with UPDATE_SCHEDULING_POLICIES_KLV. 159 + * 160 + * _`GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD` : 0x1001 161 + * Some platforms do not allow concurrent execution of RCS and CCS 162 + * workloads from different address spaces. By default, the GuC prioritizes 163 + * RCS submissions over CCS ones, which can lead to CCS workloads being 164 + * significantly (or completely) starved of execution time. This KLV allows 165 + * the driver to specify a quantum (in ms) and a ratio (percentage value 166 + * between 0 and 100), and the GuC will prioritize the CCS for that 167 + * percentage of each quantum. For example, specifying 100ms and 30% will 168 + * make the GuC prioritize the CCS for 30ms of every 100ms. 169 + * Note that this does not necessarly mean that RCS and CCS engines will 170 + * only be active for their percentage of the quantum, as the restriction 171 + * only kicks in if both classes are fully busy with non-compatible address 172 + * spaces; i.e., if one engine is idle or running the same address space, 173 + * a pending job on the other engine will still be submitted to the HW no 174 + * matter what the ratio is 175 + */ 176 + #define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_KEY 0x1001 177 + #define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_LEN 2u 155 178 156 179 /** 157 180 * DOC: GuC VGT Policy KLVs

+4 -11

drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h

··· 8 8 9 9 #include "xe_ttm_stolen_mgr.h" 10 10 #include "xe_res_cursor.h" 11 + #include "xe_validation.h" 11 12 12 13 struct xe_bo; 13 14 ··· 22 21 u32 start, u32 end) 23 22 { 24 23 struct xe_bo *bo; 25 - int err; 24 + int err = 0; 26 25 u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN; 27 26 28 27 if (start < SZ_4K) ··· 33 32 start = ALIGN(start, align); 34 33 } 35 34 36 - bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe), 37 - NULL, size, start, end, 38 - ttm_bo_type_kernel, flags, 0); 35 + bo = xe_bo_create_pin_range_novm(xe, xe_device_get_root_tile(xe), 36 + size, start, end, ttm_bo_type_kernel, flags); 39 37 if (IS_ERR(bo)) { 40 38 err = PTR_ERR(bo); 41 39 bo = NULL; 42 40 return err; 43 - } 44 - err = xe_bo_pin(bo); 45 - xe_bo_unlock_vm_held(bo); 46 - 47 - if (err) { 48 - xe_bo_put(fb->bo); 49 - bo = NULL; 50 41 } 51 42 52 43 fb->bo = bo;

+9 -9

drivers/gpu/drm/xe/display/intel_fbdev_fb.c

··· 42 42 obj = ERR_PTR(-ENODEV); 43 43 44 44 if (!IS_DGFX(xe) && !XE_GT_WA(xe_root_mmio_gt(xe), 22019338487_display)) { 45 - obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), 46 - NULL, size, 47 - ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | 48 - XE_BO_FLAG_STOLEN | 49 - XE_BO_FLAG_GGTT); 45 + obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), 46 + size, 47 + ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | 48 + XE_BO_FLAG_STOLEN | 49 + XE_BO_FLAG_GGTT, false); 50 50 if (!IS_ERR(obj)) 51 51 drm_info(&xe->drm, "Allocated fbdev into stolen\n"); 52 52 else ··· 54 54 } 55 55 56 56 if (IS_ERR(obj)) { 57 - obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size, 58 - ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | 59 - XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | 60 - XE_BO_FLAG_GGTT); 57 + obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), size, 58 + ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | 59 + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | 60 + XE_BO_FLAG_GGTT, false); 61 61 } 62 62 63 63 if (IS_ERR(obj)) {

+5 -5

drivers/gpu/drm/xe/display/xe_dsb_buffer.c

··· 43 43 return false; 44 44 45 45 /* Set scanout flag for WC mapping */ 46 - obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), 47 - NULL, PAGE_ALIGN(size), 48 - ttm_bo_type_kernel, 49 - XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | 50 - XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT); 46 + obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), 47 + PAGE_ALIGN(size), 48 + ttm_bo_type_kernel, 49 + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | 50 + XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT, false); 51 51 if (IS_ERR(obj)) { 52 52 kfree(vma); 53 53 return false;

+39 -32

drivers/gpu/drm/xe/display/xe_fb_pin.c

··· 102 102 XE_PAGE_SIZE); 103 103 104 104 if (IS_DGFX(xe)) 105 - dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, 106 - dpt_size, ~0ull, 107 - ttm_bo_type_kernel, 108 - XE_BO_FLAG_VRAM0 | 109 - XE_BO_FLAG_GGTT | 110 - XE_BO_FLAG_PAGETABLE, 111 - alignment); 105 + dpt = xe_bo_create_pin_map_at_novm(xe, tile0, 106 + dpt_size, ~0ull, 107 + ttm_bo_type_kernel, 108 + XE_BO_FLAG_VRAM0 | 109 + XE_BO_FLAG_GGTT | 110 + XE_BO_FLAG_PAGETABLE, 111 + alignment, false); 112 112 else 113 - dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, 114 - dpt_size, ~0ull, 115 - ttm_bo_type_kernel, 116 - XE_BO_FLAG_STOLEN | 117 - XE_BO_FLAG_GGTT | 118 - XE_BO_FLAG_PAGETABLE, 119 - alignment); 113 + dpt = xe_bo_create_pin_map_at_novm(xe, tile0, 114 + dpt_size, ~0ull, 115 + ttm_bo_type_kernel, 116 + XE_BO_FLAG_STOLEN | 117 + XE_BO_FLAG_GGTT | 118 + XE_BO_FLAG_PAGETABLE, 119 + alignment, false); 120 120 if (IS_ERR(dpt)) 121 - dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, 122 - dpt_size, ~0ull, 123 - ttm_bo_type_kernel, 124 - XE_BO_FLAG_SYSTEM | 125 - XE_BO_FLAG_GGTT | 126 - XE_BO_FLAG_PAGETABLE, 127 - alignment); 121 + dpt = xe_bo_create_pin_map_at_novm(xe, tile0, 122 + dpt_size, ~0ull, 123 + ttm_bo_type_kernel, 124 + XE_BO_FLAG_SYSTEM | 125 + XE_BO_FLAG_GGTT | 126 + XE_BO_FLAG_PAGETABLE, 127 + alignment, false); 128 128 if (IS_ERR(dpt)) 129 129 return PTR_ERR(dpt); 130 130 ··· 281 281 struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); 282 282 struct drm_gem_object *obj = intel_fb_bo(&fb->base); 283 283 struct xe_bo *bo = gem_to_xe_bo(obj); 284 - int ret; 284 + struct xe_validation_ctx ctx; 285 + struct drm_exec exec; 286 + int ret = 0; 285 287 286 288 if (!vma) 287 289 return ERR_PTR(-ENODEV); ··· 310 308 * Pin the framebuffer, we can't use xe_bo_(un)pin functions as the 311 309 * assumptions are incorrect for framebuffers 312 310 */ 313 - ret = ttm_bo_reserve(&bo->ttm, false, false, NULL); 314 - if (ret) 315 - goto err; 311 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 312 + ret) { 313 + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); 314 + drm_exec_retry_on_contention(&exec); 315 + if (ret) 316 + break; 316 317 317 - if (IS_DGFX(xe)) 318 - ret = xe_bo_migrate(bo, XE_PL_VRAM0); 319 - else 320 - ret = xe_bo_validate(bo, NULL, true); 321 - if (!ret) 322 - ttm_bo_pin(&bo->ttm); 323 - ttm_bo_unreserve(&bo->ttm); 318 + if (IS_DGFX(xe)) 319 + ret = xe_bo_migrate(bo, XE_PL_VRAM0, NULL, &exec); 320 + else 321 + ret = xe_bo_validate(bo, NULL, true, &exec); 322 + drm_exec_retry_on_contention(&exec); 323 + xe_validation_retry_on_oom(&ctx, &ret); 324 + if (!ret) 325 + ttm_bo_pin(&bo->ttm); 326 + } 324 327 if (ret) 325 328 goto err; 326 329

+4 -4

drivers/gpu/drm/xe/display/xe_hdcp_gsc.c

··· 72 72 int ret = 0; 73 73 74 74 /* allocate object of two page for HDCP command memory and store it */ 75 - bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2, 76 - ttm_bo_type_kernel, 77 - XE_BO_FLAG_SYSTEM | 78 - XE_BO_FLAG_GGTT); 75 + bo = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), PAGE_SIZE * 2, 76 + ttm_bo_type_kernel, 77 + XE_BO_FLAG_SYSTEM | 78 + XE_BO_FLAG_GGTT, false); 79 79 80 80 if (IS_ERR(bo)) { 81 81 drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n");

+2 -2

drivers/gpu/drm/xe/display/xe_plane_initial.c

··· 140 140 page_size); 141 141 size -= base; 142 142 143 - bo = xe_bo_create_pin_map_at(xe, tile0, NULL, size, phys_base, 144 - ttm_bo_type_kernel, flags); 143 + bo = xe_bo_create_pin_map_at_novm(xe, tile0, size, phys_base, 144 + ttm_bo_type_kernel, flags, 0, false); 145 145 if (IS_ERR(bo)) { 146 146 drm_dbg(&xe->drm, 147 147 "Failed to create bo phys_base=%pa size %u with flags %x: %li\n",

+1

drivers/gpu/drm/xe/regs/xe_gt_regs.h

··· 522 522 523 523 #define TDL_CHICKEN XE_REG_MCR(0xe5f4, XE_REG_OPTION_MASKED) 524 524 #define QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE REG_BIT(12) 525 + #define EUSTALL_PERF_SAMPLING_DISABLE REG_BIT(5) 525 526 526 527 #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) 527 528 #define DISABLE_D8_D16_COASLESCE REG_BIT(30)

-3

drivers/gpu/drm/xe/regs/xe_lrc_layout.h

··· 40 40 #define INDIRECT_CTX_RING_START_UDW (0x08 + 1) 41 41 #define INDIRECT_CTX_RING_CTL (0x0a + 1) 42 42 43 - #define CTX_INDIRECT_CTX_OFFSET_MASK REG_GENMASK(15, 6) 44 - #define CTX_INDIRECT_CTX_OFFSET_DEFAULT REG_FIELD_PREP(CTX_INDIRECT_CTX_OFFSET_MASK, 0xd) 45 - 46 43 #endif

+19 -17

drivers/gpu/drm/xe/tests/xe_bo.c

··· 23 23 24 24 static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, 25 25 bool clear, u64 get_val, u64 assign_val, 26 - struct kunit *test) 26 + struct kunit *test, struct drm_exec *exec) 27 27 { 28 28 struct dma_fence *fence; 29 29 struct ttm_tt *ttm; ··· 35 35 u32 offset; 36 36 37 37 /* Move bo to VRAM if not already there. */ 38 - ret = xe_bo_validate(bo, NULL, false); 38 + ret = xe_bo_validate(bo, NULL, false, exec); 39 39 if (ret) { 40 40 KUNIT_FAIL(test, "Failed to validate bo.\n"); 41 41 return ret; ··· 60 60 } 61 61 62 62 /* Evict to system. CCS data should be copied. */ 63 - ret = xe_bo_evict(bo); 63 + ret = xe_bo_evict(bo, exec); 64 64 if (ret) { 65 65 KUNIT_FAIL(test, "Failed to evict bo.\n"); 66 66 return ret; ··· 132 132 133 133 /* TODO: Sanity check */ 134 134 unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); 135 + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; 135 136 136 137 if (IS_DGFX(xe)) 137 138 kunit_info(test, "Testing vram id %u\n", tile->id); 138 139 else 139 140 kunit_info(test, "Testing system memory\n"); 140 141 141 - bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, 142 - bo_flags); 142 + bo = xe_bo_create_user(xe, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, 143 + bo_flags, exec); 143 144 if (IS_ERR(bo)) { 144 145 KUNIT_FAIL(test, "Failed to create bo.\n"); 145 146 return; ··· 150 149 151 150 kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); 152 151 ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, 153 - test); 152 + test, exec); 154 153 if (ret) 155 154 goto out_unlock; 156 155 157 156 kunit_info(test, "Verifying that CCS data survives migration.\n"); 158 157 ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL, 159 - 0xdeadbeefdeadbeefULL, test); 158 + 0xdeadbeefdeadbeefULL, test, exec); 160 159 if (ret) 161 160 goto out_unlock; 162 161 163 162 kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); 164 - ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test); 163 + ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test, exec); 165 164 166 165 out_unlock: 167 166 xe_bo_unlock(bo); ··· 211 210 struct xe_bo *bo, *external; 212 211 unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); 213 212 struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); 213 + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; 214 214 struct xe_gt *__gt; 215 215 int err, i, id; 216 216 ··· 220 218 221 219 for (i = 0; i < 2; ++i) { 222 220 xe_vm_lock(vm, false); 223 - bo = xe_bo_create_user(xe, NULL, vm, 0x10000, 221 + bo = xe_bo_create_user(xe, vm, 0x10000, 224 222 DRM_XE_GEM_CPU_CACHING_WC, 225 - bo_flags); 223 + bo_flags, exec); 226 224 xe_vm_unlock(vm); 227 225 if (IS_ERR(bo)) { 228 226 KUNIT_FAIL(test, "bo create err=%pe\n", bo); 229 227 break; 230 228 } 231 229 232 - external = xe_bo_create_user(xe, NULL, NULL, 0x10000, 230 + external = xe_bo_create_user(xe, NULL, 0x10000, 233 231 DRM_XE_GEM_CPU_CACHING_WC, 234 - bo_flags); 232 + bo_flags, NULL); 235 233 if (IS_ERR(external)) { 236 234 KUNIT_FAIL(test, "external bo create err=%pe\n", external); 237 235 goto cleanup_bo; 238 236 } 239 237 240 238 xe_bo_lock(external, false); 241 - err = xe_bo_pin_external(external, false); 239 + err = xe_bo_pin_external(external, false, exec); 242 240 xe_bo_unlock(external); 243 241 if (err) { 244 242 KUNIT_FAIL(test, "external bo pin err=%pe\n", ··· 296 294 if (i) { 297 295 down_read(&vm->lock); 298 296 xe_vm_lock(vm, false); 299 - err = xe_bo_validate(bo, bo->vm, false); 297 + err = xe_bo_validate(bo, bo->vm, false, exec); 300 298 xe_vm_unlock(vm); 301 299 up_read(&vm->lock); 302 300 if (err) { ··· 305 303 goto cleanup_all; 306 304 } 307 305 xe_bo_lock(external, false); 308 - err = xe_bo_validate(external, NULL, false); 306 + err = xe_bo_validate(external, NULL, false, exec); 309 307 xe_bo_unlock(external); 310 308 if (err) { 311 309 KUNIT_FAIL(test, "external bo valid err=%pe\n", ··· 497 495 INIT_LIST_HEAD(&link->link); 498 496 499 497 /* We can create bos using WC caching here. But it is slower. */ 500 - bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE, 498 + bo = xe_bo_create_user(xe, NULL, XE_BO_SHRINK_SIZE, 501 499 DRM_XE_GEM_CPU_CACHING_WB, 502 - XE_BO_FLAG_SYSTEM); 500 + XE_BO_FLAG_SYSTEM, NULL); 503 501 if (IS_ERR(bo)) { 504 502 if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) && 505 503 bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))

+9 -7

drivers/gpu/drm/xe/tests/xe_dma_buf.c

··· 27 27 } 28 28 29 29 static void check_residency(struct kunit *test, struct xe_bo *exported, 30 - struct xe_bo *imported, struct dma_buf *dmabuf) 30 + struct xe_bo *imported, struct dma_buf *dmabuf, 31 + struct drm_exec *exec) 31 32 { 32 33 struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); 33 34 u32 mem_type; ··· 63 62 * importer is on a different device. If they're on the same device, 64 63 * the exporter and the importer should be the same bo. 65 64 */ 66 - ret = xe_bo_evict(exported); 65 + ret = xe_bo_evict(exported, exec); 67 66 if (ret) { 68 67 if (ret != -EINTR && ret != -ERESTARTSYS) 69 68 KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n", ··· 78 77 } 79 78 80 79 /* Re-validate the importer. This should move also exporter in. */ 81 - ret = xe_bo_validate(imported, NULL, false); 80 + ret = xe_bo_validate(imported, NULL, false, exec); 82 81 if (ret) { 83 82 if (ret != -EINTR && ret != -ERESTARTSYS) 84 83 KUNIT_FAIL(test, "Validating importer failed with err=%d.\n", ··· 114 113 size = SZ_64K; 115 114 116 115 kunit_info(test, "running %s\n", __func__); 117 - bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, 118 - params->mem_mask); 116 + bo = xe_bo_create_user(xe, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, 117 + params->mem_mask, NULL); 119 118 if (IS_ERR(bo)) { 120 119 KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", 121 120 PTR_ERR(bo)); ··· 143 142 KUNIT_FAIL(test, 144 143 "xe_gem_prime_import() succeeded when it shouldn't have\n"); 145 144 } else { 145 + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; 146 146 int err; 147 147 148 148 /* Is everything where we expect it to be? */ 149 149 xe_bo_lock(import_bo, false); 150 - err = xe_bo_validate(import_bo, NULL, false); 150 + err = xe_bo_validate(import_bo, NULL, false, exec); 151 151 152 152 /* Pinning in VRAM is not allowed. */ 153 153 if (!is_dynamic(params) && ··· 161 159 err == -ERESTARTSYS); 162 160 163 161 if (!err) 164 - check_residency(test, bo, import_bo, dmabuf); 162 + check_residency(test, bo, import_bo, dmabuf, exec); 165 163 xe_bo_unlock(import_bo); 166 164 } 167 165 drm_gem_object_put(import);

+776

drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 AND MIT 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #include <linux/delay.h> 7 + 8 + #include <kunit/test.h> 9 + #include <kunit/visibility.h> 10 + 11 + #include "tests/xe_kunit_helpers.h" 12 + #include "tests/xe_pci_test.h" 13 + #include "tests/xe_test.h" 14 + 15 + #include "xe_bo.h" 16 + #include "xe_device.h" 17 + #include "xe_pm.h" 18 + 19 + /* 20 + * There are different ways to allocate the G2G buffers. The plan for this test 21 + * is to make sure that all the possible options work. The particular option 22 + * chosen by the driver may vary from one platform to another, it may also change 23 + * with time. So to ensure consistency of testing, the relevant driver code is 24 + * replicated here to guarantee it won't change without the test being updated 25 + * to keep testing the other options. 26 + * 27 + * In order to test the actual code being used by the driver, there is also the 28 + * 'default' scheme. That will use the official driver routines to test whatever 29 + * method the driver is using on the current platform at the current time. 30 + */ 31 + enum { 32 + /* Driver defined allocation scheme */ 33 + G2G_CTB_TYPE_DEFAULT, 34 + /* Single buffer in host memory */ 35 + G2G_CTB_TYPE_HOST, 36 + /* Single buffer in a specific tile, loops across all tiles */ 37 + G2G_CTB_TYPE_TILE, 38 + }; 39 + 40 + /* 41 + * Payload is opaque to GuC. So KMD can define any structure or size it wants. 42 + */ 43 + struct g2g_test_payload { 44 + u32 tx_dev; 45 + u32 tx_tile; 46 + u32 rx_dev; 47 + u32 rx_tile; 48 + u32 seqno; 49 + }; 50 + 51 + static void g2g_test_send(struct kunit *test, struct xe_guc *guc, 52 + u32 far_tile, u32 far_dev, 53 + struct g2g_test_payload *payload) 54 + { 55 + struct xe_device *xe = guc_to_xe(guc); 56 + struct xe_gt *gt = guc_to_gt(guc); 57 + u32 *action, total; 58 + size_t payload_len; 59 + int ret; 60 + 61 + static_assert(IS_ALIGNED(sizeof(*payload), sizeof(u32))); 62 + payload_len = sizeof(*payload) / sizeof(u32); 63 + 64 + total = 4 + payload_len; 65 + action = kunit_kmalloc_array(test, total, sizeof(*action), GFP_KERNEL); 66 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, action); 67 + 68 + action[0] = XE_GUC_ACTION_TEST_G2G_SEND; 69 + action[1] = far_tile; 70 + action[2] = far_dev; 71 + action[3] = payload_len; 72 + memcpy(action + 4, payload, payload_len * sizeof(u32)); 73 + 74 + atomic_inc(&xe->g2g_test_count); 75 + 76 + /* 77 + * Should specify the expected response notification here. Problem is that 78 + * the response will be coming from a different GuC. By the end, it should 79 + * all add up as long as an equal number of messages are sent from each GuC 80 + * and to each GuC. However, in the middle negative reservation space errors 81 + * and such like can occur. Rather than add intrusive changes to the CT layer 82 + * it is simpler to just not bother counting it at all. The system should be 83 + * idle when running the selftest, and the selftest's notification total size 84 + * is well within the G2H allocation size. So there should be no issues with 85 + * needing to block for space, which is all the tracking code is really for. 86 + */ 87 + ret = xe_guc_ct_send(&guc->ct, action, total, 0, 0); 88 + kunit_kfree(test, action); 89 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G send failed: %d [%d:%d -> %d:%d]\n", ret, 90 + gt_to_tile(gt)->id, G2G_DEV(gt), far_tile, far_dev); 91 + } 92 + 93 + /* 94 + * NB: Can't use KUNIT_ASSERT and friends in here as this is called asynchronously 95 + * from the G2H notification handler. Need that to actually complete rather than 96 + * thread-abort in order to keep the rest of the driver alive! 97 + */ 98 + int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len) 99 + { 100 + struct xe_device *xe = guc_to_xe(guc); 101 + struct xe_gt *rx_gt = guc_to_gt(guc), *test_gt, *tx_gt = NULL; 102 + u32 tx_tile, tx_dev, rx_tile, rx_dev, idx, got_len; 103 + struct g2g_test_payload *payload; 104 + size_t payload_len; 105 + int ret = 0, i; 106 + 107 + payload_len = sizeof(*payload) / sizeof(u32); 108 + 109 + if (unlikely(len != (G2H_LEN_DW_G2G_NOTIFY_MIN + payload_len))) { 110 + xe_gt_err(rx_gt, "G2G test notification invalid length %u", len); 111 + ret = -EPROTO; 112 + goto done; 113 + } 114 + 115 + tx_tile = msg[0]; 116 + tx_dev = msg[1]; 117 + got_len = msg[2]; 118 + payload = (struct g2g_test_payload *)(msg + 3); 119 + 120 + rx_tile = gt_to_tile(rx_gt)->id; 121 + rx_dev = G2G_DEV(rx_gt); 122 + 123 + if (got_len != payload_len) { 124 + xe_gt_err(rx_gt, "G2G: Invalid payload length: %u vs %zu\n", got_len, payload_len); 125 + ret = -EPROTO; 126 + goto done; 127 + } 128 + 129 + if (payload->tx_dev != tx_dev || payload->tx_tile != tx_tile || 130 + payload->rx_dev != rx_dev || payload->rx_tile != rx_tile) { 131 + xe_gt_err(rx_gt, "G2G: Invalid payload: %d:%d -> %d:%d vs %d:%d -> %d:%d! [%d]\n", 132 + payload->tx_tile, payload->tx_dev, payload->rx_tile, payload->rx_dev, 133 + tx_tile, tx_dev, rx_tile, rx_dev, payload->seqno); 134 + ret = -EPROTO; 135 + goto done; 136 + } 137 + 138 + if (!xe->g2g_test_array) { 139 + xe_gt_err(rx_gt, "G2G: Missing test array!\n"); 140 + ret = -ENOMEM; 141 + goto done; 142 + } 143 + 144 + for_each_gt(test_gt, xe, i) { 145 + if (gt_to_tile(test_gt)->id != tx_tile) 146 + continue; 147 + 148 + if (G2G_DEV(test_gt) != tx_dev) 149 + continue; 150 + 151 + if (tx_gt) { 152 + xe_gt_err(rx_gt, "G2G: Got duplicate TX GTs: %d vs %d for %d:%d!\n", 153 + tx_gt->info.id, test_gt->info.id, tx_tile, tx_dev); 154 + ret = -EINVAL; 155 + goto done; 156 + } 157 + 158 + tx_gt = test_gt; 159 + } 160 + if (!tx_gt) { 161 + xe_gt_err(rx_gt, "G2G: Failed to find a TX GT for %d:%d!\n", tx_tile, tx_dev); 162 + ret = -EINVAL; 163 + goto done; 164 + } 165 + 166 + idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id; 167 + 168 + if (xe->g2g_test_array[idx] != payload->seqno - 1) { 169 + xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> %d:%d!\n", 170 + xe->g2g_test_array[idx], payload->seqno - 1, 171 + tx_tile, tx_dev, rx_tile, rx_dev); 172 + ret = -EINVAL; 173 + goto done; 174 + } 175 + 176 + xe->g2g_test_array[idx] = payload->seqno; 177 + 178 + done: 179 + atomic_dec(&xe->g2g_test_count); 180 + return ret; 181 + } 182 + 183 + /* 184 + * Send the given seqno from all GuCs to all other GuCs in tile/GT order 185 + */ 186 + static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqno) 187 + { 188 + struct xe_gt *near_gt, *far_gt; 189 + int i, j; 190 + 191 + for_each_gt(near_gt, xe, i) { 192 + u32 near_tile = gt_to_tile(near_gt)->id; 193 + u32 near_dev = G2G_DEV(near_gt); 194 + 195 + for_each_gt(far_gt, xe, j) { 196 + u32 far_tile = gt_to_tile(far_gt)->id; 197 + u32 far_dev = G2G_DEV(far_gt); 198 + struct g2g_test_payload payload; 199 + 200 + if (far_gt->info.id == near_gt->info.id) 201 + continue; 202 + 203 + payload.tx_dev = near_dev; 204 + payload.tx_tile = near_tile; 205 + payload.rx_dev = far_dev; 206 + payload.rx_tile = far_tile; 207 + payload.seqno = seqno; 208 + g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, &payload); 209 + } 210 + } 211 + } 212 + 213 + #define WAIT_TIME_MS 100 214 + #define WAIT_COUNT (1000 / WAIT_TIME_MS) 215 + 216 + static void g2g_wait_for_complete(void *_xe) 217 + { 218 + struct xe_device *xe = (struct xe_device *)_xe; 219 + struct kunit *test = kunit_get_current_test(); 220 + int wait = 0; 221 + 222 + /* Wait for all G2H messages to be received */ 223 + while (atomic_read(&xe->g2g_test_count)) { 224 + if (++wait > WAIT_COUNT) 225 + break; 226 + 227 + msleep(WAIT_TIME_MS); 228 + } 229 + 230 + KUNIT_ASSERT_EQ_MSG(test, 0, atomic_read(&xe->g2g_test_count), 231 + "Timed out waiting for notifications\n"); 232 + kunit_info(test, "Got all notifications back\n"); 233 + } 234 + 235 + #undef WAIT_TIME_MS 236 + #undef WAIT_COUNT 237 + 238 + static void g2g_clean_array(void *_xe) 239 + { 240 + struct xe_device *xe = (struct xe_device *)_xe; 241 + 242 + xe->g2g_test_array = NULL; 243 + } 244 + 245 + #define NUM_LOOPS 16 246 + 247 + static void g2g_run_test(struct kunit *test, struct xe_device *xe) 248 + { 249 + u32 seqno, max_array; 250 + int ret, i, j; 251 + 252 + max_array = xe->info.gt_count * xe->info.gt_count; 253 + xe->g2g_test_array = kunit_kcalloc(test, max_array, sizeof(u32), GFP_KERNEL); 254 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe->g2g_test_array); 255 + 256 + ret = kunit_add_action_or_reset(test, g2g_clean_array, xe); 257 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n"); 258 + 259 + /* 260 + * Send incrementing seqnos from all GuCs to all other GuCs in tile/GT order. 261 + * Tile/GT order doesn't really mean anything to the hardware but it is going 262 + * to be a fixed sequence every time. 263 + * 264 + * Verify that each one comes back having taken the correct route. 265 + */ 266 + ret = kunit_add_action(test, g2g_wait_for_complete, xe); 267 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n"); 268 + for (seqno = 1; seqno < NUM_LOOPS; seqno++) 269 + g2g_test_in_order(test, xe, seqno); 270 + seqno--; 271 + 272 + kunit_release_action(test, &g2g_wait_for_complete, xe); 273 + 274 + /* Check for the final seqno in each slot */ 275 + for (i = 0; i < xe->info.gt_count; i++) { 276 + for (j = 0; j < xe->info.gt_count; j++) { 277 + u32 idx = (j * xe->info.gt_count) + i; 278 + 279 + if (i == j) 280 + KUNIT_ASSERT_EQ_MSG(test, 0, xe->g2g_test_array[idx], 281 + "identity seqno modified: %d for %dx%d!\n", 282 + xe->g2g_test_array[idx], i, j); 283 + else 284 + KUNIT_ASSERT_EQ_MSG(test, seqno, xe->g2g_test_array[idx], 285 + "invalid seqno: %d vs %d for %dx%d!\n", 286 + xe->g2g_test_array[idx], seqno, i, j); 287 + } 288 + } 289 + 290 + kunit_kfree(test, xe->g2g_test_array); 291 + kunit_release_action(test, &g2g_clean_array, xe); 292 + 293 + kunit_info(test, "Test passed\n"); 294 + } 295 + 296 + #undef NUM_LOOPS 297 + 298 + static void g2g_ct_stop(struct xe_guc *guc) 299 + { 300 + struct xe_gt *remote_gt, *gt = guc_to_gt(guc); 301 + struct xe_device *xe = gt_to_xe(gt); 302 + int i, t; 303 + 304 + for_each_gt(remote_gt, xe, i) { 305 + u32 tile, dev; 306 + 307 + if (remote_gt->info.id == gt->info.id) 308 + continue; 309 + 310 + tile = gt_to_tile(remote_gt)->id; 311 + dev = G2G_DEV(remote_gt); 312 + 313 + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) 314 + guc_g2g_deregister(guc, tile, dev, t); 315 + } 316 + } 317 + 318 + /* Size of a single allocation that contains all G2G CTBs across all GTs */ 319 + static u32 g2g_ctb_size(struct kunit *test, struct xe_device *xe) 320 + { 321 + unsigned int count = xe->info.gt_count; 322 + u32 num_channels = (count * (count - 1)) / 2; 323 + 324 + kunit_info(test, "Size: (%d * %d / 2) * %d * 0x%08X + 0x%08X => 0x%08X [%d]\n", 325 + count, count - 1, XE_G2G_TYPE_LIMIT, G2G_BUFFER_SIZE, G2G_DESC_AREA_SIZE, 326 + num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE, 327 + num_channels * XE_G2G_TYPE_LIMIT); 328 + 329 + return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; 330 + } 331 + 332 + /* 333 + * Use the driver's regular CTB allocation scheme. 334 + */ 335 + static void g2g_alloc_default(struct kunit *test, struct xe_device *xe) 336 + { 337 + struct xe_gt *gt; 338 + int i; 339 + 340 + kunit_info(test, "Default [tiles = %d, GTs = %d]\n", 341 + xe->info.tile_count, xe->info.gt_count); 342 + 343 + for_each_gt(gt, xe, i) { 344 + struct xe_guc *guc = &gt->uc.guc; 345 + int ret; 346 + 347 + ret = guc_g2g_alloc(guc); 348 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G alloc failed: %pe", ERR_PTR(ret)); 349 + continue; 350 + } 351 + } 352 + 353 + static void g2g_distribute(struct kunit *test, struct xe_device *xe, struct xe_bo *bo) 354 + { 355 + struct xe_gt *root_gt, *gt; 356 + int i; 357 + 358 + root_gt = xe_device_get_gt(xe, 0); 359 + root_gt->uc.guc.g2g.bo = bo; 360 + root_gt->uc.guc.g2g.owned = true; 361 + kunit_info(test, "[%d.%d] Assigned 0x%p\n", gt_to_tile(root_gt)->id, root_gt->info.id, bo); 362 + 363 + for_each_gt(gt, xe, i) { 364 + if (gt->info.id != 0) { 365 + gt->uc.guc.g2g.owned = false; 366 + gt->uc.guc.g2g.bo = xe_bo_get(bo); 367 + kunit_info(test, "[%d.%d] Pinned 0x%p\n", 368 + gt_to_tile(gt)->id, gt->info.id, gt->uc.guc.g2g.bo); 369 + } 370 + 371 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt->uc.guc.g2g.bo); 372 + } 373 + } 374 + 375 + /* 376 + * Allocate a single blob on the host and split between all G2G CTBs. 377 + */ 378 + static void g2g_alloc_host(struct kunit *test, struct xe_device *xe) 379 + { 380 + struct xe_bo *bo; 381 + u32 g2g_size; 382 + 383 + kunit_info(test, "Host [tiles = %d, GTs = %d]\n", xe->info.tile_count, xe->info.gt_count); 384 + 385 + g2g_size = g2g_ctb_size(test, xe); 386 + bo = xe_managed_bo_create_pin_map(xe, xe_device_get_root_tile(xe), g2g_size, 387 + XE_BO_FLAG_SYSTEM | 388 + XE_BO_FLAG_GGTT | 389 + XE_BO_FLAG_GGTT_ALL | 390 + XE_BO_FLAG_GGTT_INVALIDATE); 391 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); 392 + kunit_info(test, "[HST] G2G buffer create: 0x%p\n", bo); 393 + 394 + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); 395 + 396 + g2g_distribute(test, xe, bo); 397 + } 398 + 399 + /* 400 + * Allocate a single blob on the given tile and split between all G2G CTBs. 401 + */ 402 + static void g2g_alloc_tile(struct kunit *test, struct xe_device *xe, struct xe_tile *tile) 403 + { 404 + struct xe_bo *bo; 405 + u32 g2g_size; 406 + 407 + KUNIT_ASSERT_TRUE(test, IS_DGFX(xe)); 408 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile); 409 + 410 + kunit_info(test, "Tile %d [tiles = %d, GTs = %d]\n", 411 + tile->id, xe->info.tile_count, xe->info.gt_count); 412 + 413 + g2g_size = g2g_ctb_size(test, xe); 414 + bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size, 415 + XE_BO_FLAG_VRAM_IF_DGFX(tile) | 416 + XE_BO_FLAG_GGTT | 417 + XE_BO_FLAG_GGTT_ALL | 418 + XE_BO_FLAG_GGTT_INVALIDATE); 419 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); 420 + kunit_info(test, "[%d.*] G2G buffer create: 0x%p\n", tile->id, bo); 421 + 422 + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); 423 + 424 + g2g_distribute(test, xe, bo); 425 + } 426 + 427 + static void g2g_free(struct kunit *test, struct xe_device *xe) 428 + { 429 + struct xe_gt *gt; 430 + struct xe_bo *bo; 431 + int i; 432 + 433 + for_each_gt(gt, xe, i) { 434 + bo = gt->uc.guc.g2g.bo; 435 + if (!bo) 436 + continue; 437 + 438 + if (gt->uc.guc.g2g.owned) { 439 + xe_managed_bo_unpin_map_no_vm(bo); 440 + kunit_info(test, "[%d.%d] Unmapped 0x%p\n", 441 + gt_to_tile(gt)->id, gt->info.id, bo); 442 + } else { 443 + xe_bo_put(bo); 444 + kunit_info(test, "[%d.%d] Unpinned 0x%p\n", 445 + gt_to_tile(gt)->id, gt->info.id, bo); 446 + } 447 + 448 + gt->uc.guc.g2g.bo = NULL; 449 + } 450 + } 451 + 452 + static void g2g_stop(struct kunit *test, struct xe_device *xe) 453 + { 454 + struct xe_gt *gt; 455 + int i; 456 + 457 + for_each_gt(gt, xe, i) { 458 + struct xe_guc *guc = &gt->uc.guc; 459 + 460 + if (!guc->g2g.bo) 461 + continue; 462 + 463 + g2g_ct_stop(guc); 464 + } 465 + 466 + g2g_free(test, xe); 467 + } 468 + 469 + /* 470 + * Generate a unique id for each bi-directional CTB for each pair of 471 + * near and far tiles/devices. The id can then be used as an index into 472 + * a single allocation that is sub-divided into multiple CTBs. 473 + * 474 + * For example, with two devices per tile and two tiles, the table should 475 + * look like: 476 + * Far <tile>.<dev> 477 + * 0.0 0.1 1.0 1.1 478 + * N 0.0 --/-- 00/01 02/03 04/05 479 + * e 0.1 01/00 --/-- 06/07 08/09 480 + * a 1.0 03/02 07/06 --/-- 10/11 481 + * r 1.1 05/04 09/08 11/10 --/-- 482 + * 483 + * Where each entry is Rx/Tx channel id. 484 + * 485 + * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would 486 + * be reading from channel #11 and writing to channel #10. Whereas, 487 + * GuC #2 talking to GuC #3 would be read on #10 and write to #11. 488 + */ 489 + static int g2g_slot_flat(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev, 490 + u32 type, u32 max_inst, bool have_dev) 491 + { 492 + u32 near = near_tile, far = far_tile; 493 + u32 idx = 0, x, y, direction; 494 + int i; 495 + 496 + if (have_dev) { 497 + near = (near << 1) | near_dev; 498 + far = (far << 1) | far_dev; 499 + } 500 + 501 + /* No need to send to one's self */ 502 + if (far == near) 503 + return -1; 504 + 505 + if (far > near) { 506 + /* Top right table half */ 507 + x = far; 508 + y = near; 509 + 510 + /* T/R is 'forwards' direction */ 511 + direction = type; 512 + } else { 513 + /* Bottom left table half */ 514 + x = near; 515 + y = far; 516 + 517 + /* B/L is 'backwards' direction */ 518 + direction = (1 - type); 519 + } 520 + 521 + /* Count the rows prior to the target */ 522 + for (i = y; i > 0; i--) 523 + idx += max_inst - i; 524 + 525 + /* Count this row up to the target */ 526 + idx += (x - 1 - y); 527 + 528 + /* Slots are in Rx/Tx pairs */ 529 + idx *= 2; 530 + 531 + /* Pick Rx/Tx direction */ 532 + idx += direction; 533 + 534 + return idx; 535 + } 536 + 537 + static int g2g_register_flat(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type, bool have_dev) 538 + { 539 + struct xe_gt *gt = guc_to_gt(guc); 540 + struct xe_device *xe = gt_to_xe(gt); 541 + u32 near_tile = gt_to_tile(gt)->id; 542 + u32 near_dev = G2G_DEV(gt); 543 + u32 max = xe->info.gt_count; 544 + int idx; 545 + u32 base, desc, buf; 546 + 547 + if (!guc->g2g.bo) 548 + return -ENODEV; 549 + 550 + idx = g2g_slot_flat(near_tile, near_dev, far_tile, far_dev, type, max, have_dev); 551 + xe_assert(xe, idx >= 0); 552 + 553 + base = guc_bo_ggtt_addr(guc, guc->g2g.bo); 554 + desc = base + idx * G2G_DESC_SIZE; 555 + buf = base + idx * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; 556 + 557 + xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); 558 + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(guc->g2g.bo)); 559 + 560 + return guc_action_register_g2g_buffer(guc, type, far_tile, far_dev, 561 + desc, buf, G2G_BUFFER_SIZE); 562 + } 563 + 564 + static void g2g_start(struct kunit *test, struct xe_guc *guc) 565 + { 566 + struct xe_gt *remote_gt, *gt = guc_to_gt(guc); 567 + struct xe_device *xe = gt_to_xe(gt); 568 + unsigned int i; 569 + int t, ret; 570 + bool have_dev; 571 + 572 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo); 573 + 574 + /* GuC interface will need extending if more GT device types are ever created. */ 575 + KUNIT_ASSERT_TRUE(test, 576 + (gt->info.type == XE_GT_TYPE_MAIN) || 577 + (gt->info.type == XE_GT_TYPE_MEDIA)); 578 + 579 + /* Channel numbering depends on whether there are multiple GTs per tile */ 580 + have_dev = xe->info.gt_count > xe->info.tile_count; 581 + 582 + for_each_gt(remote_gt, xe, i) { 583 + u32 tile, dev; 584 + 585 + if (remote_gt->info.id == gt->info.id) 586 + continue; 587 + 588 + tile = gt_to_tile(remote_gt)->id; 589 + dev = G2G_DEV(remote_gt); 590 + 591 + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) { 592 + ret = g2g_register_flat(guc, tile, dev, t, have_dev); 593 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: %pe", ERR_PTR(ret)); 594 + } 595 + } 596 + } 597 + 598 + static void g2g_reinit(struct kunit *test, struct xe_device *xe, int ctb_type, struct xe_tile *tile) 599 + { 600 + struct xe_gt *gt; 601 + int i, found = 0; 602 + 603 + g2g_stop(test, xe); 604 + 605 + for_each_gt(gt, xe, i) { 606 + struct xe_guc *guc = &gt->uc.guc; 607 + 608 + KUNIT_ASSERT_NULL(test, guc->g2g.bo); 609 + } 610 + 611 + switch (ctb_type) { 612 + case G2G_CTB_TYPE_DEFAULT: 613 + g2g_alloc_default(test, xe); 614 + break; 615 + 616 + case G2G_CTB_TYPE_HOST: 617 + g2g_alloc_host(test, xe); 618 + break; 619 + 620 + case G2G_CTB_TYPE_TILE: 621 + g2g_alloc_tile(test, xe, tile); 622 + break; 623 + 624 + default: 625 + KUNIT_ASSERT_TRUE(test, false); 626 + } 627 + 628 + for_each_gt(gt, xe, i) { 629 + struct xe_guc *guc = &gt->uc.guc; 630 + 631 + if (!guc->g2g.bo) 632 + continue; 633 + 634 + if (ctb_type == G2G_CTB_TYPE_DEFAULT) 635 + guc_g2g_start(guc); 636 + else 637 + g2g_start(test, guc); 638 + found++; 639 + } 640 + 641 + KUNIT_ASSERT_GT_MSG(test, found, 1, "insufficient G2G channels running: %d", found); 642 + 643 + kunit_info(test, "Testing across %d GTs\n", found); 644 + } 645 + 646 + static void g2g_recreate_ctb(void *_xe) 647 + { 648 + struct xe_device *xe = (struct xe_device *)_xe; 649 + struct kunit *test = kunit_get_current_test(); 650 + 651 + g2g_stop(test, xe); 652 + 653 + if (xe_guc_g2g_wanted(xe)) 654 + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL); 655 + } 656 + 657 + static void g2g_pm_runtime_put(void *_xe) 658 + { 659 + struct xe_device *xe = (struct xe_device *)_xe; 660 + 661 + xe_pm_runtime_put(xe); 662 + } 663 + 664 + static void g2g_pm_runtime_get(struct kunit *test) 665 + { 666 + struct xe_device *xe = test->priv; 667 + int ret; 668 + 669 + xe_pm_runtime_get(xe); 670 + ret = kunit_add_action_or_reset(test, g2g_pm_runtime_put, xe); 671 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register runtime PM action\n"); 672 + } 673 + 674 + static void g2g_check_skip(struct kunit *test) 675 + { 676 + struct xe_device *xe = test->priv; 677 + struct xe_gt *gt; 678 + int i; 679 + 680 + if (IS_SRIOV_VF(xe)) 681 + kunit_skip(test, "not supported from a VF"); 682 + 683 + if (xe->info.gt_count <= 1) 684 + kunit_skip(test, "not enough GTs"); 685 + 686 + for_each_gt(gt, xe, i) { 687 + struct xe_guc *guc = &gt->uc.guc; 688 + 689 + if (guc->fw.build_type == CSS_UKERNEL_INFO_BUILDTYPE_PROD) 690 + kunit_skip(test, 691 + "G2G test interface not available in production firmware builds\n"); 692 + } 693 + } 694 + 695 + /* 696 + * Simple test that does not try to recreate the CTBs. 697 + * Requires that the platform already enables G2G comms 698 + * but has no risk of leaving the system in a broken state 699 + * afterwards. 700 + */ 701 + static void xe_live_guc_g2g_kunit_default(struct kunit *test) 702 + { 703 + struct xe_device *xe = test->priv; 704 + 705 + if (!xe_guc_g2g_wanted(xe)) 706 + kunit_skip(test, "G2G not enabled"); 707 + 708 + g2g_check_skip(test); 709 + 710 + g2g_pm_runtime_get(test); 711 + 712 + kunit_info(test, "Testing default CTBs\n"); 713 + g2g_run_test(test, xe); 714 + 715 + kunit_release_action(test, &g2g_pm_runtime_put, xe); 716 + } 717 + 718 + /* 719 + * More complex test that re-creates the CTBs in various location to 720 + * test access to each location from each GuC. Can be run even on 721 + * systems that do not enable G2G by default. On the other hand, 722 + * because it recreates the CTBs, if something goes wrong it could 723 + * leave the system with broken G2G comms. 724 + */ 725 + static void xe_live_guc_g2g_kunit_allmem(struct kunit *test) 726 + { 727 + struct xe_device *xe = test->priv; 728 + int ret; 729 + 730 + g2g_check_skip(test); 731 + 732 + g2g_pm_runtime_get(test); 733 + 734 + /* Make sure to leave the system as we found it */ 735 + ret = kunit_add_action_or_reset(test, g2g_recreate_ctb, xe); 736 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register CTB re-creation action\n"); 737 + 738 + kunit_info(test, "Testing CTB type 'default'...\n"); 739 + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL); 740 + g2g_run_test(test, xe); 741 + 742 + kunit_info(test, "Testing CTB type 'host'...\n"); 743 + g2g_reinit(test, xe, G2G_CTB_TYPE_HOST, NULL); 744 + g2g_run_test(test, xe); 745 + 746 + if (IS_DGFX(xe)) { 747 + struct xe_tile *tile; 748 + int id; 749 + 750 + for_each_tile(tile, xe, id) { 751 + kunit_info(test, "Testing CTB type 'tile: #%d'...\n", id); 752 + 753 + g2g_reinit(test, xe, G2G_CTB_TYPE_TILE, tile); 754 + g2g_run_test(test, xe); 755 + } 756 + } else { 757 + kunit_info(test, "Skipping local memory on integrated platform\n"); 758 + } 759 + 760 + kunit_release_action(test, g2g_recreate_ctb, xe); 761 + kunit_release_action(test, g2g_pm_runtime_put, xe); 762 + } 763 + 764 + static struct kunit_case xe_guc_g2g_tests[] = { 765 + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_default, xe_pci_live_device_gen_param), 766 + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_allmem, xe_pci_live_device_gen_param), 767 + {} 768 + }; 769 + 770 + VISIBLE_IF_KUNIT 771 + struct kunit_suite xe_guc_g2g_test_suite = { 772 + .name = "xe_guc_g2g", 773 + .test_cases = xe_guc_g2g_tests, 774 + .init = xe_kunit_helper_xe_device_live_test_init, 775 + }; 776 + EXPORT_SYMBOL_IF_KUNIT(xe_guc_g2g_test_suite);

+2

drivers/gpu/drm/xe/tests/xe_live_test_mod.c

··· 10 10 extern struct kunit_suite xe_dma_buf_test_suite; 11 11 extern struct kunit_suite xe_migrate_test_suite; 12 12 extern struct kunit_suite xe_mocs_test_suite; 13 + extern struct kunit_suite xe_guc_g2g_test_suite; 13 14 14 15 kunit_test_suite(xe_bo_test_suite); 15 16 kunit_test_suite(xe_bo_shrink_test_suite); 16 17 kunit_test_suite(xe_dma_buf_test_suite); 17 18 kunit_test_suite(xe_migrate_test_suite); 18 19 kunit_test_suite(xe_mocs_test_suite); 20 + kunit_test_suite(xe_guc_g2g_test_suite); 19 21 20 22 MODULE_AUTHOR("Intel Corporation"); 21 23 MODULE_LICENSE("GPL");

+37 -29

drivers/gpu/drm/xe/tests/xe_migrate.c

··· 70 70 } } while (0) 71 71 72 72 static void test_copy(struct xe_migrate *m, struct xe_bo *bo, 73 - struct kunit *test, u32 region) 73 + struct kunit *test, u32 region, struct drm_exec *exec) 74 74 { 75 75 struct xe_device *xe = tile_to_xe(m->tile); 76 76 u64 retval, expected = 0; ··· 84 84 ttm_bo_type_kernel, 85 85 region | 86 86 XE_BO_FLAG_NEEDS_CPU_ACCESS | 87 - XE_BO_FLAG_PINNED); 87 + XE_BO_FLAG_PINNED, 88 + exec); 88 89 if (IS_ERR(remote)) { 89 90 KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n", 90 91 str, remote); 91 92 return; 92 93 } 93 94 94 - err = xe_bo_validate(remote, NULL, false); 95 + err = xe_bo_validate(remote, NULL, false, exec); 95 96 if (err) { 96 97 KUNIT_FAIL(test, "Failed to validate system bo for %s: %i\n", 97 98 str, err); ··· 162 161 } 163 162 164 163 static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo, 165 - struct kunit *test) 164 + struct drm_exec *exec, struct kunit *test) 166 165 { 167 - test_copy(m, bo, test, XE_BO_FLAG_SYSTEM); 166 + test_copy(m, bo, test, XE_BO_FLAG_SYSTEM, exec); 168 167 } 169 168 170 169 static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, 171 - struct kunit *test) 170 + struct drm_exec *exec, struct kunit *test) 172 171 { 173 172 u32 region; 174 173 ··· 179 178 region = XE_BO_FLAG_VRAM1; 180 179 else 181 180 region = XE_BO_FLAG_VRAM0; 182 - test_copy(m, bo, test, region); 181 + test_copy(m, bo, test, region, exec); 183 182 } 184 183 185 - static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) 184 + static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test, 185 + struct drm_exec *exec) 186 186 { 187 187 struct xe_tile *tile = m->tile; 188 188 struct xe_device *xe = tile_to_xe(tile); ··· 204 202 205 203 big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M, 206 204 ttm_bo_type_kernel, 207 - XE_BO_FLAG_VRAM_IF_DGFX(tile)); 205 + XE_BO_FLAG_VRAM_IF_DGFX(tile), 206 + exec); 208 207 if (IS_ERR(big)) { 209 208 KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); 210 209 goto vunmap; ··· 213 210 214 211 pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE, 215 212 ttm_bo_type_kernel, 216 - XE_BO_FLAG_VRAM_IF_DGFX(tile)); 213 + XE_BO_FLAG_VRAM_IF_DGFX(tile), 214 + exec); 217 215 if (IS_ERR(pt)) { 218 216 KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", 219 217 PTR_ERR(pt)); ··· 224 220 tiny = xe_bo_create_pin_map(xe, tile, m->q->vm, 225 221 2 * SZ_4K, 226 222 ttm_bo_type_kernel, 227 - XE_BO_FLAG_VRAM_IF_DGFX(tile)); 223 + XE_BO_FLAG_VRAM_IF_DGFX(tile), 224 + exec); 228 225 if (IS_ERR(tiny)) { 229 226 KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n", 230 227 PTR_ERR(tiny)); ··· 295 290 check(retval, expected, "Command clear small last value", test); 296 291 297 292 kunit_info(test, "Copying small buffer object to system\n"); 298 - test_copy_sysmem(m, tiny, test); 293 + test_copy_sysmem(m, tiny, exec, test); 299 294 if (xe->info.tile_count > 1) { 300 295 kunit_info(test, "Copying small buffer object to other vram\n"); 301 - test_copy_vram(m, tiny, test); 296 + test_copy_vram(m, tiny, exec, test); 302 297 } 303 298 304 299 /* Clear a big bo */ ··· 317 312 check(retval, expected, "Command clear big last value", test); 318 313 319 314 kunit_info(test, "Copying big buffer object to system\n"); 320 - test_copy_sysmem(m, big, test); 315 + test_copy_sysmem(m, big, exec, test); 321 316 if (xe->info.tile_count > 1) { 322 317 kunit_info(test, "Copying big buffer object to other vram\n"); 323 - test_copy_vram(m, big, test); 318 + test_copy_vram(m, big, exec, test); 324 319 } 325 320 326 321 out: ··· 348 343 349 344 for_each_tile(tile, xe, id) { 350 345 struct xe_migrate *m = tile->migrate; 346 + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; 351 347 352 348 kunit_info(test, "Testing tile id %d.\n", id); 353 349 xe_vm_lock(m->q->vm, false); 354 - xe_migrate_sanity_test(m, test); 350 + xe_migrate_sanity_test(m, test, exec); 355 351 xe_vm_unlock(m->q->vm); 356 352 } 357 353 ··· 496 490 497 491 static void test_migrate(struct xe_device *xe, struct xe_tile *tile, 498 492 struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct xe_bo *ccs_bo, 499 - struct kunit *test) 493 + struct drm_exec *exec, struct kunit *test) 500 494 { 501 495 struct dma_fence *fence; 502 496 u64 expected, retval; ··· 515 509 dma_fence_put(fence); 516 510 517 511 kunit_info(test, "Evict vram buffer object\n"); 518 - ret = xe_bo_evict(vram_bo); 512 + ret = xe_bo_evict(vram_bo, exec); 519 513 if (ret) { 520 514 KUNIT_FAIL(test, "Failed to evict bo.\n"); 521 515 return; ··· 544 538 dma_fence_put(fence); 545 539 546 540 kunit_info(test, "Restore vram buffer object\n"); 547 - ret = xe_bo_validate(vram_bo, NULL, false); 541 + ret = xe_bo_validate(vram_bo, NULL, false, exec); 548 542 if (ret) { 549 543 KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret); 550 544 return; ··· 642 636 { 643 637 struct xe_bo *sys_bo, *vram_bo = NULL, *ccs_bo = NULL; 644 638 unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); 639 + struct drm_exec *exec; 645 640 long ret; 646 641 647 - sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 642 + sys_bo = xe_bo_create_user(xe, NULL, SZ_4M, 648 643 DRM_XE_GEM_CPU_CACHING_WC, 649 644 XE_BO_FLAG_SYSTEM | 650 645 XE_BO_FLAG_NEEDS_CPU_ACCESS | 651 - XE_BO_FLAG_PINNED); 646 + XE_BO_FLAG_PINNED, NULL); 652 647 653 648 if (IS_ERR(sys_bo)) { 654 649 KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", ··· 657 650 return; 658 651 } 659 652 653 + exec = XE_VALIDATION_OPT_OUT; 660 654 xe_bo_lock(sys_bo, false); 661 - ret = xe_bo_validate(sys_bo, NULL, false); 655 + ret = xe_bo_validate(sys_bo, NULL, false, exec); 662 656 if (ret) { 663 657 KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret); 664 658 goto free_sysbo; ··· 672 664 } 673 665 xe_bo_unlock(sys_bo); 674 666 675 - ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 667 + ccs_bo = xe_bo_create_user(xe, NULL, SZ_4M, 676 668 DRM_XE_GEM_CPU_CACHING_WC, 677 669 bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | 678 - XE_BO_FLAG_PINNED); 670 + XE_BO_FLAG_PINNED, NULL); 679 671 680 672 if (IS_ERR(ccs_bo)) { 681 673 KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", ··· 684 676 } 685 677 686 678 xe_bo_lock(ccs_bo, false); 687 - ret = xe_bo_validate(ccs_bo, NULL, false); 679 + ret = xe_bo_validate(ccs_bo, NULL, false, exec); 688 680 if (ret) { 689 681 KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret); 690 682 goto free_ccsbo; ··· 697 689 } 698 690 xe_bo_unlock(ccs_bo); 699 691 700 - vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 692 + vram_bo = xe_bo_create_user(xe, NULL, SZ_4M, 701 693 DRM_XE_GEM_CPU_CACHING_WC, 702 694 bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | 703 - XE_BO_FLAG_PINNED); 695 + XE_BO_FLAG_PINNED, NULL); 704 696 if (IS_ERR(vram_bo)) { 705 697 KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", 706 698 PTR_ERR(vram_bo)); ··· 708 700 } 709 701 710 702 xe_bo_lock(vram_bo, false); 711 - ret = xe_bo_validate(vram_bo, NULL, false); 703 + ret = xe_bo_validate(vram_bo, NULL, false, exec); 712 704 if (ret) { 713 705 KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret); 714 706 goto free_vrambo; ··· 721 713 } 722 714 723 715 test_clear(xe, tile, sys_bo, vram_bo, test); 724 - test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, test); 716 + test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, exec, test); 725 717 xe_bo_unlock(vram_bo); 726 718 727 719 xe_bo_lock(vram_bo, false);

+223 -2

drivers/gpu/drm/xe/tests/xe_pci.c

··· 12 12 #include <kunit/test-bug.h> 13 13 #include <kunit/visibility.h> 14 14 15 + #define PLATFORM_CASE(platform__, graphics_step__) \ 16 + { \ 17 + .platform = XE_ ## platform__, \ 18 + .subplatform = XE_SUBPLATFORM_NONE, \ 19 + .step = { .graphics = STEP_ ## graphics_step__ } \ 20 + } 21 + 22 + #define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \ 23 + { \ 24 + .platform = XE_ ## platform__, \ 25 + .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \ 26 + .step = { .graphics = STEP_ ## graphics_step__ } \ 27 + } 28 + 29 + #define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \ 30 + media_verx100__, media_step__) \ 31 + { \ 32 + .platform = XE_ ## platform__, \ 33 + .subplatform = XE_SUBPLATFORM_NONE, \ 34 + .graphics_verx100 = graphics_verx100__, \ 35 + .media_verx100 = media_verx100__, \ 36 + .step = { .graphics = STEP_ ## graphics_step__, \ 37 + .media = STEP_ ## media_step__ } \ 38 + } 39 + 40 + static const struct xe_pci_fake_data cases[] = { 41 + PLATFORM_CASE(TIGERLAKE, B0), 42 + PLATFORM_CASE(DG1, A0), 43 + PLATFORM_CASE(DG1, B0), 44 + PLATFORM_CASE(ALDERLAKE_S, A0), 45 + PLATFORM_CASE(ALDERLAKE_S, B0), 46 + PLATFORM_CASE(ALDERLAKE_S, C0), 47 + PLATFORM_CASE(ALDERLAKE_S, D0), 48 + PLATFORM_CASE(ALDERLAKE_P, A0), 49 + PLATFORM_CASE(ALDERLAKE_P, B0), 50 + PLATFORM_CASE(ALDERLAKE_P, C0), 51 + SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0), 52 + SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0), 53 + SUBPLATFORM_CASE(DG2, G10, C0), 54 + SUBPLATFORM_CASE(DG2, G11, B1), 55 + SUBPLATFORM_CASE(DG2, G12, A1), 56 + GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), 57 + GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), 58 + GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0), 59 + GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), 60 + GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), 61 + GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1), 62 + GMDID_CASE(PANTHERLAKE, 3000, A0, 3000, A0), 63 + }; 64 + 65 + KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc); 66 + 67 + /** 68 + * xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters 69 + * @prev: the pointer to the previous parameter to iterate from or NULL 70 + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE 71 + * 72 + * This function prepares struct xe_pci_fake_data parameter. 73 + * 74 + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. 75 + * 76 + * Return: pointer to the next parameter or NULL if no more parameters 77 + */ 78 + const void *xe_pci_fake_data_gen_params(const void *prev, char *desc) 79 + { 80 + return platform_gen_params(prev, desc); 81 + } 82 + EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_gen_params); 83 + 84 + static const struct xe_device_desc *lookup_desc(enum xe_platform p) 85 + { 86 + const struct xe_device_desc *desc; 87 + const struct pci_device_id *ids; 88 + 89 + for (ids = pciidlist; ids->driver_data; ids++) { 90 + desc = (const void *)ids->driver_data; 91 + if (desc->platform == p) 92 + return desc; 93 + } 94 + return NULL; 95 + } 96 + 97 + static const struct xe_subplatform_desc *lookup_sub_desc(enum xe_platform p, enum xe_subplatform s) 98 + { 99 + const struct xe_device_desc *desc = lookup_desc(p); 100 + const struct xe_subplatform_desc *spd; 101 + 102 + if (desc && desc->subplatforms) 103 + for (spd = desc->subplatforms; spd->subplatform; spd++) 104 + if (spd->subplatform == s) 105 + return spd; 106 + return NULL; 107 + } 108 + 109 + static const char *lookup_platform_name(enum xe_platform p) 110 + { 111 + const struct xe_device_desc *desc = lookup_desc(p); 112 + 113 + return desc ? desc->platform_name : "INVALID"; 114 + } 115 + 116 + static const char *__lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s) 117 + { 118 + const struct xe_subplatform_desc *desc = lookup_sub_desc(p, s); 119 + 120 + return desc ? desc->name : "INVALID"; 121 + } 122 + 123 + static const char *lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s) 124 + { 125 + return s == XE_SUBPLATFORM_NONE ? "" : __lookup_subplatform_name(p, s); 126 + } 127 + 128 + static const char *subplatform_prefix(enum xe_subplatform s) 129 + { 130 + return s == XE_SUBPLATFORM_NONE ? "" : " "; 131 + } 132 + 133 + static const char *step_prefix(enum xe_step step) 134 + { 135 + return step == STEP_NONE ? "" : " "; 136 + } 137 + 138 + static const char *step_name(enum xe_step step) 139 + { 140 + return step == STEP_NONE ? "" : xe_step_name(step); 141 + } 142 + 143 + static const char *sriov_prefix(enum xe_sriov_mode mode) 144 + { 145 + return mode <= XE_SRIOV_MODE_NONE ? "" : " "; 146 + } 147 + 148 + static const char *sriov_name(enum xe_sriov_mode mode) 149 + { 150 + return mode <= XE_SRIOV_MODE_NONE ? "" : xe_sriov_mode_to_string(mode); 151 + } 152 + 153 + static const char *lookup_graphics_name(unsigned int verx100) 154 + { 155 + const struct xe_ip *ip = find_graphics_ip(verx100); 156 + 157 + return ip ? ip->name : ""; 158 + } 159 + 160 + static const char *lookup_media_name(unsigned int verx100) 161 + { 162 + const struct xe_ip *ip = find_media_ip(verx100); 163 + 164 + return ip ? ip->name : ""; 165 + } 166 + 167 + /** 168 + * xe_pci_fake_data_desc - Describe struct xe_pci_fake_data parameter 169 + * @param: the &struct xe_pci_fake_data parameter to describe 170 + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE 171 + * 172 + * This function prepares description of the struct xe_pci_fake_data parameter. 173 + * 174 + * It is tailored for use in parameterized KUnit tests where parameter generator 175 + * is based on the struct xe_pci_fake_data arrays. 176 + */ 177 + void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc) 178 + { 179 + if (param->graphics_verx100 || param->media_verx100) 180 + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s %u.%02u(%s)%s%s %u.%02u(%s)%s%s%s%s", 181 + lookup_platform_name(param->platform), 182 + subplatform_prefix(param->subplatform), 183 + lookup_subplatform_name(param->platform, param->subplatform), 184 + param->graphics_verx100 / 100, param->graphics_verx100 % 100, 185 + lookup_graphics_name(param->graphics_verx100), 186 + step_prefix(param->step.graphics), step_name(param->step.graphics), 187 + param->media_verx100 / 100, param->media_verx100 % 100, 188 + lookup_media_name(param->media_verx100), 189 + step_prefix(param->step.media), step_name(param->step.media), 190 + sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode)); 191 + else 192 + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s%s%s%s%s", 193 + lookup_platform_name(param->platform), 194 + subplatform_prefix(param->subplatform), 195 + lookup_subplatform_name(param->platform, param->subplatform), 196 + step_prefix(param->step.graphics), step_name(param->step.graphics), 197 + sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode)); 198 + } 199 + EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_desc); 200 + 15 201 static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc) 16 202 { 17 203 snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u.%02u %s", 18 204 param->verx100 / 100, param->verx100 % 100, param->name); 19 205 } 206 + 207 + /* 208 + * Pre-GMDID Graphics and Media IPs definitions. 209 + * 210 + * Mimic the way GMDID IPs are declared so the same 211 + * param generator can be used for both 212 + */ 213 + static const struct xe_ip pre_gmdid_graphics_ips[] = { 214 + graphics_ip_xelp, 215 + graphics_ip_xelpp, 216 + graphics_ip_xehpg, 217 + graphics_ip_xehpc, 218 + }; 219 + 220 + static const struct xe_ip pre_gmdid_media_ips[] = { 221 + media_ip_xem, 222 + media_ip_xehpm, 223 + }; 224 + 225 + KUNIT_ARRAY_PARAM(pre_gmdid_graphics_ip, pre_gmdid_graphics_ips, xe_ip_kunit_desc); 226 + KUNIT_ARRAY_PARAM(pre_gmdid_media_ip, pre_gmdid_media_ips, xe_ip_kunit_desc); 20 227 21 228 KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc); 22 229 KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc); ··· 253 46 */ 254 47 const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc) 255 48 { 49 + const void *next = pre_gmdid_graphics_ip_gen_params(prev, desc); 50 + 51 + if (next) 52 + return next; 53 + if (is_insidevar(prev, pre_gmdid_graphics_ips)) 54 + prev = NULL; 55 + 256 56 return graphics_ip_gen_params(prev, desc); 257 57 } 258 58 EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); ··· 277 63 */ 278 64 const void *xe_pci_media_ip_gen_param(const void *prev, char *desc) 279 65 { 66 + const void *next = pre_gmdid_media_ip_gen_params(prev, desc); 67 + 68 + if (next) 69 + return next; 70 + if (is_insidevar(prev, pre_gmdid_media_ips)) 71 + prev = NULL; 72 + 280 73 return media_ip_gen_params(prev, desc); 281 74 } 282 75 EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); ··· 315 94 316 95 if (type == GMDID_MEDIA) { 317 96 *ver = data->media_verx100; 318 - *revid = xe_step_to_gmdid(data->media_step); 97 + *revid = xe_step_to_gmdid(data->step.media); 319 98 } else { 320 99 *ver = data->graphics_verx100; 321 - *revid = xe_step_to_gmdid(data->graphics_step); 100 + *revid = xe_step_to_gmdid(data->step.graphics); 322 101 } 323 102 } 324 103

+4 -2

drivers/gpu/drm/xe/tests/xe_pci_test.h

··· 10 10 11 11 #include "xe_platform_types.h" 12 12 #include "xe_sriov_types.h" 13 + #include "xe_step_types.h" 13 14 14 15 struct xe_device; 15 16 ··· 18 17 enum xe_sriov_mode sriov_mode; 19 18 enum xe_platform platform; 20 19 enum xe_subplatform subplatform; 20 + struct xe_step_info step; 21 21 u32 graphics_verx100; 22 22 u32 media_verx100; 23 - u32 graphics_step; 24 - u32 media_step; 25 23 }; 26 24 27 25 int xe_pci_fake_device_init(struct xe_device *xe); 26 + const void *xe_pci_fake_data_gen_params(const void *prev, char *desc); 27 + void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc); 28 28 29 29 const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc); 30 30 const void *xe_pci_media_ip_gen_param(const void *prev, char *desc);

+3 -88

drivers/gpu/drm/xe/tests/xe_wa_test.c

··· 15 15 #include "xe_tuning.h" 16 16 #include "xe_wa.h" 17 17 18 - struct platform_test_case { 19 - const char *name; 20 - enum xe_platform platform; 21 - enum xe_subplatform subplatform; 22 - u32 graphics_verx100; 23 - u32 media_verx100; 24 - struct xe_step_info step; 25 - }; 26 - 27 - #define PLATFORM_CASE(platform__, graphics_step__) \ 28 - { \ 29 - .name = #platform__ " (" #graphics_step__ ")", \ 30 - .platform = XE_ ## platform__, \ 31 - .subplatform = XE_SUBPLATFORM_NONE, \ 32 - .step = { .graphics = STEP_ ## graphics_step__ } \ 33 - } 34 - 35 - 36 - #define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \ 37 - { \ 38 - .name = #platform__ "_" #subplatform__ " (" #graphics_step__ ")", \ 39 - .platform = XE_ ## platform__, \ 40 - .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \ 41 - .step = { .graphics = STEP_ ## graphics_step__ } \ 42 - } 43 - 44 - #define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \ 45 - media_verx100__, media_step__) \ 46 - { \ 47 - .name = #platform__ " (g:" #graphics_step__ ", m:" #media_step__ ")",\ 48 - .platform = XE_ ## platform__, \ 49 - .subplatform = XE_SUBPLATFORM_NONE, \ 50 - .graphics_verx100 = graphics_verx100__, \ 51 - .media_verx100 = media_verx100__, \ 52 - .step = { .graphics = STEP_ ## graphics_step__, \ 53 - .media = STEP_ ## media_step__ } \ 54 - } 55 - 56 - static const struct platform_test_case cases[] = { 57 - PLATFORM_CASE(TIGERLAKE, B0), 58 - PLATFORM_CASE(DG1, A0), 59 - PLATFORM_CASE(DG1, B0), 60 - PLATFORM_CASE(ALDERLAKE_S, A0), 61 - PLATFORM_CASE(ALDERLAKE_S, B0), 62 - PLATFORM_CASE(ALDERLAKE_S, C0), 63 - PLATFORM_CASE(ALDERLAKE_S, D0), 64 - PLATFORM_CASE(ALDERLAKE_P, A0), 65 - PLATFORM_CASE(ALDERLAKE_P, B0), 66 - PLATFORM_CASE(ALDERLAKE_P, C0), 67 - SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0), 68 - SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0), 69 - SUBPLATFORM_CASE(DG2, G10, C0), 70 - SUBPLATFORM_CASE(DG2, G11, B1), 71 - SUBPLATFORM_CASE(DG2, G12, A1), 72 - GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), 73 - GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), 74 - GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0), 75 - GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), 76 - GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), 77 - GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1), 78 - GMDID_CASE(PANTHERLAKE, 3000, A0, 3000, A0), 79 - }; 80 - 81 - static void platform_desc(const struct platform_test_case *t, char *desc) 82 - { 83 - strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE); 84 - } 85 - 86 - KUNIT_ARRAY_PARAM(platform, cases, platform_desc); 87 - 88 18 static int xe_wa_test_init(struct kunit *test) 89 19 { 90 - const struct platform_test_case *param = test->param_value; 91 - struct xe_pci_fake_data data = { 92 - .platform = param->platform, 93 - .subplatform = param->subplatform, 94 - .graphics_verx100 = param->graphics_verx100, 95 - .media_verx100 = param->media_verx100, 96 - .graphics_step = param->step.graphics, 97 - .media_step = param->step.media, 98 - }; 20 + const struct xe_pci_fake_data *param = test->param_value; 21 + struct xe_pci_fake_data data = *param; 99 22 struct xe_device *xe; 100 23 struct device *dev; 101 24 int ret; ··· 43 120 return 0; 44 121 } 45 122 46 - static void xe_wa_test_exit(struct kunit *test) 47 - { 48 - struct xe_device *xe = test->priv; 49 - 50 - drm_kunit_helper_free_device(test, xe->drm.dev); 51 - } 52 - 53 123 static void xe_wa_gt(struct kunit *test) 54 124 { 55 125 struct xe_device *xe = test->priv; ··· 60 144 } 61 145 62 146 static struct kunit_case xe_wa_tests[] = { 63 - KUNIT_CASE_PARAM(xe_wa_gt, platform_gen_params), 147 + KUNIT_CASE_PARAM(xe_wa_gt, xe_pci_fake_data_gen_params), 64 148 {} 65 149 }; 66 150 67 151 static struct kunit_suite xe_rtp_test_suite = { 68 152 .name = "xe_wa", 69 153 .init = xe_wa_test_init, 70 - .exit = xe_wa_test_exit, 71 154 .test_cases = xe_wa_tests, 72 155 }; 73 156

+2 -2

drivers/gpu/drm/xe/xe_bb.c

··· 64 64 enum xe_sriov_vf_ccs_rw_ctxs ctx_id) 65 65 { 66 66 struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); 67 - struct xe_tile *tile = gt_to_tile(gt); 67 + struct xe_device *xe = gt_to_xe(gt); 68 68 struct xe_sa_manager *bb_pool; 69 69 int err; 70 70 ··· 78 78 * So, this extra DW acts as a guard here. 79 79 */ 80 80 81 - bb_pool = tile->sriov.vf.ccs[ctx_id].mem.ccs_bb_pool; 81 + bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool; 82 82 bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1)); 83 83 84 84 if (IS_ERR(bb->bo)) {

+651 -246

drivers/gpu/drm/xe/xe_bo.c

··· 974 974 * CCS meta data is migrated from TT -> SMEM. So, let us detach the 975 975 * BBs from BO as it is no longer needed. 976 976 */ 977 - if (IS_VF_CCS_BB_VALID(xe, bo) && old_mem_type == XE_PL_TT && 977 + if (IS_VF_CCS_READY(xe) && old_mem_type == XE_PL_TT && 978 978 new_mem->mem_type == XE_PL_SYSTEM) 979 979 xe_sriov_vf_ccs_detach_bo(bo); 980 980 981 - if (IS_SRIOV_VF(xe) && 981 + if (IS_VF_CCS_READY(xe) && 982 982 ((move_lacks_source && new_mem->mem_type == XE_PL_TT) || 983 983 (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) && 984 984 handle_system_ccs) ··· 994 994 if (timeout < 0) 995 995 ret = timeout; 996 996 997 - if (IS_VF_CCS_BB_VALID(xe, bo)) 997 + if (IS_VF_CCS_READY(xe)) 998 998 xe_sriov_vf_ccs_detach_bo(bo); 999 999 1000 1000 xe_tt_unmap_sg(xe, ttm_bo->ttm); ··· 1141 1141 int xe_bo_notifier_prepare_pinned(struct xe_bo *bo) 1142 1142 { 1143 1143 struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); 1144 + struct xe_validation_ctx ctx; 1145 + struct drm_exec exec; 1144 1146 struct xe_bo *backup; 1145 1147 int ret = 0; 1146 1148 1147 - xe_bo_lock(bo, false); 1149 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) { 1150 + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); 1151 + drm_exec_retry_on_contention(&exec); 1152 + xe_assert(xe, !ret); 1153 + xe_assert(xe, !bo->backup_obj); 1148 1154 1149 - xe_assert(xe, !bo->backup_obj); 1155 + /* 1156 + * Since this is called from the PM notifier we might have raced with 1157 + * someone unpinning this after we dropped the pinned list lock and 1158 + * grabbing the above bo lock. 1159 + */ 1160 + if (!xe_bo_is_pinned(bo)) 1161 + break; 1150 1162 1151 - /* 1152 - * Since this is called from the PM notifier we might have raced with 1153 - * someone unpinning this after we dropped the pinned list lock and 1154 - * grabbing the above bo lock. 1155 - */ 1156 - if (!xe_bo_is_pinned(bo)) 1157 - goto out_unlock_bo; 1163 + if (!xe_bo_is_vram(bo)) 1164 + break; 1158 1165 1159 - if (!xe_bo_is_vram(bo)) 1160 - goto out_unlock_bo; 1166 + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) 1167 + break; 1161 1168 1162 - if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) 1163 - goto out_unlock_bo; 1169 + backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo), 1170 + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, 1171 + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | 1172 + XE_BO_FLAG_PINNED, &exec); 1173 + if (IS_ERR(backup)) { 1174 + drm_exec_retry_on_contention(&exec); 1175 + ret = PTR_ERR(backup); 1176 + xe_validation_retry_on_oom(&ctx, &ret); 1177 + break; 1178 + } 1164 1179 1165 - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo), 1166 - DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, 1167 - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | 1168 - XE_BO_FLAG_PINNED); 1169 - if (IS_ERR(backup)) { 1170 - ret = PTR_ERR(backup); 1171 - goto out_unlock_bo; 1180 + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ 1181 + ttm_bo_pin(&backup->ttm); 1182 + bo->backup_obj = backup; 1172 1183 } 1173 1184 1174 - backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ 1175 - ttm_bo_pin(&backup->ttm); 1176 - bo->backup_obj = backup; 1177 - 1178 - out_unlock_bo: 1179 - xe_bo_unlock(bo); 1180 1185 return ret; 1181 1186 } 1182 1187 ··· 1207 1202 return 0; 1208 1203 } 1209 1204 1205 + static int xe_bo_evict_pinned_copy(struct xe_bo *bo, struct xe_bo *backup) 1206 + { 1207 + struct xe_device *xe = xe_bo_device(bo); 1208 + bool unmap = false; 1209 + int ret = 0; 1210 + 1211 + if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { 1212 + struct xe_migrate *migrate; 1213 + struct dma_fence *fence; 1214 + 1215 + if (bo->tile) 1216 + migrate = bo->tile->migrate; 1217 + else 1218 + migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type); 1219 + 1220 + xe_assert(xe, bo->ttm.base.resv == backup->ttm.base.resv); 1221 + ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); 1222 + if (ret) 1223 + goto out_backup; 1224 + 1225 + fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource, 1226 + backup->ttm.resource, false); 1227 + if (IS_ERR(fence)) { 1228 + ret = PTR_ERR(fence); 1229 + goto out_backup; 1230 + } 1231 + 1232 + dma_resv_add_fence(bo->ttm.base.resv, fence, 1233 + DMA_RESV_USAGE_KERNEL); 1234 + dma_fence_put(fence); 1235 + } else { 1236 + ret = xe_bo_vmap(backup); 1237 + if (ret) 1238 + goto out_backup; 1239 + 1240 + if (iosys_map_is_null(&bo->vmap)) { 1241 + ret = xe_bo_vmap(bo); 1242 + if (ret) 1243 + goto out_vunmap; 1244 + unmap = true; 1245 + } 1246 + 1247 + xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0, 1248 + xe_bo_size(bo)); 1249 + } 1250 + 1251 + if (!bo->backup_obj) 1252 + bo->backup_obj = backup; 1253 + out_vunmap: 1254 + xe_bo_vunmap(backup); 1255 + out_backup: 1256 + if (unmap) 1257 + xe_bo_vunmap(bo); 1258 + 1259 + return ret; 1260 + } 1261 + 1210 1262 /** 1211 1263 * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory 1212 1264 * @bo: The buffer object to move. ··· 1278 1216 int xe_bo_evict_pinned(struct xe_bo *bo) 1279 1217 { 1280 1218 struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); 1219 + struct xe_validation_ctx ctx; 1220 + struct drm_exec exec; 1281 1221 struct xe_bo *backup = bo->backup_obj; 1282 1222 bool backup_created = false; 1283 - bool unmap = false; 1284 1223 int ret = 0; 1285 1224 1286 - xe_bo_lock(bo, false); 1225 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) { 1226 + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); 1227 + drm_exec_retry_on_contention(&exec); 1228 + xe_assert(xe, !ret); 1287 1229 1288 - if (WARN_ON(!bo->ttm.resource)) { 1289 - ret = -EINVAL; 1290 - goto out_unlock_bo; 1291 - } 1292 - 1293 - if (WARN_ON(!xe_bo_is_pinned(bo))) { 1294 - ret = -EINVAL; 1295 - goto out_unlock_bo; 1296 - } 1297 - 1298 - if (!xe_bo_is_vram(bo)) 1299 - goto out_unlock_bo; 1300 - 1301 - if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) 1302 - goto out_unlock_bo; 1303 - 1304 - if (!backup) { 1305 - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, 1306 - NULL, xe_bo_size(bo), 1307 - DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, 1308 - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | 1309 - XE_BO_FLAG_PINNED); 1310 - if (IS_ERR(backup)) { 1311 - ret = PTR_ERR(backup); 1312 - goto out_unlock_bo; 1313 - } 1314 - backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ 1315 - backup_created = true; 1316 - } 1317 - 1318 - if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { 1319 - struct xe_migrate *migrate; 1320 - struct dma_fence *fence; 1321 - 1322 - if (bo->tile) 1323 - migrate = bo->tile->migrate; 1324 - else 1325 - migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type); 1326 - 1327 - ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); 1328 - if (ret) 1329 - goto out_backup; 1330 - 1331 - ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); 1332 - if (ret) 1333 - goto out_backup; 1334 - 1335 - fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource, 1336 - backup->ttm.resource, false); 1337 - if (IS_ERR(fence)) { 1338 - ret = PTR_ERR(fence); 1339 - goto out_backup; 1230 + if (WARN_ON(!bo->ttm.resource)) { 1231 + ret = -EINVAL; 1232 + break; 1340 1233 } 1341 1234 1342 - dma_resv_add_fence(bo->ttm.base.resv, fence, 1343 - DMA_RESV_USAGE_KERNEL); 1344 - dma_resv_add_fence(backup->ttm.base.resv, fence, 1345 - DMA_RESV_USAGE_KERNEL); 1346 - dma_fence_put(fence); 1347 - } else { 1348 - ret = xe_bo_vmap(backup); 1349 - if (ret) 1350 - goto out_backup; 1351 - 1352 - if (iosys_map_is_null(&bo->vmap)) { 1353 - ret = xe_bo_vmap(bo); 1354 - if (ret) 1355 - goto out_backup; 1356 - unmap = true; 1235 + if (WARN_ON(!xe_bo_is_pinned(bo))) { 1236 + ret = -EINVAL; 1237 + break; 1357 1238 } 1358 1239 1359 - xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0, 1360 - xe_bo_size(bo)); 1240 + if (!xe_bo_is_vram(bo)) 1241 + break; 1242 + 1243 + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) 1244 + break; 1245 + 1246 + if (!backup) { 1247 + backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, 1248 + xe_bo_size(bo), 1249 + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, 1250 + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | 1251 + XE_BO_FLAG_PINNED, &exec); 1252 + if (IS_ERR(backup)) { 1253 + drm_exec_retry_on_contention(&exec); 1254 + ret = PTR_ERR(backup); 1255 + xe_validation_retry_on_oom(&ctx, &ret); 1256 + break; 1257 + } 1258 + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ 1259 + backup_created = true; 1260 + } 1261 + 1262 + ret = xe_bo_evict_pinned_copy(bo, backup); 1361 1263 } 1362 1264 1363 - if (!bo->backup_obj) 1364 - bo->backup_obj = backup; 1365 - 1366 - out_backup: 1367 - xe_bo_vunmap(backup); 1368 1265 if (ret && backup_created) 1369 1266 xe_bo_put(backup); 1370 - out_unlock_bo: 1371 - if (unmap) 1372 - xe_bo_vunmap(bo); 1373 - xe_bo_unlock(bo); 1267 + 1374 1268 return ret; 1375 1269 } 1376 1270 ··· 1376 1358 if (ret) 1377 1359 goto out_unlock_bo; 1378 1360 1379 - ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); 1380 - if (ret) 1381 - goto out_unlock_bo; 1382 - 1383 1361 fence = xe_migrate_copy(migrate, backup, bo, 1384 1362 backup->ttm.resource, bo->ttm.resource, 1385 1363 false); ··· 1385 1371 } 1386 1372 1387 1373 dma_resv_add_fence(bo->ttm.base.resv, fence, 1388 - DMA_RESV_USAGE_KERNEL); 1389 - dma_resv_add_fence(backup->ttm.base.resv, fence, 1390 1374 DMA_RESV_USAGE_KERNEL); 1391 1375 dma_fence_put(fence); 1392 1376 } else { ··· 1541 1529 if (!xe_bo_is_xe_bo(ttm_bo)) 1542 1530 return; 1543 1531 1544 - if (IS_VF_CCS_BB_VALID(ttm_to_xe_device(ttm_bo->bdev), bo)) 1532 + if (IS_VF_CCS_READY(ttm_to_xe_device(ttm_bo->bdev))) 1545 1533 xe_sriov_vf_ccs_detach_bo(bo); 1546 1534 1547 1535 /* ··· 1737 1725 bo->attr.atomic_access == DRM_XE_ATOMIC_CPU; 1738 1726 } 1739 1727 1740 - static vm_fault_t xe_gem_fault(struct vm_fault *vmf) 1728 + /* Populate the bo if swapped out, or migrate if the access mode requires that. */ 1729 + static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx, 1730 + struct drm_exec *exec) 1731 + { 1732 + struct ttm_buffer_object *tbo = &bo->ttm; 1733 + int err = 0; 1734 + 1735 + if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) { 1736 + xe_assert(xe_bo_device(bo), 1737 + dma_resv_test_signaled(tbo->base.resv, DMA_RESV_USAGE_KERNEL) || 1738 + (tbo->ttm && ttm_tt_is_populated(tbo->ttm))); 1739 + err = ttm_bo_populate(&bo->ttm, ctx); 1740 + } else if (should_migrate_to_smem(bo)) { 1741 + xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM); 1742 + err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec); 1743 + } 1744 + 1745 + return err; 1746 + } 1747 + 1748 + /* Call into TTM to populate PTEs, and register bo for PTE removal on runtime suspend. */ 1749 + static vm_fault_t __xe_bo_cpu_fault(struct vm_fault *vmf, struct xe_device *xe, struct xe_bo *bo) 1750 + { 1751 + vm_fault_t ret; 1752 + 1753 + trace_xe_bo_cpu_fault(bo); 1754 + 1755 + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, 1756 + TTM_BO_VM_NUM_PREFAULT); 1757 + /* 1758 + * When TTM is actually called to insert PTEs, ensure no blocking conditions 1759 + * remain, in which case TTM may drop locks and return VM_FAULT_RETRY. 1760 + */ 1761 + xe_assert(xe, ret != VM_FAULT_RETRY); 1762 + 1763 + if (ret == VM_FAULT_NOPAGE && 1764 + mem_type_is_vram(bo->ttm.resource->mem_type)) { 1765 + mutex_lock(&xe->mem_access.vram_userfault.lock); 1766 + if (list_empty(&bo->vram_userfault_link)) 1767 + list_add(&bo->vram_userfault_link, 1768 + &xe->mem_access.vram_userfault.list); 1769 + mutex_unlock(&xe->mem_access.vram_userfault.lock); 1770 + } 1771 + 1772 + return ret; 1773 + } 1774 + 1775 + static vm_fault_t xe_err_to_fault_t(int err) 1776 + { 1777 + switch (err) { 1778 + case 0: 1779 + case -EINTR: 1780 + case -ERESTARTSYS: 1781 + case -EAGAIN: 1782 + return VM_FAULT_NOPAGE; 1783 + case -ENOMEM: 1784 + case -ENOSPC: 1785 + return VM_FAULT_OOM; 1786 + default: 1787 + break; 1788 + } 1789 + return VM_FAULT_SIGBUS; 1790 + } 1791 + 1792 + static bool xe_ttm_bo_is_imported(struct ttm_buffer_object *tbo) 1793 + { 1794 + dma_resv_assert_held(tbo->base.resv); 1795 + 1796 + return tbo->ttm && 1797 + (tbo->ttm->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE)) == 1798 + TTM_TT_FLAG_EXTERNAL; 1799 + } 1800 + 1801 + static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_device *xe, 1802 + struct xe_bo *bo, bool needs_rpm) 1803 + { 1804 + struct ttm_buffer_object *tbo = &bo->ttm; 1805 + vm_fault_t ret = VM_FAULT_RETRY; 1806 + struct xe_validation_ctx ctx; 1807 + struct ttm_operation_ctx tctx = { 1808 + .interruptible = true, 1809 + .no_wait_gpu = true, 1810 + .gfp_retry_mayfail = true, 1811 + 1812 + }; 1813 + int err; 1814 + 1815 + if (needs_rpm && !xe_pm_runtime_get_if_active(xe)) 1816 + return VM_FAULT_RETRY; 1817 + 1818 + err = xe_validation_ctx_init(&ctx, &xe->val, NULL, 1819 + (struct xe_val_flags) { 1820 + .interruptible = true, 1821 + .no_block = true 1822 + }); 1823 + if (err) 1824 + goto out_pm; 1825 + 1826 + if (!dma_resv_trylock(tbo->base.resv)) 1827 + goto out_validation; 1828 + 1829 + if (xe_ttm_bo_is_imported(tbo)) { 1830 + ret = VM_FAULT_SIGBUS; 1831 + drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); 1832 + goto out_unlock; 1833 + } 1834 + 1835 + err = xe_bo_fault_migrate(bo, &tctx, NULL); 1836 + if (err) { 1837 + /* Return VM_FAULT_RETRY on these errors. */ 1838 + if (err != -ENOMEM && err != -ENOSPC && err != -EBUSY) 1839 + ret = xe_err_to_fault_t(err); 1840 + goto out_unlock; 1841 + } 1842 + 1843 + if (dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) 1844 + ret = __xe_bo_cpu_fault(vmf, xe, bo); 1845 + 1846 + out_unlock: 1847 + dma_resv_unlock(tbo->base.resv); 1848 + out_validation: 1849 + xe_validation_ctx_fini(&ctx); 1850 + out_pm: 1851 + if (needs_rpm) 1852 + xe_pm_runtime_put(xe); 1853 + 1854 + return ret; 1855 + } 1856 + 1857 + static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf) 1741 1858 { 1742 1859 struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; 1743 1860 struct drm_device *ddev = tbo->base.dev; 1744 1861 struct xe_device *xe = to_xe_device(ddev); 1745 1862 struct xe_bo *bo = ttm_to_xe_bo(tbo); 1746 1863 bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK; 1864 + bool retry_after_wait = false; 1865 + struct xe_validation_ctx ctx; 1866 + struct drm_exec exec; 1747 1867 vm_fault_t ret; 1748 - int idx, r = 0; 1868 + int err = 0; 1869 + int idx; 1870 + 1871 + if (!drm_dev_enter(&xe->drm, &idx)) 1872 + return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); 1873 + 1874 + ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm); 1875 + if (ret != VM_FAULT_RETRY) 1876 + goto out; 1877 + 1878 + if (fault_flag_allow_retry_first(vmf->flags)) { 1879 + if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) 1880 + goto out; 1881 + retry_after_wait = true; 1882 + xe_bo_get(bo); 1883 + mmap_read_unlock(vmf->vma->vm_mm); 1884 + } else { 1885 + ret = VM_FAULT_NOPAGE; 1886 + } 1887 + 1888 + /* 1889 + * The fastpath failed and we were not required to return and retry immediately. 1890 + * We're now running in one of two modes: 1891 + * 1892 + * 1) retry_after_wait == true: The mmap_read_lock() is dropped, and we're trying 1893 + * to resolve blocking waits. But we can't resolve the fault since the 1894 + * mmap_read_lock() is dropped. After retrying the fault, the aim is that the fastpath 1895 + * should succeed. But it may fail since we drop the bo lock. 1896 + * 1897 + * 2) retry_after_wait == false: The fastpath failed, typically even after 1898 + * a retry. Do whatever's necessary to resolve the fault. 1899 + * 1900 + * This construct is recommended to avoid excessive waits under the mmap_lock. 1901 + */ 1749 1902 1750 1903 if (needs_rpm) 1751 1904 xe_pm_runtime_get(xe); 1752 1905 1753 - ret = ttm_bo_vm_reserve(tbo, vmf); 1754 - if (ret) 1755 - goto out; 1906 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1907 + err) { 1908 + struct ttm_operation_ctx tctx = { 1909 + .interruptible = true, 1910 + .no_wait_gpu = false, 1911 + .gfp_retry_mayfail = retry_after_wait, 1912 + }; 1913 + long lerr; 1756 1914 1757 - if (drm_dev_enter(ddev, &idx)) { 1758 - trace_xe_bo_cpu_fault(bo); 1915 + err = drm_exec_lock_obj(&exec, &tbo->base); 1916 + drm_exec_retry_on_contention(&exec); 1917 + if (err) 1918 + break; 1759 1919 1760 - if (should_migrate_to_smem(bo)) { 1761 - xe_assert(xe, bo->flags & XE_BO_FLAG_SYSTEM); 1762 - 1763 - r = xe_bo_migrate(bo, XE_PL_TT); 1764 - if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) 1765 - ret = VM_FAULT_NOPAGE; 1766 - else if (r) 1767 - ret = VM_FAULT_SIGBUS; 1920 + if (xe_ttm_bo_is_imported(tbo)) { 1921 + err = -EFAULT; 1922 + drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); 1923 + break; 1768 1924 } 1769 - if (!ret) 1770 - ret = ttm_bo_vm_fault_reserved(vmf, 1771 - vmf->vma->vm_page_prot, 1772 - TTM_BO_VM_NUM_PREFAULT); 1773 - drm_dev_exit(idx); 1774 1925 1775 - if (ret == VM_FAULT_RETRY && 1776 - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 1777 - goto out; 1778 - 1779 - /* 1780 - * ttm_bo_vm_reserve() already has dma_resv_lock. 1781 - */ 1782 - if (ret == VM_FAULT_NOPAGE && 1783 - mem_type_is_vram(tbo->resource->mem_type)) { 1784 - mutex_lock(&xe->mem_access.vram_userfault.lock); 1785 - if (list_empty(&bo->vram_userfault_link)) 1786 - list_add(&bo->vram_userfault_link, 1787 - &xe->mem_access.vram_userfault.list); 1788 - mutex_unlock(&xe->mem_access.vram_userfault.lock); 1926 + err = xe_bo_fault_migrate(bo, &tctx, &exec); 1927 + if (err) { 1928 + drm_exec_retry_on_contention(&exec); 1929 + xe_validation_retry_on_oom(&ctx, &err); 1930 + break; 1789 1931 } 1790 - } else { 1791 - ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); 1932 + 1933 + lerr = dma_resv_wait_timeout(tbo->base.resv, 1934 + DMA_RESV_USAGE_KERNEL, true, 1935 + MAX_SCHEDULE_TIMEOUT); 1936 + if (lerr < 0) { 1937 + err = lerr; 1938 + break; 1939 + } 1940 + 1941 + if (!retry_after_wait) 1942 + ret = __xe_bo_cpu_fault(vmf, xe, bo); 1792 1943 } 1944 + /* if retry_after_wait == true, we *must* return VM_FAULT_RETRY. */ 1945 + if (err && !retry_after_wait) 1946 + ret = xe_err_to_fault_t(err); 1793 1947 1794 - dma_resv_unlock(tbo->base.resv); 1795 - out: 1796 1948 if (needs_rpm) 1797 1949 xe_pm_runtime_put(xe); 1950 + 1951 + if (retry_after_wait) 1952 + xe_bo_put(bo); 1953 + out: 1954 + drm_dev_exit(idx); 1798 1955 1799 1956 return ret; 1800 1957 } ··· 2008 1827 } 2009 1828 2010 1829 static const struct vm_operations_struct xe_gem_vm_ops = { 2011 - .fault = xe_gem_fault, 1830 + .fault = xe_bo_cpu_fault, 2012 1831 .open = ttm_bo_vm_open, 2013 1832 .close = ttm_bo_vm_close, 2014 1833 .access = xe_bo_vm_access, ··· 2056 1875 kfree(bo); 2057 1876 } 2058 1877 2059 - struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, 2060 - struct xe_tile *tile, struct dma_resv *resv, 2061 - struct ttm_lru_bulk_move *bulk, size_t size, 2062 - u16 cpu_caching, enum ttm_bo_type type, 2063 - u32 flags) 1878 + /** 1879 + * xe_bo_init_locked() - Initialize or create an xe_bo. 1880 + * @xe: The xe device. 1881 + * @bo: An already allocated buffer object or NULL 1882 + * if the function should allocate a new one. 1883 + * @tile: The tile to select for migration of this bo, and the tile used for 1884 + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. 1885 + * @resv: Pointer to a locked shared reservation object to use fo this bo, 1886 + * or NULL for the xe_bo to use its own. 1887 + * @bulk: The bulk move to use for LRU bumping, or NULL for external bos. 1888 + * @size: The storage size to use for the bo. 1889 + * @cpu_caching: The cpu caching used for system memory backing store. 1890 + * @type: The TTM buffer object type. 1891 + * @flags: XE_BO_FLAG_ flags. 1892 + * @exec: The drm_exec transaction to use for exhaustive eviction. 1893 + * 1894 + * Initialize or create an xe buffer object. On failure, any allocated buffer 1895 + * object passed in @bo will have been unreferenced. 1896 + * 1897 + * Return: The buffer object on success. Negative error pointer on failure. 1898 + */ 1899 + struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, 1900 + struct xe_tile *tile, struct dma_resv *resv, 1901 + struct ttm_lru_bulk_move *bulk, size_t size, 1902 + u16 cpu_caching, enum ttm_bo_type type, 1903 + u32 flags, struct drm_exec *exec) 2064 1904 { 2065 1905 struct ttm_operation_ctx ctx = { 2066 1906 .interruptible = true, ··· 2150 1948 ctx.resv = resv; 2151 1949 } 2152 1950 1951 + xe_validation_assert_exec(xe, exec, &bo->ttm.base); 2153 1952 if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) { 2154 1953 err = __xe_bo_placement_for_flags(xe, bo, bo->flags); 2155 1954 if (WARN_ON(err)) { ··· 2252 2049 struct xe_tile *tile, struct xe_vm *vm, 2253 2050 size_t size, u64 start, u64 end, 2254 2051 u16 cpu_caching, enum ttm_bo_type type, u32 flags, 2255 - u64 alignment) 2052 + u64 alignment, struct drm_exec *exec) 2256 2053 { 2257 2054 struct xe_bo *bo = NULL; 2258 2055 int err; ··· 2273 2070 } 2274 2071 } 2275 2072 2276 - bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, 2277 - vm && !xe_vm_in_fault_mode(vm) && 2278 - flags & XE_BO_FLAG_USER ? 2279 - &vm->lru_bulk_move : NULL, size, 2280 - cpu_caching, type, flags); 2073 + bo = xe_bo_init_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, 2074 + vm && !xe_vm_in_fault_mode(vm) && 2075 + flags & XE_BO_FLAG_USER ? 2076 + &vm->lru_bulk_move : NULL, size, 2077 + cpu_caching, type, flags, exec); 2281 2078 if (IS_ERR(bo)) 2282 2079 return bo; 2283 2080 ··· 2311 2108 2312 2109 if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { 2313 2110 err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, 2314 - start + xe_bo_size(bo), U64_MAX); 2111 + start + xe_bo_size(bo), U64_MAX, 2112 + exec); 2315 2113 } else { 2316 - err = xe_ggtt_insert_bo(t->mem.ggtt, bo); 2114 + err = xe_ggtt_insert_bo(t->mem.ggtt, bo, exec); 2317 2115 } 2318 2116 if (err) 2319 2117 goto err_unlock_put_bo; ··· 2331 2127 return ERR_PTR(err); 2332 2128 } 2333 2129 2334 - struct xe_bo * 2335 - xe_bo_create_locked_range(struct xe_device *xe, 2336 - struct xe_tile *tile, struct xe_vm *vm, 2337 - size_t size, u64 start, u64 end, 2338 - enum ttm_bo_type type, u32 flags, u64 alignment) 2339 - { 2340 - return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, 2341 - flags, alignment); 2342 - } 2343 - 2130 + /** 2131 + * xe_bo_create_locked() - Create a BO 2132 + * @xe: The xe device. 2133 + * @tile: The tile to select for migration of this bo, and the tile used for 2134 + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. 2135 + * @vm: The local vm or NULL for external objects. 2136 + * @size: The storage size to use for the bo. 2137 + * @type: The TTM buffer object type. 2138 + * @flags: XE_BO_FLAG_ flags. 2139 + * @exec: The drm_exec transaction to use for exhaustive eviction. 2140 + * 2141 + * Create a locked xe BO with no range- nor alignment restrictions. 2142 + * 2143 + * Return: The buffer object on success. Negative error pointer on failure. 2144 + */ 2344 2145 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, 2345 2146 struct xe_vm *vm, size_t size, 2346 - enum ttm_bo_type type, u32 flags) 2147 + enum ttm_bo_type type, u32 flags, 2148 + struct drm_exec *exec) 2347 2149 { 2348 2150 return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, 2349 - flags, 0); 2151 + flags, 0, exec); 2350 2152 } 2351 2153 2352 - struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, 2154 + static struct xe_bo *xe_bo_create_novm(struct xe_device *xe, struct xe_tile *tile, 2155 + size_t size, u16 cpu_caching, 2156 + enum ttm_bo_type type, u32 flags, 2157 + u64 alignment, bool intr) 2158 + { 2159 + struct xe_validation_ctx ctx; 2160 + struct drm_exec exec; 2161 + struct xe_bo *bo; 2162 + int ret = 0; 2163 + 2164 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr}, 2165 + ret) { 2166 + bo = __xe_bo_create_locked(xe, tile, NULL, size, 0, ~0ULL, 2167 + cpu_caching, type, flags, alignment, &exec); 2168 + drm_exec_retry_on_contention(&exec); 2169 + if (IS_ERR(bo)) { 2170 + ret = PTR_ERR(bo); 2171 + xe_validation_retry_on_oom(&ctx, &ret); 2172 + } else { 2173 + xe_bo_unlock(bo); 2174 + } 2175 + } 2176 + 2177 + return ret ? ERR_PTR(ret) : bo; 2178 + } 2179 + 2180 + /** 2181 + * xe_bo_create_user() - Create a user BO 2182 + * @xe: The xe device. 2183 + * @vm: The local vm or NULL for external objects. 2184 + * @size: The storage size to use for the bo. 2185 + * @cpu_caching: The caching mode to be used for system backing store. 2186 + * @flags: XE_BO_FLAG_ flags. 2187 + * @exec: The drm_exec transaction to use for exhaustive eviction, or NULL 2188 + * if such a transaction should be initiated by the call. 2189 + * 2190 + * Create a bo on behalf of user-space. 2191 + * 2192 + * Return: The buffer object on success. Negative error pointer on failure. 2193 + */ 2194 + struct xe_bo *xe_bo_create_user(struct xe_device *xe, 2353 2195 struct xe_vm *vm, size_t size, 2354 2196 u16 cpu_caching, 2355 - u32 flags) 2197 + u32 flags, struct drm_exec *exec) 2356 2198 { 2357 - struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 2358 - cpu_caching, ttm_bo_type_device, 2359 - flags | XE_BO_FLAG_USER, 0); 2360 - if (!IS_ERR(bo)) 2361 - xe_bo_unlock_vm_held(bo); 2199 + struct xe_bo *bo; 2200 + 2201 + flags |= XE_BO_FLAG_USER; 2202 + 2203 + if (vm || exec) { 2204 + xe_assert(xe, exec); 2205 + bo = __xe_bo_create_locked(xe, NULL, vm, size, 0, ~0ULL, 2206 + cpu_caching, ttm_bo_type_device, 2207 + flags, 0, exec); 2208 + if (!IS_ERR(bo)) 2209 + xe_bo_unlock_vm_held(bo); 2210 + } else { 2211 + bo = xe_bo_create_novm(xe, NULL, size, cpu_caching, 2212 + ttm_bo_type_device, flags, 0, true); 2213 + } 2362 2214 2363 2215 return bo; 2364 2216 } 2365 2217 2366 - struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, 2367 - struct xe_vm *vm, size_t size, 2368 - enum ttm_bo_type type, u32 flags) 2218 + /** 2219 + * xe_bo_create_pin_range_novm() - Create and pin a BO with range options. 2220 + * @xe: The xe device. 2221 + * @tile: The tile to select for migration of this bo, and the tile used for 2222 + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. 2223 + * @size: The storage size to use for the bo. 2224 + * @start: Start of fixed VRAM range or 0. 2225 + * @end: End of fixed VRAM range or ~0ULL. 2226 + * @type: The TTM buffer object type. 2227 + * @flags: XE_BO_FLAG_ flags. 2228 + * 2229 + * Create an Xe BO with range- and options. If @start and @end indicate 2230 + * a fixed VRAM range, this must be a ttm_bo_type_kernel bo with VRAM placement 2231 + * only. 2232 + * 2233 + * Return: The buffer object on success. Negative error pointer on failure. 2234 + */ 2235 + struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile, 2236 + size_t size, u64 start, u64 end, 2237 + enum ttm_bo_type type, u32 flags) 2369 2238 { 2370 - struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags); 2239 + struct xe_validation_ctx ctx; 2240 + struct drm_exec exec; 2241 + struct xe_bo *bo; 2242 + int err = 0; 2371 2243 2372 - if (!IS_ERR(bo)) 2373 - xe_bo_unlock_vm_held(bo); 2244 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 2245 + bo = __xe_bo_create_locked(xe, tile, NULL, size, start, end, 2246 + 0, type, flags, 0, &exec); 2247 + if (IS_ERR(bo)) { 2248 + drm_exec_retry_on_contention(&exec); 2249 + err = PTR_ERR(bo); 2250 + xe_validation_retry_on_oom(&ctx, &err); 2251 + break; 2252 + } 2374 2253 2375 - return bo; 2254 + err = xe_bo_pin(bo, &exec); 2255 + xe_bo_unlock(bo); 2256 + if (err) { 2257 + xe_bo_put(bo); 2258 + drm_exec_retry_on_contention(&exec); 2259 + xe_validation_retry_on_oom(&ctx, &err); 2260 + break; 2261 + } 2262 + } 2263 + 2264 + return err ? ERR_PTR(err) : bo; 2376 2265 } 2377 2266 2378 - struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, 2379 - struct xe_vm *vm, 2380 - size_t size, u64 offset, 2381 - enum ttm_bo_type type, u32 flags) 2382 - { 2383 - return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset, 2384 - type, flags, 0); 2385 - } 2386 - 2387 - struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, 2388 - struct xe_tile *tile, 2389 - struct xe_vm *vm, 2390 - size_t size, u64 offset, 2391 - enum ttm_bo_type type, u32 flags, 2392 - u64 alignment) 2267 + static struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, 2268 + struct xe_tile *tile, 2269 + struct xe_vm *vm, 2270 + size_t size, u64 offset, 2271 + enum ttm_bo_type type, u32 flags, 2272 + u64 alignment, struct drm_exec *exec) 2393 2273 { 2394 2274 struct xe_bo *bo; 2395 2275 int err; 2396 2276 u64 start = offset == ~0ull ? 0 : offset; 2397 - u64 end = offset == ~0ull ? offset : start + size; 2277 + u64 end = offset == ~0ull ? ~0ull : start + size; 2398 2278 2399 2279 if (flags & XE_BO_FLAG_STOLEN && 2400 2280 xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 2401 2281 flags |= XE_BO_FLAG_GGTT; 2402 2282 2403 - bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, 2404 - flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED, 2405 - alignment); 2283 + bo = __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, 2284 + flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED, 2285 + alignment, exec); 2406 2286 if (IS_ERR(bo)) 2407 2287 return bo; 2408 2288 2409 - err = xe_bo_pin(bo); 2289 + err = xe_bo_pin(bo, exec); 2410 2290 if (err) 2411 2291 goto err_put; 2412 2292 ··· 2510 2222 return ERR_PTR(err); 2511 2223 } 2512 2224 2225 + /** 2226 + * xe_bo_create_pin_map_at_novm() - Create pinned and mapped bo at optional VRAM offset 2227 + * @xe: The xe device. 2228 + * @tile: The tile to select for migration of this bo, and the tile used for 2229 + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. 2230 + * @size: The storage size to use for the bo. 2231 + * @offset: Optional VRAM offset or %~0ull for don't care. 2232 + * @type: The TTM buffer object type. 2233 + * @flags: XE_BO_FLAG_ flags. 2234 + * @alignment: GGTT alignment. 2235 + * @intr: Whether to execute any waits for backing store interruptible. 2236 + * 2237 + * Create a pinned and optionally mapped bo with VRAM offset and GGTT alignment 2238 + * options. The bo will be external and not associated with a VM. 2239 + * 2240 + * Return: The buffer object on success. Negative error pointer on failure. 2241 + * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set 2242 + * to true on entry. 2243 + */ 2244 + struct xe_bo * 2245 + xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile, 2246 + size_t size, u64 offset, enum ttm_bo_type type, u32 flags, 2247 + u64 alignment, bool intr) 2248 + { 2249 + struct xe_validation_ctx ctx; 2250 + struct drm_exec exec; 2251 + struct xe_bo *bo; 2252 + int ret = 0; 2253 + 2254 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr}, 2255 + ret) { 2256 + bo = xe_bo_create_pin_map_at_aligned(xe, tile, NULL, size, offset, 2257 + type, flags, alignment, &exec); 2258 + if (IS_ERR(bo)) { 2259 + drm_exec_retry_on_contention(&exec); 2260 + ret = PTR_ERR(bo); 2261 + xe_validation_retry_on_oom(&ctx, &ret); 2262 + } 2263 + } 2264 + 2265 + return ret ? ERR_PTR(ret) : bo; 2266 + } 2267 + 2268 + /** 2269 + * xe_bo_create_pin_map() - Create pinned and mapped bo 2270 + * @xe: The xe device. 2271 + * @tile: The tile to select for migration of this bo, and the tile used for 2272 + * @vm: The vm to associate the buffer object with. The vm's resv must be locked 2273 + * with the transaction represented by @exec. 2274 + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. 2275 + * @size: The storage size to use for the bo. 2276 + * @type: The TTM buffer object type. 2277 + * @flags: XE_BO_FLAG_ flags. 2278 + * @exec: The drm_exec transaction to use for exhaustive eviction, and 2279 + * previously used for locking @vm's resv. 2280 + * 2281 + * Create a pinned and mapped bo. The bo will be external and not associated 2282 + * with a VM. 2283 + * 2284 + * Return: The buffer object on success. Negative error pointer on failure. 2285 + * In particular, the function may return ERR_PTR(%-EINTR) if @exec was 2286 + * configured for interruptible locking. 2287 + */ 2513 2288 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, 2514 2289 struct xe_vm *vm, size_t size, 2515 - enum ttm_bo_type type, u32 flags) 2290 + enum ttm_bo_type type, u32 flags, 2291 + struct drm_exec *exec) 2516 2292 { 2517 - return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags); 2293 + return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, ~0ull, type, flags, 2294 + 0, exec); 2295 + } 2296 + 2297 + /** 2298 + * xe_bo_create_pin_map_novm() - Create pinned and mapped bo 2299 + * @xe: The xe device. 2300 + * @tile: The tile to select for migration of this bo, and the tile used for 2301 + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. 2302 + * @size: The storage size to use for the bo. 2303 + * @type: The TTM buffer object type. 2304 + * @flags: XE_BO_FLAG_ flags. 2305 + * @intr: Whether to execut any waits for backing store interruptible. 2306 + * 2307 + * Create a pinned and mapped bo. The bo will be external and not associated 2308 + * with a VM. 2309 + * 2310 + * Return: The buffer object on success. Negative error pointer on failure. 2311 + * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set 2312 + * to true on entry. 2313 + */ 2314 + struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile, 2315 + size_t size, enum ttm_bo_type type, u32 flags, 2316 + bool intr) 2317 + { 2318 + return xe_bo_create_pin_map_at_novm(xe, tile, size, ~0ull, type, flags, 0, intr); 2518 2319 } 2519 2320 2520 2321 static void __xe_bo_unpin_map_no_vm(void *arg) ··· 2618 2241 int ret; 2619 2242 2620 2243 KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags); 2621 - 2622 - bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags); 2244 + bo = xe_bo_create_pin_map_novm(xe, tile, size, ttm_bo_type_kernel, flags, true); 2623 2245 if (IS_ERR(bo)) 2624 2246 return bo; 2625 2247 ··· 2627 2251 return ERR_PTR(ret); 2628 2252 2629 2253 return bo; 2254 + } 2255 + 2256 + void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo) 2257 + { 2258 + devm_release_action(xe_bo_device(bo)->drm.dev, __xe_bo_unpin_map_no_vm, bo); 2630 2259 } 2631 2260 2632 2261 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, ··· 2706 2325 * xe_bo_pin_external - pin an external BO 2707 2326 * @bo: buffer object to be pinned 2708 2327 * @in_place: Pin in current placement, don't attempt to migrate. 2328 + * @exec: The drm_exec transaction to use for exhaustive eviction. 2709 2329 * 2710 2330 * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD) 2711 2331 * BO. Unique call compared to xe_bo_pin as this function has it own set of ··· 2714 2332 * 2715 2333 * Returns 0 for success, negative error code otherwise. 2716 2334 */ 2717 - int xe_bo_pin_external(struct xe_bo *bo, bool in_place) 2335 + int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec) 2718 2336 { 2719 2337 struct xe_device *xe = xe_bo_device(bo); 2720 2338 int err; ··· 2724 2342 2725 2343 if (!xe_bo_is_pinned(bo)) { 2726 2344 if (!in_place) { 2727 - err = xe_bo_validate(bo, NULL, false); 2345 + err = xe_bo_validate(bo, NULL, false, exec); 2728 2346 if (err) 2729 2347 return err; 2730 2348 } ··· 2747 2365 return 0; 2748 2366 } 2749 2367 2750 - int xe_bo_pin(struct xe_bo *bo) 2368 + /** 2369 + * xe_bo_pin() - Pin a kernel bo after potentially migrating it 2370 + * @bo: The kernel bo to pin. 2371 + * @exec: The drm_exec transaction to use for exhaustive eviction. 2372 + * 2373 + * Attempts to migrate a bo to @bo->placement. If that succeeds, 2374 + * pins the bo. 2375 + * 2376 + * Return: %0 on success, negative error code on migration failure. 2377 + */ 2378 + int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec) 2751 2379 { 2752 2380 struct ttm_place *place = &bo->placements[0]; 2753 2381 struct xe_device *xe = xe_bo_device(bo); ··· 2779 2387 /* We only expect at most 1 pin */ 2780 2388 xe_assert(xe, !xe_bo_is_pinned(bo)); 2781 2389 2782 - err = xe_bo_validate(bo, NULL, false); 2390 + err = xe_bo_validate(bo, NULL, false, exec); 2783 2391 if (err) 2784 2392 return err; 2785 2393 ··· 2872 2480 * NULL. Used together with @allow_res_evict. 2873 2481 * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's 2874 2482 * reservation object. 2483 + * @exec: The drm_exec transaction to use for exhaustive eviction. 2875 2484 * 2876 2485 * Make sure the bo is in allowed placement, migrating it if necessary. If 2877 2486 * needed, other bos will be evicted. If bos selected for eviction shares ··· 2882 2489 * Return: 0 on success, negative error code on failure. May return 2883 2490 * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal. 2884 2491 */ 2885 - int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) 2492 + int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict, 2493 + struct drm_exec *exec) 2886 2494 { 2887 2495 struct ttm_operation_ctx ctx = { 2888 2496 .interruptible = true, ··· 2905 2511 2906 2512 xe_vm_set_validating(vm, allow_res_evict); 2907 2513 trace_xe_bo_validate(bo); 2514 + xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base); 2908 2515 ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); 2909 2516 xe_vm_clear_validating(vm, allow_res_evict); 2910 2517 ··· 3101 2706 struct xe_device *xe = to_xe_device(dev); 3102 2707 struct xe_file *xef = to_xe_file(file); 3103 2708 struct drm_xe_gem_create *args = data; 2709 + struct xe_validation_ctx ctx; 2710 + struct drm_exec exec; 3104 2711 struct xe_vm *vm = NULL; 3105 - ktime_t end = 0; 3106 2712 struct xe_bo *bo; 3107 2713 unsigned int bo_flags; 3108 2714 u32 handle; ··· 3177 2781 return -ENOENT; 3178 2782 } 3179 2783 3180 - retry: 3181 - if (vm) { 3182 - err = xe_vm_lock(vm, true); 3183 - if (err) 3184 - goto out_vm; 2784 + err = 0; 2785 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 2786 + err) { 2787 + if (vm) { 2788 + err = xe_vm_drm_exec_lock(vm, &exec); 2789 + drm_exec_retry_on_contention(&exec); 2790 + if (err) 2791 + break; 2792 + } 2793 + bo = xe_bo_create_user(xe, vm, args->size, args->cpu_caching, 2794 + bo_flags, &exec); 2795 + drm_exec_retry_on_contention(&exec); 2796 + if (IS_ERR(bo)) { 2797 + err = PTR_ERR(bo); 2798 + xe_validation_retry_on_oom(&ctx, &err); 2799 + break; 2800 + } 3185 2801 } 3186 - 3187 - bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, 3188 - bo_flags); 3189 - 3190 - if (vm) 3191 - xe_vm_unlock(vm); 3192 - 3193 - if (IS_ERR(bo)) { 3194 - err = PTR_ERR(bo); 3195 - if (xe_vm_validate_should_retry(NULL, err, &end)) 3196 - goto retry; 2802 + if (err) 3197 2803 goto out_vm; 3198 - } 3199 2804 3200 2805 if (args->extensions) { 3201 2806 err = gem_create_user_extensions(xe, bo, args->extensions, 0); ··· 3345 2948 * xe_bo_migrate - Migrate an object to the desired region id 3346 2949 * @bo: The buffer object to migrate. 3347 2950 * @mem_type: The TTM region type to migrate to. 2951 + * @tctx: A pointer to a struct ttm_operation_ctx or NULL if 2952 + * a default interruptibe ctx is to be used. 2953 + * @exec: The drm_exec transaction to use for exhaustive eviction. 3348 2954 * 3349 2955 * Attempt to migrate the buffer object to the desired memory region. The 3350 2956 * buffer object may not be pinned, and must be locked. ··· 3359 2959 * Return: 0 on success. Negative error code on failure. In particular may 3360 2960 * return -EINTR or -ERESTARTSYS if signal pending. 3361 2961 */ 3362 - int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) 2962 + int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *tctx, 2963 + struct drm_exec *exec) 3363 2964 { 3364 2965 struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); 3365 2966 struct ttm_operation_ctx ctx = { ··· 3372 2971 struct ttm_place requested; 3373 2972 3374 2973 xe_bo_assert_held(bo); 2974 + tctx = tctx ? tctx : &ctx; 3375 2975 3376 2976 if (bo->ttm.resource->mem_type == mem_type) 3377 2977 return 0; ··· 3399 2997 add_vram(xe, bo, &requested, bo->flags, mem_type, &c); 3400 2998 } 3401 2999 3402 - return ttm_bo_validate(&bo->ttm, &placement, &ctx); 3000 + if (!tctx->no_wait_gpu) 3001 + xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base); 3002 + return ttm_bo_validate(&bo->ttm, &placement, tctx); 3403 3003 } 3404 3004 3405 3005 /** 3406 3006 * xe_bo_evict - Evict an object to evict placement 3407 3007 * @bo: The buffer object to migrate. 3008 + * @exec: The drm_exec transaction to use for exhaustive eviction. 3408 3009 * 3409 3010 * On successful completion, the object memory will be moved to evict 3410 3011 * placement. This function blocks until the object has been fully moved. 3411 3012 * 3412 3013 * Return: 0 on success. Negative error code on failure. 3413 3014 */ 3414 - int xe_bo_evict(struct xe_bo *bo) 3015 + int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec) 3415 3016 { 3416 3017 struct ttm_operation_ctx ctx = { 3417 3018 .interruptible = false, ··· 3574 3169 args->size = ALIGN(mul_u32_u32(args->pitch, args->height), 3575 3170 page_size); 3576 3171 3577 - bo = xe_bo_create_user(xe, NULL, NULL, args->size, 3172 + bo = xe_bo_create_user(xe, NULL, args->size, 3578 3173 DRM_XE_GEM_CPU_CACHING_WC, 3579 3174 XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | 3580 3175 XE_BO_FLAG_SCANOUT | 3581 - XE_BO_FLAG_NEEDS_CPU_ACCESS); 3176 + XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL); 3582 3177 if (IS_ERR(bo)) 3583 3178 return PTR_ERR(bo); 3584 3179

+45 -33

drivers/gpu/drm/xe/xe_bo.h

··· 10 10 11 11 #include "xe_bo_types.h" 12 12 #include "xe_macros.h" 13 + #include "xe_validation.h" 13 14 #include "xe_vm_types.h" 14 15 #include "xe_vm.h" 15 16 #include "xe_vram_types.h" ··· 89 88 struct xe_bo *xe_bo_alloc(void); 90 89 void xe_bo_free(struct xe_bo *bo); 91 90 92 - struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, 93 - struct xe_tile *tile, struct dma_resv *resv, 94 - struct ttm_lru_bulk_move *bulk, size_t size, 95 - u16 cpu_caching, enum ttm_bo_type type, 96 - u32 flags); 97 - struct xe_bo * 98 - xe_bo_create_locked_range(struct xe_device *xe, 99 - struct xe_tile *tile, struct xe_vm *vm, 100 - size_t size, u64 start, u64 end, 101 - enum ttm_bo_type type, u32 flags, u64 alignment); 91 + struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, 92 + struct xe_tile *tile, struct dma_resv *resv, 93 + struct ttm_lru_bulk_move *bulk, size_t size, 94 + u16 cpu_caching, enum ttm_bo_type type, 95 + u32 flags, struct drm_exec *exec); 102 96 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, 103 97 struct xe_vm *vm, size_t size, 104 - enum ttm_bo_type type, u32 flags); 105 - struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, 106 - struct xe_vm *vm, size_t size, 107 - enum ttm_bo_type type, u32 flags); 108 - struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, 109 - struct xe_vm *vm, size_t size, 110 - u16 cpu_caching, 111 - u32 flags); 98 + enum ttm_bo_type type, u32 flags, 99 + struct drm_exec *exec); 100 + struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_vm *vm, size_t size, 101 + u16 cpu_caching, u32 flags, struct drm_exec *exec); 112 102 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, 113 103 struct xe_vm *vm, size_t size, 114 - enum ttm_bo_type type, u32 flags); 115 - struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, 116 - struct xe_vm *vm, size_t size, u64 offset, 117 - enum ttm_bo_type type, u32 flags); 118 - struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, 119 - struct xe_tile *tile, 120 - struct xe_vm *vm, 121 - size_t size, u64 offset, 122 - enum ttm_bo_type type, u32 flags, 123 - u64 alignment); 104 + enum ttm_bo_type type, u32 flags, 105 + struct drm_exec *exec); 106 + struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile, 107 + size_t size, enum ttm_bo_type type, u32 flags, 108 + bool intr); 109 + struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile, 110 + size_t size, u64 start, u64 end, 111 + enum ttm_bo_type type, u32 flags); 112 + struct xe_bo * 113 + xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile, 114 + size_t size, u64 offset, enum ttm_bo_type type, 115 + u32 flags, u64 alignment, bool intr); 124 116 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, 125 117 size_t size, u32 flags); 118 + void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo); 126 119 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, 127 120 const void *data, size_t size, u32 flags); 128 121 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src); ··· 195 200 } 196 201 } 197 202 198 - int xe_bo_pin_external(struct xe_bo *bo, bool in_place); 199 - int xe_bo_pin(struct xe_bo *bo); 203 + int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec); 204 + int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec); 200 205 void xe_bo_unpin_external(struct xe_bo *bo); 201 206 void xe_bo_unpin(struct xe_bo *bo); 202 - int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict); 207 + int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict, 208 + struct drm_exec *exec); 203 209 204 210 static inline bool xe_bo_is_pinned(struct xe_bo *bo) 205 211 { ··· 281 285 282 286 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); 283 287 284 - int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); 285 - int xe_bo_evict(struct xe_bo *bo); 288 + int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *ctc, 289 + struct drm_exec *exec); 290 + int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec); 286 291 287 292 int xe_bo_evict_pinned(struct xe_bo *bo); 288 293 int xe_bo_notifier_prepare_pinned(struct xe_bo *bo); ··· 310 313 static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) 311 314 { 312 315 return PAGE_ALIGN(xe_bo_size(bo)); 316 + } 317 + 318 + /** 319 + * xe_bo_has_valid_ccs_bb - Check if CCS's BBs were setup for the BO. 320 + * @bo: the &xe_bo to check 321 + * 322 + * The CCS's BBs should only be setup by the driver VF, but it is safe 323 + * to call this function also by non-VF driver. 324 + * 325 + * Return: true iff the CCS's BBs are setup, false otherwise. 326 + */ 327 + static inline bool xe_bo_has_valid_ccs_bb(struct xe_bo *bo) 328 + { 329 + return bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] && 330 + bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX]; 313 331 } 314 332 315 333 static inline bool xe_bo_has_pages(struct xe_bo *bo)

+9 -6

drivers/gpu/drm/xe/xe_bo_types.h

··· 25 25 /* TODO: To be selected with VM_MADVISE */ 26 26 #define XE_BO_PRIORITY_NORMAL 1 27 27 28 - /** @xe_bo: XE buffer object */ 28 + /** 29 + * struct xe_bo - Xe buffer object 30 + */ 29 31 struct xe_bo { 30 32 /** @ttm: TTM base buffer object */ 31 33 struct ttm_buffer_object ttm; ··· 49 47 struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE]; 50 48 /** @vmap: iosys map of this buffer */ 51 49 struct iosys_map vmap; 52 - /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ 50 + /** @kmap: TTM bo kmap object for internal use only. Keep off. */ 53 51 struct ttm_bo_kmap_obj kmap; 54 52 /** @pinned_link: link to present / evicted list of pinned BO */ 55 53 struct list_head pinned_link; ··· 84 82 /** @created: Whether the bo has passed initial creation */ 85 83 bool created; 86 84 87 - /** @ccs_cleared */ 85 + /** @ccs_cleared: true means that CCS region of BO is already cleared */ 88 86 bool ccs_cleared; 89 87 90 - /** @bb_ccs_rw: BB instructions of CCS read/write. Valid only for VF */ 88 + /** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */ 91 89 struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; 92 90 93 91 /** ··· 101 99 struct drm_pagemap_devmem devmem_allocation; 102 100 103 101 /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ 104 - struct list_head vram_userfault_link; 102 + struct list_head vram_userfault_link; 105 103 106 - /** @min_align: minimum alignment needed for this BO if different 104 + /** 105 + * @min_align: minimum alignment needed for this BO if different 107 106 * from default 108 107 */ 109 108 u64 min_align;

+422 -46

drivers/gpu/drm/xe/xe_configfs.c

··· 4 4 */ 5 5 6 6 #include <linux/bitops.h> 7 + #include <linux/ctype.h> 7 8 #include <linux/configfs.h> 8 9 #include <linux/cleanup.h> 9 10 #include <linux/find.h> ··· 13 12 #include <linux/pci.h> 14 13 #include <linux/string.h> 15 14 15 + #include "instructions/xe_mi_commands.h" 16 16 #include "xe_configfs.h" 17 17 #include "xe_hw_engine_types.h" 18 18 #include "xe_module.h" ··· 23 21 * DOC: Xe Configfs 24 22 * 25 23 * Overview 26 - * ========= 24 + * ======== 27 25 * 28 26 * Configfs is a filesystem-based manager of kernel objects. XE KMD registers a 29 27 * configfs subsystem called ``xe`` that creates a directory in the mounted ··· 36 34 * 37 35 * To create a device, the ``xe`` module should already be loaded, but some 38 36 * attributes can only be set before binding the device. It can be accomplished 39 - * by blocking the driver autoprobe: 37 + * by blocking the driver autoprobe:: 40 38 * 41 39 * # echo 0 > /sys/bus/pci/drivers_autoprobe 42 40 * # modprobe xe ··· 117 115 * 118 116 * This attribute can only be set before binding to the device. 119 117 * 118 + * Context restore BB 119 + * ------------------ 120 + * 121 + * Allow to execute a batch buffer during any context switches. When the 122 + * GPU is restoring the context, it executes additional commands. It's useful 123 + * for testing additional workarounds and validating certain HW behaviors: it's 124 + * not intended for normal execution and will taint the kernel with TAINT_TEST 125 + * when used. 126 + * 127 + * Currently this is implemented only for post and mid context restore. 128 + * Examples: 129 + * 130 + * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 after the 131 + * normal context restore:: 132 + * 133 + * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \ 134 + * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb 135 + * 136 + * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 at the 137 + * beginning of the context restore:: 138 + * 139 + * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \ 140 + * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_mid_bb 141 + 142 + * #. Load certain values in a couple of registers (it can be used as a simpler 143 + * alternative to the `cmd`) action:: 144 + * 145 + * # cat > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb <<EOF 146 + * rcs reg 4F100 DEADBEEF 147 + * rcs reg 4F104 FFFFFFFF 148 + * EOF 149 + * 150 + * .. note:: 151 + * 152 + * When using multiple lines, make sure to use a command that is 153 + * implemented with a single write syscall, like HEREDOC. 154 + * 155 + * These attributes can only be set before binding to the device. 156 + * 120 157 * Remove devices 121 158 * ============== 122 159 * ··· 164 123 * # rmdir /sys/kernel/config/xe/0000:03:00.0/ 165 124 */ 166 125 126 + /* Similar to struct xe_bb, but not tied to HW (yet) */ 127 + struct wa_bb { 128 + u32 *cs; 129 + u32 len; /* in dwords */ 130 + }; 131 + 167 132 struct xe_config_group_device { 168 133 struct config_group group; 169 134 170 135 struct xe_config_device { 171 136 u64 engines_allowed; 137 + struct wa_bb ctx_restore_post_bb[XE_ENGINE_CLASS_MAX]; 138 + struct wa_bb ctx_restore_mid_bb[XE_ENGINE_CLASS_MAX]; 172 139 bool survivability_mode; 173 140 bool enable_psmi; 174 141 } config; 175 142 176 143 /* protects attributes */ 177 144 struct mutex lock; 145 + /* matching descriptor */ 146 + const struct xe_device_desc *desc; 178 147 }; 179 148 180 149 static const struct xe_config_device device_defaults = { ··· 201 150 struct engine_info { 202 151 const char *cls; 203 152 u64 mask; 153 + enum xe_engine_class engine_class; 204 154 }; 205 155 206 156 /* Some helpful macros to aid on the sizing of buffer allocation when parsing */ ··· 209 157 #define MAX_ENGINE_INSTANCE_CHARS 2 210 158 211 159 static const struct engine_info engine_info[] = { 212 - { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK }, 213 - { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK }, 214 - { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK }, 215 - { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK }, 216 - { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK }, 217 - { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK }, 160 + { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK, .engine_class = XE_ENGINE_CLASS_RENDER }, 161 + { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK, .engine_class = XE_ENGINE_CLASS_COPY }, 162 + { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_DECODE }, 163 + { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_ENHANCE }, 164 + { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK, .engine_class = XE_ENGINE_CLASS_COMPUTE }, 165 + { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK, .engine_class = XE_ENGINE_CLASS_OTHER }, 218 166 }; 219 167 220 168 static struct xe_config_group_device *to_xe_config_group_device(struct config_item *item) ··· 303 251 return p - page; 304 252 } 305 253 306 - static bool lookup_engine_mask(const char *pattern, u64 *mask) 254 + /* 255 + * Lookup engine_info. If @mask is not NULL, reduce the mask according to the 256 + * instance in @pattern. 257 + * 258 + * Examples of inputs: 259 + * - lookup_engine_info("rcs0", &mask): return "rcs" entry from @engine_info and 260 + * mask == BIT_ULL(XE_HW_ENGINE_RCS0) 261 + * - lookup_engine_info("rcs*", &mask): return "rcs" entry from @engine_info and 262 + * mask == XE_HW_ENGINE_RCS_MASK 263 + * - lookup_engine_info("rcs", NULL): return "rcs" entry from @engine_info 264 + */ 265 + static const struct engine_info *lookup_engine_info(const char *pattern, u64 *mask) 307 266 { 308 267 for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { 309 268 u8 instance; ··· 324 261 continue; 325 262 326 263 pattern += strlen(engine_info[i].cls); 264 + if (!mask && !*pattern) 265 + return &engine_info[i]; 327 266 328 267 if (!strcmp(pattern, "*")) { 329 268 *mask = engine_info[i].mask; 330 - return true; 269 + return &engine_info[i]; 331 270 } 332 271 333 272 if (kstrtou8(pattern, 10, &instance)) 334 - return false; 273 + return NULL; 335 274 336 275 bit = __ffs64(engine_info[i].mask) + instance; 337 276 if (bit >= fls64(engine_info[i].mask)) 338 - return false; 277 + return NULL; 339 278 340 279 *mask = BIT_ULL(bit); 341 - return true; 280 + return &engine_info[i]; 342 281 } 343 282 344 - return false; 283 + return NULL; 284 + } 285 + 286 + static int parse_engine(const char *s, const char *end_chars, u64 *mask, 287 + const struct engine_info **pinfo) 288 + { 289 + char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1]; 290 + const struct engine_info *info; 291 + size_t len; 292 + 293 + len = strcspn(s, end_chars); 294 + if (len >= sizeof(buf)) 295 + return -EINVAL; 296 + 297 + memcpy(buf, s, len); 298 + buf[len] = '\0'; 299 + 300 + info = lookup_engine_info(buf, mask); 301 + if (!info) 302 + return -ENOENT; 303 + 304 + if (pinfo) 305 + *pinfo = info; 306 + 307 + return len; 345 308 } 346 309 347 310 static ssize_t engines_allowed_store(struct config_item *item, const char *page, 348 311 size_t len) 349 312 { 350 313 struct xe_config_group_device *dev = to_xe_config_group_device(item); 351 - size_t patternlen, p; 314 + ssize_t patternlen, p; 352 315 u64 mask, val = 0; 353 316 354 317 for (p = 0; p < len; p += patternlen + 1) { 355 - char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1]; 356 - 357 - patternlen = strcspn(page + p, ",\n"); 358 - if (patternlen >= sizeof(buf)) 359 - return -EINVAL; 360 - 361 - memcpy(buf, page + p, patternlen); 362 - buf[patternlen] = '\0'; 363 - 364 - if (!lookup_engine_mask(buf, &mask)) 318 + patternlen = parse_engine(page + p, ",\n", &mask, NULL); 319 + if (patternlen < 0) 365 320 return -EINVAL; 366 321 367 322 val |= mask; ··· 420 339 return len; 421 340 } 422 341 342 + static bool wa_bb_read_advance(bool dereference, char **p, 343 + const char *append, size_t len, 344 + size_t *max_size) 345 + { 346 + if (dereference) { 347 + if (len >= *max_size) 348 + return false; 349 + *max_size -= len; 350 + if (append) 351 + memcpy(*p, append, len); 352 + } 353 + 354 + *p += len; 355 + 356 + return true; 357 + } 358 + 359 + static ssize_t wa_bb_show(struct xe_config_group_device *dev, 360 + struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX], 361 + char *data, size_t sz) 362 + { 363 + char *p = data; 364 + 365 + guard(mutex)(&dev->lock); 366 + 367 + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { 368 + enum xe_engine_class ec = engine_info[i].engine_class; 369 + size_t len; 370 + 371 + if (!wa_bb[ec].len) 372 + continue; 373 + 374 + len = snprintf(p, sz, "%s:", engine_info[i].cls); 375 + if (!wa_bb_read_advance(data, &p, NULL, len, &sz)) 376 + return -ENOBUFS; 377 + 378 + for (size_t j = 0; j < wa_bb[ec].len; j++) { 379 + len = snprintf(p, sz, " %08x", wa_bb[ec].cs[j]); 380 + if (!wa_bb_read_advance(data, &p, NULL, len, &sz)) 381 + return -ENOBUFS; 382 + } 383 + 384 + if (!wa_bb_read_advance(data, &p, "\n", 1, &sz)) 385 + return -ENOBUFS; 386 + } 387 + 388 + if (!wa_bb_read_advance(data, &p, "", 1, &sz)) 389 + return -ENOBUFS; 390 + 391 + /* Reserve one more to match check for '\0' */ 392 + if (!data) 393 + p++; 394 + 395 + return p - data; 396 + } 397 + 398 + static ssize_t ctx_restore_mid_bb_show(struct config_item *item, char *page) 399 + { 400 + struct xe_config_group_device *dev = to_xe_config_group_device(item); 401 + 402 + return wa_bb_show(dev, dev->config.ctx_restore_mid_bb, page, SZ_4K); 403 + } 404 + 405 + static ssize_t ctx_restore_post_bb_show(struct config_item *item, char *page) 406 + { 407 + struct xe_config_group_device *dev = to_xe_config_group_device(item); 408 + 409 + return wa_bb_show(dev, dev->config.ctx_restore_post_bb, page, SZ_4K); 410 + } 411 + 412 + static void wa_bb_append(struct wa_bb *wa_bb, u32 val) 413 + { 414 + if (wa_bb->cs) 415 + wa_bb->cs[wa_bb->len] = val; 416 + 417 + wa_bb->len++; 418 + } 419 + 420 + static ssize_t parse_hex(const char *line, u32 *pval) 421 + { 422 + char numstr[12]; 423 + const char *p; 424 + ssize_t numlen; 425 + 426 + p = line + strspn(line, " \t"); 427 + if (!*p || *p == '\n') 428 + return 0; 429 + 430 + numlen = strcspn(p, " \t\n"); 431 + if (!numlen || numlen >= sizeof(numstr) - 1) 432 + return -EINVAL; 433 + 434 + memcpy(numstr, p, numlen); 435 + numstr[numlen] = '\0'; 436 + p += numlen; 437 + 438 + if (kstrtou32(numstr, 16, pval)) 439 + return -EINVAL; 440 + 441 + return p - line; 442 + } 443 + 444 + /* 445 + * Parse lines with the format 446 + * 447 + * <engine-class> cmd <u32> <u32...> 448 + * <engine-class> reg <u32_addr> <u32_val> 449 + * 450 + * and optionally save them in @wa_bb[i].cs is non-NULL. 451 + * 452 + * Return the number of dwords parsed. 453 + */ 454 + static ssize_t parse_wa_bb_lines(const char *lines, 455 + struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX]) 456 + { 457 + ssize_t dwords = 0, ret; 458 + const char *p; 459 + 460 + for (p = lines; *p; p++) { 461 + const struct engine_info *info = NULL; 462 + u32 val, val2; 463 + 464 + /* Also allow empty lines */ 465 + p += strspn(p, " \t\n"); 466 + if (!*p) 467 + break; 468 + 469 + ret = parse_engine(p, " \t\n", NULL, &info); 470 + if (ret < 0) 471 + return ret; 472 + 473 + p += ret; 474 + p += strspn(p, " \t"); 475 + 476 + if (str_has_prefix(p, "cmd")) { 477 + for (p += strlen("cmd"); *p;) { 478 + ret = parse_hex(p, &val); 479 + if (ret < 0) 480 + return -EINVAL; 481 + if (!ret) 482 + break; 483 + 484 + p += ret; 485 + dwords++; 486 + wa_bb_append(&wa_bb[info->engine_class], val); 487 + } 488 + } else if (str_has_prefix(p, "reg")) { 489 + p += strlen("reg"); 490 + ret = parse_hex(p, &val); 491 + if (ret <= 0) 492 + return -EINVAL; 493 + 494 + p += ret; 495 + ret = parse_hex(p, &val2); 496 + if (ret <= 0) 497 + return -EINVAL; 498 + 499 + p += ret; 500 + dwords += 3; 501 + wa_bb_append(&wa_bb[info->engine_class], 502 + MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1)); 503 + wa_bb_append(&wa_bb[info->engine_class], val); 504 + wa_bb_append(&wa_bb[info->engine_class], val2); 505 + } else { 506 + return -EINVAL; 507 + } 508 + } 509 + 510 + return dwords; 511 + } 512 + 513 + static ssize_t wa_bb_store(struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX], 514 + struct xe_config_group_device *dev, 515 + const char *page, size_t len) 516 + { 517 + /* tmp_wa_bb must match wa_bb's size */ 518 + struct wa_bb tmp_wa_bb[XE_ENGINE_CLASS_MAX] = { }; 519 + ssize_t count, class; 520 + u32 *tmp; 521 + 522 + /* 1. Count dwords - wa_bb[i].cs is NULL for all classes */ 523 + count = parse_wa_bb_lines(page, tmp_wa_bb); 524 + if (count < 0) 525 + return count; 526 + 527 + guard(mutex)(&dev->lock); 528 + 529 + if (is_bound(dev)) 530 + return -EBUSY; 531 + 532 + /* 533 + * 2. Allocate a u32 array and set the pointers to the right positions 534 + * according to the length of each class' wa_bb 535 + */ 536 + tmp = krealloc(wa_bb[0].cs, count * sizeof(u32), GFP_KERNEL); 537 + if (!tmp) 538 + return -ENOMEM; 539 + 540 + if (!count) { 541 + memset(wa_bb, 0, sizeof(tmp_wa_bb)); 542 + return len; 543 + } 544 + 545 + for (class = 0, count = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 546 + tmp_wa_bb[class].cs = tmp + count; 547 + count += tmp_wa_bb[class].len; 548 + tmp_wa_bb[class].len = 0; 549 + } 550 + 551 + /* 3. Parse wa_bb lines again, this time saving the values */ 552 + count = parse_wa_bb_lines(page, tmp_wa_bb); 553 + if (count < 0) 554 + return count; 555 + 556 + memcpy(wa_bb, tmp_wa_bb, sizeof(tmp_wa_bb)); 557 + 558 + return len; 559 + } 560 + 561 + static ssize_t ctx_restore_mid_bb_store(struct config_item *item, 562 + const char *data, size_t sz) 563 + { 564 + struct xe_config_group_device *dev = to_xe_config_group_device(item); 565 + 566 + return wa_bb_store(dev->config.ctx_restore_mid_bb, dev, data, sz); 567 + } 568 + 569 + static ssize_t ctx_restore_post_bb_store(struct config_item *item, 570 + const char *data, size_t sz) 571 + { 572 + struct xe_config_group_device *dev = to_xe_config_group_device(item); 573 + 574 + return wa_bb_store(dev->config.ctx_restore_post_bb, dev, data, sz); 575 + } 576 + 577 + CONFIGFS_ATTR(, ctx_restore_mid_bb); 578 + CONFIGFS_ATTR(, ctx_restore_post_bb); 423 579 CONFIGFS_ATTR(, enable_psmi); 424 580 CONFIGFS_ATTR(, engines_allowed); 425 581 CONFIGFS_ATTR(, survivability_mode); 426 582 427 583 static struct configfs_attribute *xe_config_device_attrs[] = { 584 + &attr_ctx_restore_mid_bb, 585 + &attr_ctx_restore_post_bb, 428 586 &attr_enable_psmi, 429 587 &attr_engines_allowed, 430 588 &attr_survivability_mode, ··· 675 355 struct xe_config_group_device *dev = to_xe_config_group_device(item); 676 356 677 357 mutex_destroy(&dev->lock); 358 + 359 + kfree(dev->config.ctx_restore_post_bb[0].cs); 678 360 kfree(dev); 679 361 } 680 362 ··· 684 362 .release = xe_config_device_release, 685 363 }; 686 364 365 + static bool xe_config_device_is_visible(struct config_item *item, 366 + struct configfs_attribute *attr, int n) 367 + { 368 + struct xe_config_group_device *dev = to_xe_config_group_device(item); 369 + 370 + if (attr == &attr_survivability_mode) { 371 + if (!dev->desc->is_dgfx || dev->desc->platform < XE_BATTLEMAGE) 372 + return false; 373 + } 374 + 375 + return true; 376 + } 377 + 378 + static struct configfs_group_operations xe_config_device_group_ops = { 379 + .is_visible = xe_config_device_is_visible, 380 + }; 381 + 687 382 static const struct config_item_type xe_config_device_type = { 688 383 .ct_item_ops = &xe_config_device_ops, 384 + .ct_group_ops = &xe_config_device_group_ops, 689 385 .ct_attrs = xe_config_device_attrs, 690 386 .ct_owner = THIS_MODULE, 691 387 }; ··· 782 442 if (!dev) 783 443 return ERR_PTR(-ENOMEM); 784 444 445 + dev->desc = match; 785 446 set_device_defaults(&dev->config); 786 447 787 448 config_group_init_type_name(&dev->group, name, &xe_config_device_type); ··· 792 451 return &dev->group; 793 452 } 794 453 795 - static struct configfs_group_operations xe_config_device_group_ops = { 454 + static struct configfs_group_operations xe_config_group_ops = { 796 455 .make_group = xe_config_make_device_group, 797 456 }; 798 457 799 458 static const struct config_item_type xe_configfs_type = { 800 - .ct_group_ops = &xe_config_device_group_ops, 459 + .ct_group_ops = &xe_config_group_ops, 801 460 .ct_owner = THIS_MODULE, 802 461 }; 803 462 ··· 884 543 } 885 544 886 545 /** 887 - * xe_configfs_clear_survivability_mode - clear configfs survivability mode 888 - * @pdev: pci device 889 - */ 890 - void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) 891 - { 892 - struct xe_config_group_device *dev = find_xe_config_group_device(pdev); 893 - 894 - if (!dev) 895 - return; 896 - 897 - guard(mutex)(&dev->lock); 898 - dev->config.survivability_mode = 0; 899 - 900 - config_group_put(&dev->group); 901 - } 902 - 903 - /** 904 546 * xe_configfs_get_engines_allowed - get engine allowed mask from configfs 905 547 * @pdev: pci device 906 548 * ··· 918 594 return false; 919 595 920 596 ret = dev->config.enable_psmi; 921 - config_item_put(&dev->group.cg_item); 597 + config_group_put(&dev->group); 922 598 923 599 return ret; 600 + } 601 + 602 + /** 603 + * xe_configfs_get_ctx_restore_mid_bb - get configfs ctx_restore_mid_bb setting 604 + * @pdev: pci device 605 + * @class: hw engine class 606 + * @cs: pointer to the bb to use - only valid during probe 607 + * 608 + * Return: Number of dwords used in the mid_ctx_restore setting in configfs 609 + */ 610 + u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, 611 + enum xe_engine_class class, 612 + const u32 **cs) 613 + { 614 + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); 615 + u32 len; 616 + 617 + if (!dev) 618 + return 0; 619 + 620 + if (cs) 621 + *cs = dev->config.ctx_restore_mid_bb[class].cs; 622 + 623 + len = dev->config.ctx_restore_mid_bb[class].len; 624 + config_group_put(&dev->group); 625 + 626 + return len; 627 + } 628 + 629 + /** 630 + * xe_configfs_get_ctx_restore_post_bb - get configfs ctx_restore_post_bb setting 631 + * @pdev: pci device 632 + * @class: hw engine class 633 + * @cs: pointer to the bb to use - only valid during probe 634 + * 635 + * Return: Number of dwords used in the post_ctx_restore setting in configfs 636 + */ 637 + u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, 638 + enum xe_engine_class class, 639 + const u32 **cs) 640 + { 641 + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); 642 + u32 len; 643 + 644 + if (!dev) 645 + return 0; 646 + 647 + *cs = dev->config.ctx_restore_post_bb[class].cs; 648 + len = dev->config.ctx_restore_post_bb[class].len; 649 + config_group_put(&dev->group); 650 + 651 + return len; 924 652 } 925 653 926 654 int __init xe_configfs_init(void) ··· 990 614 return 0; 991 615 } 992 616 993 - void __exit xe_configfs_exit(void) 617 + void xe_configfs_exit(void) 994 618 { 995 619 configfs_unregister_subsystem(&xe_configfs); 996 620 mutex_destroy(&xe_configfs.su_mutex);

+10 -2

drivers/gpu/drm/xe/xe_configfs.h

··· 8 8 #include <linux/limits.h> 9 9 #include <linux/types.h> 10 10 11 + #include <xe_hw_engine_types.h> 12 + 11 13 struct pci_dev; 12 14 13 15 #if IS_ENABLED(CONFIG_CONFIGFS_FS) ··· 17 15 void xe_configfs_exit(void); 18 16 void xe_configfs_check_device(struct pci_dev *pdev); 19 17 bool xe_configfs_get_survivability_mode(struct pci_dev *pdev); 20 - void xe_configfs_clear_survivability_mode(struct pci_dev *pdev); 21 18 u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev); 22 19 bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev); 20 + u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, 21 + const u32 **cs); 22 + u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, 23 + const u32 **cs); 23 24 #else 24 25 static inline int xe_configfs_init(void) { return 0; } 25 26 static inline void xe_configfs_exit(void) { } 26 27 static inline void xe_configfs_check_device(struct pci_dev *pdev) { } 27 28 static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; } 28 - static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) { } 29 29 static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; } 30 30 static inline bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) { return false; } 31 + static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, 32 + const u32 **cs) { return 0; } 33 + static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, 34 + const u32 **cs) { return 0; } 31 35 #endif 32 36 33 37 #endif

+47 -19

drivers/gpu/drm/xe/xe_debugfs.c

··· 24 24 #include "xe_pxp_debugfs.h" 25 25 #include "xe_sriov.h" 26 26 #include "xe_sriov_pf.h" 27 + #include "xe_sriov_vf.h" 27 28 #include "xe_step.h" 29 + #include "xe_tile_debugfs.h" 28 30 #include "xe_wa.h" 29 31 #include "xe_vsec.h" 30 32 ··· 40 38 DECLARE_FAULT_ATTR(inject_csc_hw_error); 41 39 42 40 static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio, 43 - u32 offset, char *name, struct drm_printer *p) 41 + u32 offset, const char *name, struct drm_printer *p) 44 42 { 45 43 u64 residency = 0; 46 44 int ret; ··· 136 134 p = drm_seq_file_printer(m); 137 135 xe_pm_runtime_get(xe); 138 136 mmio = xe_root_tile_mmio(xe); 139 - struct { 137 + static const struct { 140 138 u32 offset; 141 - char *name; 139 + const char *name; 142 140 } residencies[] = { 143 141 {BMG_G2_RESIDENCY_OFFSET, "Package G2"}, 144 142 {BMG_G6_RESIDENCY_OFFSET, "Package G6"}, ··· 165 163 xe_pm_runtime_get(xe); 166 164 mmio = xe_root_tile_mmio(xe); 167 165 168 - struct { 166 + static const struct { 169 167 u32 offset; 170 - char *name; 168 + const char *name; 171 169 } residencies[] = { 172 170 {BMG_PCIE_LINK_L0_RESIDENCY_OFFSET, "PCIE LINK L0 RESIDENCY"}, 173 171 {BMG_PCIE_LINK_L1_RESIDENCY_OFFSET, "PCIE LINK L1 RESIDENCY"}, ··· 331 329 .write = atomic_svm_timeslice_ms_set, 332 330 }; 333 331 334 - static void create_tile_debugfs(struct xe_tile *tile, struct dentry *root) 332 + static ssize_t disable_late_binding_show(struct file *f, char __user *ubuf, 333 + size_t size, loff_t *pos) 335 334 { 336 - char name[8]; 335 + struct xe_device *xe = file_inode(f)->i_private; 336 + struct xe_late_bind *late_bind = &xe->late_bind; 337 + char buf[32]; 338 + int len; 337 339 338 - snprintf(name, sizeof(name), "tile%u", tile->id); 339 - tile->debugfs = debugfs_create_dir(name, root); 340 - if (IS_ERR(tile->debugfs)) 341 - return; 340 + len = scnprintf(buf, sizeof(buf), "%d\n", late_bind->disable); 342 341 343 - /* 344 - * Store the xe_tile pointer as private data of the tile/ directory 345 - * node so other tile specific attributes under that directory may 346 - * refer to it by looking at its parent node private data. 347 - */ 348 - tile->debugfs->d_inode->i_private = tile; 342 + return simple_read_from_buffer(ubuf, size, pos, buf, len); 349 343 } 344 + 345 + static ssize_t disable_late_binding_set(struct file *f, const char __user *ubuf, 346 + size_t size, loff_t *pos) 347 + { 348 + struct xe_device *xe = file_inode(f)->i_private; 349 + struct xe_late_bind *late_bind = &xe->late_bind; 350 + u32 uval; 351 + ssize_t ret; 352 + 353 + ret = kstrtouint_from_user(ubuf, size, sizeof(uval), &uval); 354 + if (ret) 355 + return ret; 356 + 357 + if (uval > 1) 358 + return -EINVAL; 359 + 360 + late_bind->disable = !!uval; 361 + return size; 362 + } 363 + 364 + static const struct file_operations disable_late_binding_fops = { 365 + .owner = THIS_MODULE, 366 + .read = disable_late_binding_show, 367 + .write = disable_late_binding_set, 368 + }; 350 369 351 370 void xe_debugfs_register(struct xe_device *xe) 352 371 { ··· 385 362 ARRAY_SIZE(debugfs_list), 386 363 root, minor); 387 364 388 - if (xe->info.platform == XE_BATTLEMAGE) { 365 + if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) { 389 366 drm_debugfs_create_files(debugfs_residencies, 390 367 ARRAY_SIZE(debugfs_residencies), 391 368 root, minor); ··· 401 378 402 379 debugfs_create_file("atomic_svm_timeslice_ms", 0600, root, xe, 403 380 &atomic_svm_timeslice_ms_fops); 381 + 382 + debugfs_create_file("disable_late_binding", 0600, root, xe, 383 + &disable_late_binding_fops); 404 384 405 385 for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { 406 386 man = ttm_manager_type(bdev, mem_type); ··· 424 398 ttm_resource_manager_create_debugfs(man, root, "stolen_mm"); 425 399 426 400 for_each_tile(tile, xe, tile_id) 427 - create_tile_debugfs(tile, root); 401 + xe_tile_debugfs_register(tile); 428 402 429 403 for_each_gt(gt, xe, id) 430 404 xe_gt_debugfs_register(gt); ··· 437 411 438 412 if (IS_SRIOV_PF(xe)) 439 413 xe_sriov_pf_debugfs_register(xe, root); 414 + else if (IS_SRIOV_VF(xe)) 415 + xe_sriov_vf_debugfs_register(xe, root); 440 416 }

+9 -2

drivers/gpu/drm/xe/xe_device.c

··· 45 45 #include "xe_hwmon.h" 46 46 #include "xe_i2c.h" 47 47 #include "xe_irq.h" 48 + #include "xe_late_bind_fw.h" 48 49 #include "xe_mmio.h" 49 50 #include "xe_module.h" 50 51 #include "xe_nvm.h" ··· 458 457 if (err) 459 458 goto err; 460 459 460 + xe_validation_device_init(&xe->val); 461 + 461 462 init_waitqueue_head(&xe->ufence_wq); 462 463 463 464 init_rwsem(&xe->usm.lock); ··· 533 530 * re-init and saving/restoring (or re-populating) the wiped memory. Since we 534 531 * perform the FLR as the very last action before releasing access to the HW 535 532 * during the driver release flow, we don't attempt recovery at all, because 536 - * if/when a new instance of i915 is bound to the device it will do a full 533 + * if/when a new instance of Xe is bound to the device it will do a full 537 534 * re-init anyway. 538 535 */ 539 536 static void __xe_driver_flr(struct xe_device *xe) ··· 904 901 if (err) 905 902 return err; 906 903 904 + err = xe_late_bind_init(&xe->late_bind); 905 + if (err) 906 + return err; 907 + 907 908 err = xe_oa_init(xe); 908 909 if (err) 909 910 return err; ··· 957 950 958 951 xe_vsec_init(xe); 959 952 960 - err = xe_sriov_late_init(xe); 953 + err = xe_sriov_init_late(xe); 961 954 if (err) 962 955 goto err_unregister_display; 963 956

+40 -58

drivers/gpu/drm/xe/xe_device_sysfs.c

··· 71 71 72 72 static DEVICE_ATTR_RW(vram_d3cold_threshold); 73 73 74 + static struct attribute *vram_attrs[] = { 75 + &dev_attr_vram_d3cold_threshold.attr, 76 + NULL 77 + }; 78 + 79 + static const struct attribute_group vram_attr_group = { 80 + .attrs = vram_attrs, 81 + }; 82 + 74 83 static ssize_t 75 84 lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, char *buf) 76 85 { ··· 158 149 } 159 150 static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version); 160 151 161 - static int late_bind_create_files(struct device *dev) 152 + static struct attribute *late_bind_attrs[] = { 153 + &dev_attr_lb_fan_control_version.attr, 154 + &dev_attr_lb_voltage_regulator_version.attr, 155 + NULL 156 + }; 157 + 158 + static umode_t late_bind_attr_is_visible(struct kobject *kobj, 159 + struct attribute *attr, int n) 162 160 { 161 + struct device *dev = kobj_to_dev(kobj); 163 162 struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); 164 163 struct xe_tile *root = xe_device_get_root_tile(xe); 165 164 u32 cap = 0; ··· 177 160 178 161 ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), 179 162 &cap, NULL); 180 - if (ret) { 181 - if (ret == -ENXIO) { 182 - drm_dbg(&xe->drm, "Late binding not supported by firmware\n"); 183 - ret = 0; 184 - } 185 - goto out; 186 - } 187 - 188 - if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) { 189 - ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); 190 - if (ret) 191 - goto out; 192 - } 193 - 194 - if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) 195 - ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); 196 - out: 197 163 xe_pm_runtime_put(xe); 198 - 199 - return ret; 200 - } 201 - 202 - static void late_bind_remove_files(struct device *dev) 203 - { 204 - struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); 205 - struct xe_tile *root = xe_device_get_root_tile(xe); 206 - u32 cap = 0; 207 - int ret; 208 - 209 - xe_pm_runtime_get(xe); 210 - 211 - ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), 212 - &cap, NULL); 213 164 if (ret) 214 - goto out; 165 + return 0; 215 166 216 - if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) 217 - sysfs_remove_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); 167 + if (attr == &dev_attr_lb_fan_control_version.attr && 168 + REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) 169 + return attr->mode; 170 + if (attr == &dev_attr_lb_voltage_regulator_version.attr && 171 + REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) 172 + return attr->mode; 218 173 219 - if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) 220 - sysfs_remove_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); 221 - out: 222 - xe_pm_runtime_put(xe); 174 + return 0; 223 175 } 176 + 177 + static const struct attribute_group late_bind_attr_group = { 178 + .attrs = late_bind_attrs, 179 + .is_visible = late_bind_attr_is_visible, 180 + }; 224 181 225 182 /** 226 183 * DOC: PCIe Gen5 Limitations ··· 269 278 } 270 279 static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status); 271 280 272 - static const struct attribute *auto_link_downgrade_attrs[] = { 281 + static struct attribute *auto_link_downgrade_attrs[] = { 273 282 &dev_attr_auto_link_downgrade_capable.attr, 274 283 &dev_attr_auto_link_downgrade_status.attr, 275 284 NULL 276 285 }; 277 286 278 - static void xe_device_sysfs_fini(void *arg) 279 - { 280 - struct xe_device *xe = arg; 281 - 282 - if (xe->d3cold.capable) 283 - sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); 284 - 285 - if (xe->info.platform == XE_BATTLEMAGE) { 286 - sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs); 287 - late_bind_remove_files(xe->drm.dev); 288 - } 289 - } 287 + static const struct attribute_group auto_link_downgrade_attr_group = { 288 + .attrs = auto_link_downgrade_attrs, 289 + }; 290 290 291 291 int xe_device_sysfs_init(struct xe_device *xe) 292 292 { ··· 285 303 int ret; 286 304 287 305 if (xe->d3cold.capable) { 288 - ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); 306 + ret = devm_device_add_group(dev, &vram_attr_group); 289 307 if (ret) 290 308 return ret; 291 309 } 292 310 293 - if (xe->info.platform == XE_BATTLEMAGE) { 294 - ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); 311 + if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) { 312 + ret = devm_device_add_group(dev, &auto_link_downgrade_attr_group); 295 313 if (ret) 296 314 return ret; 297 315 298 - ret = late_bind_create_files(dev); 316 + ret = devm_device_add_group(dev, &late_bind_attr_group); 299 317 if (ret) 300 318 return ret; 301 319 } 302 320 303 - return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe); 321 + return 0; 304 322 }

+16 -3

drivers/gpu/drm/xe/xe_device_types.h

··· 14 14 15 15 #include "xe_devcoredump_types.h" 16 16 #include "xe_heci_gsc.h" 17 + #include "xe_late_bind_fw_types.h" 17 18 #include "xe_lmtt_types.h" 18 19 #include "xe_memirq_types.h" 19 20 #include "xe_oa_types.h" ··· 27 26 #include "xe_sriov_vf_ccs_types.h" 28 27 #include "xe_step_types.h" 29 28 #include "xe_survivability_mode_types.h" 29 + #include "xe_validation.h" 30 30 31 31 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) 32 32 #define TEST_VM_OPS_ERROR ··· 185 183 struct { 186 184 /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ 187 185 struct xe_ggtt_node *ggtt_balloon[2]; 188 - 189 - /** @sriov.vf.ccs: CCS read and write contexts for VF. */ 190 - struct xe_tile_vf_ccs ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; 191 186 } vf; 192 187 } sriov; 193 188 ··· 281 282 u8 has_heci_cscfi:1; 282 283 /** @info.has_heci_gscfi: device has heci gscfi */ 283 284 u8 has_heci_gscfi:1; 285 + /** @info.has_late_bind: Device has firmware late binding support */ 286 + u8 has_late_bind:1; 284 287 /** @info.has_llc: Device has a shared CPU+GPU last level cache */ 285 288 u8 has_llc:1; 286 289 /** @info.has_mbx_power_limits: Device has support to manage power limits using ··· 536 535 /** @nvm: discrete graphics non-volatile memory */ 537 536 struct intel_dg_nvm_dev *nvm; 538 537 538 + /** @late_bind: xe mei late bind interface */ 539 + struct xe_late_bind late_bind; 540 + 539 541 /** @oa: oa observation subsystem */ 540 542 struct xe_oa oa; 541 543 ··· 590 586 */ 591 587 atomic64_t global_total_pages; 592 588 #endif 589 + /** @val: The domain for exhaustive eviction, which is currently per device. */ 590 + struct xe_validation_device val; 593 591 594 592 /** @psmi: GPU debugging via additional validation HW */ 595 593 struct { ··· 600 594 /** @psmi.region_mask: Mask of valid memory regions */ 601 595 u8 region_mask; 602 596 } psmi; 597 + 598 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 599 + /** @g2g_test_array: for testing G2G communications */ 600 + u32 *g2g_test_array; 601 + /** @g2g_test_count: for testing G2G communications */ 602 + atomic_t g2g_test_count; 603 + #endif 603 604 604 605 /* private: */ 605 606

+48 -22

drivers/gpu/drm/xe/xe_dma_buf.c

··· 51 51 struct drm_gem_object *obj = attach->dmabuf->priv; 52 52 struct xe_bo *bo = gem_to_xe_bo(obj); 53 53 struct xe_device *xe = xe_bo_device(bo); 54 + struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; 54 55 int ret; 55 56 56 57 /* ··· 64 63 return -EINVAL; 65 64 } 66 65 67 - ret = xe_bo_migrate(bo, XE_PL_TT); 66 + ret = xe_bo_migrate(bo, XE_PL_TT, NULL, exec); 68 67 if (ret) { 69 68 if (ret != -EINTR && ret != -ERESTARTSYS) 70 69 drm_dbg(&xe->drm, ··· 73 72 return ret; 74 73 } 75 74 76 - ret = xe_bo_pin_external(bo, true); 75 + ret = xe_bo_pin_external(bo, true, exec); 77 76 xe_assert(xe, !ret); 78 77 79 78 return 0; ··· 93 92 struct dma_buf *dma_buf = attach->dmabuf; 94 93 struct drm_gem_object *obj = dma_buf->priv; 95 94 struct xe_bo *bo = gem_to_xe_bo(obj); 95 + struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; 96 96 struct sg_table *sgt; 97 97 int r = 0; 98 98 ··· 102 100 103 101 if (!xe_bo_is_pinned(bo)) { 104 102 if (!attach->peer2peer) 105 - r = xe_bo_migrate(bo, XE_PL_TT); 103 + r = xe_bo_migrate(bo, XE_PL_TT, NULL, exec); 106 104 else 107 - r = xe_bo_validate(bo, NULL, false); 105 + r = xe_bo_validate(bo, NULL, false, exec); 108 106 if (r) 109 107 return ERR_PTR(r); 110 108 } ··· 163 161 struct xe_bo *bo = gem_to_xe_bo(obj); 164 162 bool reads = (direction == DMA_BIDIRECTIONAL || 165 163 direction == DMA_FROM_DEVICE); 164 + struct xe_validation_ctx ctx; 165 + struct drm_exec exec; 166 + int ret = 0; 166 167 167 168 if (!reads) 168 169 return 0; 169 170 170 171 /* Can we do interruptible lock here? */ 171 - xe_bo_lock(bo, false); 172 - (void)xe_bo_migrate(bo, XE_PL_TT); 173 - xe_bo_unlock(bo); 172 + xe_validation_guard(&ctx, &xe_bo_device(bo)->val, &exec, (struct xe_val_flags) {}, ret) { 173 + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); 174 + drm_exec_retry_on_contention(&exec); 175 + if (ret) 176 + break; 174 177 178 + ret = xe_bo_migrate(bo, XE_PL_TT, NULL, &exec); 179 + drm_exec_retry_on_contention(&exec); 180 + xe_validation_retry_on_oom(&ctx, &ret); 181 + } 182 + 183 + /* If we failed, cpu-access takes place in current placement. */ 175 184 return 0; 176 185 } 177 186 ··· 233 220 { 234 221 struct dma_resv *resv = dma_buf->resv; 235 222 struct xe_device *xe = to_xe_device(dev); 223 + struct xe_validation_ctx ctx; 224 + struct drm_gem_object *dummy_obj; 225 + struct drm_exec exec; 236 226 struct xe_bo *bo; 237 - int ret; 227 + int ret = 0; 238 228 239 - dma_resv_lock(resv, NULL); 240 - bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size, 241 - 0, /* Will require 1way or 2way for vm_bind */ 242 - ttm_bo_type_sg, XE_BO_FLAG_SYSTEM); 243 - if (IS_ERR(bo)) { 244 - ret = PTR_ERR(bo); 245 - goto error; 229 + dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 230 + if (!dummy_obj) 231 + return ERR_PTR(-ENOMEM); 232 + 233 + dummy_obj->resv = resv; 234 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) { 235 + ret = drm_exec_lock_obj(&exec, dummy_obj); 236 + drm_exec_retry_on_contention(&exec); 237 + if (ret) 238 + break; 239 + 240 + bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size, 241 + 0, /* Will require 1way or 2way for vm_bind */ 242 + ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec); 243 + drm_exec_retry_on_contention(&exec); 244 + if (IS_ERR(bo)) { 245 + ret = PTR_ERR(bo); 246 + xe_validation_retry_on_oom(&ctx, &ret); 247 + break; 248 + } 246 249 } 247 - dma_resv_unlock(resv); 250 + drm_gem_object_put(dummy_obj); 248 251 249 - return &bo->ttm.base; 250 - 251 - error: 252 - dma_resv_unlock(resv); 253 - return ERR_PTR(ret); 252 + return ret ? ERR_PTR(ret) : &bo->ttm.base; 254 253 } 255 254 256 255 static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach) 257 256 { 258 257 struct drm_gem_object *obj = attach->importer_priv; 259 258 struct xe_bo *bo = gem_to_xe_bo(obj); 259 + struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; 260 260 261 - XE_WARN_ON(xe_bo_evict(bo)); 261 + XE_WARN_ON(xe_bo_evict(bo, exec)); 262 262 } 263 263 264 264 static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = {

+2 -3

drivers/gpu/drm/xe/xe_eu_stall.c

··· 617 617 618 618 size = stream->per_xecore_buf_size * last_xecore; 619 619 620 - bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL, 621 - size, ~0ull, ttm_bo_type_kernel, 622 - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64); 620 + bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel, 621 + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false); 623 622 if (IS_ERR(bo)) { 624 623 kfree(stream->xecore_buf); 625 624 return PTR_ERR(bo);

+16 -15

drivers/gpu/drm/xe/xe_exec.c

··· 19 19 #include "xe_ring_ops_types.h" 20 20 #include "xe_sched_job.h" 21 21 #include "xe_sync.h" 22 + #include "xe_svm.h" 22 23 #include "xe_vm.h" 23 24 24 25 /** ··· 98 97 static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) 99 98 { 100 99 struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm); 100 + int ret; 101 101 102 102 /* The fence slot added here is intended for the exec sched job. */ 103 - return xe_vm_validate_rebind(vm, &vm_exec->exec, 1); 103 + xe_vm_set_validation_exec(vm, &vm_exec->exec); 104 + ret = xe_vm_validate_rebind(vm, &vm_exec->exec, 1); 105 + xe_vm_set_validation_exec(vm, NULL); 106 + return ret; 104 107 } 105 108 106 109 int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ··· 120 115 struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; 121 116 struct drm_exec *exec = &vm_exec.exec; 122 117 u32 i, num_syncs, num_ufence = 0; 118 + struct xe_validation_ctx ctx; 123 119 struct xe_sched_job *job; 124 120 struct xe_vm *vm; 125 121 bool write_locked, skip_retry = false; 126 - ktime_t end = 0; 127 122 int err = 0; 128 123 struct xe_hw_engine_group *group; 129 124 enum xe_hw_engine_group_execution_mode mode, previous_mode; ··· 251 246 if (err) 252 247 goto err_unlock_list; 253 248 254 - vm_exec.vm = &vm->gpuvm; 255 - vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; 256 - if (xe_vm_in_lr_mode(vm)) { 257 - drm_exec_init(exec, vm_exec.flags, 0); 258 - } else { 259 - err = drm_gpuvm_exec_lock(&vm_exec); 260 - if (err) { 261 - if (xe_vm_validate_should_retry(exec, err, &end)) 262 - err = -EAGAIN; 249 + if (!xe_vm_in_lr_mode(vm)) { 250 + vm_exec.vm = &vm->gpuvm; 251 + vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; 252 + err = xe_validation_exec_lock(&ctx, &vm_exec, &xe->val); 253 + if (err) 263 254 goto err_unlock_list; 264 - } 265 255 } 266 256 267 257 if (xe_vm_is_closed_or_banned(q->vm)) { ··· 303 303 if (err) 304 304 goto err_put_job; 305 305 306 - err = down_read_interruptible(&vm->userptr.notifier_lock); 306 + err = xe_svm_notifier_lock_interruptible(vm); 307 307 if (err) 308 308 goto err_put_job; 309 309 ··· 345 345 346 346 err_repin: 347 347 if (!xe_vm_in_lr_mode(vm)) 348 - up_read(&vm->userptr.notifier_lock); 348 + xe_svm_notifier_unlock(vm); 349 349 err_put_job: 350 350 if (err) 351 351 xe_sched_job_put(job); 352 352 err_exec: 353 - drm_exec_fini(exec); 353 + if (!xe_vm_in_lr_mode(vm)) 354 + xe_validation_ctx_fini(&ctx); 354 355 err_unlock_list: 355 356 up_read(&vm->lock); 356 357 if (err == -EAGAIN && !skip_retry)

+15 -7

drivers/gpu/drm/xe/xe_exec_queue.c

··· 199 199 return err; 200 200 } 201 201 202 + static void __xe_exec_queue_fini(struct xe_exec_queue *q) 203 + { 204 + int i; 205 + 206 + q->ops->fini(q); 207 + 208 + for (i = 0; i < q->width; ++i) 209 + xe_lrc_put(q->lrc[i]); 210 + } 211 + 202 212 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, 203 213 u32 logical_mask, u16 width, 204 214 struct xe_hw_engine *hwe, u32 flags, ··· 239 229 if (xe_exec_queue_uses_pxp(q)) { 240 230 err = xe_pxp_exec_queue_add(xe->pxp, q); 241 231 if (err) 242 - goto err_post_alloc; 232 + goto err_post_init; 243 233 } 244 234 245 235 return q; 246 236 237 + err_post_init: 238 + __xe_exec_queue_fini(q); 247 239 err_post_alloc: 248 240 __xe_exec_queue_free(q); 249 241 return ERR_PTR(err); ··· 343 331 xe_exec_queue_put(eq); 344 332 } 345 333 346 - q->ops->fini(q); 334 + q->ops->destroy(q); 347 335 } 348 336 349 337 void xe_exec_queue_fini(struct xe_exec_queue *q) 350 338 { 351 - int i; 352 - 353 339 /* 354 340 * Before releasing our ref to lrc and xef, accumulate our run ticks 355 341 * and wakeup any waiters. ··· 356 346 if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) 357 347 wake_up_var(&q->xef->exec_queue.pending_removal); 358 348 359 - for (i = 0; i < q->width; ++i) 360 - xe_lrc_put(q->lrc[i]); 361 - 349 + __xe_exec_queue_fini(q); 362 350 __xe_exec_queue_free(q); 363 351 } 364 352

+7 -1

drivers/gpu/drm/xe/xe_exec_queue_types.h

··· 181 181 int (*init)(struct xe_exec_queue *q); 182 182 /** @kill: Kill inflight submissions for backend */ 183 183 void (*kill)(struct xe_exec_queue *q); 184 - /** @fini: Fini exec queue for submission backend */ 184 + /** @fini: Undoes the init() for submission backend */ 185 185 void (*fini)(struct xe_exec_queue *q); 186 + /** 187 + * @destroy: Destroy exec queue for submission backend. The backend 188 + * function must call xe_exec_queue_fini() (which will in turn call the 189 + * fini() backend function) to ensure the queue is properly cleaned up. 190 + */ 191 + void (*destroy)(struct xe_exec_queue *q); 186 192 /** @set_priority: Set priority for exec queue */ 187 193 int (*set_priority)(struct xe_exec_queue *q, 188 194 enum xe_exec_queue_priority priority);

+16 -9

drivers/gpu/drm/xe/xe_execlist.c

··· 385 385 return err; 386 386 } 387 387 388 - static void execlist_exec_queue_fini_async(struct work_struct *w) 388 + static void execlist_exec_queue_fini(struct xe_exec_queue *q) 389 + { 390 + struct xe_execlist_exec_queue *exl = q->execlist; 391 + 392 + drm_sched_entity_fini(&exl->entity); 393 + drm_sched_fini(&exl->sched); 394 + 395 + kfree(exl); 396 + } 397 + 398 + static void execlist_exec_queue_destroy_async(struct work_struct *w) 389 399 { 390 400 struct xe_execlist_exec_queue *ee = 391 - container_of(w, struct xe_execlist_exec_queue, fini_async); 401 + container_of(w, struct xe_execlist_exec_queue, destroy_async); 392 402 struct xe_exec_queue *q = ee->q; 393 403 struct xe_execlist_exec_queue *exl = q->execlist; 394 404 struct xe_device *xe = gt_to_xe(q->gt); ··· 411 401 list_del(&exl->active_link); 412 402 spin_unlock_irqrestore(&exl->port->lock, flags); 413 403 414 - drm_sched_entity_fini(&exl->entity); 415 - drm_sched_fini(&exl->sched); 416 - kfree(exl); 417 - 418 404 xe_exec_queue_fini(q); 419 405 } 420 406 ··· 419 413 /* NIY */ 420 414 } 421 415 422 - static void execlist_exec_queue_fini(struct xe_exec_queue *q) 416 + static void execlist_exec_queue_destroy(struct xe_exec_queue *q) 423 417 { 424 - INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async); 425 - queue_work(system_unbound_wq, &q->execlist->fini_async); 418 + INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async); 419 + queue_work(system_unbound_wq, &q->execlist->destroy_async); 426 420 } 427 421 428 422 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, ··· 473 467 .init = execlist_exec_queue_init, 474 468 .kill = execlist_exec_queue_kill, 475 469 .fini = execlist_exec_queue_fini, 470 + .destroy = execlist_exec_queue_destroy, 476 471 .set_priority = execlist_exec_queue_set_priority, 477 472 .set_timeslice = execlist_exec_queue_set_timeslice, 478 473 .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,

+1 -1

drivers/gpu/drm/xe/xe_execlist_types.h

··· 42 42 43 43 bool has_run; 44 44 45 - struct work_struct fini_async; 45 + struct work_struct destroy_async; 46 46 47 47 enum xe_exec_queue_priority active_priority; 48 48 struct list_head active_link;

+15 -12

drivers/gpu/drm/xe/xe_ggtt.c

··· 28 28 #include "xe_pm.h" 29 29 #include "xe_res_cursor.h" 30 30 #include "xe_sriov.h" 31 + #include "xe_tile_printk.h" 31 32 #include "xe_tile_sriov_vf.h" 32 33 #include "xe_tlb_inval.h" 33 34 #include "xe_wa.h" ··· 270 269 gsm_size = probe_gsm_size(pdev); 271 270 272 271 if (gsm_size == 0) { 273 - drm_err(&xe->drm, "Hardware reported no preallocated GSM\n"); 272 + xe_tile_err(ggtt->tile, "Hardware reported no preallocated GSM\n"); 274 273 return -ENOMEM; 275 274 } 276 275 ··· 467 466 468 467 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { 469 468 string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf)); 470 - xe_gt_dbg(ggtt->tile->primary_gt, "GGTT %#llx-%#llx (%s) %s\n", 471 - node->start, node->start + node->size, buf, description); 469 + xe_tile_dbg(ggtt->tile, "GGTT %#llx-%#llx (%s) %s\n", 470 + node->start, node->start + node->size, buf, description); 472 471 } 473 472 } 474 473 ··· 500 499 501 500 err = drm_mm_reserve_node(&ggtt->mm, &node->base); 502 501 503 - if (xe_gt_WARN(ggtt->tile->primary_gt, err, 504 - "Failed to balloon GGTT %#llx-%#llx (%pe)\n", 505 - node->base.start, node->base.start + node->base.size, ERR_PTR(err))) 502 + if (xe_tile_WARN(ggtt->tile, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", 503 + node->base.start, node->base.start + node->base.size, ERR_PTR(err))) 506 504 return err; 507 505 508 506 xe_ggtt_dump_node(ggtt, &node->base, "balloon"); ··· 731 731 } 732 732 733 733 static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 734 - u64 start, u64 end) 734 + u64 start, u64 end, struct drm_exec *exec) 735 735 { 736 736 u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; 737 737 u8 tile_id = ggtt->tile->id; ··· 746 746 return 0; 747 747 } 748 748 749 - err = xe_bo_validate(bo, NULL, false); 749 + err = xe_bo_validate(bo, NULL, false, exec); 750 750 if (err) 751 751 return err; 752 752 ··· 788 788 * @bo: the &xe_bo to be inserted 789 789 * @start: address where it will be inserted 790 790 * @end: end of the range where it will be inserted 791 + * @exec: The drm_exec transaction to use for exhaustive eviction. 791 792 * 792 793 * Return: 0 on success or a negative error code on failure. 793 794 */ 794 795 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 795 - u64 start, u64 end) 796 + u64 start, u64 end, struct drm_exec *exec) 796 797 { 797 - return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); 798 + return __xe_ggtt_insert_bo_at(ggtt, bo, start, end, exec); 798 799 } 799 800 800 801 /** 801 802 * xe_ggtt_insert_bo - Insert BO into GGTT 802 803 * @ggtt: the &xe_ggtt where bo will be inserted 803 804 * @bo: the &xe_bo to be inserted 805 + * @exec: The drm_exec transaction to use for exhaustive eviction. 804 806 * 805 807 * Return: 0 on success or a negative error code on failure. 806 808 */ 807 - int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 809 + int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, 810 + struct drm_exec *exec) 808 811 { 809 - return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); 812 + return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, exec); 810 813 } 811 814 812 815 /**

+3 -2

drivers/gpu/drm/xe/xe_ggtt.h

··· 10 10 11 11 struct drm_printer; 12 12 struct xe_tile; 13 + struct drm_exec; 13 14 14 15 struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile); 15 16 int xe_ggtt_init_early(struct xe_ggtt *ggtt); ··· 32 31 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, 33 32 struct xe_bo *bo, u16 pat_index); 34 33 void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo); 35 - int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); 34 + int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, struct drm_exec *exec); 36 35 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 37 - u64 start, u64 end); 36 + u64 start, u64 end, struct drm_exec *exec); 38 37 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); 39 38 u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare); 40 39

+4 -4

drivers/gpu/drm/xe/xe_gsc.c

··· 136 136 u64 ggtt_offset; 137 137 int err; 138 138 139 - bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, 140 - ttm_bo_type_kernel, 141 - XE_BO_FLAG_SYSTEM | 142 - XE_BO_FLAG_GGTT); 139 + bo = xe_bo_create_pin_map_novm(xe, tile, GSC_VER_PKT_SZ * 2, 140 + ttm_bo_type_kernel, 141 + XE_BO_FLAG_SYSTEM | 142 + XE_BO_FLAG_GGTT, false); 143 143 if (IS_ERR(bo)) { 144 144 xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); 145 145 return PTR_ERR(bo);

+1 -1

drivers/gpu/drm/xe/xe_gt.c

··· 98 98 * FIXME: if xe_uc_sanitize is called here, on TGL driver will not 99 99 * reload 100 100 */ 101 - gt->uc.guc.submission_state.enabled = false; 101 + xe_guc_submit_disable(&gt->uc.guc); 102 102 } 103 103 104 104 static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)

+20 -53

drivers/gpu/drm/xe/xe_gt_debugfs.c

··· 31 31 #include "xe_reg_whitelist.h" 32 32 #include "xe_sa.h" 33 33 #include "xe_sriov.h" 34 + #include "xe_sriov_vf_ccs.h" 34 35 #include "xe_tuning.h" 35 36 #include "xe_uc_debugfs.h" 36 37 #include "xe_wa.h" ··· 122 121 xe_pm_runtime_put(gt_to_xe(gt)); 123 122 124 123 return ret; 125 - } 126 - 127 - static int sa_info(struct xe_gt *gt, struct drm_printer *p) 128 - { 129 - struct xe_tile *tile = gt_to_tile(gt); 130 - 131 - xe_pm_runtime_get(gt_to_xe(gt)); 132 - drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p, 133 - xe_sa_manager_gpu_addr(tile->mem.kernel_bb_pool)); 134 - xe_pm_runtime_put(gt_to_xe(gt)); 135 - 136 - return 0; 137 - } 138 - 139 - static int sa_info_vf_ccs(struct xe_gt *gt, struct drm_printer *p) 140 - { 141 - struct xe_tile *tile = gt_to_tile(gt); 142 - struct xe_sa_manager *bb_pool; 143 - enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 144 - 145 - if (!IS_VF_CCS_READY(gt_to_xe(gt))) 146 - return 0; 147 - 148 - xe_pm_runtime_get(gt_to_xe(gt)); 149 - 150 - for_each_ccs_rw_ctx(ctx_id) { 151 - bb_pool = tile->sriov.vf.ccs[ctx_id].mem.ccs_bb_pool; 152 - if (!bb_pool) 153 - break; 154 - 155 - drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); 156 - drm_printf(p, "-------------------------\n"); 157 - drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); 158 - drm_puts(p, "\n"); 159 - } 160 - 161 - xe_pm_runtime_put(gt_to_xe(gt)); 162 - 163 - return 0; 164 124 } 165 125 166 126 static int topology(struct xe_gt *gt, struct drm_printer *p) ··· 278 316 * - without access to the PF specific data 279 317 */ 280 318 static const struct drm_info_list vf_safe_debugfs_list[] = { 281 - {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info}, 282 319 {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, 283 320 {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, 284 321 {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, ··· 288 327 {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, 289 328 {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, 290 329 {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, 291 - {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info}, 292 330 {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, 293 - }; 294 - 295 - /* 296 - * only for GT debugfs files which are valid on VF. Not valid on PF. 297 - */ 298 - static const struct drm_info_list vf_only_debugfs_list[] = { 299 - {"sa_info_vf_ccs", .show = xe_gt_debugfs_simple_show, .data = sa_info_vf_ccs}, 300 331 }; 301 332 302 333 /* everything else should be added here */ ··· 315 362 call(gt); 316 363 return count; 317 364 } 365 + 366 + static ssize_t stats_write(struct file *file, const char __user *userbuf, 367 + size_t count, loff_t *ppos) 368 + { 369 + struct seq_file *s = file->private_data; 370 + struct xe_gt *gt = s->private; 371 + 372 + return write_to_gt_call(userbuf, count, ppos, xe_gt_stats_clear, gt); 373 + } 374 + 375 + static int stats_show(struct seq_file *s, void *unused) 376 + { 377 + struct drm_printer p = drm_seq_file_printer(s); 378 + struct xe_gt *gt = s->private; 379 + 380 + return xe_gt_stats_print_info(gt, &p); 381 + } 382 + DEFINE_SHOW_STORE_ATTRIBUTE(stats); 318 383 319 384 static void force_reset(struct xe_gt *gt) 320 385 { ··· 419 448 root->d_inode->i_private = gt; 420 449 421 450 /* VF safe */ 451 + debugfs_create_file("stats", 0600, root, gt, &stats_fops); 422 452 debugfs_create_file("force_reset", 0600, root, gt, &force_reset_fops); 423 453 debugfs_create_file("force_reset_sync", 0600, root, gt, &force_reset_sync_fops); 424 454 ··· 431 459 drm_debugfs_create_files(pf_only_debugfs_list, 432 460 ARRAY_SIZE(pf_only_debugfs_list), 433 461 root, minor); 434 - else 435 - drm_debugfs_create_files(vf_only_debugfs_list, 436 - ARRAY_SIZE(vf_only_debugfs_list), 437 - root, minor); 438 - 439 462 440 463 xe_uc_debugfs_register(&gt->uc, root); 441 464

+28

drivers/gpu/drm/xe/xe_gt_freq.c

··· 227 227 } 228 228 static struct kobj_attribute attr_max_freq = __ATTR_RW(max_freq); 229 229 230 + static ssize_t power_profile_show(struct kobject *kobj, 231 + struct kobj_attribute *attr, 232 + char *buff) 233 + { 234 + struct device *dev = kobj_to_dev(kobj); 235 + 236 + xe_guc_pc_get_power_profile(dev_to_pc(dev), buff); 237 + 238 + return strlen(buff); 239 + } 240 + 241 + static ssize_t power_profile_store(struct kobject *kobj, 242 + struct kobj_attribute *attr, 243 + const char *buff, size_t count) 244 + { 245 + struct device *dev = kobj_to_dev(kobj); 246 + struct xe_guc_pc *pc = dev_to_pc(dev); 247 + int err; 248 + 249 + xe_pm_runtime_get(dev_to_xe(dev)); 250 + err = xe_guc_pc_set_power_profile(pc, buff); 251 + xe_pm_runtime_put(dev_to_xe(dev)); 252 + 253 + return err ?: count; 254 + } 255 + static struct kobj_attribute attr_power_profile = __ATTR_RW(power_profile); 256 + 230 257 static const struct attribute *freq_attrs[] = { 231 258 &attr_act_freq.attr, 232 259 &attr_cur_freq.attr, ··· 263 236 &attr_rpn_freq.attr, 264 237 &attr_min_freq.attr, 265 238 &attr_max_freq.attr, 239 + &attr_power_profile.attr, 266 240 NULL 267 241 }; 268 242

+1 -1

drivers/gpu/drm/xe/xe_gt_mcr.c

··· 362 362 * @group: pointer to storage for steering group ID 363 363 * @instance: pointer to storage for steering instance ID 364 364 */ 365 - void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) 365 + void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) 366 366 { 367 367 xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS); 368 368

+2 -1

drivers/gpu/drm/xe/xe_gt_mcr.h

··· 31 31 u8 *group, u8 *instance); 32 32 33 33 void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); 34 - void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance); 34 + void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt, 35 + unsigned int dss, u16 *group, u16 *instance); 35 36 u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance); 36 37 37 38 /*

+13 -15

drivers/gpu/drm/xe/xe_gt_pagefault.c

··· 87 87 if (!bo) 88 88 return 0; 89 89 90 - err = need_vram_move ? xe_bo_migrate(bo, vram->placement) : 91 - xe_bo_validate(bo, vm, true); 92 - 93 - return err; 90 + return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) : 91 + xe_bo_validate(bo, vm, true, exec); 94 92 } 95 93 96 94 static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, ··· 96 98 { 97 99 struct xe_vm *vm = xe_vma_vm(vma); 98 100 struct xe_tile *tile = gt_to_tile(gt); 101 + struct xe_validation_ctx ctx; 99 102 struct drm_exec exec; 100 103 struct dma_fence *fence; 101 - ktime_t end = 0; 102 104 int err, needs_vram; 103 105 104 106 lockdep_assert_held_write(&vm->lock); ··· 127 129 } 128 130 129 131 /* Lock VM and BOs dma-resv */ 130 - drm_exec_init(&exec, 0, 0); 132 + xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); 131 133 drm_exec_until_all_locked(&exec) { 132 134 err = xe_pf_begin(&exec, vma, needs_vram == 1, tile->mem.vram); 133 135 drm_exec_retry_on_contention(&exec); 134 - if (xe_vm_validate_should_retry(&exec, err, &end)) 135 - err = -EAGAIN; 136 + xe_validation_retry_on_oom(&ctx, &err); 136 137 if (err) 137 138 goto unlock_dma_resv; 138 139 139 140 /* Bind VMA only to the GT that has faulted */ 140 141 trace_xe_vma_pf_bind(vma); 142 + xe_vm_set_validation_exec(vm, &exec); 141 143 fence = xe_vma_rebind(vm, vma, BIT(tile->id)); 144 + xe_vm_set_validation_exec(vm, NULL); 142 145 if (IS_ERR(fence)) { 143 146 err = PTR_ERR(fence); 144 - if (xe_vm_validate_should_retry(&exec, err, &end)) 145 - err = -EAGAIN; 147 + xe_validation_retry_on_oom(&ctx, &err); 146 148 goto unlock_dma_resv; 147 149 } 148 150 } ··· 151 153 dma_fence_put(fence); 152 154 153 155 unlock_dma_resv: 154 - drm_exec_fini(&exec); 156 + xe_validation_ctx_fini(&ctx); 155 157 if (err == -EAGAIN) 156 158 goto retry_userptr; 157 159 ··· 533 535 { 534 536 struct xe_device *xe = gt_to_xe(gt); 535 537 struct xe_tile *tile = gt_to_tile(gt); 538 + struct xe_validation_ctx ctx; 536 539 struct drm_exec exec; 537 540 struct xe_vm *vm; 538 541 struct xe_vma *vma; ··· 563 564 goto unlock_vm; 564 565 565 566 /* Lock VM and BOs dma-resv */ 566 - drm_exec_init(&exec, 0, 0); 567 + xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); 567 568 drm_exec_until_all_locked(&exec) { 568 569 ret = xe_pf_begin(&exec, vma, IS_DGFX(vm->xe), tile->mem.vram); 569 570 drm_exec_retry_on_contention(&exec); 570 - if (ret) 571 - break; 571 + xe_validation_retry_on_oom(&ctx, &ret); 572 572 } 573 573 574 - drm_exec_fini(&exec); 574 + xe_validation_ctx_fini(&ctx); 575 575 unlock_vm: 576 576 up_read(&vm->lock); 577 577 xe_vm_put(vm);

+18 -14

drivers/gpu/drm/xe/xe_gt_printk.h

··· 6 6 #ifndef _XE_GT_PRINTK_H_ 7 7 #define _XE_GT_PRINTK_H_ 8 8 9 - #include <drm/drm_print.h> 10 - 11 9 #include "xe_gt_types.h" 10 + #include "xe_tile_printk.h" 11 + 12 + #define __XE_GT_PRINTK_FMT(_gt, _fmt, _args...) "GT%u: " _fmt, (_gt)->info.id, ##_args 12 13 13 14 #define xe_gt_printk(_gt, _level, _fmt, ...) \ 14 - drm_##_level(&gt_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) 15 + xe_tile_printk((_gt)->tile, _level, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) 16 + 17 + #define xe_gt_err(_gt, _fmt, ...) \ 18 + xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__) 15 19 16 20 #define xe_gt_err_once(_gt, _fmt, ...) \ 17 21 xe_gt_printk((_gt), err_once, _fmt, ##__VA_ARGS__) 18 22 19 - #define xe_gt_err(_gt, _fmt, ...) \ 20 - xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__) 23 + #define xe_gt_err_ratelimited(_gt, _fmt, ...) \ 24 + xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__) 21 25 22 26 #define xe_gt_warn(_gt, _fmt, ...) \ 23 27 xe_gt_printk((_gt), warn, _fmt, ##__VA_ARGS__) ··· 35 31 #define xe_gt_dbg(_gt, _fmt, ...) \ 36 32 xe_gt_printk((_gt), dbg, _fmt, ##__VA_ARGS__) 37 33 38 - #define xe_gt_err_ratelimited(_gt, _fmt, ...) \ 39 - xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__) 34 + #define xe_gt_WARN_type(_gt, _type, _condition, _fmt, ...) \ 35 + xe_tile_WARN##_type((_gt)->tile, _condition, _fmt, ## __VA_ARGS__) 40 36 41 37 #define xe_gt_WARN(_gt, _condition, _fmt, ...) \ 42 - drm_WARN(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) 38 + xe_gt_WARN_type((_gt),, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) 43 39 44 40 #define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \ 45 - drm_WARN_ONCE(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) 41 + xe_gt_WARN_type((_gt), _ONCE, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) 46 42 47 43 #define xe_gt_WARN_ON(_gt, _condition) \ 48 - xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition)) 44 + xe_gt_WARN((_gt), _condition, "%s(%s)", "WARN_ON", __stringify(_condition)) 49 45 50 46 #define xe_gt_WARN_ON_ONCE(_gt, _condition) \ 51 - xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition)) 47 + xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition)) 52 48 53 49 static inline void __xe_gt_printfn_err(struct drm_printer *p, struct va_format *vaf) 54 50 { ··· 71 67 72 68 /* 73 69 * The original xe_gt_dbg() callsite annotations are useless here, 74 - * redirect to the tweaked drm_dbg_printer() instead. 70 + * redirect to the tweaked xe_tile_dbg_printer() instead. 75 71 */ 76 - dbg = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, NULL); 72 + dbg = xe_tile_dbg_printer((gt)->tile); 77 73 dbg.origin = p->origin; 78 74 79 - drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf); 75 + drm_printf(&dbg, __XE_GT_PRINTK_FMT(gt, "%pV", vaf)); 80 76 } 81 77 82 78 /**

+7 -15

drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c

··· 1478 1478 return 0; 1479 1479 1480 1480 xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M); 1481 - bo = xe_bo_create_locked(xe, tile, NULL, 1482 - ALIGN(size, PAGE_SIZE), 1483 - ttm_bo_type_kernel, 1484 - XE_BO_FLAG_VRAM_IF_DGFX(tile) | 1485 - XE_BO_FLAG_NEEDS_2M | 1486 - XE_BO_FLAG_PINNED | 1487 - XE_BO_FLAG_PINNED_LATE_RESTORE); 1481 + bo = xe_bo_create_pin_range_novm(xe, tile, 1482 + ALIGN(size, PAGE_SIZE), 0, ~0ull, 1483 + ttm_bo_type_kernel, 1484 + XE_BO_FLAG_VRAM_IF_DGFX(tile) | 1485 + XE_BO_FLAG_NEEDS_2M | 1486 + XE_BO_FLAG_PINNED | 1487 + XE_BO_FLAG_PINNED_LATE_RESTORE); 1488 1488 if (IS_ERR(bo)) 1489 1489 return PTR_ERR(bo); 1490 - 1491 - err = xe_bo_pin(bo); 1492 - xe_bo_unlock(bo); 1493 - if (unlikely(err)) { 1494 - xe_bo_put(bo); 1495 - return err; 1496 - } 1497 1490 1498 1491 config->lmem_obj = bo; 1499 1492 ··· 1629 1636 u64 fair; 1630 1637 1631 1638 fair = div_u64(available, num_vfs); 1632 - fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */ 1633 1639 fair = ALIGN_DOWN(fair, alignment); 1634 1640 #ifdef MAX_FAIR_LMEM 1635 1641 fair = min_t(u64, MAX_FAIR_LMEM, fair);

+12 -12

drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c

··· 55 55 xe_gt_assert(gt, size % sizeof(u32) == 0); 56 56 xe_gt_assert(gt, size == ndwords * sizeof(u32)); 57 57 58 - bo = xe_bo_create_pin_map(xe, tile, NULL, 59 - ALIGN(size, PAGE_SIZE), 60 - ttm_bo_type_kernel, 61 - XE_BO_FLAG_SYSTEM | 62 - XE_BO_FLAG_GGTT | 63 - XE_BO_FLAG_GGTT_INVALIDATE); 58 + bo = xe_bo_create_pin_map_novm(xe, tile, 59 + ALIGN(size, PAGE_SIZE), 60 + ttm_bo_type_kernel, 61 + XE_BO_FLAG_SYSTEM | 62 + XE_BO_FLAG_GGTT | 63 + XE_BO_FLAG_GGTT_INVALIDATE, false); 64 64 if (IS_ERR(bo)) 65 65 return PTR_ERR(bo); 66 66 ··· 91 91 xe_gt_assert(gt, size % sizeof(u32) == 0); 92 92 xe_gt_assert(gt, size == ndwords * sizeof(u32)); 93 93 94 - bo = xe_bo_create_pin_map(xe, tile, NULL, 95 - ALIGN(size, PAGE_SIZE), 96 - ttm_bo_type_kernel, 97 - XE_BO_FLAG_SYSTEM | 98 - XE_BO_FLAG_GGTT | 99 - XE_BO_FLAG_GGTT_INVALIDATE); 94 + bo = xe_bo_create_pin_map_novm(xe, tile, 95 + ALIGN(size, PAGE_SIZE), 96 + ttm_bo_type_kernel, 97 + XE_BO_FLAG_SYSTEM | 98 + XE_BO_FLAG_GGTT | 99 + XE_BO_FLAG_GGTT_INVALIDATE, false); 100 100 if (IS_ERR(bo)) 101 101 return PTR_ERR(bo); 102 102

+53 -4

drivers/gpu/drm/xe/xe_gt_stats.c

··· 26 26 atomic64_add(incr, &gt->stats.counters[id]); 27 27 } 28 28 29 + #define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name 30 + 29 31 static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { 30 - "svm_pagefault_count", 31 - "tlb_inval_count", 32 - "vma_pagefault_count", 33 - "vma_pagefault_kb", 32 + DEF_STAT_STR(SVM_PAGEFAULT_COUNT, "svm_pagefault_count"), 33 + DEF_STAT_STR(TLB_INVAL, "tlb_inval_count"), 34 + DEF_STAT_STR(SVM_TLB_INVAL_COUNT, "svm_tlb_inval_count"), 35 + DEF_STAT_STR(SVM_TLB_INVAL_US, "svm_tlb_inval_us"), 36 + DEF_STAT_STR(VMA_PAGEFAULT_COUNT, "vma_pagefault_count"), 37 + DEF_STAT_STR(VMA_PAGEFAULT_KB, "vma_pagefault_kb"), 38 + DEF_STAT_STR(SVM_4K_PAGEFAULT_COUNT, "svm_4K_pagefault_count"), 39 + DEF_STAT_STR(SVM_64K_PAGEFAULT_COUNT, "svm_64K_pagefault_count"), 40 + DEF_STAT_STR(SVM_2M_PAGEFAULT_COUNT, "svm_2M_pagefault_count"), 41 + DEF_STAT_STR(SVM_4K_VALID_PAGEFAULT_COUNT, "svm_4K_valid_pagefault_count"), 42 + DEF_STAT_STR(SVM_64K_VALID_PAGEFAULT_COUNT, "svm_64K_valid_pagefault_count"), 43 + DEF_STAT_STR(SVM_2M_VALID_PAGEFAULT_COUNT, "svm_2M_valid_pagefault_count"), 44 + DEF_STAT_STR(SVM_4K_PAGEFAULT_US, "svm_4K_pagefault_us"), 45 + DEF_STAT_STR(SVM_64K_PAGEFAULT_US, "svm_64K_pagefault_us"), 46 + DEF_STAT_STR(SVM_2M_PAGEFAULT_US, "svm_2M_pagefault_us"), 47 + DEF_STAT_STR(SVM_4K_MIGRATE_COUNT, "svm_4K_migrate_count"), 48 + DEF_STAT_STR(SVM_64K_MIGRATE_COUNT, "svm_64K_migrate_count"), 49 + DEF_STAT_STR(SVM_2M_MIGRATE_COUNT, "svm_2M_migrate_count"), 50 + DEF_STAT_STR(SVM_4K_MIGRATE_US, "svm_4K_migrate_us"), 51 + DEF_STAT_STR(SVM_64K_MIGRATE_US, "svm_64K_migrate_us"), 52 + DEF_STAT_STR(SVM_2M_MIGRATE_US, "svm_2M_migrate_us"), 53 + DEF_STAT_STR(SVM_DEVICE_COPY_US, "svm_device_copy_us"), 54 + DEF_STAT_STR(SVM_4K_DEVICE_COPY_US, "svm_4K_device_copy_us"), 55 + DEF_STAT_STR(SVM_64K_DEVICE_COPY_US, "svm_64K_device_copy_us"), 56 + DEF_STAT_STR(SVM_2M_DEVICE_COPY_US, "svm_2M_device_copy_us"), 57 + DEF_STAT_STR(SVM_CPU_COPY_US, "svm_cpu_copy_us"), 58 + DEF_STAT_STR(SVM_4K_CPU_COPY_US, "svm_4K_cpu_copy_us"), 59 + DEF_STAT_STR(SVM_64K_CPU_COPY_US, "svm_64K_cpu_copy_us"), 60 + DEF_STAT_STR(SVM_2M_CPU_COPY_US, "svm_2M_cpu_copy_us"), 61 + DEF_STAT_STR(SVM_DEVICE_COPY_KB, "svm_device_copy_kb"), 62 + DEF_STAT_STR(SVM_CPU_COPY_KB, "svm_cpu_copy_kb"), 63 + DEF_STAT_STR(SVM_4K_GET_PAGES_US, "svm_4K_get_pages_us"), 64 + DEF_STAT_STR(SVM_64K_GET_PAGES_US, "svm_64K_get_pages_us"), 65 + DEF_STAT_STR(SVM_2M_GET_PAGES_US, "svm_2M_get_pages_us"), 66 + DEF_STAT_STR(SVM_4K_BIND_US, "svm_4K_bind_us"), 67 + DEF_STAT_STR(SVM_64K_BIND_US, "svm_64K_bind_us"), 68 + DEF_STAT_STR(SVM_2M_BIND_US, "svm_2M_bind_us"), 34 69 }; 35 70 36 71 /** ··· 84 49 atomic64_read(&gt->stats.counters[id])); 85 50 86 51 return 0; 52 + } 53 + 54 + /** 55 + * xe_gt_stats_clear - Clear the GT stats 56 + * @gt: GT structure 57 + * 58 + * This clear (zeros) all the available GT stats. 59 + */ 60 + void xe_gt_stats_clear(struct xe_gt *gt) 61 + { 62 + int id; 63 + 64 + for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id) 65 + atomic64_set(&gt->stats.counters[id], 0); 87 66 }

+1

drivers/gpu/drm/xe/xe_gt_stats.h

··· 13 13 14 14 #ifdef CONFIG_DEBUG_FS 15 15 int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); 16 + void xe_gt_stats_clear(struct xe_gt *gt); 16 17 void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); 17 18 #else 18 19 static inline void

+33

drivers/gpu/drm/xe/xe_gt_stats_types.h

··· 9 9 enum xe_gt_stats_id { 10 10 XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 11 11 XE_GT_STATS_ID_TLB_INVAL, 12 + XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, 13 + XE_GT_STATS_ID_SVM_TLB_INVAL_US, 12 14 XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 13 15 XE_GT_STATS_ID_VMA_PAGEFAULT_KB, 16 + XE_GT_STATS_ID_SVM_4K_PAGEFAULT_COUNT, 17 + XE_GT_STATS_ID_SVM_64K_PAGEFAULT_COUNT, 18 + XE_GT_STATS_ID_SVM_2M_PAGEFAULT_COUNT, 19 + XE_GT_STATS_ID_SVM_4K_VALID_PAGEFAULT_COUNT, 20 + XE_GT_STATS_ID_SVM_64K_VALID_PAGEFAULT_COUNT, 21 + XE_GT_STATS_ID_SVM_2M_VALID_PAGEFAULT_COUNT, 22 + XE_GT_STATS_ID_SVM_4K_PAGEFAULT_US, 23 + XE_GT_STATS_ID_SVM_64K_PAGEFAULT_US, 24 + XE_GT_STATS_ID_SVM_2M_PAGEFAULT_US, 25 + XE_GT_STATS_ID_SVM_4K_MIGRATE_COUNT, 26 + XE_GT_STATS_ID_SVM_64K_MIGRATE_COUNT, 27 + XE_GT_STATS_ID_SVM_2M_MIGRATE_COUNT, 28 + XE_GT_STATS_ID_SVM_4K_MIGRATE_US, 29 + XE_GT_STATS_ID_SVM_64K_MIGRATE_US, 30 + XE_GT_STATS_ID_SVM_2M_MIGRATE_US, 31 + XE_GT_STATS_ID_SVM_DEVICE_COPY_US, 32 + XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US, 33 + XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US, 34 + XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US, 35 + XE_GT_STATS_ID_SVM_CPU_COPY_US, 36 + XE_GT_STATS_ID_SVM_4K_CPU_COPY_US, 37 + XE_GT_STATS_ID_SVM_64K_CPU_COPY_US, 38 + XE_GT_STATS_ID_SVM_2M_CPU_COPY_US, 39 + XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, 40 + XE_GT_STATS_ID_SVM_CPU_COPY_KB, 41 + XE_GT_STATS_ID_SVM_4K_GET_PAGES_US, 42 + XE_GT_STATS_ID_SVM_64K_GET_PAGES_US, 43 + XE_GT_STATS_ID_SVM_2M_GET_PAGES_US, 44 + XE_GT_STATS_ID_SVM_4K_BIND_US, 45 + XE_GT_STATS_ID_SVM_64K_BIND_US, 46 + XE_GT_STATS_ID_SVM_2M_BIND_US, 14 47 /* must be the last entry */ 15 48 __XE_GT_STATS_NUM_IDS, 16 49 };

+36 -12

drivers/gpu/drm/xe/xe_gt_topology.c

··· 12 12 #include "regs/xe_gt_regs.h" 13 13 #include "xe_assert.h" 14 14 #include "xe_gt.h" 15 + #include "xe_gt_mcr.h" 15 16 #include "xe_gt_printk.h" 16 17 #include "xe_mmio.h" 17 18 #include "xe_wa.h" ··· 123 122 } 124 123 } 125 124 125 + bool xe_gt_topology_report_l3(struct xe_gt *gt) 126 + { 127 + /* 128 + * No known userspace needs/uses the L3 bank mask reported by 129 + * the media GT, and the hardware itself is known to report bogus 130 + * values on several platforms. Only report L3 bank mask as part 131 + * of the media GT's topology on pre-Xe3 platforms since that's 132 + * already part of our ABI. 133 + */ 134 + if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) >= 30) 135 + return false; 136 + 137 + return true; 138 + } 139 + 126 140 static void 127 141 load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) 128 142 { ··· 145 129 struct xe_mmio *mmio = &gt->mmio; 146 130 u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3); 147 131 148 - /* 149 - * PTL platforms with media version 30.00 do not provide proper values 150 - * for the media GT's L3 bank registers. Skip the readout since we 151 - * don't have any way to obtain real values. 152 - * 153 - * This may get re-described as an official workaround in the future, 154 - * but there's no tracking number assigned yet so we use a custom 155 - * OOB workaround descriptor. 156 - */ 157 - if (XE_GT_WA(gt, no_media_l3)) 132 + if (!xe_gt_topology_report_l3(gt)) 158 133 return; 159 134 160 135 if (GRAPHICS_VER(xe) >= 30) { ··· 282 275 drm_printf(p, "EU type: %s\n", 283 276 eu_type_to_str(gt->fuse_topo.eu_type)); 284 277 285 - drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, 286 - gt->fuse_topo.l3_bank_mask); 278 + if (xe_gt_topology_report_l3(gt)) 279 + drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, 280 + gt->fuse_topo.l3_bank_mask); 287 281 } 288 282 289 283 /* ··· 335 327 bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss) 336 328 { 337 329 return test_bit(dss, gt->fuse_topo.c_dss_mask); 330 + } 331 + 332 + bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt) 333 + { 334 + unsigned int xecore; 335 + int last_group = -1; 336 + u16 group, instance; 337 + 338 + for_each_dss_steering(xecore, gt, group, instance) { 339 + if (last_group != group) { 340 + if (group - last_group > 1) 341 + return true; 342 + last_group = group; 343 + } 344 + } 345 + return false; 338 346 }

+4

drivers/gpu/drm/xe/xe_gt_topology.h

··· 47 47 bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss); 48 48 bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss); 49 49 50 + bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt); 51 + 52 + bool xe_gt_topology_report_l3(struct xe_gt *gt); 53 + 50 54 #endif /* _XE_GT_TOPOLOGY_H_ */

+25 -27

drivers/gpu/drm/xe/xe_guc.c

··· 74 74 if (!GUC_LOG_LEVEL_IS_VERBOSE(level)) 75 75 flags |= GUC_LOG_DISABLED; 76 76 else 77 - flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) << 78 - GUC_LOG_VERBOSITY_SHIFT; 77 + flags |= FIELD_PREP(GUC_LOG_VERBOSITY, GUC_LOG_LEVEL_TO_VERBOSITY(level)); 79 78 80 79 return flags; 81 80 } ··· 121 122 BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); 122 123 BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); 123 124 124 - BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) > 125 - (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); 126 - BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) > 127 - (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); 128 - BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) > 129 - (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT)); 130 - 131 125 flags = GUC_LOG_VALID | 132 126 GUC_LOG_NOTIFY_ON_HALF_FULL | 133 127 CAPTURE_FLAG | 134 128 LOG_FLAG | 135 - ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) | 136 - ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | 137 - ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << 138 - GUC_LOG_CAPTURE_SHIFT) | 139 - (offset << GUC_LOG_BUF_ADDR_SHIFT); 129 + FIELD_PREP(GUC_LOG_CRASH, CRASH_BUFFER_SIZE / LOG_UNIT - 1) | 130 + FIELD_PREP(GUC_LOG_DEBUG, DEBUG_BUFFER_SIZE / LOG_UNIT - 1) | 131 + FIELD_PREP(GUC_LOG_CAPTURE, CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) | 132 + FIELD_PREP(GUC_LOG_BUF_ADDR, offset); 140 133 141 134 #undef LOG_UNIT 142 135 #undef LOG_FLAG ··· 141 150 static u32 guc_ctl_ads_flags(struct xe_guc *guc) 142 151 { 143 152 u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT; 144 - u32 flags = ads << GUC_ADS_ADDR_SHIFT; 153 + u32 flags = FIELD_PREP(GUC_ADS_ADDR, ads); 145 154 146 155 return flags; 147 156 } ··· 700 709 if (ret) 701 710 return ret; 702 711 703 - ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ct.bo); 704 - if (ret) 705 - return ret; 706 - 707 712 return 0; 708 713 } 709 714 ··· 834 847 if (ret) 835 848 return ret; 836 849 850 + ret = xe_guc_ct_init_post_hwconfig(&guc->ct); 851 + if (ret) 852 + return ret; 853 + 837 854 guc_init_params_post_hwconfig(guc); 838 855 839 856 ret = xe_guc_submit_init(guc, ~0); ··· 879 888 return ret; 880 889 } 881 890 882 - guc->submission_state.enabled = true; 883 - 884 - return 0; 891 + return xe_guc_submit_enable(guc); 885 892 } 886 893 887 894 int xe_guc_reset(struct xe_guc *guc) ··· 1055 1066 #endif 1056 1067 #define GUC_LOAD_TIME_WARN_MS 200 1057 1068 1058 - static void guc_wait_ucode(struct xe_guc *guc) 1069 + static int guc_wait_ucode(struct xe_guc *guc) 1059 1070 { 1060 1071 struct xe_gt *gt = guc_to_gt(guc); 1061 1072 struct xe_mmio *mmio = &gt->mmio; ··· 1162 1173 break; 1163 1174 } 1164 1175 1165 - xe_device_declare_wedged(gt_to_xe(gt)); 1176 + return -EPROTO; 1166 1177 } else if (delta_ms > GUC_LOAD_TIME_WARN_MS) { 1167 1178 xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n", 1168 1179 delta_ms, status, count); ··· 1174 1185 delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc), 1175 1186 before_freq, status, count); 1176 1187 } 1188 + 1189 + return 0; 1177 1190 } 1191 + ALLOW_ERROR_INJECTION(guc_wait_ucode, ERRNO); 1178 1192 1179 1193 static int __xe_guc_upload(struct xe_guc *guc) 1180 1194 { ··· 1209 1217 goto out; 1210 1218 1211 1219 /* Wait for authentication */ 1212 - guc_wait_ucode(guc); 1220 + ret = guc_wait_ucode(guc); 1221 + if (ret) 1222 + goto out; 1213 1223 1214 1224 xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING); 1215 1225 return 0; 1216 1226 1217 1227 out: 1218 1228 xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL); 1219 - return 0 /* FIXME: ret, don't want to stop load currently */; 1229 + return ret; 1220 1230 } 1221 1231 1222 1232 static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) ··· 1596 1602 { 1597 1603 xe_uc_fw_sanitize(&guc->fw); 1598 1604 xe_guc_ct_disable(&guc->ct); 1599 - guc->submission_state.enabled = false; 1605 + xe_guc_submit_disable(guc); 1600 1606 } 1601 1607 1602 1608 int xe_guc_reset_prepare(struct xe_guc *guc) ··· 1689 1695 xe_guc_ct_stop(&guc->ct); 1690 1696 xe_guc_submit_wedge(guc); 1691 1697 } 1698 + 1699 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 1700 + #include "tests/xe_guc_g2g_test.c" 1701 + #endif

+4

drivers/gpu/drm/xe/xe_guc.h

··· 53 53 int xe_guc_start(struct xe_guc *guc); 54 54 void xe_guc_declare_wedged(struct xe_guc *guc); 55 55 56 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 57 + int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len); 58 + #endif 59 + 56 60 static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) 57 61 { 58 62 switch (class) {

+2 -2

drivers/gpu/drm/xe/xe_guc_ads.c

··· 339 339 if (XE_GT_WA(gt, 13011645652)) { 340 340 u32 data = 0xC40; 341 341 342 - guc_waklv_enable(ads, &data, sizeof(data) / sizeof(u32), &offset, &remain, 342 + guc_waklv_enable(ads, &data, 1, &offset, &remain, 343 343 GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE); 344 344 } 345 345 ··· 355 355 0x0, 356 356 0xF, 357 357 }; 358 - guc_waklv_enable(ads, data, sizeof(data) / sizeof(u32), &offset, &remain, 358 + guc_waklv_enable(ads, data, ARRAY_SIZE(data), &offset, &remain, 359 359 GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG); 360 360 } 361 361

+54 -1

drivers/gpu/drm/xe/xe_guc_ct.c

··· 39 39 static void g2h_worker_func(struct work_struct *w); 40 40 static void safe_mode_worker_func(struct work_struct *w); 41 41 static void ct_exit_safe_mode(struct xe_guc_ct *ct); 42 + static void guc_ct_change_state(struct xe_guc_ct *ct, 43 + enum xe_guc_ct_state state); 42 44 43 45 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) 44 46 enum { ··· 254 252 } 255 253 ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */ 256 254 255 + static void guc_action_disable_ct(void *arg) 256 + { 257 + struct xe_guc_ct *ct = arg; 258 + 259 + guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); 260 + } 261 + 257 262 int xe_guc_ct_init(struct xe_guc_ct *ct) 258 263 { 259 264 struct xe_device *xe = ct_to_xe(ct); ··· 277 268 return PTR_ERR(bo); 278 269 279 270 ct->bo = bo; 280 - return 0; 271 + 272 + return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct); 281 273 } 282 274 ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */ 275 + 276 + /** 277 + * xe_guc_ct_init_post_hwconfig - Reinitialize the GuC CTB in VRAM 278 + * @ct: the &xe_guc_ct 279 + * 280 + * Allocate a new BO in VRAM and free the previous BO that was allocated 281 + * in system memory (SMEM). Applicable only for DGFX products. 282 + * 283 + * Return: 0 on success, or a negative errno on failure. 284 + */ 285 + int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct) 286 + { 287 + struct xe_device *xe = ct_to_xe(ct); 288 + struct xe_gt *gt = ct_to_gt(ct); 289 + struct xe_tile *tile = gt_to_tile(gt); 290 + int ret; 291 + 292 + xe_assert(xe, !xe_guc_ct_enabled(ct)); 293 + 294 + if (IS_DGFX(xe)) { 295 + ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->bo); 296 + if (ret) 297 + return ret; 298 + } 299 + 300 + devm_remove_action(xe->drm.dev, guc_action_disable_ct, ct); 301 + return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct); 302 + } 283 303 284 304 #define desc_read(xe_, guc_ctb__, field_) \ 285 305 xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \ ··· 1078 1040 return true; 1079 1041 } 1080 1042 1043 + #define GUC_SEND_RETRY_LIMIT 50 1044 + #define GUC_SEND_RETRY_MSLEEP 5 1045 + 1081 1046 static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, 1082 1047 u32 *response_buffer, bool no_fail) 1083 1048 { 1084 1049 struct xe_gt *gt = ct_to_gt(ct); 1085 1050 struct g2h_fence g2h_fence; 1051 + unsigned int retries = 0; 1086 1052 int ret = 0; 1087 1053 1088 1054 /* ··· 1151 1109 xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n", 1152 1110 action[0], g2h_fence.reason); 1153 1111 mutex_unlock(&ct->lock); 1112 + if (++retries > GUC_SEND_RETRY_LIMIT) { 1113 + xe_gt_err(gt, "H2G action %#x reached retry limit=%u, aborting\n", 1114 + action[0], GUC_SEND_RETRY_LIMIT); 1115 + return -ELOOP; 1116 + } 1117 + msleep(GUC_SEND_RETRY_MSLEEP * retries); 1154 1118 goto retry; 1155 1119 } 1156 1120 if (g2h_fence.fail) { ··· 1486 1438 case XE_GUC_ACTION_NOTIFY_EXCEPTION: 1487 1439 ret = guc_crash_process_msg(ct, action); 1488 1440 break; 1441 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 1442 + case XE_GUC_ACTION_TEST_G2G_RECV: 1443 + ret = xe_guc_g2g_test_notification(guc, payload, adj_len); 1444 + break; 1445 + #endif 1489 1446 default: 1490 1447 xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); 1491 1448 }

+1

drivers/gpu/drm/xe/xe_guc_ct.h

··· 13 13 14 14 int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct); 15 15 int xe_guc_ct_init(struct xe_guc_ct *ct); 16 + int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct); 16 17 int xe_guc_ct_enable(struct xe_guc_ct *ct); 17 18 void xe_guc_ct_disable(struct xe_guc_ct *ct); 18 19 void xe_guc_ct_stop(struct xe_guc_ct *ct);

+7 -6

drivers/gpu/drm/xe/xe_guc_engine_activity.c

··· 94 94 struct xe_tile *tile = gt_to_tile(gt); 95 95 struct xe_bo *bo, *metadata_bo; 96 96 97 - metadata_bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(metadata_size), 98 - ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | 99 - XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); 97 + metadata_bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(metadata_size), 98 + ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | 99 + XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE, 100 + false); 100 101 101 102 if (IS_ERR(metadata_bo)) 102 103 return PTR_ERR(metadata_bo); 103 104 104 - bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(size), 105 - ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | 106 - XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); 105 + bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(size), 106 + ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | 107 + XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE, false); 107 108 108 109 if (IS_ERR(bo)) { 109 110 xe_bo_unpin_map_no_vm(metadata_bo);

+2 -2

drivers/gpu/drm/xe/xe_guc_exec_queue_types.h

··· 35 35 struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; 36 36 /** @lr_tdr: long running TDR worker */ 37 37 struct work_struct lr_tdr; 38 - /** @fini_async: do final fini async from this worker */ 39 - struct work_struct fini_async; 38 + /** @destroy_async: do final destroy async from this worker */ 39 + struct work_struct destroy_async; 40 40 /** @resume_time: time of last resume */ 41 41 u64 resume_time; 42 42 /** @state: GuC specific state for this xe_exec_queue */

+11 -19

drivers/gpu/drm/xe/xe_guc_fwif.h

··· 15 15 #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4 16 16 #define G2H_LEN_DW_DEREGISTER_CONTEXT 3 17 17 #define G2H_LEN_DW_TLB_INVALIDATE 3 18 + #define G2H_LEN_DW_G2G_NOTIFY_MIN 3 18 19 19 20 #define GUC_ID_MAX 65535 20 21 #define GUC_ID_UNKNOWN 0xffffffff ··· 66 65 u32 hwlrca_hi; 67 66 }; 68 67 #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) 68 + #define CONTEXT_REGISTRATION_FLAG_TYPE GENMASK(2, 1) 69 69 70 70 /* 32-bit KLV structure as used by policy updates and others */ 71 71 struct guc_klv_generic_dw_t { ··· 91 89 #define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1) 92 90 #define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2) 93 91 #define GUC_LOG_LOG_ALLOC_UNITS BIT(3) 94 - #define GUC_LOG_CRASH_SHIFT 4 95 - #define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) 96 - #define GUC_LOG_DEBUG_SHIFT 6 97 - #define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) 98 - #define GUC_LOG_CAPTURE_SHIFT 10 99 - #define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT) 100 - #define GUC_LOG_BUF_ADDR_SHIFT 12 92 + #define GUC_LOG_CRASH REG_GENMASK(5, 4) 93 + #define GUC_LOG_DEBUG REG_GENMASK(9, 6) 94 + #define GUC_LOG_CAPTURE REG_GENMASK(11, 10) 95 + #define GUC_LOG_BUF_ADDR REG_GENMASK(31, 12) 101 96 102 97 #define GUC_CTL_WA 1 103 98 #define GUC_WA_GAM_CREDITS BIT(10) ··· 116 117 #define GUC_CTL_DISABLE_SCHEDULER BIT(14) 117 118 118 119 #define GUC_CTL_DEBUG 3 119 - #define GUC_LOG_VERBOSITY_SHIFT 0 120 - #define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT) 121 - #define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT) 122 - #define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT) 123 - #define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT) 124 - #define GUC_LOG_VERBOSITY_MIN 0 120 + #define GUC_LOG_VERBOSITY REG_GENMASK(1, 0) 125 121 #define GUC_LOG_VERBOSITY_MAX 3 126 - #define GUC_LOG_VERBOSITY_MASK 0x0000000f 127 - #define GUC_LOG_DESTINATION_MASK (3 << 4) 128 - #define GUC_LOG_DISABLED (1 << 6) 129 - #define GUC_PROFILE_ENABLED (1 << 7) 122 + #define GUC_LOG_DESTINATION REG_GENMASK(5, 4) 123 + #define GUC_LOG_DISABLED BIT(6) 124 + #define GUC_PROFILE_ENABLED BIT(7) 130 125 131 126 #define GUC_CTL_ADS 4 132 - #define GUC_ADS_ADDR_SHIFT 1 133 - #define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT) 127 + #define GUC_ADS_ADDR REG_GENMASK(21, 1) 134 128 135 129 #define GUC_CTL_DEVID 5 136 130

+1 -1

drivers/gpu/drm/xe/xe_guc_log.h

··· 17 17 #define DEBUG_BUFFER_SIZE SZ_8M 18 18 #define CAPTURE_BUFFER_SIZE SZ_2M 19 19 #else 20 - #define CRASH_BUFFER_SIZE SZ_8K 20 + #define CRASH_BUFFER_SIZE SZ_16K 21 21 #define DEBUG_BUFFER_SIZE SZ_64K 22 22 #define CAPTURE_BUFFER_SIZE SZ_1M 23 23 #endif

+67

drivers/gpu/drm/xe/xe_guc_pc.c

··· 79 79 * Xe driver enables SLPC with all of its defaults features and frequency 80 80 * selection, which varies per platform. 81 81 * 82 + * Power profiles add another level of control to SLPC. When power saving 83 + * profile is chosen, SLPC will use conservative thresholds to ramp frequency, 84 + * thus saving power. Base profile is default and ensures balanced performance 85 + * for any workload. 86 + * 82 87 * Render-C States: 83 88 * ================ 84 89 * ··· 1176 1171 return ret; 1177 1172 } 1178 1173 1174 + static const char *power_profile_to_string(struct xe_guc_pc *pc) 1175 + { 1176 + switch (pc->power_profile) { 1177 + case SLPC_POWER_PROFILE_BASE: 1178 + return "base"; 1179 + case SLPC_POWER_PROFILE_POWER_SAVING: 1180 + return "power_saving"; 1181 + default: 1182 + return "invalid"; 1183 + } 1184 + } 1185 + 1186 + void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile) 1187 + { 1188 + switch (pc->power_profile) { 1189 + case SLPC_POWER_PROFILE_BASE: 1190 + sprintf(profile, "[%s] %s\n", "base", "power_saving"); 1191 + break; 1192 + case SLPC_POWER_PROFILE_POWER_SAVING: 1193 + sprintf(profile, "%s [%s]\n", "base", "power_saving"); 1194 + break; 1195 + default: 1196 + sprintf(profile, "invalid"); 1197 + } 1198 + } 1199 + 1200 + int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf) 1201 + { 1202 + int ret = 0; 1203 + u32 val; 1204 + 1205 + if (strncmp("base", buf, strlen("base")) == 0) 1206 + val = SLPC_POWER_PROFILE_BASE; 1207 + else if (strncmp("power_saving", buf, strlen("power_saving")) == 0) 1208 + val = SLPC_POWER_PROFILE_POWER_SAVING; 1209 + else 1210 + return -EINVAL; 1211 + 1212 + guard(mutex)(&pc->freq_lock); 1213 + xe_pm_runtime_get_noresume(pc_to_xe(pc)); 1214 + 1215 + ret = pc_action_set_param(pc, 1216 + SLPC_PARAM_POWER_PROFILE, 1217 + val); 1218 + if (ret) 1219 + xe_gt_err_once(pc_to_gt(pc), "Failed to set power profile to %d: %pe\n", 1220 + val, ERR_PTR(ret)); 1221 + else 1222 + pc->power_profile = val; 1223 + 1224 + xe_pm_runtime_put(pc_to_xe(pc)); 1225 + 1226 + return ret; 1227 + } 1228 + 1179 1229 /** 1180 1230 * xe_guc_pc_start - Start GuC's Power Conservation component 1181 1231 * @pc: Xe_GuC_PC instance ··· 1309 1249 /* Enable SLPC Optimized Strategy for compute */ 1310 1250 ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); 1311 1251 1252 + /* Set cached value of power_profile */ 1253 + ret = xe_guc_pc_set_power_profile(pc, power_profile_to_string(pc)); 1254 + if (unlikely(ret)) 1255 + xe_gt_err(gt, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret)); 1256 + 1312 1257 out: 1313 1258 xe_force_wake_put(gt_to_fw(gt), fw_ref); 1314 1259 return ret; ··· 1391 1326 return PTR_ERR(bo); 1392 1327 1393 1328 pc->bo = bo; 1329 + 1330 + pc->power_profile = SLPC_POWER_PROFILE_BASE; 1394 1331 1395 1332 return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc); 1396 1333 }

+2

drivers/gpu/drm/xe/xe_guc_pc.h

··· 31 31 int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq); 32 32 int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq); 33 33 int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq); 34 + int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf); 35 + void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile); 34 36 35 37 enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc); 36 38 u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);

+2

drivers/gpu/drm/xe/xe_guc_pc_types.h

··· 37 37 struct mutex freq_lock; 38 38 /** @freq_ready: Only handle freq changes, if they are really ready */ 39 39 bool freq_ready; 40 + /** @power_profile: Base or power_saving profile */ 41 + u32 power_profile; 40 42 }; 41 43 42 44 #endif /* _XE_GUC_PC_TYPES_H_ */

+109 -33

drivers/gpu/drm/xe/xe_guc_submit.c

··· 32 32 #include "xe_guc_ct.h" 33 33 #include "xe_guc_exec_queue_types.h" 34 34 #include "xe_guc_id_mgr.h" 35 + #include "xe_guc_klv_helpers.h" 35 36 #include "xe_guc_submit_types.h" 36 37 #include "xe_hw_engine.h" 37 38 #include "xe_hw_fence.h" ··· 317 316 return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 318 317 } 319 318 319 + /* 320 + * Given that we want to guarantee enough RCS throughput to avoid missing 321 + * frames, we set the yield policy to 20% of each 80ms interval. 322 + */ 323 + #define RC_YIELD_DURATION 80 /* in ms */ 324 + #define RC_YIELD_RATIO 20 /* in percent */ 325 + static u32 *emit_render_compute_yield_klv(u32 *emit) 326 + { 327 + *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 328 + *emit++ = RC_YIELD_DURATION; 329 + *emit++ = RC_YIELD_RATIO; 330 + 331 + return emit; 332 + } 333 + 334 + #define SCHEDULING_POLICY_MAX_DWORDS 16 335 + static int guc_init_global_schedule_policy(struct xe_guc *guc) 336 + { 337 + u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 338 + u32 *emit = data; 339 + u32 count = 0; 340 + int ret; 341 + 342 + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 343 + return 0; 344 + 345 + *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 346 + 347 + if (CCS_MASK(guc_to_gt(guc))) 348 + emit = emit_render_compute_yield_klv(emit); 349 + 350 + count = emit - data; 351 + if (count > 1) { 352 + xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 353 + 354 + ret = xe_guc_ct_send_block(&guc->ct, data, count); 355 + if (ret < 0) { 356 + xe_gt_err(guc_to_gt(guc), 357 + "failed to enable GuC scheduling policies: %pe\n", 358 + ERR_PTR(ret)); 359 + return ret; 360 + } 361 + } 362 + 363 + return 0; 364 + } 365 + 366 + int xe_guc_submit_enable(struct xe_guc *guc) 367 + { 368 + int ret; 369 + 370 + ret = guc_init_global_schedule_policy(guc); 371 + if (ret) 372 + return ret; 373 + 374 + guc->submission_state.enabled = true; 375 + 376 + return 0; 377 + } 378 + 379 + void xe_guc_submit_disable(struct xe_guc *guc) 380 + { 381 + guc->submission_state.enabled = false; 382 + } 383 + 320 384 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 321 385 { 322 386 int i; ··· 624 558 info.engine_submit_mask = q->logical_mask; 625 559 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 626 560 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 627 - info.flags = CONTEXT_REGISTRATION_FLAG_KMD; 628 - 629 - if (ctx_type != GUC_CONTEXT_NORMAL) 630 - info.flags |= BIT(ctx_type); 561 + info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 562 + FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 631 563 632 564 if (xe_exec_queue_is_parallel(q)) { 633 565 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); ··· 1419 1355 return DRM_GPU_SCHED_STAT_NO_HANG; 1420 1356 } 1421 1357 1422 - static void __guc_exec_queue_fini_async(struct work_struct *w) 1358 + static void guc_exec_queue_fini(struct xe_exec_queue *q) 1423 1359 { 1424 - struct xe_guc_exec_queue *ge = 1425 - container_of(w, struct xe_guc_exec_queue, fini_async); 1426 - struct xe_exec_queue *q = ge->q; 1360 + struct xe_guc_exec_queue *ge = q->guc; 1427 1361 struct xe_guc *guc = exec_queue_to_guc(q); 1428 1362 1429 - xe_pm_runtime_get(guc_to_xe(guc)); 1430 - trace_xe_exec_queue_destroy(q); 1431 - 1432 1363 release_guc_id(guc, q); 1433 - if (xe_exec_queue_is_lr(q)) 1434 - cancel_work_sync(&ge->lr_tdr); 1435 - /* Confirm no work left behind accessing device structures */ 1436 - cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1437 1364 xe_sched_entity_fini(&ge->entity); 1438 1365 xe_sched_fini(&ge->sched); 1439 1366 ··· 1433 1378 * (timeline name). 1434 1379 */ 1435 1380 kfree_rcu(ge, rcu); 1381 + } 1382 + 1383 + static void __guc_exec_queue_destroy_async(struct work_struct *w) 1384 + { 1385 + struct xe_guc_exec_queue *ge = 1386 + container_of(w, struct xe_guc_exec_queue, destroy_async); 1387 + struct xe_exec_queue *q = ge->q; 1388 + struct xe_guc *guc = exec_queue_to_guc(q); 1389 + 1390 + xe_pm_runtime_get(guc_to_xe(guc)); 1391 + trace_xe_exec_queue_destroy(q); 1392 + 1393 + if (xe_exec_queue_is_lr(q)) 1394 + cancel_work_sync(&ge->lr_tdr); 1395 + /* Confirm no work left behind accessing device structures */ 1396 + cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1397 + 1436 1398 xe_exec_queue_fini(q); 1399 + 1437 1400 xe_pm_runtime_put(guc_to_xe(guc)); 1438 1401 } 1439 1402 1440 - static void guc_exec_queue_fini_async(struct xe_exec_queue *q) 1403 + static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1441 1404 { 1442 1405 struct xe_guc *guc = exec_queue_to_guc(q); 1443 1406 struct xe_device *xe = guc_to_xe(guc); 1444 1407 1445 - INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); 1408 + INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1446 1409 1447 1410 /* We must block on kernel engines so slabs are empty on driver unload */ 1448 1411 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1449 - __guc_exec_queue_fini_async(&q->guc->fini_async); 1412 + __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1450 1413 else 1451 - queue_work(xe->destroy_wq, &q->guc->fini_async); 1414 + queue_work(xe->destroy_wq, &q->guc->destroy_async); 1452 1415 } 1453 1416 1454 - static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) 1417 + static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1455 1418 { 1456 1419 /* 1457 1420 * Might be done from within the GPU scheduler, need to do async as we ··· 1478 1405 * this we and don't really care when everything is fini'd, just that it 1479 1406 * is. 1480 1407 */ 1481 - guc_exec_queue_fini_async(q); 1408 + guc_exec_queue_destroy_async(q); 1482 1409 } 1483 1410 1484 1411 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) ··· 1492 1419 if (exec_queue_registered(q)) 1493 1420 disable_scheduling_deregister(guc, q); 1494 1421 else 1495 - __guc_exec_queue_fini(guc, q); 1422 + __guc_exec_queue_destroy(guc, q); 1496 1423 } 1497 1424 1498 1425 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) ··· 1725 1652 #define STATIC_MSG_CLEANUP 0 1726 1653 #define STATIC_MSG_SUSPEND 1 1727 1654 #define STATIC_MSG_RESUME 2 1728 - static void guc_exec_queue_fini(struct xe_exec_queue *q) 1655 + static void guc_exec_queue_destroy(struct xe_exec_queue *q) 1729 1656 { 1730 1657 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 1731 1658 1732 1659 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 1733 1660 guc_exec_queue_add_msg(q, msg, CLEANUP); 1734 1661 else 1735 - __guc_exec_queue_fini(exec_queue_to_guc(q), q); 1662 + __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 1736 1663 } 1737 1664 1738 1665 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, ··· 1862 1789 .init = guc_exec_queue_init, 1863 1790 .kill = guc_exec_queue_kill, 1864 1791 .fini = guc_exec_queue_fini, 1792 + .destroy = guc_exec_queue_destroy, 1865 1793 .set_priority = guc_exec_queue_set_priority, 1866 1794 .set_timeslice = guc_exec_queue_set_timeslice, 1867 1795 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, ··· 1884 1810 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) 1885 1811 xe_exec_queue_put(q); 1886 1812 else if (exec_queue_destroyed(q)) 1887 - __guc_exec_queue_fini(guc, q); 1813 + __guc_exec_queue_destroy(guc, q); 1888 1814 } 1889 1815 if (q->guc->suspend_pending) { 1890 1816 set_exec_queue_suspended(q); ··· 2103 2029 2104 2030 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2105 2031 if (unlikely(!q)) { 2106 - xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id); 2032 + xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2107 2033 return NULL; 2108 2034 } 2109 2035 ··· 2213 2139 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) 2214 2140 xe_exec_queue_put(q); 2215 2141 else 2216 - __guc_exec_queue_fini(guc, q); 2142 + __guc_exec_queue_destroy(guc, q); 2217 2143 } 2218 2144 2219 2145 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) ··· 2602 2528 } 2603 2529 2604 2530 /** 2605 - * xe_guc_register_exec_queue - Register exec queue for a given context type. 2531 + * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 2606 2532 * @q: Execution queue 2607 2533 * @ctx_type: Type of the context 2608 2534 * ··· 2613 2539 * 2614 2540 * Returns - None. 2615 2541 */ 2616 - void xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type) 2542 + void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 2617 2543 { 2618 2544 struct xe_guc *guc = exec_queue_to_guc(q); 2619 2545 struct xe_device *xe = guc_to_xe(guc); 2546 + struct xe_gt *gt = guc_to_gt(guc); 2620 2547 2621 - xe_assert(xe, IS_SRIOV_VF(xe)); 2622 - xe_assert(xe, !IS_DGFX(xe)); 2623 - xe_assert(xe, (ctx_type > GUC_CONTEXT_NORMAL && 2624 - ctx_type < GUC_CONTEXT_COUNT)); 2548 + xe_gt_assert(gt, IS_SRIOV_VF(xe)); 2549 + xe_gt_assert(gt, !IS_DGFX(xe)); 2550 + xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 2551 + ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 2552 + xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 2625 2553 2626 2554 register_exec_queue(q, ctx_type); 2627 2555 enable_scheduling(q);

+3 -1

drivers/gpu/drm/xe/xe_guc_submit.h

··· 13 13 struct xe_guc; 14 14 15 15 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids); 16 + int xe_guc_submit_enable(struct xe_guc *guc); 17 + void xe_guc_submit_disable(struct xe_guc *guc); 16 18 17 19 int xe_guc_submit_reset_prepare(struct xe_guc *guc); 18 20 void xe_guc_submit_reset_wait(struct xe_guc *guc); ··· 48 46 void 49 47 xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot); 50 48 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); 51 - void xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type); 49 + void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type); 52 50 53 51 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch); 54 52

-325

drivers/gpu/drm/xe/xe_hmm.c

··· 1 - // SPDX-License-Identifier: MIT 2 - /* 3 - * Copyright © 2024 Intel Corporation 4 - */ 5 - 6 - #include <linux/scatterlist.h> 7 - #include <linux/mmu_notifier.h> 8 - #include <linux/dma-mapping.h> 9 - #include <linux/memremap.h> 10 - #include <linux/swap.h> 11 - #include <linux/hmm.h> 12 - #include <linux/mm.h> 13 - #include "xe_hmm.h" 14 - #include "xe_vm.h" 15 - #include "xe_bo.h" 16 - 17 - static u64 xe_npages_in_range(unsigned long start, unsigned long end) 18 - { 19 - return (end - start) >> PAGE_SHIFT; 20 - } 21 - 22 - static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, 23 - struct hmm_range *range, struct rw_semaphore *notifier_sem) 24 - { 25 - unsigned long i, npages, hmm_pfn; 26 - unsigned long num_chunks = 0; 27 - int ret; 28 - 29 - /* HMM docs says this is needed. */ 30 - ret = down_read_interruptible(notifier_sem); 31 - if (ret) 32 - return ret; 33 - 34 - if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { 35 - up_read(notifier_sem); 36 - return -EAGAIN; 37 - } 38 - 39 - npages = xe_npages_in_range(range->start, range->end); 40 - for (i = 0; i < npages;) { 41 - unsigned long len; 42 - 43 - hmm_pfn = range->hmm_pfns[i]; 44 - xe_assert(xe, hmm_pfn & HMM_PFN_VALID); 45 - 46 - len = 1UL << hmm_pfn_to_map_order(hmm_pfn); 47 - 48 - /* If order > 0 the page may extend beyond range->start */ 49 - len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1); 50 - i += len; 51 - num_chunks++; 52 - } 53 - up_read(notifier_sem); 54 - 55 - return sg_alloc_table(st, num_chunks, GFP_KERNEL); 56 - } 57 - 58 - /** 59 - * xe_build_sg() - build a scatter gather table for all the physical pages/pfn 60 - * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table 61 - * and will be used to program GPU page table later. 62 - * @xe: the xe device who will access the dma-address in sg table 63 - * @range: the hmm range that we build the sg table from. range->hmm_pfns[] 64 - * has the pfn numbers of pages that back up this hmm address range. 65 - * @st: pointer to the sg table. 66 - * @notifier_sem: The xe notifier lock. 67 - * @write: whether we write to this range. This decides dma map direction 68 - * for system pages. If write we map it bi-diretional; otherwise 69 - * DMA_TO_DEVICE 70 - * 71 - * All the contiguous pfns will be collapsed into one entry in 72 - * the scatter gather table. This is for the purpose of efficiently 73 - * programming GPU page table. 74 - * 75 - * The dma_address in the sg table will later be used by GPU to 76 - * access memory. So if the memory is system memory, we need to 77 - * do a dma-mapping so it can be accessed by GPU/DMA. 78 - * 79 - * FIXME: This function currently only support pages in system 80 - * memory. If the memory is GPU local memory (of the GPU who 81 - * is going to access memory), we need gpu dpa (device physical 82 - * address), and there is no need of dma-mapping. This is TBD. 83 - * 84 - * FIXME: dma-mapping for peer gpu device to access remote gpu's 85 - * memory. Add this when you support p2p 86 - * 87 - * This function allocates the storage of the sg table. It is 88 - * caller's responsibility to free it calling sg_free_table. 89 - * 90 - * Returns 0 if successful; -ENOMEM if fails to allocate memory 91 - */ 92 - static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, 93 - struct sg_table *st, 94 - struct rw_semaphore *notifier_sem, 95 - bool write) 96 - { 97 - unsigned long npages = xe_npages_in_range(range->start, range->end); 98 - struct device *dev = xe->drm.dev; 99 - struct scatterlist *sgl; 100 - struct page *page; 101 - unsigned long i, j; 102 - 103 - lockdep_assert_held(notifier_sem); 104 - 105 - i = 0; 106 - for_each_sg(st->sgl, sgl, st->nents, j) { 107 - unsigned long hmm_pfn, size; 108 - 109 - hmm_pfn = range->hmm_pfns[i]; 110 - page = hmm_pfn_to_page(hmm_pfn); 111 - xe_assert(xe, !is_device_private_page(page)); 112 - 113 - size = 1UL << hmm_pfn_to_map_order(hmm_pfn); 114 - size -= page_to_pfn(page) & (size - 1); 115 - i += size; 116 - 117 - if (unlikely(j == st->nents - 1)) { 118 - xe_assert(xe, i >= npages); 119 - if (i > npages) 120 - size -= (i - npages); 121 - 122 - sg_mark_end(sgl); 123 - } else { 124 - xe_assert(xe, i < npages); 125 - } 126 - 127 - sg_set_page(sgl, page, size << PAGE_SHIFT, 0); 128 - } 129 - 130 - return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 131 - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); 132 - } 133 - 134 - static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma) 135 - { 136 - struct xe_userptr *userptr = &uvma->userptr; 137 - struct xe_vm *vm = xe_vma_vm(&uvma->vma); 138 - 139 - lockdep_assert_held_write(&vm->lock); 140 - lockdep_assert_held(&vm->userptr.notifier_lock); 141 - 142 - mutex_lock(&userptr->unmap_mutex); 143 - xe_assert(vm->xe, !userptr->mapped); 144 - userptr->mapped = true; 145 - mutex_unlock(&userptr->unmap_mutex); 146 - } 147 - 148 - void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma) 149 - { 150 - struct xe_userptr *userptr = &uvma->userptr; 151 - struct xe_vma *vma = &uvma->vma; 152 - bool write = !xe_vma_read_only(vma); 153 - struct xe_vm *vm = xe_vma_vm(vma); 154 - struct xe_device *xe = vm->xe; 155 - 156 - if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) && 157 - !lockdep_is_held_type(&vm->lock, 0) && 158 - !(vma->gpuva.flags & XE_VMA_DESTROYED)) { 159 - /* Don't unmap in exec critical section. */ 160 - xe_vm_assert_held(vm); 161 - /* Don't unmap while mapping the sg. */ 162 - lockdep_assert_held(&vm->lock); 163 - } 164 - 165 - mutex_lock(&userptr->unmap_mutex); 166 - if (userptr->sg && userptr->mapped) 167 - dma_unmap_sgtable(xe->drm.dev, userptr->sg, 168 - write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); 169 - userptr->mapped = false; 170 - mutex_unlock(&userptr->unmap_mutex); 171 - } 172 - 173 - /** 174 - * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr 175 - * @uvma: the userptr vma which hold the scatter gather table 176 - * 177 - * With function xe_userptr_populate_range, we allocate storage of 178 - * the userptr sg table. This is a helper function to free this 179 - * sg table, and dma unmap the address in the table. 180 - */ 181 - void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) 182 - { 183 - struct xe_userptr *userptr = &uvma->userptr; 184 - 185 - xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg); 186 - xe_hmm_userptr_unmap(uvma); 187 - sg_free_table(userptr->sg); 188 - userptr->sg = NULL; 189 - } 190 - 191 - /** 192 - * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual 193 - * address range 194 - * 195 - * @uvma: userptr vma which has information of the range to populate. 196 - * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller. 197 - * 198 - * This function populate the physical pages of a virtual 199 - * address range. The populated physical pages is saved in 200 - * userptr's sg table. It is similar to get_user_pages but call 201 - * hmm_range_fault. 202 - * 203 - * This function also read mmu notifier sequence # ( 204 - * mmu_interval_read_begin), for the purpose of later 205 - * comparison (through mmu_interval_read_retry). 206 - * 207 - * This must be called with mmap read or write lock held. 208 - * 209 - * This function allocates the storage of the userptr sg table. 210 - * It is caller's responsibility to free it calling sg_free_table. 211 - * 212 - * returns: 0 for success; negative error no on failure 213 - */ 214 - int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, 215 - bool is_mm_mmap_locked) 216 - { 217 - unsigned long timeout = 218 - jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 219 - unsigned long *pfns; 220 - struct xe_userptr *userptr; 221 - struct xe_vma *vma = &uvma->vma; 222 - u64 userptr_start = xe_vma_userptr(vma); 223 - u64 userptr_end = userptr_start + xe_vma_size(vma); 224 - struct xe_vm *vm = xe_vma_vm(vma); 225 - struct hmm_range hmm_range = { 226 - .pfn_flags_mask = 0, /* ignore pfns */ 227 - .default_flags = HMM_PFN_REQ_FAULT, 228 - .start = userptr_start, 229 - .end = userptr_end, 230 - .notifier = &uvma->userptr.notifier, 231 - .dev_private_owner = vm->xe, 232 - }; 233 - bool write = !xe_vma_read_only(vma); 234 - unsigned long notifier_seq; 235 - u64 npages; 236 - int ret; 237 - 238 - userptr = &uvma->userptr; 239 - 240 - if (is_mm_mmap_locked) 241 - mmap_assert_locked(userptr->notifier.mm); 242 - 243 - if (vma->gpuva.flags & XE_VMA_DESTROYED) 244 - return 0; 245 - 246 - notifier_seq = mmu_interval_read_begin(&userptr->notifier); 247 - if (notifier_seq == userptr->notifier_seq) 248 - return 0; 249 - 250 - if (userptr->sg) 251 - xe_hmm_userptr_free_sg(uvma); 252 - 253 - npages = xe_npages_in_range(userptr_start, userptr_end); 254 - pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 255 - if (unlikely(!pfns)) 256 - return -ENOMEM; 257 - 258 - if (write) 259 - hmm_range.default_flags |= HMM_PFN_REQ_WRITE; 260 - 261 - if (!mmget_not_zero(userptr->notifier.mm)) { 262 - ret = -EFAULT; 263 - goto free_pfns; 264 - } 265 - 266 - hmm_range.hmm_pfns = pfns; 267 - 268 - while (true) { 269 - hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); 270 - 271 - if (!is_mm_mmap_locked) 272 - mmap_read_lock(userptr->notifier.mm); 273 - 274 - ret = hmm_range_fault(&hmm_range); 275 - 276 - if (!is_mm_mmap_locked) 277 - mmap_read_unlock(userptr->notifier.mm); 278 - 279 - if (ret == -EBUSY) { 280 - if (time_after(jiffies, timeout)) 281 - break; 282 - 283 - continue; 284 - } 285 - break; 286 - } 287 - 288 - mmput(userptr->notifier.mm); 289 - 290 - if (ret) 291 - goto free_pfns; 292 - 293 - ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock); 294 - if (ret) 295 - goto free_pfns; 296 - 297 - ret = down_read_interruptible(&vm->userptr.notifier_lock); 298 - if (ret) 299 - goto free_st; 300 - 301 - if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) { 302 - ret = -EAGAIN; 303 - goto out_unlock; 304 - } 305 - 306 - ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, 307 - &vm->userptr.notifier_lock, write); 308 - if (ret) 309 - goto out_unlock; 310 - 311 - userptr->sg = &userptr->sgt; 312 - xe_hmm_userptr_set_mapped(uvma); 313 - userptr->notifier_seq = hmm_range.notifier_seq; 314 - up_read(&vm->userptr.notifier_lock); 315 - kvfree(pfns); 316 - return 0; 317 - 318 - out_unlock: 319 - up_read(&vm->userptr.notifier_lock); 320 - free_st: 321 - sg_free_table(&userptr->sgt); 322 - free_pfns: 323 - kvfree(pfns); 324 - return ret; 325 - }

-18

drivers/gpu/drm/xe/xe_hmm.h

··· 1 - /* SPDX-License-Identifier: MIT 2 - * 3 - * Copyright © 2024 Intel Corporation 4 - */ 5 - 6 - #ifndef _XE_HMM_H_ 7 - #define _XE_HMM_H_ 8 - 9 - #include <linux/types.h> 10 - 11 - struct xe_userptr_vma; 12 - 13 - int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); 14 - 15 - void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); 16 - 17 - void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma); 18 - #endif

+20 -25

drivers/gpu/drm/xe/xe_hwmon.c

··· 286 286 */ 287 287 static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value) 288 288 { 289 - u64 reg_val = 0, min, max; 289 + u32 reg_val = 0; 290 290 struct xe_device *xe = hwmon->xe; 291 291 struct xe_reg rapl_limit, pkg_power_sku; 292 292 struct xe_mmio *mmio = xe_root_tile_mmio(xe); ··· 294 294 mutex_lock(&hwmon->hwmon_lock); 295 295 296 296 if (hwmon->xe->info.has_mbx_power_limits) { 297 - xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, (u32 *)&reg_val); 297 + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &reg_val); 298 298 } else { 299 299 rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); 300 300 pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); ··· 304 304 /* Check if PL limits are disabled. */ 305 305 if (!(reg_val & PWR_LIM_EN)) { 306 306 *value = PL_DISABLE; 307 - drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%016llx\n", 307 + drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%08x\n", 308 308 PWR_ATTR_TO_STR(attr), channel, reg_val); 309 309 goto unlock; 310 310 } 311 311 312 312 reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val); 313 - *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); 313 + *value = mul_u32_u32(reg_val, SF_POWER) >> hwmon->scl_shift_power; 314 314 315 315 /* For platforms with mailbox power limit support clamping would be done by pcode. */ 316 316 if (!hwmon->xe->info.has_mbx_power_limits) { 317 - reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); 318 - min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); 319 - max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); 317 + u64 pkg_pwr, min, max; 318 + 319 + pkg_pwr = xe_mmio_read64_2x32(mmio, pkg_power_sku); 320 + min = REG_FIELD_GET(PKG_MIN_PWR, pkg_pwr); 321 + max = REG_FIELD_GET(PKG_MAX_PWR, pkg_pwr); 320 322 min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); 321 323 max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); 322 324 if (min && max) ··· 495 493 { 496 494 struct xe_hwmon *hwmon = dev_get_drvdata(dev); 497 495 struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); 498 - u32 x, y, x_w = 2; /* 2 bits */ 499 - u64 r, tau4, out; 496 + u32 reg_val, x, y, x_w = 2; /* 2 bits */ 497 + u64 tau4, out; 500 498 int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; 501 499 u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; 502 500 ··· 507 505 mutex_lock(&hwmon->hwmon_lock); 508 506 509 507 if (hwmon->xe->info.has_mbx_power_limits) { 510 - ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); 508 + ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &reg_val); 511 509 if (ret) { 512 510 drm_err(&hwmon->xe->drm, 513 - "power interval read fail, ch %d, attr %d, r 0%llx, ret %d\n", 514 - channel, power_attr, r, ret); 515 - r = 0; 511 + "power interval read fail, ch %d, attr %d, val 0x%08x, ret %d\n", 512 + channel, power_attr, reg_val, ret); 513 + reg_val = 0; 516 514 } 517 515 } else { 518 - r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel)); 516 + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, 517 + channel)); 519 518 } 520 519 521 520 mutex_unlock(&hwmon->hwmon_lock); 522 521 523 522 xe_pm_runtime_put(hwmon->xe); 524 523 525 - x = REG_FIELD_GET(PWR_LIM_TIME_X, r); 526 - y = REG_FIELD_GET(PWR_LIM_TIME_Y, r); 524 + x = REG_FIELD_GET(PWR_LIM_TIME_X, reg_val); 525 + y = REG_FIELD_GET(PWR_LIM_TIME_Y, reg_val); 527 526 528 527 /* 529 528 * tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17) ··· 1297 1294 xe_hwmon_fan_input_read(hwmon, channel, &fan_speed); 1298 1295 } 1299 1296 1300 - static void xe_hwmon_mutex_destroy(void *arg) 1301 - { 1302 - struct xe_hwmon *hwmon = arg; 1303 - 1304 - mutex_destroy(&hwmon->hwmon_lock); 1305 - } 1306 - 1307 1297 int xe_hwmon_register(struct xe_device *xe) 1308 1298 { 1309 1299 struct device *dev = xe->drm.dev; ··· 1315 1319 if (!hwmon) 1316 1320 return -ENOMEM; 1317 1321 1318 - mutex_init(&hwmon->hwmon_lock); 1319 - ret = devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon); 1322 + ret = devm_mutex_init(dev, &hwmon->hwmon_lock); 1320 1323 if (ret) 1321 1324 return ret; 1322 1325

+1 -1

drivers/gpu/drm/xe/xe_i2c.c

··· 259 259 return; 260 260 261 261 if (d3cold) 262 - xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY); 262 + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); 263 263 264 264 xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D0); 265 265 drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR));

+464

drivers/gpu/drm/xe/xe_late_bind_fw.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #include <linux/component.h> 7 + #include <linux/delay.h> 8 + #include <linux/firmware.h> 9 + 10 + #include <drm/drm_managed.h> 11 + #include <drm/intel/i915_component.h> 12 + #include <drm/intel/intel_lb_mei_interface.h> 13 + #include <drm/drm_print.h> 14 + 15 + #include "xe_device.h" 16 + #include "xe_late_bind_fw.h" 17 + #include "xe_pcode.h" 18 + #include "xe_pcode_api.h" 19 + #include "xe_pm.h" 20 + 21 + /* 22 + * The component should load quite quickly in most cases, but it could take 23 + * a bit. Using a very big timeout just to cover the worst case scenario 24 + */ 25 + #define LB_INIT_TIMEOUT_MS 20000 26 + 27 + /* 28 + * Retry interval set to 6 seconds, in steps of 200 ms, to allow time for 29 + * other OS components to release the MEI CL handle 30 + */ 31 + #define LB_FW_LOAD_RETRY_MAXCOUNT 30 32 + #define LB_FW_LOAD_RETRY_PAUSE_MS 200 33 + 34 + static const u32 fw_id_to_type[] = { 35 + [XE_LB_FW_FAN_CONTROL] = INTEL_LB_TYPE_FAN_CONTROL, 36 + }; 37 + 38 + static const char * const fw_id_to_name[] = { 39 + [XE_LB_FW_FAN_CONTROL] = "fan_control", 40 + }; 41 + 42 + static struct xe_device * 43 + late_bind_to_xe(struct xe_late_bind *late_bind) 44 + { 45 + return container_of(late_bind, struct xe_device, late_bind); 46 + } 47 + 48 + static struct xe_device * 49 + late_bind_fw_to_xe(struct xe_late_bind_fw *lb_fw) 50 + { 51 + return container_of(lb_fw, struct xe_device, late_bind.late_bind_fw[lb_fw->id]); 52 + } 53 + 54 + /* Refer to the "Late Bind based Firmware Layout" documentation entry for details */ 55 + static int parse_cpd_header(struct xe_late_bind_fw *lb_fw, 56 + const void *data, size_t size, const char *manifest_entry) 57 + { 58 + struct xe_device *xe = late_bind_fw_to_xe(lb_fw); 59 + const struct gsc_cpd_header_v2 *header = data; 60 + const struct gsc_manifest_header *manifest; 61 + const struct gsc_cpd_entry *entry; 62 + size_t min_size = sizeof(*header); 63 + u32 offset; 64 + int i; 65 + 66 + /* manifest_entry is mandatory */ 67 + xe_assert(xe, manifest_entry); 68 + 69 + if (size < min_size || header->header_marker != GSC_CPD_HEADER_MARKER) 70 + return -ENOENT; 71 + 72 + if (header->header_length < sizeof(struct gsc_cpd_header_v2)) { 73 + drm_err(&xe->drm, "%s late binding fw: Invalid CPD header length %u!\n", 74 + fw_id_to_name[lb_fw->id], header->header_length); 75 + return -EINVAL; 76 + } 77 + 78 + min_size = header->header_length + sizeof(struct gsc_cpd_entry) * header->num_of_entries; 79 + if (size < min_size) { 80 + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", 81 + fw_id_to_name[lb_fw->id], size, min_size); 82 + return -ENODATA; 83 + } 84 + 85 + /* Look for the manifest first */ 86 + entry = (void *)header + header->header_length; 87 + for (i = 0; i < header->num_of_entries; i++, entry++) 88 + if (strcmp(entry->name, manifest_entry) == 0) 89 + offset = entry->offset & GSC_CPD_ENTRY_OFFSET_MASK; 90 + 91 + if (!offset) { 92 + drm_err(&xe->drm, "%s late binding fw: Failed to find manifest_entry\n", 93 + fw_id_to_name[lb_fw->id]); 94 + return -ENODATA; 95 + } 96 + 97 + min_size = offset + sizeof(struct gsc_manifest_header); 98 + if (size < min_size) { 99 + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", 100 + fw_id_to_name[lb_fw->id], size, min_size); 101 + return -ENODATA; 102 + } 103 + 104 + manifest = data + offset; 105 + 106 + lb_fw->version = manifest->fw_version; 107 + 108 + return 0; 109 + } 110 + 111 + /* Refer to the "Late Bind based Firmware Layout" documentation entry for details */ 112 + static int parse_lb_layout(struct xe_late_bind_fw *lb_fw, 113 + const void *data, size_t size, const char *fpt_entry) 114 + { 115 + struct xe_device *xe = late_bind_fw_to_xe(lb_fw); 116 + const struct csc_fpt_header *header = data; 117 + const struct csc_fpt_entry *entry; 118 + size_t min_size = sizeof(*header); 119 + u32 offset; 120 + int i; 121 + 122 + /* fpt_entry is mandatory */ 123 + xe_assert(xe, fpt_entry); 124 + 125 + if (size < min_size || header->header_marker != CSC_FPT_HEADER_MARKER) 126 + return -ENOENT; 127 + 128 + if (header->header_length < sizeof(struct csc_fpt_header)) { 129 + drm_err(&xe->drm, "%s late binding fw: Invalid FPT header length %u!\n", 130 + fw_id_to_name[lb_fw->id], header->header_length); 131 + return -EINVAL; 132 + } 133 + 134 + min_size = header->header_length + sizeof(struct csc_fpt_entry) * header->num_of_entries; 135 + if (size < min_size) { 136 + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", 137 + fw_id_to_name[lb_fw->id], size, min_size); 138 + return -ENODATA; 139 + } 140 + 141 + /* Look for the cpd header first */ 142 + entry = (void *)header + header->header_length; 143 + for (i = 0; i < header->num_of_entries; i++, entry++) 144 + if (strcmp(entry->name, fpt_entry) == 0) 145 + offset = entry->offset; 146 + 147 + if (!offset) { 148 + drm_err(&xe->drm, "%s late binding fw: Failed to find fpt_entry\n", 149 + fw_id_to_name[lb_fw->id]); 150 + return -ENODATA; 151 + } 152 + 153 + min_size = offset + sizeof(struct gsc_cpd_header_v2); 154 + if (size < min_size) { 155 + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", 156 + fw_id_to_name[lb_fw->id], size, min_size); 157 + return -ENODATA; 158 + } 159 + 160 + return parse_cpd_header(lb_fw, data + offset, size - offset, "LTES.man"); 161 + } 162 + 163 + static const char *xe_late_bind_parse_status(uint32_t status) 164 + { 165 + switch (status) { 166 + case INTEL_LB_STATUS_SUCCESS: 167 + return "success"; 168 + case INTEL_LB_STATUS_4ID_MISMATCH: 169 + return "4Id Mismatch"; 170 + case INTEL_LB_STATUS_ARB_FAILURE: 171 + return "ARB Failure"; 172 + case INTEL_LB_STATUS_GENERAL_ERROR: 173 + return "General Error"; 174 + case INTEL_LB_STATUS_INVALID_PARAMS: 175 + return "Invalid Params"; 176 + case INTEL_LB_STATUS_INVALID_SIGNATURE: 177 + return "Invalid Signature"; 178 + case INTEL_LB_STATUS_INVALID_PAYLOAD: 179 + return "Invalid Payload"; 180 + case INTEL_LB_STATUS_TIMEOUT: 181 + return "Timeout"; 182 + default: 183 + return "Unknown error"; 184 + } 185 + } 186 + 187 + static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind) 188 + { 189 + struct xe_device *xe = late_bind_to_xe(late_bind); 190 + struct xe_tile *root_tile = xe_device_get_root_tile(xe); 191 + u32 uval; 192 + 193 + if (!xe_pcode_read(root_tile, 194 + PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), &uval, NULL)) 195 + return uval; 196 + else 197 + return 0; 198 + } 199 + 200 + void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind) 201 + { 202 + struct xe_device *xe = late_bind_to_xe(late_bind); 203 + struct xe_late_bind_fw *lbfw; 204 + int fw_id; 205 + 206 + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) { 207 + lbfw = &late_bind->late_bind_fw[fw_id]; 208 + if (lbfw->payload && late_bind->wq) { 209 + drm_dbg(&xe->drm, "Flush work: load %s firmware\n", 210 + fw_id_to_name[lbfw->id]); 211 + flush_work(&lbfw->work); 212 + } 213 + } 214 + } 215 + 216 + static void xe_late_bind_work(struct work_struct *work) 217 + { 218 + struct xe_late_bind_fw *lbfw = container_of(work, struct xe_late_bind_fw, work); 219 + struct xe_late_bind *late_bind = container_of(lbfw, struct xe_late_bind, 220 + late_bind_fw[lbfw->id]); 221 + struct xe_device *xe = late_bind_to_xe(late_bind); 222 + int retry = LB_FW_LOAD_RETRY_MAXCOUNT; 223 + int ret; 224 + int slept; 225 + 226 + xe_device_assert_mem_access(xe); 227 + 228 + /* we can queue this before the component is bound */ 229 + for (slept = 0; slept < LB_INIT_TIMEOUT_MS; slept += 100) { 230 + if (late_bind->component.ops) 231 + break; 232 + msleep(100); 233 + } 234 + 235 + if (!late_bind->component.ops) { 236 + drm_err(&xe->drm, "Late bind component not bound\n"); 237 + /* Do not re-attempt fw load */ 238 + drmm_kfree(&xe->drm, (void *)lbfw->payload); 239 + lbfw->payload = NULL; 240 + goto out; 241 + } 242 + 243 + drm_dbg(&xe->drm, "Load %s firmware\n", fw_id_to_name[lbfw->id]); 244 + 245 + do { 246 + ret = late_bind->component.ops->push_payload(late_bind->component.mei_dev, 247 + lbfw->type, 248 + lbfw->flags, 249 + lbfw->payload, 250 + lbfw->payload_size); 251 + if (!ret) 252 + break; 253 + msleep(LB_FW_LOAD_RETRY_PAUSE_MS); 254 + } while (--retry && ret == -EBUSY); 255 + 256 + if (!ret) { 257 + drm_dbg(&xe->drm, "Load %s firmware successful\n", 258 + fw_id_to_name[lbfw->id]); 259 + goto out; 260 + } 261 + 262 + if (ret > 0) 263 + drm_err(&xe->drm, "Load %s firmware failed with err %d, %s\n", 264 + fw_id_to_name[lbfw->id], ret, xe_late_bind_parse_status(ret)); 265 + else 266 + drm_err(&xe->drm, "Load %s firmware failed with err %d", 267 + fw_id_to_name[lbfw->id], ret); 268 + /* Do not re-attempt fw load */ 269 + drmm_kfree(&xe->drm, (void *)lbfw->payload); 270 + lbfw->payload = NULL; 271 + 272 + out: 273 + xe_pm_runtime_put(xe); 274 + } 275 + 276 + int xe_late_bind_fw_load(struct xe_late_bind *late_bind) 277 + { 278 + struct xe_device *xe = late_bind_to_xe(late_bind); 279 + struct xe_late_bind_fw *lbfw; 280 + int fw_id; 281 + 282 + if (!late_bind->component_added) 283 + return -ENODEV; 284 + 285 + if (late_bind->disable) 286 + return 0; 287 + 288 + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) { 289 + lbfw = &late_bind->late_bind_fw[fw_id]; 290 + if (lbfw->payload) { 291 + xe_pm_runtime_get_noresume(xe); 292 + queue_work(late_bind->wq, &lbfw->work); 293 + } 294 + } 295 + return 0; 296 + } 297 + 298 + static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id) 299 + { 300 + struct xe_device *xe = late_bind_to_xe(late_bind); 301 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 302 + struct xe_late_bind_fw *lb_fw; 303 + const struct firmware *fw; 304 + u32 num_fans; 305 + int ret; 306 + 307 + if (fw_id >= XE_LB_FW_MAX_ID) 308 + return -EINVAL; 309 + 310 + lb_fw = &late_bind->late_bind_fw[fw_id]; 311 + 312 + lb_fw->id = fw_id; 313 + lb_fw->type = fw_id_to_type[lb_fw->id]; 314 + lb_fw->flags &= ~INTEL_LB_FLAG_IS_PERSISTENT; 315 + 316 + if (lb_fw->type == INTEL_LB_TYPE_FAN_CONTROL) { 317 + num_fans = xe_late_bind_fw_num_fans(late_bind); 318 + drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans); 319 + if (!num_fans) 320 + return 0; 321 + } 322 + 323 + snprintf(lb_fw->blob_path, sizeof(lb_fw->blob_path), "xe/%s_8086_%04x_%04x_%04x.bin", 324 + fw_id_to_name[lb_fw->id], pdev->device, 325 + pdev->subsystem_vendor, pdev->subsystem_device); 326 + 327 + drm_dbg(&xe->drm, "Request late binding firmware %s\n", lb_fw->blob_path); 328 + ret = firmware_request_nowarn(&fw, lb_fw->blob_path, xe->drm.dev); 329 + if (ret) { 330 + drm_dbg(&xe->drm, "%s late binding fw not available for current device", 331 + fw_id_to_name[lb_fw->id]); 332 + return 0; 333 + } 334 + 335 + if (fw->size > XE_LB_MAX_PAYLOAD_SIZE) { 336 + drm_err(&xe->drm, "Firmware %s size %zu is larger than max pay load size %u\n", 337 + lb_fw->blob_path, fw->size, XE_LB_MAX_PAYLOAD_SIZE); 338 + release_firmware(fw); 339 + return -ENODATA; 340 + } 341 + 342 + ret = parse_lb_layout(lb_fw, fw->data, fw->size, "LTES"); 343 + if (ret) 344 + return ret; 345 + 346 + lb_fw->payload_size = fw->size; 347 + lb_fw->payload = drmm_kzalloc(&xe->drm, lb_fw->payload_size, GFP_KERNEL); 348 + if (!lb_fw->payload) { 349 + release_firmware(fw); 350 + return -ENOMEM; 351 + } 352 + 353 + drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u.%u\n", 354 + fw_id_to_name[lb_fw->id], lb_fw->blob_path, 355 + lb_fw->version.major, lb_fw->version.minor, 356 + lb_fw->version.hotfix, lb_fw->version.build); 357 + 358 + memcpy((void *)lb_fw->payload, fw->data, lb_fw->payload_size); 359 + release_firmware(fw); 360 + INIT_WORK(&lb_fw->work, xe_late_bind_work); 361 + 362 + return 0; 363 + } 364 + 365 + static int xe_late_bind_fw_init(struct xe_late_bind *late_bind) 366 + { 367 + int ret; 368 + int fw_id; 369 + 370 + late_bind->wq = alloc_ordered_workqueue("late-bind-ordered-wq", 0); 371 + if (!late_bind->wq) 372 + return -ENOMEM; 373 + 374 + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) { 375 + ret = __xe_late_bind_fw_init(late_bind, fw_id); 376 + if (ret) 377 + return ret; 378 + } 379 + 380 + return 0; 381 + } 382 + 383 + static int xe_late_bind_component_bind(struct device *xe_kdev, 384 + struct device *mei_kdev, void *data) 385 + { 386 + struct xe_device *xe = kdev_to_xe_device(xe_kdev); 387 + struct xe_late_bind *late_bind = &xe->late_bind; 388 + 389 + late_bind->component.ops = data; 390 + late_bind->component.mei_dev = mei_kdev; 391 + 392 + return 0; 393 + } 394 + 395 + static void xe_late_bind_component_unbind(struct device *xe_kdev, 396 + struct device *mei_kdev, void *data) 397 + { 398 + struct xe_device *xe = kdev_to_xe_device(xe_kdev); 399 + struct xe_late_bind *late_bind = &xe->late_bind; 400 + 401 + xe_late_bind_wait_for_worker_completion(late_bind); 402 + 403 + late_bind->component.ops = NULL; 404 + } 405 + 406 + static const struct component_ops xe_late_bind_component_ops = { 407 + .bind = xe_late_bind_component_bind, 408 + .unbind = xe_late_bind_component_unbind, 409 + }; 410 + 411 + static void xe_late_bind_remove(void *arg) 412 + { 413 + struct xe_late_bind *late_bind = arg; 414 + struct xe_device *xe = late_bind_to_xe(late_bind); 415 + 416 + xe_late_bind_wait_for_worker_completion(late_bind); 417 + 418 + late_bind->component_added = false; 419 + 420 + component_del(xe->drm.dev, &xe_late_bind_component_ops); 421 + if (late_bind->wq) { 422 + destroy_workqueue(late_bind->wq); 423 + late_bind->wq = NULL; 424 + } 425 + } 426 + 427 + /** 428 + * xe_late_bind_init() - add xe mei late binding component 429 + * @late_bind: pointer to late bind structure. 430 + * 431 + * Return: 0 if the initialization was successful, a negative errno otherwise. 432 + */ 433 + int xe_late_bind_init(struct xe_late_bind *late_bind) 434 + { 435 + struct xe_device *xe = late_bind_to_xe(late_bind); 436 + int err; 437 + 438 + if (!xe->info.has_late_bind) 439 + return 0; 440 + 441 + if (!IS_ENABLED(CONFIG_INTEL_MEI_LB) || !IS_ENABLED(CONFIG_INTEL_MEI_GSC)) { 442 + drm_info(&xe->drm, "Can't init xe mei late bind missing mei component\n"); 443 + return 0; 444 + } 445 + 446 + err = component_add_typed(xe->drm.dev, &xe_late_bind_component_ops, 447 + INTEL_COMPONENT_LB); 448 + if (err < 0) { 449 + drm_err(&xe->drm, "Failed to add mei late bind component (%pe)\n", ERR_PTR(err)); 450 + return err; 451 + } 452 + 453 + late_bind->component_added = true; 454 + 455 + err = devm_add_action_or_reset(xe->drm.dev, xe_late_bind_remove, late_bind); 456 + if (err) 457 + return err; 458 + 459 + err = xe_late_bind_fw_init(late_bind); 460 + if (err) 461 + return err; 462 + 463 + return xe_late_bind_fw_load(late_bind); 464 + }

+17

drivers/gpu/drm/xe/xe_late_bind_fw.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_LATE_BIND_FW_H_ 7 + #define _XE_LATE_BIND_FW_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct xe_late_bind; 12 + 13 + int xe_late_bind_init(struct xe_late_bind *late_bind); 14 + int xe_late_bind_fw_load(struct xe_late_bind *late_bind); 15 + void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind); 16 + 17 + #endif

+75

drivers/gpu/drm/xe/xe_late_bind_fw_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_LATE_BIND_TYPES_H_ 7 + #define _XE_LATE_BIND_TYPES_H_ 8 + 9 + #include <linux/iosys-map.h> 10 + #include <linux/mutex.h> 11 + #include <linux/types.h> 12 + #include <linux/workqueue.h> 13 + #include "xe_uc_fw_abi.h" 14 + 15 + #define XE_LB_MAX_PAYLOAD_SIZE SZ_4K 16 + 17 + /** 18 + * xe_late_bind_fw_id - enum to determine late binding fw index 19 + */ 20 + enum xe_late_bind_fw_id { 21 + XE_LB_FW_FAN_CONTROL = 0, 22 + XE_LB_FW_MAX_ID 23 + }; 24 + 25 + /** 26 + * struct xe_late_bind_fw 27 + */ 28 + struct xe_late_bind_fw { 29 + /** @id: firmware index */ 30 + u32 id; 31 + /** @blob_path: firmware binary path */ 32 + char blob_path[PATH_MAX]; 33 + /** @type: firmware type */ 34 + u32 type; 35 + /** @flags: firmware flags */ 36 + u32 flags; 37 + /** @payload: to store the late binding blob */ 38 + const u8 *payload; 39 + /** @payload_size: late binding blob payload_size */ 40 + size_t payload_size; 41 + /** @work: worker to upload latebind blob */ 42 + struct work_struct work; 43 + /** @version: late binding blob manifest version */ 44 + struct gsc_version version; 45 + }; 46 + 47 + /** 48 + * struct xe_late_bind_component - Late Binding services component 49 + * @mei_dev: device that provide Late Binding service. 50 + * @ops: Ops implemented by Late Binding driver, used by Xe driver. 51 + * 52 + * Communication between Xe and MEI drivers for Late Binding services 53 + */ 54 + struct xe_late_bind_component { 55 + struct device *mei_dev; 56 + const struct intel_lb_component_ops *ops; 57 + }; 58 + 59 + /** 60 + * struct xe_late_bind 61 + */ 62 + struct xe_late_bind { 63 + /** @component: struct for communication with mei component */ 64 + struct xe_late_bind_component component; 65 + /** @late_bind_fw: late binding firmware array */ 66 + struct xe_late_bind_fw late_bind_fw[XE_LB_FW_MAX_ID]; 67 + /** @wq: workqueue to submit request to download late bind blob */ 68 + struct workqueue_struct *wq; 69 + /** @component_added: whether the component has been added */ 70 + bool component_added; 71 + /** @disable: to block late binding reload during pm resume flow*/ 72 + bool disable; 73 + }; 74 + 75 + #endif

+6 -6

drivers/gpu/drm/xe/xe_lmtt.c

··· 67 67 goto out; 68 68 } 69 69 70 - bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL, 71 - PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * 72 - lmtt->ops->lmtt_pte_num(level)), 73 - ttm_bo_type_kernel, 74 - XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | 75 - XE_BO_FLAG_NEEDS_64K); 70 + bo = xe_bo_create_pin_map_novm(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), 71 + PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * 72 + lmtt->ops->lmtt_pte_num(level)), 73 + ttm_bo_type_kernel, 74 + XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | 75 + XE_BO_FLAG_NEEDS_64K, false); 76 76 if (IS_ERR(bo)) { 77 77 err = PTR_ERR(bo); 78 78 goto out_free_pt;

+83 -7

drivers/gpu/drm/xe/xe_lrc.c

··· 8 8 #include <generated/xe_wa_oob.h> 9 9 10 10 #include <linux/ascii85.h> 11 + #include <linux/panic.h> 11 12 12 13 #include "instructions/xe_mi_commands.h" 13 14 #include "instructions/xe_gfxpipe_commands.h" ··· 17 16 #include "regs/xe_lrc_layout.h" 18 17 #include "xe_bb.h" 19 18 #include "xe_bo.h" 19 + #include "xe_configfs.h" 20 20 #include "xe_device.h" 21 21 #include "xe_drm_client.h" 22 22 #include "xe_exec_queue_types.h" ··· 77 75 static bool 78 76 gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) 79 77 { 78 + struct xe_device *xe = gt_to_xe(gt); 79 + 80 80 if (XE_GT_WA(gt, 16010904313) && 81 81 (class == XE_ENGINE_CLASS_RENDER || 82 82 class == XE_ENGINE_CLASS_COMPUTE)) 83 + return true; 84 + 85 + if (xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev), 86 + class, NULL)) 83 87 return true; 84 88 85 89 return false; ··· 1110 1102 return cmd - batch; 1111 1103 } 1112 1104 1105 + static ssize_t setup_configfs_post_ctx_restore_bb(struct xe_lrc *lrc, 1106 + struct xe_hw_engine *hwe, 1107 + u32 *batch, size_t max_len) 1108 + { 1109 + struct xe_device *xe = gt_to_xe(lrc->gt); 1110 + const u32 *user_batch; 1111 + u32 *cmd = batch; 1112 + u32 count; 1113 + 1114 + count = xe_configfs_get_ctx_restore_post_bb(to_pci_dev(xe->drm.dev), 1115 + hwe->class, &user_batch); 1116 + if (!count) 1117 + return 0; 1118 + 1119 + if (count > max_len) 1120 + return -ENOSPC; 1121 + 1122 + /* 1123 + * This should be used only for tests and validation. Taint the kernel 1124 + * as anything could be submitted directly in context switches 1125 + */ 1126 + add_taint(TAINT_TEST, LOCKDEP_STILL_OK); 1127 + 1128 + memcpy(cmd, user_batch, count * sizeof(u32)); 1129 + cmd += count; 1130 + 1131 + return cmd - batch; 1132 + } 1133 + 1134 + static ssize_t setup_configfs_mid_ctx_restore_bb(struct xe_lrc *lrc, 1135 + struct xe_hw_engine *hwe, 1136 + u32 *batch, size_t max_len) 1137 + { 1138 + struct xe_device *xe = gt_to_xe(lrc->gt); 1139 + const u32 *user_batch; 1140 + u32 *cmd = batch; 1141 + u32 count; 1142 + 1143 + count = xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev), 1144 + hwe->class, &user_batch); 1145 + if (!count) 1146 + return 0; 1147 + 1148 + if (count > max_len) 1149 + return -ENOSPC; 1150 + 1151 + /* 1152 + * This should be used only for tests and validation. Taint the kernel 1153 + * as anything could be submitted directly in context switches 1154 + */ 1155 + add_taint(TAINT_TEST, LOCKDEP_STILL_OK); 1156 + 1157 + memcpy(cmd, user_batch, count * sizeof(u32)); 1158 + cmd += count; 1159 + 1160 + return cmd - batch; 1161 + } 1162 + 1113 1163 static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, 1114 1164 struct xe_hw_engine *hwe, 1115 1165 u32 *batch, size_t max_len) ··· 1269 1203 { .setup = setup_timestamp_wa }, 1270 1204 { .setup = setup_invalidate_state_cache_wa }, 1271 1205 { .setup = setup_utilization_wa }, 1206 + { .setup = setup_configfs_post_ctx_restore_bb }, 1272 1207 }; 1273 1208 struct bo_setup_state state = { 1274 1209 .lrc = lrc, ··· 1316 1249 static int 1317 1250 setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) 1318 1251 { 1319 - static struct bo_setup rcs_funcs[] = { 1252 + static const struct bo_setup rcs_funcs[] = { 1320 1253 { .setup = setup_timestamp_wa }, 1254 + { .setup = setup_configfs_mid_ctx_restore_bb }, 1255 + }; 1256 + static const struct bo_setup xcs_funcs[] = { 1257 + { .setup = setup_configfs_mid_ctx_restore_bb }, 1321 1258 }; 1322 1259 struct bo_setup_state state = { 1323 1260 .lrc = lrc, ··· 1339 1268 hwe->class == XE_ENGINE_CLASS_COMPUTE) { 1340 1269 state.funcs = rcs_funcs; 1341 1270 state.num_funcs = ARRAY_SIZE(rcs_funcs); 1271 + } else { 1272 + state.funcs = xcs_funcs; 1273 + state.num_funcs = ARRAY_SIZE(xcs_funcs); 1342 1274 } 1343 1275 1344 1276 if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) ··· 1368 1294 finish_bo(&state); 1369 1295 kfree(state.buffer); 1370 1296 1297 + /* 1298 + * Enable INDIRECT_CTX leaving INDIRECT_CTX_OFFSET at its default: it 1299 + * varies per engine class, but the default is good enough 1300 + */ 1371 1301 xe_lrc_write_ctx_reg(lrc, 1372 1302 CTX_CS_INDIRECT_CTX, 1373 1303 (xe_bo_ggtt_addr(lrc->bo) + state.offset) | 1374 1304 /* Size in CLs. */ 1375 1305 (state.written * sizeof(u32) / 64)); 1376 - xe_lrc_write_ctx_reg(lrc, 1377 - CTX_CS_INDIRECT_CTX_OFFSET, 1378 - CTX_INDIRECT_CTX_OFFSET_DEFAULT); 1379 1306 1380 1307 return 0; 1381 1308 } ··· 1415 1340 if (vm && vm->xef) /* userspace */ 1416 1341 bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; 1417 1342 1418 - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, 1419 - ttm_bo_type_kernel, 1420 - bo_flags); 1343 + lrc->bo = xe_bo_create_pin_map_novm(xe, tile, 1344 + bo_size, 1345 + ttm_bo_type_kernel, 1346 + bo_flags, false); 1421 1347 if (IS_ERR(lrc->bo)) 1422 1348 return PTR_ERR(lrc->bo); 1423 1349

+34 -13

drivers/gpu/drm/xe/xe_migrate.c

··· 35 35 #include "xe_sched_job.h" 36 36 #include "xe_sync.h" 37 37 #include "xe_trace_bo.h" 38 + #include "xe_validation.h" 38 39 #include "xe_vm.h" 39 40 #include "xe_vram.h" 40 41 ··· 174 173 } 175 174 176 175 static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, 177 - struct xe_vm *vm) 176 + struct xe_vm *vm, struct drm_exec *exec) 178 177 { 179 178 struct xe_device *xe = tile_to_xe(tile); 180 179 u16 pat_index = xe->pat.idx[XE_CACHE_WB]; ··· 201 200 num_entries * XE_PAGE_SIZE, 202 201 ttm_bo_type_kernel, 203 202 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 204 - XE_BO_FLAG_PAGETABLE); 203 + XE_BO_FLAG_PAGETABLE, exec); 205 204 if (IS_ERR(bo)) 206 205 return PTR_ERR(bo); 207 206 ··· 394 393 return m; 395 394 } 396 395 396 + static int xe_migrate_lock_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_vm *vm) 397 + { 398 + struct xe_device *xe = tile_to_xe(tile); 399 + struct xe_validation_ctx ctx; 400 + struct drm_exec exec; 401 + int err = 0; 402 + 403 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 404 + err = xe_vm_drm_exec_lock(vm, &exec); 405 + drm_exec_retry_on_contention(&exec); 406 + err = xe_migrate_prepare_vm(tile, m, vm, &exec); 407 + drm_exec_retry_on_contention(&exec); 408 + xe_validation_retry_on_oom(&ctx, &err); 409 + } 410 + 411 + return err; 412 + } 413 + 397 414 /** 398 415 * xe_migrate_init() - Initialize a migrate context 399 416 * @m: The migration context ··· 432 413 if (IS_ERR(vm)) 433 414 return PTR_ERR(vm); 434 415 435 - xe_vm_lock(vm, false); 436 - err = xe_migrate_prepare_vm(tile, m, vm); 437 - xe_vm_unlock(vm); 416 + err = xe_migrate_lock_prepare_vm(tile, m, vm); 438 417 if (err) 439 - goto err_out; 418 + return err; 440 419 441 420 if (xe->info.has_usm) { 442 421 struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, ··· 859 842 batch_size += pte_update_size(m, pte_flags, src, &src_it, &src_L0, 860 843 &src_L0_ofs, &src_L0_pt, 0, 0, 861 844 avail_pts); 862 - 863 - pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; 864 - batch_size += pte_update_size(m, pte_flags, dst, &dst_it, &src_L0, 865 - &dst_L0_ofs, &dst_L0_pt, 0, 866 - avail_pts, avail_pts); 845 + if (copy_only_ccs) { 846 + dst_L0_ofs = src_L0_ofs; 847 + } else { 848 + pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; 849 + batch_size += pte_update_size(m, pte_flags, dst, 850 + &dst_it, &src_L0, 851 + &dst_L0_ofs, &dst_L0_pt, 852 + 0, avail_pts, avail_pts); 853 + } 867 854 868 855 if (copy_system_ccs) { 869 856 xe_assert(xe, type_device); ··· 897 876 898 877 if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) 899 878 xe_res_next(&dst_it, src_L0); 900 - else 879 + else if (!copy_only_ccs) 901 880 emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs, 902 881 &dst_it, src_L0, dst); 903 882 ··· 929 908 if (!fence) { 930 909 err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv, 931 910 DMA_RESV_USAGE_BOOKKEEP); 932 - if (!err && src_bo != dst_bo) 911 + if (!err && src_bo->ttm.base.resv != dst_bo->ttm.base.resv) 933 912 err = xe_sched_job_add_deps(job, dst_bo->ttm.base.resv, 934 913 DMA_RESV_USAGE_BOOKKEEP); 935 914 if (err)

+4 -1

drivers/gpu/drm/xe/xe_nvm.c

··· 35 35 36 36 static void xe_nvm_release_dev(struct device *dev) 37 37 { 38 + struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev); 39 + struct intel_dg_nvm_dev *nvm = container_of(aux, struct intel_dg_nvm_dev, aux_dev); 40 + 41 + kfree(nvm); 38 42 } 39 43 40 44 static bool xe_nvm_non_posted_erase(struct xe_device *xe) ··· 166 162 167 163 auxiliary_device_delete(&nvm->aux_dev); 168 164 auxiliary_device_uninit(&nvm->aux_dev); 169 - kfree(nvm); 170 165 xe->nvm = NULL; 171 166 }

+3 -3

drivers/gpu/drm/xe/xe_oa.c

··· 883 883 { 884 884 struct xe_bo *bo; 885 885 886 - bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, 887 - size, ttm_bo_type_kernel, 888 - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); 886 + bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, 887 + size, ttm_bo_type_kernel, 888 + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); 889 889 if (IS_ERR(bo)) 890 890 return PTR_ERR(bo); 891 891

+24 -16

drivers/gpu/drm/xe/xe_pci.c

··· 334 334 .has_mbx_power_limits = true, 335 335 .has_gsc_nvm = 1, 336 336 .has_heci_cscfi = 1, 337 + .has_late_bind = true, 337 338 .has_sriov = true, 338 339 .max_gt_per_tile = 2, 339 340 .needs_scratch = true, ··· 511 510 *revid = REG_FIELD_GET(GMD_ID_REVID, val); 512 511 } 513 512 513 + static const struct xe_ip *find_graphics_ip(unsigned int verx100) 514 + { 515 + KUNIT_STATIC_STUB_REDIRECT(find_graphics_ip, verx100); 516 + 517 + for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) 518 + if (graphics_ips[i].verx100 == verx100) 519 + return &graphics_ips[i]; 520 + return NULL; 521 + } 522 + 523 + static const struct xe_ip *find_media_ip(unsigned int verx100) 524 + { 525 + KUNIT_STATIC_STUB_REDIRECT(find_media_ip, verx100); 526 + 527 + for (int i = 0; i < ARRAY_SIZE(media_ips); i++) 528 + if (media_ips[i].verx100 == verx100) 529 + return &media_ips[i]; 530 + return NULL; 531 + } 532 + 514 533 /* 515 534 * Read IP version from hardware and select graphics/media IP descriptors 516 535 * based on the result. ··· 548 527 549 528 read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid); 550 529 551 - for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) { 552 - if (ver == graphics_ips[i].verx100) { 553 - *graphics_ip = &graphics_ips[i]; 554 - 555 - break; 556 - } 557 - } 558 - 530 + *graphics_ip = find_graphics_ip(ver); 559 531 if (!*graphics_ip) { 560 532 drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n", 561 533 ver / 100, ver % 100); ··· 559 545 if (ver == 0) 560 546 return; 561 547 562 - for (int i = 0; i < ARRAY_SIZE(media_ips); i++) { 563 - if (ver == media_ips[i].verx100) { 564 - *media_ip = &media_ips[i]; 565 - 566 - break; 567 - } 568 - } 569 - 548 + *media_ip = find_media_ip(ver); 570 549 if (!*media_ip) { 571 550 drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n", 572 551 ver / 100, ver % 100); ··· 588 581 xe->info.has_gsc_nvm = desc->has_gsc_nvm; 589 582 xe->info.has_heci_gscfi = desc->has_heci_gscfi; 590 583 xe->info.has_heci_cscfi = desc->has_heci_cscfi; 584 + xe->info.has_late_bind = desc->has_late_bind; 591 585 xe->info.has_llc = desc->has_llc; 592 586 xe->info.has_pxp = desc->has_pxp; 593 587 xe->info.has_sriov = desc->has_sriov;

+1

drivers/gpu/drm/xe/xe_pci_types.h

··· 39 39 u8 has_gsc_nvm:1; 40 40 u8 has_heci_gscfi:1; 41 41 u8 has_heci_cscfi:1; 42 + u8 has_late_bind:1; 42 43 u8 has_llc:1; 43 44 u8 has_mbx_power_limits:1; 44 45 u8 has_pxp:1;

+10 -2

drivers/gpu/drm/xe/xe_pm.c

··· 21 21 #include "xe_gt_idle.h" 22 22 #include "xe_i2c.h" 23 23 #include "xe_irq.h" 24 + #include "xe_late_bind_fw.h" 24 25 #include "xe_pcode.h" 25 26 #include "xe_pxp.h" 26 27 #include "xe_sriov_vf_ccs.h" ··· 130 129 if (err) 131 130 goto err; 132 131 132 + xe_late_bind_wait_for_worker_completion(&xe->late_bind); 133 + 133 134 for_each_gt(gt, xe, id) 134 135 xe_gt_suspend_prepare(gt); 135 136 ··· 216 213 217 214 xe_pxp_pm_resume(xe->pxp); 218 215 219 - if (IS_SRIOV_VF(xe)) 216 + if (IS_VF_CCS_READY(xe)) 220 217 xe_sriov_vf_ccs_register_context(xe); 218 + 219 + xe_late_bind_fw_load(&xe->late_bind); 221 220 222 221 drm_dbg(&xe->drm, "Device resumed\n"); 223 222 return 0; ··· 603 598 604 599 xe_pxp_pm_resume(xe->pxp); 605 600 606 - if (IS_SRIOV_VF(xe)) 601 + if (IS_VF_CCS_READY(xe)) 607 602 xe_sriov_vf_ccs_register_context(xe); 603 + 604 + if (xe->d3cold.allowed) 605 + xe_late_bind_fw_load(&xe->late_bind); 608 606 609 607 out: 610 608 xe_rpm_lockmap_release(xe);

+129

drivers/gpu/drm/xe/xe_printk.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_PRINTK_H_ 7 + #define _XE_PRINTK_H_ 8 + 9 + #include <drm/drm_print.h> 10 + 11 + #include "xe_device_types.h" 12 + 13 + #define __XE_PRINTK_FMT(_xe, _fmt, _args...) _fmt, ##_args 14 + 15 + #define xe_printk(_xe, _level, _fmt, ...) \ 16 + drm_##_level(&(_xe)->drm, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__)) 17 + 18 + #define xe_err(_xe, _fmt, ...) \ 19 + xe_printk((_xe), err, _fmt, ##__VA_ARGS__) 20 + 21 + #define xe_err_once(_xe, _fmt, ...) \ 22 + xe_printk((_xe), err_once, _fmt, ##__VA_ARGS__) 23 + 24 + #define xe_err_ratelimited(_xe, _fmt, ...) \ 25 + xe_printk((_xe), err_ratelimited, _fmt, ##__VA_ARGS__) 26 + 27 + #define xe_warn(_xe, _fmt, ...) \ 28 + xe_printk((_xe), warn, _fmt, ##__VA_ARGS__) 29 + 30 + #define xe_notice(_xe, _fmt, ...) \ 31 + xe_printk((_xe), notice, _fmt, ##__VA_ARGS__) 32 + 33 + #define xe_info(_xe, _fmt, ...) \ 34 + xe_printk((_xe), info, _fmt, ##__VA_ARGS__) 35 + 36 + #define xe_dbg(_xe, _fmt, ...) \ 37 + xe_printk((_xe), dbg, _fmt, ##__VA_ARGS__) 38 + 39 + #define xe_WARN_type(_xe, _type, _condition, _fmt, ...) \ 40 + drm_WARN##_type(&(_xe)->drm, _condition, _fmt, ## __VA_ARGS__) 41 + 42 + #define xe_WARN(_xe, _condition, _fmt, ...) \ 43 + xe_WARN_type((_xe),, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__)) 44 + 45 + #define xe_WARN_ONCE(_xe, _condition, _fmt, ...) \ 46 + xe_WARN_type((_xe), _ONCE, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__)) 47 + 48 + #define xe_WARN_ON(_xe, _condition) \ 49 + xe_WARN((_xe), _condition, "%s(%s)", "WARN_ON", __stringify(_condition)) 50 + 51 + #define xe_WARN_ON_ONCE(_xe, _condition) \ 52 + xe_WARN_ONCE((_xe), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition)) 53 + 54 + static inline void __xe_printfn_err(struct drm_printer *p, struct va_format *vaf) 55 + { 56 + struct xe_device *xe = p->arg; 57 + 58 + xe_err(xe, "%pV", vaf); 59 + } 60 + 61 + static inline void __xe_printfn_info(struct drm_printer *p, struct va_format *vaf) 62 + { 63 + struct xe_device *xe = p->arg; 64 + 65 + xe_info(xe, "%pV", vaf); 66 + } 67 + 68 + static inline void __xe_printfn_dbg(struct drm_printer *p, struct va_format *vaf) 69 + { 70 + struct xe_device *xe = p->arg; 71 + struct drm_printer ddp; 72 + 73 + /* 74 + * The original xe_dbg() callsite annotations are useless here, 75 + * redirect to the tweaked drm_dbg_printer() instead. 76 + */ 77 + ddp = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL); 78 + ddp.origin = p->origin; 79 + 80 + drm_printf(&ddp, __XE_PRINTK_FMT(xe, "%pV", vaf)); 81 + } 82 + 83 + /** 84 + * xe_err_printer - Construct a &drm_printer that outputs to xe_err() 85 + * @xe: the &xe_device pointer to use in xe_err() 86 + * 87 + * Return: The &drm_printer object. 88 + */ 89 + static inline struct drm_printer xe_err_printer(struct xe_device *xe) 90 + { 91 + struct drm_printer p = { 92 + .printfn = __xe_printfn_err, 93 + .arg = xe, 94 + }; 95 + return p; 96 + } 97 + 98 + /** 99 + * xe_info_printer - Construct a &drm_printer that outputs to xe_info() 100 + * @xe: the &xe_device pointer to use in xe_info() 101 + * 102 + * Return: The &drm_printer object. 103 + */ 104 + static inline struct drm_printer xe_info_printer(struct xe_device *xe) 105 + { 106 + struct drm_printer p = { 107 + .printfn = __xe_printfn_info, 108 + .arg = xe, 109 + }; 110 + return p; 111 + } 112 + 113 + /** 114 + * xe_dbg_printer - Construct a &drm_printer that outputs like xe_dbg() 115 + * @xe: the &xe_device pointer to use in xe_dbg() 116 + * 117 + * Return: The &drm_printer object. 118 + */ 119 + static inline struct drm_printer xe_dbg_printer(struct xe_device *xe) 120 + { 121 + struct drm_printer p = { 122 + .printfn = __xe_printfn_dbg, 123 + .arg = xe, 124 + .origin = (const void *)_THIS_IP_, 125 + }; 126 + return p; 127 + } 128 + 129 + #endif

+6 -18

drivers/gpu/drm/xe/xe_psmi.c

··· 68 68 static struct xe_bo *psmi_alloc_object(struct xe_device *xe, 69 69 unsigned int id, size_t bo_size) 70 70 { 71 - struct xe_bo *bo = NULL; 72 71 struct xe_tile *tile; 73 - int err; 74 72 75 73 if (!id || !bo_size) 76 74 return NULL; ··· 76 78 tile = &xe->tiles[id - 1]; 77 79 78 80 /* VRAM: Allocate GEM object for the capture buffer */ 79 - bo = xe_bo_create_locked(xe, tile, NULL, bo_size, 80 - ttm_bo_type_kernel, 81 - XE_BO_FLAG_VRAM_IF_DGFX(tile) | 82 - XE_BO_FLAG_PINNED | 83 - XE_BO_FLAG_PINNED_LATE_RESTORE | 84 - XE_BO_FLAG_NEEDS_CPU_ACCESS); 85 - 86 - if (!IS_ERR(bo)) { 87 - /* Buffer written by HW, ensure stays resident */ 88 - err = xe_bo_pin(bo); 89 - if (err) 90 - bo = ERR_PTR(err); 91 - xe_bo_unlock(bo); 92 - } 93 - 94 - return bo; 81 + return xe_bo_create_pin_range_novm(xe, tile, bo_size, 0, ~0ull, 82 + ttm_bo_type_kernel, 83 + XE_BO_FLAG_VRAM_IF_DGFX(tile) | 84 + XE_BO_FLAG_PINNED | 85 + XE_BO_FLAG_PINNED_LATE_RESTORE | 86 + XE_BO_FLAG_NEEDS_CPU_ACCESS); 95 87 } 96 88 97 89 /*

+66 -91

drivers/gpu/drm/xe/xe_pt.c

··· 13 13 #include "xe_drm_client.h" 14 14 #include "xe_exec_queue.h" 15 15 #include "xe_gt.h" 16 - #include "xe_tlb_inval_job.h" 17 16 #include "xe_migrate.h" 18 17 #include "xe_pt_types.h" 19 18 #include "xe_pt_walk.h" 20 19 #include "xe_res_cursor.h" 21 20 #include "xe_sched_job.h" 22 - #include "xe_sync.h" 23 21 #include "xe_svm.h" 22 + #include "xe_sync.h" 24 23 #include "xe_tlb_inval_job.h" 25 24 #include "xe_trace.h" 26 25 #include "xe_ttm_stolen_mgr.h" 26 + #include "xe_userptr.h" 27 27 #include "xe_vm.h" 28 28 29 29 struct xe_pt_dir { ··· 89 89 * @vm: The vm to create for. 90 90 * @tile: The tile to create for. 91 91 * @level: The page-table level. 92 + * @exec: The drm_exec object used to lock the vm. 92 93 * 93 94 * Allocate and initialize a single struct xe_pt metadata structure. Also 94 95 * create the corresponding page-table bo, but don't initialize it. If the ··· 101 100 * error. 102 101 */ 103 102 struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, 104 - unsigned int level) 103 + unsigned int level, struct drm_exec *exec) 105 104 { 106 105 struct xe_pt *pt; 107 106 struct xe_bo *bo; ··· 125 124 bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; 126 125 127 126 pt->level = level; 127 + 128 + drm_WARN_ON(&vm->xe->drm, IS_ERR_OR_NULL(exec)); 128 129 bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, 129 130 ttm_bo_type_kernel, 130 - bo_flags); 131 + bo_flags, exec); 131 132 if (IS_ERR(bo)) { 132 133 err = PTR_ERR(bo); 133 134 goto err_kfree; ··· 593 590 if (covers || !*child) { 594 591 u64 flags = 0; 595 592 596 - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); 593 + xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1, 594 + xe_vm_validation_exec(vm)); 597 595 if (IS_ERR(xe_child)) 598 596 return PTR_ERR(xe_child); 599 597 ··· 733 729 return -EAGAIN; 734 730 } 735 731 if (xe_svm_range_has_dma_mapping(range)) { 736 - xe_res_first_dma(range->base.dma_addr, 0, 732 + xe_res_first_dma(range->base.pages.dma_addr, 0, 737 733 range->base.itree.last + 1 - range->base.itree.start, 738 734 &curs); 739 735 xe_svm_range_debug(range, "BIND PREPARE - MIXED"); ··· 764 760 765 761 if (!xe_vma_is_null(vma) && !range) { 766 762 if (xe_vma_is_userptr(vma)) 767 - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, 768 - xe_vma_size(vma), &curs); 763 + xe_res_first_dma(to_userptr_vma(vma)->userptr.pages.dma_addr, 0, 764 + xe_vma_size(vma), &curs); 769 765 else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) 770 766 xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), 771 767 xe_vma_size(vma), &curs); ··· 918 914 if (xe_vma_bo(vma)) 919 915 xe_bo_assert_held(xe_vma_bo(vma)); 920 916 else if (xe_vma_is_userptr(vma)) 921 - lockdep_assert_held(&xe_vma_vm(vma)->userptr.notifier_lock); 917 + lockdep_assert_held(&xe_vma_vm(vma)->svm.gpusvm.notifier_lock); 922 918 923 919 if (!(pt_mask & BIT(tile->id))) 924 920 return false; ··· 1053 1049 xe_pt_commit_prepare_locks_assert(vma); 1054 1050 1055 1051 if (xe_vma_is_userptr(vma)) 1056 - lockdep_assert_held_read(&vm->userptr.notifier_lock); 1052 + xe_svm_assert_held_read(vm); 1057 1053 } 1058 1054 1059 1055 static void xe_pt_commit(struct xe_vma *vma, ··· 1380 1376 pt_update_ops, rftree); 1381 1377 } 1382 1378 1379 + #if IS_ENABLED(CONFIG_DRM_GPUSVM) 1383 1380 #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT 1384 1381 1385 1382 static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) ··· 1411 1406 struct xe_userptr_vma *uvma; 1412 1407 unsigned long notifier_seq; 1413 1408 1414 - lockdep_assert_held_read(&vm->userptr.notifier_lock); 1409 + xe_svm_assert_held_read(vm); 1415 1410 1416 1411 if (!xe_vma_is_userptr(vma)) 1417 1412 return 0; ··· 1420 1415 if (xe_pt_userptr_inject_eagain(uvma)) 1421 1416 xe_vma_userptr_force_invalidate(uvma); 1422 1417 1423 - notifier_seq = uvma->userptr.notifier_seq; 1418 + notifier_seq = uvma->userptr.pages.notifier_seq; 1424 1419 1425 1420 if (!mmu_interval_read_retry(&uvma->userptr.notifier, 1426 1421 notifier_seq)) ··· 1436 1431 return 0; 1437 1432 } 1438 1433 1439 - static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, 1440 - struct xe_vm_pgtable_update_ops *pt_update) 1434 + static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op, 1435 + struct xe_vm_pgtable_update_ops *pt_update) 1441 1436 { 1442 1437 int err = 0; 1443 1438 1444 - lockdep_assert_held_read(&vm->userptr.notifier_lock); 1439 + xe_svm_assert_held_read(vm); 1445 1440 1446 1441 switch (op->base.op) { 1447 1442 case DRM_GPUVA_OP_MAP: ··· 1459 1454 case DRM_GPUVA_OP_UNMAP: 1460 1455 break; 1461 1456 case DRM_GPUVA_OP_PREFETCH: 1462 - err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), 1463 - pt_update); 1457 + if (xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))) { 1458 + struct xe_svm_range *range = op->map_range.range; 1459 + unsigned long i; 1460 + 1461 + xe_assert(vm->xe, 1462 + xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))); 1463 + xa_for_each(&op->prefetch_range.range, i, range) { 1464 + xe_svm_range_debug(range, "PRE-COMMIT"); 1465 + 1466 + if (!xe_svm_range_pages_valid(range)) { 1467 + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); 1468 + return -ENODATA; 1469 + } 1470 + } 1471 + } else { 1472 + err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), pt_update); 1473 + } 1464 1474 break; 1475 + #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) 1476 + case DRM_GPUVA_OP_DRIVER: 1477 + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { 1478 + struct xe_svm_range *range = op->map_range.range; 1479 + 1480 + xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); 1481 + 1482 + xe_svm_range_debug(range, "PRE-COMMIT"); 1483 + 1484 + if (!xe_svm_range_pages_valid(range)) { 1485 + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); 1486 + return -EAGAIN; 1487 + } 1488 + } 1489 + break; 1490 + #endif 1465 1491 default: 1466 1492 drm_warn(&vm->xe->drm, "NOT POSSIBLE"); 1467 1493 } ··· 1500 1464 return err; 1501 1465 } 1502 1466 1503 - static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) 1467 + static int xe_pt_svm_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) 1504 1468 { 1505 1469 struct xe_vm *vm = pt_update->vops->vm; 1506 1470 struct xe_vma_ops *vops = pt_update->vops; ··· 1513 1477 if (err) 1514 1478 return err; 1515 1479 1516 - down_read(&vm->userptr.notifier_lock); 1480 + xe_svm_notifier_lock(vm); 1517 1481 1518 1482 list_for_each_entry(op, &vops->list, link) { 1519 - err = op_check_userptr(vm, op, pt_update_ops); 1483 + err = op_check_svm_userptr(vm, op, pt_update_ops); 1520 1484 if (err) { 1521 - up_read(&vm->userptr.notifier_lock); 1485 + xe_svm_notifier_unlock(vm); 1522 1486 break; 1523 1487 } 1524 1488 } 1525 1489 1526 1490 return err; 1527 - } 1528 - 1529 - #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) 1530 - static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) 1531 - { 1532 - struct xe_vm *vm = pt_update->vops->vm; 1533 - struct xe_vma_ops *vops = pt_update->vops; 1534 - struct xe_vma_op *op; 1535 - unsigned long i; 1536 - int err; 1537 - 1538 - err = xe_pt_pre_commit(pt_update); 1539 - if (err) 1540 - return err; 1541 - 1542 - xe_svm_notifier_lock(vm); 1543 - 1544 - list_for_each_entry(op, &vops->list, link) { 1545 - struct xe_svm_range *range = NULL; 1546 - 1547 - if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) 1548 - continue; 1549 - 1550 - if (op->base.op == DRM_GPUVA_OP_PREFETCH) { 1551 - xe_assert(vm->xe, 1552 - xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))); 1553 - xa_for_each(&op->prefetch_range.range, i, range) { 1554 - xe_svm_range_debug(range, "PRE-COMMIT"); 1555 - 1556 - if (!xe_svm_range_pages_valid(range)) { 1557 - xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); 1558 - xe_svm_notifier_unlock(vm); 1559 - return -ENODATA; 1560 - } 1561 - } 1562 - } else { 1563 - xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); 1564 - xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE); 1565 - range = op->map_range.range; 1566 - 1567 - xe_svm_range_debug(range, "PRE-COMMIT"); 1568 - 1569 - if (!xe_svm_range_pages_valid(range)) { 1570 - xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); 1571 - xe_svm_notifier_unlock(vm); 1572 - return -EAGAIN; 1573 - } 1574 - } 1575 - } 1576 - 1577 - return 0; 1578 1491 } 1579 1492 #endif 1580 1493 ··· 1828 1843 xe_vma_start(vma), 1829 1844 xe_vma_end(vma)); 1830 1845 ++pt_update_ops->current_op; 1831 - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); 1846 + pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma); 1832 1847 1833 1848 /* 1834 1849 * If rebind, we have to invalidate TLB on !LR vms to invalidate ··· 1936 1951 xe_pt_update_ops_rfence_interval(pt_update_ops, xe_vma_start(vma), 1937 1952 xe_vma_end(vma)); 1938 1953 ++pt_update_ops->current_op; 1939 - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); 1954 + pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma); 1940 1955 pt_update_ops->needs_invalidation = true; 1941 1956 1942 1957 xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); ··· 2184 2199 vma->tile_invalidated & ~BIT(tile->id)); 2185 2200 vma->tile_staged &= ~BIT(tile->id); 2186 2201 if (xe_vma_is_userptr(vma)) { 2187 - lockdep_assert_held_read(&vm->userptr.notifier_lock); 2202 + xe_svm_assert_held_read(vm); 2188 2203 to_userptr_vma(vma)->userptr.initial_bind = true; 2189 2204 } 2190 2205 ··· 2220 2235 if (!vma->tile_present) { 2221 2236 list_del_init(&vma->combined_links.rebind); 2222 2237 if (xe_vma_is_userptr(vma)) { 2223 - lockdep_assert_held_read(&vm->userptr.notifier_lock); 2238 + xe_svm_assert_held_read(vm); 2224 2239 2225 2240 spin_lock(&vm->userptr.invalidated_lock); 2226 2241 list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); ··· 2323 2338 .pre_commit = xe_pt_pre_commit, 2324 2339 }; 2325 2340 2326 - static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { 2341 + #if IS_ENABLED(CONFIG_DRM_GPUSVM) 2342 + static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops = { 2327 2343 .populate = xe_vm_populate_pgtable, 2328 2344 .clear = xe_migrate_clear_pgtable_callback, 2329 - .pre_commit = xe_pt_userptr_pre_commit, 2330 - }; 2331 - 2332 - #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) 2333 - static const struct xe_migrate_pt_update_ops svm_migrate_ops = { 2334 - .populate = xe_vm_populate_pgtable, 2335 - .clear = xe_migrate_clear_pgtable_callback, 2336 - .pre_commit = xe_pt_svm_pre_commit, 2345 + .pre_commit = xe_pt_svm_userptr_pre_commit, 2337 2346 }; 2338 2347 #else 2339 - static const struct xe_migrate_pt_update_ops svm_migrate_ops; 2348 + static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops; 2340 2349 #endif 2341 2350 2342 2351 static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q, ··· 2368 2389 int err = 0, i; 2369 2390 struct xe_migrate_pt_update update = { 2370 2391 .ops = pt_update_ops->needs_svm_lock ? 2371 - &svm_migrate_ops : 2372 - pt_update_ops->needs_userptr_lock ? 2373 - &userptr_migrate_ops : 2392 + &svm_userptr_migrate_ops : 2374 2393 &migrate_ops, 2375 2394 .vops = vops, 2376 2395 .tile_id = tile->id, ··· 2510 2533 2511 2534 if (pt_update_ops->needs_svm_lock) 2512 2535 xe_svm_notifier_unlock(vm); 2513 - if (pt_update_ops->needs_userptr_lock) 2514 - up_read(&vm->userptr.notifier_lock); 2515 2536 2516 2537 xe_tlb_inval_job_put(mjob); 2517 2538 xe_tlb_inval_job_put(ijob);

+2 -1

drivers/gpu/drm/xe/xe_pt.h

··· 10 10 #include "xe_pt_types.h" 11 11 12 12 struct dma_fence; 13 + struct drm_exec; 13 14 struct xe_bo; 14 15 struct xe_device; 15 16 struct xe_exec_queue; ··· 30 29 unsigned int xe_pt_shift(unsigned int level); 31 30 32 31 struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, 33 - unsigned int level); 32 + unsigned int level, struct drm_exec *exec); 34 33 35 34 void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, 36 35 struct xe_pt *pt);

-2

drivers/gpu/drm/xe/xe_pt_types.h

··· 105 105 u32 current_op; 106 106 /** @needs_svm_lock: Needs SVM lock */ 107 107 bool needs_svm_lock; 108 - /** @needs_userptr_lock: Needs userptr lock */ 109 - bool needs_userptr_lock; 110 108 /** @needs_invalidation: Needs invalidation */ 111 109 bool needs_invalidation; 112 110 /**

+1

drivers/gpu/drm/xe/xe_pxp.c

··· 688 688 689 689 return ret; 690 690 } 691 + ALLOW_ERROR_INJECTION(xe_pxp_exec_queue_add, ERRNO); 691 692 692 693 static void __pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q, bool lock) 693 694 {

+24 -10

drivers/gpu/drm/xe/xe_pxp_submit.c

··· 54 54 * Each termination is 16 DWORDS, so 4K is enough to contain a 55 55 * termination for each sessions. 56 56 */ 57 - bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel, 58 - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT); 57 + bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, 58 + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT, 59 + false); 59 60 if (IS_ERR(bo)) { 60 61 err = PTR_ERR(bo); 61 62 goto out_queue; ··· 88 87 { 89 88 struct xe_tile *tile = gt_to_tile(gt); 90 89 struct xe_device *xe = tile_to_xe(tile); 90 + struct xe_validation_ctx ctx; 91 91 struct xe_hw_engine *hwe; 92 + struct drm_exec exec; 92 93 struct xe_vm *vm; 93 94 struct xe_bo *bo; 94 95 struct xe_exec_queue *q; ··· 109 106 return PTR_ERR(vm); 110 107 111 108 /* We allocate a single object for the batch and the in/out memory */ 112 - xe_vm_lock(vm, false); 113 - bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2, 114 - ttm_bo_type_kernel, 115 - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC); 116 - xe_vm_unlock(vm); 117 - if (IS_ERR(bo)) { 118 - err = PTR_ERR(bo); 119 - goto vm_out; 109 + 110 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags){}, err) { 111 + err = xe_vm_drm_exec_lock(vm, &exec); 112 + drm_exec_retry_on_contention(&exec); 113 + if (err) 114 + break; 115 + 116 + bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2, 117 + ttm_bo_type_kernel, 118 + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | 119 + XE_BO_FLAG_NEEDS_UC, &exec); 120 + drm_exec_retry_on_contention(&exec); 121 + if (IS_ERR(bo)) { 122 + err = PTR_ERR(bo); 123 + xe_validation_retry_on_oom(&ctx, &err); 124 + break; 125 + } 120 126 } 127 + if (err) 128 + goto vm_out; 121 129 122 130 fence = xe_vm_bind_kernel_bo(vm, bo, NULL, 0, XE_CACHE_WB); 123 131 if (IS_ERR(fence)) {

+3 -2

drivers/gpu/drm/xe/xe_query.c

··· 21 21 #include "xe_force_wake.h" 22 22 #include "xe_ggtt.h" 23 23 #include "xe_gt.h" 24 + #include "xe_gt_topology.h" 24 25 #include "xe_guc_hwconfig.h" 25 26 #include "xe_macros.h" 26 27 #include "xe_mmio.h" ··· 478 477 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss); 479 478 480 479 /* L3bank mask may not be available for some GTs */ 481 - if (!XE_GT_WA(gt, no_media_l3)) 480 + if (xe_gt_topology_report_l3(gt)) 482 481 query_size += sizeof(struct drm_xe_query_topology_mask) + 483 482 sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask); 484 483 } ··· 541 540 * mask, then it's better to omit L3 from the query rather than 542 541 * reporting bogus or zeroed information to userspace. 543 542 */ 544 - if (!XE_GT_WA(gt, no_media_l3)) { 543 + if (xe_gt_topology_report_l3(gt)) { 545 544 topo.type = DRM_XE_TOPO_L3_BANK; 546 545 err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask, 547 546 sizeof(gt->fuse_topo.l3_bank_mask));

+6

drivers/gpu/drm/xe/xe_rtp.c

··· 370 370 { 371 371 return xe_configfs_get_psmi_enabled(to_pci_dev(gt_to_xe(gt)->drm.dev)); 372 372 } 373 + 374 + bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_gt *gt, 375 + const struct xe_hw_engine *hwe) 376 + { 377 + return xe_gt_has_discontiguous_dss_groups(gt); 378 + }

+3

drivers/gpu/drm/xe/xe_rtp.h

··· 480 480 bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt, 481 481 const struct xe_hw_engine *hwe); 482 482 483 + bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_gt *gt, 484 + const struct xe_hw_engine *hwe); 485 + 483 486 #endif

+5 -9

drivers/gpu/drm/xe/xe_sriov.c

··· 160 160 } 161 161 162 162 /** 163 - * xe_sriov_late_init() - SR-IOV late initialization functions. 163 + * xe_sriov_init_late() - SR-IOV late initialization functions. 164 164 * @xe: the &xe_device to initialize 165 - * 166 - * On VF this function will initialize code for CCS migration. 167 165 * 168 166 * Return: 0 on success or a negative error code on failure. 169 167 */ 170 - int xe_sriov_late_init(struct xe_device *xe) 168 + int xe_sriov_init_late(struct xe_device *xe) 171 169 { 172 - int err = 0; 170 + if (IS_SRIOV_VF(xe)) 171 + return xe_sriov_vf_init_late(xe); 173 172 174 - if (IS_VF_CCS_INIT_NEEDED(xe)) 175 - err = xe_sriov_vf_ccs_init(xe); 176 - 177 - return err; 173 + return 0; 178 174 }

+1 -1

drivers/gpu/drm/xe/xe_sriov.h

··· 18 18 void xe_sriov_probe_early(struct xe_device *xe); 19 19 void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p); 20 20 int xe_sriov_init(struct xe_device *xe); 21 - int xe_sriov_late_init(struct xe_device *xe); 21 + int xe_sriov_init_late(struct xe_device *xe); 22 22 23 23 static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe) 24 24 {

+105 -10

drivers/gpu/drm/xe/xe_sriov_vf.c

··· 3 3 * Copyright © 2023-2024 Intel Corporation 4 4 */ 5 5 6 + #include <drm/drm_debugfs.h> 6 7 #include <drm/drm_managed.h> 7 8 8 9 #include "xe_assert.h" ··· 11 10 #include "xe_gt.h" 12 11 #include "xe_gt_sriov_printk.h" 13 12 #include "xe_gt_sriov_vf.h" 13 + #include "xe_guc.h" 14 14 #include "xe_guc_ct.h" 15 15 #include "xe_guc_submit.h" 16 16 #include "xe_irq.h" ··· 20 18 #include "xe_sriov.h" 21 19 #include "xe_sriov_printk.h" 22 20 #include "xe_sriov_vf.h" 21 + #include "xe_sriov_vf_ccs.h" 23 22 #include "xe_tile_sriov_vf.h" 24 23 25 24 /** ··· 130 127 * | | | 131 128 */ 132 129 133 - static bool vf_migration_supported(struct xe_device *xe) 130 + /** 131 + * xe_sriov_vf_migration_supported - Report whether SR-IOV VF migration is 132 + * supported or not. 133 + * @xe: the &xe_device to check 134 + * 135 + * Returns: true if VF migration is supported, false otherwise. 136 + */ 137 + bool xe_sriov_vf_migration_supported(struct xe_device *xe) 138 + { 139 + xe_assert(xe, IS_SRIOV_VF(xe)); 140 + return xe->sriov.vf.migration.enabled; 141 + } 142 + 143 + static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...) 144 + { 145 + struct va_format vaf; 146 + va_list va_args; 147 + 148 + xe_assert(xe, IS_SRIOV_VF(xe)); 149 + 150 + va_start(va_args, fmt); 151 + vaf.fmt = fmt; 152 + vaf.va = &va_args; 153 + xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf); 154 + va_end(va_args); 155 + 156 + xe->sriov.vf.migration.enabled = false; 157 + } 158 + 159 + static void migration_worker_func(struct work_struct *w); 160 + 161 + static void vf_migration_init_early(struct xe_device *xe) 134 162 { 135 163 /* 136 164 * TODO: Add conditions to allow specific platforms, when they're 137 165 * supported at production quality. 138 166 */ 139 - return IS_ENABLED(CONFIG_DRM_XE_DEBUG); 140 - } 167 + if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG)) 168 + return vf_disable_migration(xe, 169 + "experimental feature not available on production builds"); 141 170 142 - static void migration_worker_func(struct work_struct *w); 171 + if (GRAPHICS_VER(xe) < 20) 172 + return vf_disable_migration(xe, "requires gfx version >= 20, but only %u found", 173 + GRAPHICS_VER(xe)); 174 + 175 + if (!IS_DGFX(xe)) { 176 + struct xe_uc_fw_version guc_version; 177 + 178 + xe_gt_sriov_vf_guc_versions(xe_device_get_gt(xe, 0), NULL, &guc_version); 179 + if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) 180 + return vf_disable_migration(xe, 181 + "CCS migration requires GuC ABI >= 1.23 but only %u.%u found", 182 + guc_version.major, guc_version.minor); 183 + } 184 + 185 + INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); 186 + 187 + xe->sriov.vf.migration.enabled = true; 188 + xe_sriov_dbg(xe, "migration support enabled\n"); 189 + } 143 190 144 191 /** 145 192 * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. ··· 197 144 */ 198 145 void xe_sriov_vf_init_early(struct xe_device *xe) 199 146 { 200 - INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); 201 - 202 - if (!vf_migration_supported(xe)) 203 - xe_sriov_info(xe, "migration not supported by this module version\n"); 147 + vf_migration_init_early(xe); 204 148 } 205 149 206 150 /** ··· 352 302 xe_pm_runtime_get(xe); 353 303 vf_post_migration_shutdown(xe); 354 304 355 - if (!vf_migration_supported(xe)) { 356 - xe_sriov_err(xe, "migration not supported by this module version\n"); 305 + if (!xe_sriov_vf_migration_supported(xe)) { 306 + xe_sriov_err(xe, "migration is not supported\n"); 357 307 err = -ENOTRECOVERABLE; 358 308 goto fail; 359 309 } ··· 427 377 started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); 428 378 drm_info(&xe->drm, "VF migration recovery %s\n", started ? 429 379 "scheduled" : "already in progress"); 380 + } 381 + 382 + /** 383 + * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions. 384 + * @xe: the &xe_device to initialize 385 + * 386 + * This function initializes code for CCS migration. 387 + * 388 + * Return: 0 on success or a negative error code on failure. 389 + */ 390 + int xe_sriov_vf_init_late(struct xe_device *xe) 391 + { 392 + int err = 0; 393 + 394 + if (xe_sriov_vf_migration_supported(xe)) 395 + err = xe_sriov_vf_ccs_init(xe); 396 + 397 + return err; 398 + } 399 + 400 + static int sa_info_vf_ccs(struct seq_file *m, void *data) 401 + { 402 + struct drm_info_node *node = m->private; 403 + struct xe_device *xe = to_xe_device(node->minor->dev); 404 + struct drm_printer p = drm_seq_file_printer(m); 405 + 406 + xe_sriov_vf_ccs_print(xe, &p); 407 + return 0; 408 + } 409 + 410 + static const struct drm_info_list debugfs_list[] = { 411 + { .name = "sa_info_vf_ccs", .show = sa_info_vf_ccs }, 412 + }; 413 + 414 + /** 415 + * xe_sriov_vf_debugfs_register - Register VF debugfs attributes. 416 + * @xe: the &xe_device 417 + * @root: the root &dentry 418 + * 419 + * Prepare debugfs attributes exposed by the VF. 420 + */ 421 + void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root) 422 + { 423 + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), 424 + root, xe->drm.primary); 430 425 }

+6

drivers/gpu/drm/xe/xe_sriov_vf.h

··· 6 6 #ifndef _XE_SRIOV_VF_H_ 7 7 #define _XE_SRIOV_VF_H_ 8 8 9 + #include <linux/types.h> 10 + 11 + struct dentry; 9 12 struct xe_device; 10 13 11 14 void xe_sriov_vf_init_early(struct xe_device *xe); 15 + int xe_sriov_vf_init_late(struct xe_device *xe); 12 16 void xe_sriov_vf_start_migration_recovery(struct xe_device *xe); 17 + bool xe_sriov_vf_migration_supported(struct xe_device *xe); 18 + void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root); 13 19 14 20 #endif

+54 -21

drivers/gpu/drm/xe/xe_sriov_vf_ccs.c

··· 13 13 #include "xe_guc_submit.h" 14 14 #include "xe_lrc.h" 15 15 #include "xe_migrate.h" 16 + #include "xe_pm.h" 16 17 #include "xe_sa.h" 17 18 #include "xe_sriov_printk.h" 19 + #include "xe_sriov_vf.h" 18 20 #include "xe_sriov_vf_ccs.h" 19 21 #include "xe_sriov_vf_ccs_types.h" 20 22 ··· 137 135 return round_up(bb_pool_size * 2, SZ_1M); 138 136 } 139 137 140 - static int alloc_bb_pool(struct xe_tile *tile, struct xe_tile_vf_ccs *ctx) 138 + static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) 141 139 { 142 140 struct xe_device *xe = tile_to_xe(tile); 143 141 struct xe_sa_manager *sa_manager; ··· 169 167 return 0; 170 168 } 171 169 172 - static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx) 170 + static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx) 173 171 { 174 172 u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); 175 173 struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); ··· 186 184 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 187 185 } 188 186 189 - static int register_save_restore_context(struct xe_tile_vf_ccs *ctx) 187 + static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx) 190 188 { 191 - int err = -EINVAL; 192 189 int ctx_type; 193 190 194 191 switch (ctx->ctx_id) { ··· 198 197 ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE; 199 198 break; 200 199 default: 201 - return err; 200 + return -EINVAL; 202 201 } 203 202 204 - xe_guc_register_exec_queue(ctx->mig_q, ctx_type); 203 + xe_guc_register_vf_exec_queue(ctx->mig_q, ctx_type); 205 204 return 0; 206 205 } 207 206 ··· 216 215 */ 217 216 int xe_sriov_vf_ccs_register_context(struct xe_device *xe) 218 217 { 219 - struct xe_tile *tile = xe_device_get_root_tile(xe); 220 218 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 221 - struct xe_tile_vf_ccs *ctx; 219 + struct xe_sriov_vf_ccs_ctx *ctx; 222 220 int err; 223 221 224 - if (!IS_VF_CCS_READY(xe)) 225 - return 0; 222 + xe_assert(xe, IS_VF_CCS_READY(xe)); 226 223 227 224 for_each_ccs_rw_ctx(ctx_id) { 228 - ctx = &tile->sriov.vf.ccs[ctx_id]; 225 + ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; 229 226 err = register_save_restore_context(ctx); 230 227 if (err) 231 228 return err; ··· 234 235 235 236 static void xe_sriov_vf_ccs_fini(void *arg) 236 237 { 237 - struct xe_tile_vf_ccs *ctx = arg; 238 + struct xe_sriov_vf_ccs_ctx *ctx = arg; 238 239 struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); 239 240 240 241 /* ··· 258 259 { 259 260 struct xe_tile *tile = xe_device_get_root_tile(xe); 260 261 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 261 - struct xe_tile_vf_ccs *ctx; 262 + struct xe_sriov_vf_ccs_ctx *ctx; 262 263 struct xe_exec_queue *q; 263 264 u32 flags; 264 265 int err; 265 266 266 267 xe_assert(xe, IS_SRIOV_VF(xe)); 267 - xe_assert(xe, !IS_DGFX(xe)); 268 - xe_assert(xe, xe_device_has_flat_ccs(xe)); 268 + xe_assert(xe, xe_sriov_vf_migration_supported(xe)); 269 + 270 + if (IS_DGFX(xe) || !xe_device_has_flat_ccs(xe)) 271 + return 0; 269 272 270 273 for_each_ccs_rw_ctx(ctx_id) { 271 - ctx = &tile->sriov.vf.ccs[ctx_id]; 274 + ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; 272 275 ctx->ctx_id = ctx_id; 273 276 274 277 flags = EXEC_QUEUE_FLAG_KERNEL | ··· 325 324 { 326 325 struct xe_device *xe = xe_bo_device(bo); 327 326 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 328 - struct xe_tile_vf_ccs *ctx; 327 + struct xe_sriov_vf_ccs_ctx *ctx; 329 328 struct xe_tile *tile; 330 329 struct xe_bb *bb; 331 330 int err = 0; 332 331 333 - if (!IS_VF_CCS_READY(xe)) 334 - return 0; 332 + xe_assert(xe, IS_VF_CCS_READY(xe)); 335 333 336 334 tile = xe_device_get_root_tile(xe); 337 335 ··· 339 339 /* bb should be NULL here. Assert if not NULL */ 340 340 xe_assert(xe, !bb); 341 341 342 - ctx = &tile->sriov.vf.ccs[ctx_id]; 342 + ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; 343 343 err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id); 344 344 } 345 345 return err; ··· 361 361 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 362 362 struct xe_bb *bb; 363 363 364 - if (!IS_VF_CCS_READY(xe)) 364 + xe_assert(xe, IS_VF_CCS_READY(xe)); 365 + 366 + if (!xe_bo_has_valid_ccs_bb(bo)) 365 367 return 0; 366 368 367 369 for_each_ccs_rw_ctx(ctx_id) { ··· 376 374 bo->bb_ccs[ctx_id] = NULL; 377 375 } 378 376 return 0; 377 + } 378 + 379 + /** 380 + * xe_sriov_vf_ccs_print - Print VF CCS details. 381 + * @xe: the &xe_device 382 + * @p: the &drm_printer 383 + * 384 + * This function is for VF use only. 385 + */ 386 + void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) 387 + { 388 + struct xe_sa_manager *bb_pool; 389 + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 390 + 391 + if (!IS_VF_CCS_READY(xe)) 392 + return; 393 + 394 + xe_pm_runtime_get(xe); 395 + 396 + for_each_ccs_rw_ctx(ctx_id) { 397 + bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool; 398 + if (!bb_pool) 399 + break; 400 + 401 + drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); 402 + drm_printf(p, "-------------------------\n"); 403 + drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); 404 + drm_puts(p, "\n"); 405 + } 406 + 407 + xe_pm_runtime_put(xe); 379 408 }

+17

drivers/gpu/drm/xe/xe_sriov_vf_ccs.h

··· 6 6 #ifndef _XE_SRIOV_VF_CCS_H_ 7 7 #define _XE_SRIOV_VF_CCS_H_ 8 8 9 + #include "xe_device_types.h" 10 + #include "xe_sriov.h" 11 + #include "xe_sriov_vf_ccs_types.h" 12 + 13 + struct drm_printer; 9 14 struct xe_device; 10 15 struct xe_bo; 11 16 ··· 18 13 int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo); 19 14 int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo); 20 15 int xe_sriov_vf_ccs_register_context(struct xe_device *xe); 16 + void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p); 17 + 18 + static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe) 19 + { 20 + xe_assert(xe, IS_SRIOV_VF(xe)); 21 + return xe->sriov.vf.ccs.initialized; 22 + } 23 + 24 + #define IS_VF_CCS_READY(xe) ({ \ 25 + struct xe_device *xe__ = (xe); \ 26 + IS_SRIOV_VF(xe__) && xe_sriov_vf_ccs_ready(xe__); \ 27 + }) 21 28 22 29 #endif

+21 -23

drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h

··· 6 6 #ifndef _XE_SRIOV_VF_CCS_TYPES_H_ 7 7 #define _XE_SRIOV_VF_CCS_TYPES_H_ 8 8 9 + #include <linux/types.h> 10 + 9 11 #define for_each_ccs_rw_ctx(id__) \ 10 12 for ((id__) = 0; (id__) < XE_SRIOV_VF_CCS_CTX_COUNT; (id__)++) 11 - 12 - #define IS_VF_CCS_READY(xe) ({ \ 13 - struct xe_device *___xe = (xe); \ 14 - xe_assert(___xe, IS_SRIOV_VF(___xe)); \ 15 - ___xe->sriov.vf.ccs.initialized; \ 16 - }) 17 - 18 - #define IS_VF_CCS_INIT_NEEDED(xe) ({\ 19 - struct xe_device *___xe = (xe); \ 20 - IS_SRIOV_VF(___xe) && !IS_DGFX(___xe) && \ 21 - xe_device_has_flat_ccs(___xe) && GRAPHICS_VER(___xe) >= 20; \ 22 - }) 23 13 24 14 enum xe_sriov_vf_ccs_rw_ctxs { 25 15 XE_SRIOV_VF_CCS_READ_CTX, ··· 17 27 XE_SRIOV_VF_CCS_CTX_COUNT 18 28 }; 19 29 20 - #define IS_VF_CCS_BB_VALID(xe, bo) ({ \ 21 - struct xe_device *___xe = (xe); \ 22 - struct xe_bo *___bo = (bo); \ 23 - IS_SRIOV_VF(___xe) && \ 24 - ___bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] && \ 25 - ___bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX]; \ 26 - }) 27 - 28 30 struct xe_migrate; 29 31 struct xe_sa_manager; 30 32 31 - struct xe_tile_vf_ccs { 32 - /** @id: Id to which context it belongs to */ 33 + /** 34 + * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data. 35 + */ 36 + struct xe_sriov_vf_ccs_ctx { 37 + /** @ctx_id: Id to which context it belongs to */ 33 38 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 39 + 34 40 /** @mig_q: exec queues used for migration */ 35 41 struct xe_exec_queue *mig_q; 36 42 43 + /** @mem: memory data */ 37 44 struct { 38 - /** @ccs_bb_pool: Pool from which batch buffers are allocated. */ 45 + /** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */ 39 46 struct xe_sa_manager *ccs_bb_pool; 40 47 } mem; 48 + }; 49 + 50 + /** 51 + * struct xe_sriov_vf_ccs - The VF CCS migration support data. 52 + */ 53 + struct xe_sriov_vf_ccs { 54 + /** @contexts: CCS read and write contexts for VF. */ 55 + struct xe_sriov_vf_ccs_ctx contexts[XE_SRIOV_VF_CCS_CTX_COUNT]; 56 + 57 + /** @initialized: Initialization of VF CCS is completed or not. */ 58 + bool initialized; 41 59 }; 42 60 43 61 #endif

+8 -4

drivers/gpu/drm/xe/xe_sriov_vf_types.h

··· 9 9 #include <linux/types.h> 10 10 #include <linux/workqueue_types.h> 11 11 12 + #include "xe_sriov_vf_ccs_types.h" 13 + 12 14 /** 13 15 * struct xe_sriov_vf_relay_version - PF ABI version details. 14 16 */ ··· 37 35 struct work_struct worker; 38 36 /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ 39 37 unsigned long gt_flags; 38 + /** 39 + * @migration.enabled: flag indicating if migration support 40 + * was enabled or not due to missing prerequisites 41 + */ 42 + bool enabled; 40 43 } migration; 41 44 42 45 /** @ccs: VF CCS state data */ 43 - struct { 44 - /** @ccs.initialized: Initilalization of VF CCS is completed or not */ 45 - bool initialized; 46 - } ccs; 46 + struct xe_sriov_vf_ccs ccs; 47 47 }; 48 48 49 49 #endif

+1 -10

drivers/gpu/drm/xe/xe_survivability_mode.c

··· 289 289 u32 data; 290 290 bool survivability_mode; 291 291 292 - if (!IS_DGFX(xe) || IS_SRIOV_VF(xe)) 292 + if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE) 293 293 return false; 294 294 295 295 survivability_mode = xe_configfs_get_survivability_mode(pdev); 296 - 297 - if (xe->info.platform < XE_BATTLEMAGE) { 298 - if (survivability_mode) { 299 - dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n"); 300 - xe_configfs_clear_survivability_mode(pdev); 301 - } 302 - return false; 303 - } 304 - 305 296 /* Enable survivability mode if set via configfs */ 306 297 if (survivability_mode) 307 298 return true;

+278 -92

drivers/gpu/drm/xe/xe_svm.c

··· 6 6 #include <drm/drm_drv.h> 7 7 8 8 #include "xe_bo.h" 9 + #include "xe_exec_queue_types.h" 9 10 #include "xe_gt_stats.h" 10 11 #include "xe_migrate.h" 11 12 #include "xe_module.h" ··· 26 25 * memory. 27 26 */ 28 27 29 - struct drm_gpusvm_range_flags flags = { 28 + struct drm_gpusvm_pages_flags flags = { 30 29 /* Pairs with WRITE_ONCE in drm_gpusvm.c */ 31 - .__flags = READ_ONCE(range->base.flags.__flags), 30 + .__flags = READ_ONCE(range->base.pages.flags.__flags), 32 31 }; 33 32 34 33 return flags.has_devmem_pages; ··· 50 49 return gpusvm_to_vm(r->gpusvm); 51 50 } 52 51 53 - #define range_debug(r__, operaton__) \ 52 + #define range_debug(r__, operation__) \ 54 53 vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \ 55 54 "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \ 56 55 "start=0x%014lx, end=0x%014lx, size=%lu", \ 57 - (operaton__), range_to_vm(&(r__)->base)->usm.asid, \ 56 + (operation__), range_to_vm(&(r__)->base)->usm.asid, \ 58 57 (r__)->base.gpusvm, \ 59 58 xe_svm_range_in_vram((r__)) ? 1 : 0, \ 60 59 xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \ 61 - (r__)->base.notifier_seq, \ 60 + (r__)->base.pages.notifier_seq, \ 62 61 xe_svm_range_start((r__)), xe_svm_range_end((r__)), \ 63 62 xe_svm_range_size((r__))) 64 63 ··· 113 112 &vm->svm.garbage_collector.work); 114 113 } 115 114 115 + static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt) 116 + { 117 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, 1); 118 + } 119 + 116 120 static u8 117 121 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, 118 122 const struct mmu_notifier_range *mmu_range, ··· 134 128 range_debug(range, "NOTIFIER"); 135 129 136 130 /* Skip if already unmapped or if no binding exist */ 137 - if (range->base.flags.unmapped || !range->tile_present) 131 + if (range->base.pages.flags.unmapped || !range->tile_present) 138 132 return 0; 139 133 140 134 range_debug(range, "NOTIFIER - EXECUTE"); ··· 150 144 */ 151 145 for_each_tile(tile, xe, id) 152 146 if (xe_pt_zap_ptes_range(tile, vm, range)) { 153 - tile_mask |= BIT(id); 154 147 /* 155 148 * WRITE_ONCE pairs with READ_ONCE in 156 149 * xe_vm_has_valid_gpu_mapping() 157 150 */ 158 151 WRITE_ONCE(range->tile_invalidated, 159 152 range->tile_invalidated | BIT(id)); 153 + 154 + if (!(tile_mask & BIT(id))) { 155 + xe_svm_tlb_inval_count_stats_incr(tile->primary_gt); 156 + if (tile->media_gt) 157 + xe_svm_tlb_inval_count_stats_incr(tile->media_gt); 158 + tile_mask |= BIT(id); 159 + } 160 160 } 161 161 162 162 return tile_mask; ··· 182 170 mmu_range); 183 171 } 184 172 173 + static s64 xe_svm_stats_ktime_us_delta(ktime_t start) 174 + { 175 + return IS_ENABLED(CONFIG_DEBUG_FS) ? 176 + ktime_us_delta(ktime_get(), start) : 0; 177 + } 178 + 179 + static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start) 180 + { 181 + s64 us_delta = xe_svm_stats_ktime_us_delta(start); 182 + 183 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta); 184 + } 185 + 186 + static ktime_t xe_svm_stats_ktime_get(void) 187 + { 188 + return IS_ENABLED(CONFIG_DEBUG_FS) ? ktime_get() : 0; 189 + } 190 + 185 191 static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, 186 192 struct drm_gpusvm_notifier *notifier, 187 193 const struct mmu_notifier_range *mmu_range) ··· 207 177 struct xe_vm *vm = gpusvm_to_vm(gpusvm); 208 178 struct xe_device *xe = vm->xe; 209 179 struct drm_gpusvm_range *r, *first; 180 + struct xe_tile *tile; 181 + ktime_t start = xe_svm_stats_ktime_get(); 210 182 u64 adj_start = mmu_range->start, adj_end = mmu_range->end; 211 - u8 tile_mask = 0; 183 + u8 tile_mask = 0, id; 212 184 long err; 213 185 214 186 xe_svm_assert_in_notifier(vm); ··· 263 231 r = first; 264 232 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) 265 233 xe_svm_range_notifier_event_end(vm, r, mmu_range); 234 + for_each_tile(tile, xe, id) { 235 + if (tile_mask & BIT(id)) { 236 + xe_svm_tlb_inval_us_stats_incr(tile->primary_gt, start); 237 + if (tile->media_gt) 238 + xe_svm_tlb_inval_us_stats_incr(tile->media_gt, start); 239 + } 240 + } 266 241 } 267 242 268 243 static int __xe_svm_garbage_collector(struct xe_vm *vm, ··· 347 308 if (xe_vm_is_closed_or_banned(vm)) 348 309 return -ENOENT; 349 310 350 - spin_lock(&vm->svm.garbage_collector.lock); 351 311 for (;;) { 312 + spin_lock(&vm->svm.garbage_collector.lock); 352 313 range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list, 353 314 typeof(*range), 354 315 garbage_collector_link); ··· 377 338 else 378 339 return err; 379 340 } 380 - 381 - spin_lock(&vm->svm.garbage_collector.lock); 382 341 } 383 342 spin_unlock(&vm->svm.garbage_collector.lock); 384 343 ··· 421 384 XE_SVM_COPY_TO_SRAM, 422 385 }; 423 386 387 + static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt, 388 + const enum xe_svm_copy_dir dir, 389 + int kb) 390 + { 391 + if (dir == XE_SVM_COPY_TO_VRAM) 392 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb); 393 + else 394 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb); 395 + } 396 + 397 + static void xe_svm_copy_us_stats_incr(struct xe_gt *gt, 398 + const enum xe_svm_copy_dir dir, 399 + unsigned long npages, 400 + ktime_t start) 401 + { 402 + s64 us_delta = xe_svm_stats_ktime_us_delta(start); 403 + 404 + if (dir == XE_SVM_COPY_TO_VRAM) { 405 + switch (npages) { 406 + case 1: 407 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US, 408 + us_delta); 409 + break; 410 + case 16: 411 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US, 412 + us_delta); 413 + break; 414 + case 512: 415 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US, 416 + us_delta); 417 + break; 418 + } 419 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_US, 420 + us_delta); 421 + } else { 422 + switch (npages) { 423 + case 1: 424 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_US, 425 + us_delta); 426 + break; 427 + case 16: 428 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_US, 429 + us_delta); 430 + break; 431 + case 512: 432 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_US, 433 + us_delta); 434 + break; 435 + } 436 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_US, 437 + us_delta); 438 + } 439 + } 440 + 424 441 static int xe_svm_copy(struct page **pages, 425 442 struct drm_pagemap_addr *pagemap_addr, 426 443 unsigned long npages, const enum xe_svm_copy_dir dir) 427 444 { 428 445 struct xe_vram_region *vr = NULL; 446 + struct xe_gt *gt = NULL; 429 447 struct xe_device *xe; 430 448 struct dma_fence *fence = NULL; 431 449 unsigned long i; ··· 488 396 u64 vram_addr = XE_VRAM_ADDR_INVALID; 489 397 int err = 0, pos = 0; 490 398 bool sram = dir == XE_SVM_COPY_TO_SRAM; 399 + ktime_t start = xe_svm_stats_ktime_get(); 491 400 492 401 /* 493 402 * This flow is complex: it locates physically contiguous device pages, ··· 515 422 516 423 if (!vr && spage) { 517 424 vr = page_to_vr(spage); 425 + gt = xe_migrate_exec_queue(vr->migrate)->gt; 518 426 xe = vr->xe; 519 427 } 520 428 XE_WARN_ON(spage && page_to_vr(spage) != vr); ··· 555 461 int incr = (match && last) ? 1 : 0; 556 462 557 463 if (vram_addr != XE_VRAM_ADDR_INVALID) { 464 + xe_svm_copy_kb_stats_incr(gt, dir, 465 + (i - pos + incr) * 466 + (PAGE_SIZE / SZ_1K)); 558 467 if (sram) { 559 468 vm_dbg(&xe->drm, 560 469 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", ··· 596 499 597 500 /* Extra mismatched device page, copy it */ 598 501 if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { 502 + xe_svm_copy_kb_stats_incr(gt, dir, 503 + (PAGE_SIZE / SZ_1K)); 599 504 if (sram) { 600 505 vm_dbg(&xe->drm, 601 506 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", ··· 630 531 dma_fence_wait(fence, false); 631 532 dma_fence_put(fence); 632 533 } 534 + 535 + /* 536 + * XXX: We can't derive the GT here (or anywhere in this functions, but 537 + * compute always uses the primary GT so accumlate stats on the likely 538 + * GT of the fault. 539 + */ 540 + if (gt) 541 + xe_svm_copy_us_stats_incr(gt, dir, npages, start); 633 542 634 543 return err; 635 544 #undef XE_MIGRATE_CHUNK_SIZE ··· 737 630 { 738 631 int err; 739 632 740 - spin_lock_init(&vm->svm.garbage_collector.lock); 741 - INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); 742 - INIT_WORK(&vm->svm.garbage_collector.work, 743 - xe_svm_garbage_collector_work_func); 633 + if (vm->flags & XE_VM_FLAG_FAULT_MODE) { 634 + spin_lock_init(&vm->svm.garbage_collector.lock); 635 + INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); 636 + INIT_WORK(&vm->svm.garbage_collector.work, 637 + xe_svm_garbage_collector_work_func); 744 638 745 - err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, 746 - current->mm, xe_svm_devm_owner(vm->xe), 0, 747 - vm->size, xe_modparam.svm_notifier_size * SZ_1M, 748 - &gpusvm_ops, fault_chunk_sizes, 749 - ARRAY_SIZE(fault_chunk_sizes)); 750 - if (err) 751 - return err; 639 + err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, 640 + current->mm, xe_svm_devm_owner(vm->xe), 0, 641 + vm->size, 642 + xe_modparam.svm_notifier_size * SZ_1M, 643 + &gpusvm_ops, fault_chunk_sizes, 644 + ARRAY_SIZE(fault_chunk_sizes)); 645 + drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); 646 + } else { 647 + err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", 648 + &vm->xe->drm, NULL, NULL, 0, 0, 0, NULL, 649 + NULL, 0); 650 + } 752 651 753 - drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); 754 - 755 - return 0; 652 + return err; 756 653 } 757 654 758 655 /** ··· 827 716 xe_svm_notifier_lock(vm); 828 717 829 718 ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask && 830 - (devmem_preferred == range->base.flags.has_devmem_pages); 719 + (devmem_preferred == range->base.pages.flags.has_devmem_pages); 831 720 832 721 xe_svm_notifier_unlock(vm); 833 722 ··· 866 755 struct xe_device *xe = vr->xe; 867 756 struct device *dev = xe->drm.dev; 868 757 struct drm_buddy_block *block; 758 + struct xe_validation_ctx vctx; 869 759 struct list_head *blocks; 760 + struct drm_exec exec; 870 761 struct xe_bo *bo; 871 - ktime_t time_end = 0; 872 - int err, idx; 762 + int err = 0, idx; 873 763 874 764 if (!drm_dev_enter(&xe->drm, &idx)) 875 765 return -ENODEV; 876 766 877 767 xe_pm_runtime_get(xe); 878 768 879 - retry: 880 - bo = xe_bo_create_locked(vr->xe, NULL, NULL, end - start, 881 - ttm_bo_type_device, 882 - (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) | 883 - XE_BO_FLAG_CPU_ADDR_MIRROR); 884 - if (IS_ERR(bo)) { 885 - err = PTR_ERR(bo); 886 - if (xe_vm_validate_should_retry(NULL, err, &time_end)) 887 - goto retry; 888 - goto out_pm_put; 769 + xe_validation_guard(&vctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 770 + bo = xe_bo_create_locked(xe, NULL, NULL, end - start, 771 + ttm_bo_type_device, 772 + (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) | 773 + XE_BO_FLAG_CPU_ADDR_MIRROR, &exec); 774 + drm_exec_retry_on_contention(&exec); 775 + if (IS_ERR(bo)) { 776 + err = PTR_ERR(bo); 777 + xe_validation_retry_on_oom(&vctx, &err); 778 + break; 779 + } 780 + 781 + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, 782 + &dpagemap_devmem_ops, dpagemap, end - start); 783 + 784 + blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; 785 + list_for_each_entry(block, blocks, link) 786 + block->private = vr; 787 + 788 + xe_bo_get(bo); 789 + 790 + /* Ensure the device has a pm ref while there are device pages active. */ 791 + xe_pm_runtime_get_noresume(xe); 792 + err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, 793 + start, end, timeslice_ms, 794 + xe_svm_devm_owner(xe)); 795 + if (err) 796 + xe_svm_devmem_release(&bo->devmem_allocation); 797 + xe_bo_unlock(bo); 798 + xe_bo_put(bo); 889 799 } 890 - 891 - drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, 892 - &dpagemap_devmem_ops, dpagemap, end - start); 893 - 894 - blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; 895 - list_for_each_entry(block, blocks, link) 896 - block->private = vr; 897 - 898 - xe_bo_get(bo); 899 - 900 - /* Ensure the device has a pm ref while there are device pages active. */ 901 - xe_pm_runtime_get_noresume(xe); 902 - err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, 903 - start, end, timeslice_ms, 904 - xe_svm_devm_owner(xe)); 905 - if (err) 906 - xe_svm_devmem_release(&bo->devmem_allocation); 907 - 908 - xe_bo_unlock(bo); 909 - xe_bo_put(bo); 910 - 911 - out_pm_put: 912 800 xe_pm_runtime_put(xe); 913 801 drm_dev_exit(idx); 914 802 ··· 937 827 struct xe_vm *vm = range_to_vm(&range->base); 938 828 u64 range_size = xe_svm_range_size(range); 939 829 940 - if (!range->base.flags.migrate_devmem || !preferred_region_is_vram) 830 + if (!range->base.pages.flags.migrate_devmem || !preferred_region_is_vram) 941 831 return false; 942 832 943 833 xe_assert(vm->xe, IS_DGFX(vm->xe)); 944 834 945 - if (preferred_region_is_vram && xe_svm_range_in_vram(range)) { 835 + if (xe_svm_range_in_vram(range)) { 946 836 drm_info(&vm->xe->drm, "Range is already in VRAM\n"); 947 837 return false; 948 838 } 949 839 950 - if (preferred_region_is_vram && range_size < SZ_64K && !supports_4K_migration(vm->xe)) { 840 + if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) { 951 841 drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); 952 842 return false; 953 843 } ··· 955 845 return true; 956 846 } 957 847 848 + #define DECL_SVM_RANGE_COUNT_STATS(elem, stat) \ 849 + static void xe_svm_range_##elem##_count_stats_incr(struct xe_gt *gt, \ 850 + struct xe_svm_range *range) \ 851 + { \ 852 + switch (xe_svm_range_size(range)) { \ 853 + case SZ_4K: \ 854 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_COUNT, 1); \ 855 + break; \ 856 + case SZ_64K: \ 857 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_COUNT, 1); \ 858 + break; \ 859 + case SZ_2M: \ 860 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_COUNT, 1); \ 861 + break; \ 862 + } \ 863 + } \ 864 + 865 + DECL_SVM_RANGE_COUNT_STATS(fault, PAGEFAULT) 866 + DECL_SVM_RANGE_COUNT_STATS(valid_fault, VALID_PAGEFAULT) 867 + DECL_SVM_RANGE_COUNT_STATS(migrate, MIGRATE) 868 + 869 + #define DECL_SVM_RANGE_US_STATS(elem, stat) \ 870 + static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \ 871 + struct xe_svm_range *range, \ 872 + ktime_t start) \ 873 + { \ 874 + s64 us_delta = xe_svm_stats_ktime_us_delta(start); \ 875 + \ 876 + switch (xe_svm_range_size(range)) { \ 877 + case SZ_4K: \ 878 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_US, \ 879 + us_delta); \ 880 + break; \ 881 + case SZ_64K: \ 882 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_US, \ 883 + us_delta); \ 884 + break; \ 885 + case SZ_2M: \ 886 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_US, \ 887 + us_delta); \ 888 + break; \ 889 + } \ 890 + } \ 891 + 892 + DECL_SVM_RANGE_US_STATS(migrate, MIGRATE) 893 + DECL_SVM_RANGE_US_STATS(get_pages, GET_PAGES) 894 + DECL_SVM_RANGE_US_STATS(bind, BIND) 895 + DECL_SVM_RANGE_US_STATS(fault, PAGEFAULT) 896 + 958 897 static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, 959 898 struct xe_gt *gt, u64 fault_addr, 960 899 bool need_vram) 961 900 { 901 + int devmem_possible = IS_DGFX(vm->xe) && 902 + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 962 903 struct drm_gpusvm_ctx ctx = { 963 904 .read_only = xe_vma_read_only(vma), 964 - .devmem_possible = IS_DGFX(vm->xe) && 965 - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), 966 - .check_pages_threshold = IS_DGFX(vm->xe) && 967 - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0, 968 - .devmem_only = need_vram && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), 969 - .timeslice_ms = need_vram && IS_DGFX(vm->xe) && 970 - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? 905 + .devmem_possible = devmem_possible, 906 + .check_pages_threshold = devmem_possible ? SZ_64K : 0, 907 + .devmem_only = need_vram && devmem_possible, 908 + .timeslice_ms = need_vram && devmem_possible ? 971 909 vm->xe->atomic_svm_timeslice_ms : 0, 972 910 }; 911 + struct xe_validation_ctx vctx; 912 + struct drm_exec exec; 973 913 struct xe_svm_range *range; 974 914 struct dma_fence *fence; 975 915 struct drm_pagemap *dpagemap; 976 916 struct xe_tile *tile = gt_to_tile(gt); 977 917 int migrate_try_count = ctx.devmem_only ? 3 : 1; 978 - ktime_t end = 0; 918 + ktime_t start = xe_svm_stats_ktime_get(), bind_start, get_pages_start; 979 919 int err; 980 920 981 921 lockdep_assert_held_write(&vm->lock); ··· 1044 884 if (IS_ERR(range)) 1045 885 return PTR_ERR(range); 1046 886 1047 - if (ctx.devmem_only && !range->base.flags.migrate_devmem) 1048 - return -EACCES; 887 + xe_svm_range_fault_count_stats_incr(gt, range); 1049 888 1050 - if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) 1051 - return 0; 889 + if (ctx.devmem_only && !range->base.pages.flags.migrate_devmem) { 890 + err = -EACCES; 891 + goto out; 892 + } 893 + 894 + if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) { 895 + xe_svm_range_valid_fault_count_stats_incr(gt, range); 896 + range_debug(range, "PAGE FAULT - VALID"); 897 + goto out; 898 + } 1052 899 1053 900 range_debug(range, "PAGE FAULT"); 1054 901 1055 902 dpagemap = xe_vma_resolve_pagemap(vma, tile); 1056 903 if (--migrate_try_count >= 0 && 1057 904 xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) { 905 + ktime_t migrate_start = xe_svm_stats_ktime_get(); 906 + 1058 907 /* TODO : For multi-device dpagemap will be used to find the 1059 908 * remote tile and remote device. Will need to modify 1060 909 * xe_svm_alloc_vram to use dpagemap for future multi-device 1061 910 * support. 1062 911 */ 912 + xe_svm_range_migrate_count_stats_incr(gt, range); 1063 913 err = xe_svm_alloc_vram(tile, range, &ctx); 914 + xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start); 1064 915 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 1065 916 if (err) { 1066 917 if (migrate_try_count || !ctx.devmem_only) { ··· 1087 916 } 1088 917 } 1089 918 } 919 + 920 + get_pages_start = xe_svm_stats_ktime_get(); 1090 921 1091 922 range_debug(range, "GET PAGES"); 1092 923 err = xe_svm_range_get_pages(vm, range, &ctx); ··· 1109 936 } 1110 937 if (err) { 1111 938 range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); 1112 - goto err_out; 939 + goto out; 1113 940 } 1114 941 942 + xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start); 1115 943 range_debug(range, "PAGE FAULT - BIND"); 1116 944 1117 - retry_bind: 1118 - xe_vm_lock(vm, false); 1119 - fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); 1120 - if (IS_ERR(fence)) { 1121 - xe_vm_unlock(vm); 1122 - err = PTR_ERR(fence); 1123 - if (err == -EAGAIN) { 1124 - ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 1125 - range_debug(range, "PAGE FAULT - RETRY BIND"); 1126 - goto retry; 945 + bind_start = xe_svm_stats_ktime_get(); 946 + xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) { 947 + err = xe_vm_drm_exec_lock(vm, &exec); 948 + drm_exec_retry_on_contention(&exec); 949 + 950 + xe_vm_set_validation_exec(vm, &exec); 951 + fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); 952 + xe_vm_set_validation_exec(vm, NULL); 953 + if (IS_ERR(fence)) { 954 + drm_exec_retry_on_contention(&exec); 955 + err = PTR_ERR(fence); 956 + xe_validation_retry_on_oom(&vctx, &err); 957 + xe_svm_range_bind_us_stats_incr(gt, range, bind_start); 958 + break; 1127 959 } 1128 - if (xe_vm_validate_should_retry(NULL, err, &end)) 1129 - goto retry_bind; 1130 - goto err_out; 1131 960 } 1132 - xe_vm_unlock(vm); 961 + if (err) 962 + goto err_out; 1133 963 1134 964 dma_fence_wait(fence, false); 1135 965 dma_fence_put(fence); 966 + xe_svm_range_bind_us_stats_incr(gt, range, bind_start); 967 + 968 + out: 969 + xe_svm_range_fault_us_stats_incr(gt, range, start); 970 + return 0; 1136 971 1137 972 err_out: 973 + if (err == -EAGAIN) { 974 + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 975 + range_debug(range, "PAGE FAULT - RETRY BIND"); 976 + goto retry; 977 + } 1138 978 1139 979 return err; 1140 980 } ··· 1275 1089 r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)), 1276 1090 xe_vma_start(vma), xe_vma_end(vma), ctx); 1277 1091 if (IS_ERR(r)) 1278 - return ERR_PTR(PTR_ERR(r)); 1092 + return ERR_CAST(r); 1279 1093 1280 1094 return to_xe_range(r); 1281 1095 } ··· 1407 1221 { 1408 1222 struct drm_pagemap *dpagemap; 1409 1223 1410 - xe_assert(tile_to_xe(tile), range->base.flags.migrate_devmem); 1224 + xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem); 1411 1225 range_debug(range, "ALLOCATE VRAM"); 1412 1226 1413 1227 dpagemap = tile_local_pagemap(tile);

+47 -16

drivers/gpu/drm/xe/xe_svm.h

··· 105 105 static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) 106 106 { 107 107 lockdep_assert_held(&range->base.gpusvm->notifier_lock); 108 - return range->base.flags.has_dma_mapping; 108 + return range->base.pages.flags.has_dma_mapping; 109 109 } 110 110 111 111 /** ··· 155 155 return drm_gpusvm_range_size(&range->base); 156 156 } 157 157 158 - #define xe_svm_assert_in_notifier(vm__) \ 159 - lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) 160 - 161 - #define xe_svm_notifier_lock(vm__) \ 162 - drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) 163 - 164 - #define xe_svm_notifier_unlock(vm__) \ 165 - drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm) 166 - 167 158 void xe_svm_flush(struct xe_vm *vm); 168 159 169 160 #else 170 161 #include <linux/interval_tree.h> 162 + #include "xe_vm.h" 171 163 172 164 struct drm_pagemap_addr; 173 165 struct drm_gpusvm_ctx; ··· 176 184 struct xe_svm_range { 177 185 struct { 178 186 struct interval_tree_node itree; 179 - const struct drm_pagemap_addr *dma_addr; 187 + struct { 188 + const struct drm_pagemap_addr *dma_addr; 189 + } pages; 180 190 } base; 181 191 u32 tile_present; 182 192 u32 tile_invalidated; ··· 198 204 static inline 199 205 int xe_svm_init(struct xe_vm *vm) 200 206 { 207 + #if IS_ENABLED(CONFIG_DRM_GPUSVM) 208 + return drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", &vm->xe->drm, 209 + NULL, NULL, 0, 0, 0, NULL, NULL, 0); 210 + #else 201 211 return 0; 212 + #endif 202 213 } 203 214 204 215 static inline 205 216 void xe_svm_fini(struct xe_vm *vm) 206 217 { 218 + #if IS_ENABLED(CONFIG_DRM_GPUSVM) 219 + xe_assert(vm->xe, xe_vm_is_closed(vm)); 220 + drm_gpusvm_fini(&vm->svm.gpusvm); 221 + #endif 207 222 } 208 223 209 224 static inline ··· 329 326 return NULL; 330 327 } 331 328 332 - #define xe_svm_assert_in_notifier(...) do {} while (0) 329 + static inline void xe_svm_flush(struct xe_vm *vm) 330 + { 331 + } 333 332 #define xe_svm_range_has_dma_mapping(...) false 333 + #endif /* CONFIG_DRM_XE_GPUSVM */ 334 + 335 + #if IS_ENABLED(CONFIG_DRM_GPUSVM) /* Need to support userptr without XE_GPUSVM */ 336 + #define xe_svm_assert_in_notifier(vm__) \ 337 + lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) 338 + 339 + #define xe_svm_assert_held_read(vm__) \ 340 + lockdep_assert_held_read(&(vm__)->svm.gpusvm.notifier_lock) 341 + 342 + #define xe_svm_notifier_lock(vm__) \ 343 + drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) 344 + 345 + #define xe_svm_notifier_lock_interruptible(vm__) \ 346 + down_read_interruptible(&(vm__)->svm.gpusvm.notifier_lock) 347 + 348 + #define xe_svm_notifier_unlock(vm__) \ 349 + drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm) 350 + 351 + #else 352 + #define xe_svm_assert_in_notifier(...) do {} while (0) 353 + 354 + static inline void xe_svm_assert_held_read(struct xe_vm *vm) 355 + { 356 + } 334 357 335 358 static inline void xe_svm_notifier_lock(struct xe_vm *vm) 336 359 { 337 360 } 338 361 362 + static inline int xe_svm_notifier_lock_interruptible(struct xe_vm *vm) 363 + { 364 + return 0; 365 + } 366 + 339 367 static inline void xe_svm_notifier_unlock(struct xe_vm *vm) 340 368 { 341 369 } 370 + #endif /* CONFIG_DRM_GPUSVM */ 342 371 343 - static inline void xe_svm_flush(struct xe_vm *vm) 344 - { 345 - } 346 - #endif 347 372 #endif

+135

drivers/gpu/drm/xe/xe_tile_debugfs.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #include <linux/debugfs.h> 7 + #include <drm/drm_debugfs.h> 8 + 9 + #include "xe_pm.h" 10 + #include "xe_sa.h" 11 + #include "xe_tile_debugfs.h" 12 + 13 + static struct xe_tile *node_to_tile(struct drm_info_node *node) 14 + { 15 + return node->dent->d_parent->d_inode->i_private; 16 + } 17 + 18 + /** 19 + * tile_debugfs_simple_show - A show callback for struct drm_info_list 20 + * @m: the &seq_file 21 + * @data: data used by the drm debugfs helpers 22 + * 23 + * This callback can be used in struct drm_info_list to describe debugfs 24 + * files that are &xe_tile specific. 25 + * 26 + * It is assumed that those debugfs files will be created on directory entry 27 + * which struct dentry d_inode->i_private points to &xe_tile. 28 + * 29 + * /sys/kernel/debug/dri/0/ 30 + * ├── tile0/ # tile = dentry->d_inode->i_private 31 + * │ │ ├── id # tile = dentry->d_parent->d_inode->i_private 32 + * 33 + * This function assumes that &m->private will be set to the &struct 34 + * drm_info_node corresponding to the instance of the info on a given &struct 35 + * drm_minor (see struct drm_info_list.show for details). 36 + * 37 + * This function also assumes that struct drm_info_list.data will point to the 38 + * function code that will actually print a file content:: 39 + * 40 + * int (*print)(struct xe_tile *, struct drm_printer *) 41 + * 42 + * Example:: 43 + * 44 + * int tile_id(struct xe_tile *tile, struct drm_printer *p) 45 + * { 46 + * drm_printf(p, "%u\n", tile->id); 47 + * return 0; 48 + * } 49 + * 50 + * static const struct drm_info_list info[] = { 51 + * { name = "id", .show = tile_debugfs_simple_show, .data = tile_id }, 52 + * }; 53 + * 54 + * dir = debugfs_create_dir("tile0", parent); 55 + * dir->d_inode->i_private = tile; 56 + * drm_debugfs_create_files(info, ARRAY_SIZE(info), dir, minor); 57 + * 58 + * Return: 0 on success or a negative error code on failure. 59 + */ 60 + static int tile_debugfs_simple_show(struct seq_file *m, void *data) 61 + { 62 + struct drm_printer p = drm_seq_file_printer(m); 63 + struct drm_info_node *node = m->private; 64 + struct xe_tile *tile = node_to_tile(node); 65 + int (*print)(struct xe_tile *, struct drm_printer *) = node->info_ent->data; 66 + 67 + return print(tile, &p); 68 + } 69 + 70 + /** 71 + * tile_debugfs_show_with_rpm - A show callback for struct drm_info_list 72 + * @m: the &seq_file 73 + * @data: data used by the drm debugfs helpers 74 + * 75 + * Similar to tile_debugfs_simple_show() but implicitly takes a RPM ref. 76 + * 77 + * Return: 0 on success or a negative error code on failure. 78 + */ 79 + static int tile_debugfs_show_with_rpm(struct seq_file *m, void *data) 80 + { 81 + struct drm_info_node *node = m->private; 82 + struct xe_tile *tile = node_to_tile(node); 83 + struct xe_device *xe = tile_to_xe(tile); 84 + int ret; 85 + 86 + xe_pm_runtime_get(xe); 87 + ret = tile_debugfs_simple_show(m, data); 88 + xe_pm_runtime_put(xe); 89 + 90 + return ret; 91 + } 92 + 93 + static int sa_info(struct xe_tile *tile, struct drm_printer *p) 94 + { 95 + drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p, 96 + xe_sa_manager_gpu_addr(tile->mem.kernel_bb_pool)); 97 + 98 + return 0; 99 + } 100 + 101 + /* only for debugfs files which can be safely used on the VF */ 102 + static const struct drm_info_list vf_safe_debugfs_list[] = { 103 + { "sa_info", .show = tile_debugfs_show_with_rpm, .data = sa_info }, 104 + }; 105 + 106 + /** 107 + * xe_tile_debugfs_register - Register tile's debugfs attributes 108 + * @tile: the &xe_tile to register 109 + * 110 + * Create debugfs sub-directory with a name that includes a tile ID and 111 + * then creates set of debugfs files (attributes) specific to this tile. 112 + */ 113 + void xe_tile_debugfs_register(struct xe_tile *tile) 114 + { 115 + struct xe_device *xe = tile_to_xe(tile); 116 + struct drm_minor *minor = xe->drm.primary; 117 + struct dentry *root = minor->debugfs_root; 118 + char name[8]; 119 + 120 + snprintf(name, sizeof(name), "tile%u", tile->id); 121 + tile->debugfs = debugfs_create_dir(name, root); 122 + if (IS_ERR(tile->debugfs)) 123 + return; 124 + 125 + /* 126 + * Store the xe_tile pointer as private data of the tile/ directory 127 + * node so other tile specific attributes under that directory may 128 + * refer to it by looking at its parent node private data. 129 + */ 130 + tile->debugfs->d_inode->i_private = tile; 131 + 132 + drm_debugfs_create_files(vf_safe_debugfs_list, 133 + ARRAY_SIZE(vf_safe_debugfs_list), 134 + tile->debugfs, minor); 135 + }

+13

drivers/gpu/drm/xe/xe_tile_debugfs.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_TILE_DEBUGFS_H_ 7 + #define _XE_TILE_DEBUGFS_H_ 8 + 9 + struct xe_tile; 10 + 11 + void xe_tile_debugfs_register(struct xe_tile *tile); 12 + 13 + #endif

+127

drivers/gpu/drm/xe/xe_tile_printk.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _xe_tile_printk_H_ 7 + #define _xe_tile_printk_H_ 8 + 9 + #include "xe_printk.h" 10 + 11 + #define __XE_TILE_PRINTK_FMT(_tile, _fmt, _args...) "Tile%u: " _fmt, (_tile)->id, ##_args 12 + 13 + #define xe_tile_printk(_tile, _level, _fmt, ...) \ 14 + xe_printk((_tile)->xe, _level, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) 15 + 16 + #define xe_tile_err(_tile, _fmt, ...) \ 17 + xe_tile_printk((_tile), err, _fmt, ##__VA_ARGS__) 18 + 19 + #define xe_tile_err_once(_tile, _fmt, ...) \ 20 + xe_tile_printk((_tile), err_once, _fmt, ##__VA_ARGS__) 21 + 22 + #define xe_tile_err_ratelimited(_tile, _fmt, ...) \ 23 + xe_tile_printk((_tile), err_ratelimited, _fmt, ##__VA_ARGS__) 24 + 25 + #define xe_tile_warn(_tile, _fmt, ...) \ 26 + xe_tile_printk((_tile), warn, _fmt, ##__VA_ARGS__) 27 + 28 + #define xe_tile_notice(_tile, _fmt, ...) \ 29 + xe_tile_printk((_tile), notice, _fmt, ##__VA_ARGS__) 30 + 31 + #define xe_tile_info(_tile, _fmt, ...) \ 32 + xe_tile_printk((_tile), info, _fmt, ##__VA_ARGS__) 33 + 34 + #define xe_tile_dbg(_tile, _fmt, ...) \ 35 + xe_tile_printk((_tile), dbg, _fmt, ##__VA_ARGS__) 36 + 37 + #define xe_tile_WARN_type(_tile, _type, _condition, _fmt, ...) \ 38 + xe_WARN##_type((_tile)->xe, _condition, _fmt, ## __VA_ARGS__) 39 + 40 + #define xe_tile_WARN(_tile, _condition, _fmt, ...) \ 41 + xe_tile_WARN_type((_tile),, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) 42 + 43 + #define xe_tile_WARN_ONCE(_tile, _condition, _fmt, ...) \ 44 + xe_tile_WARN_type((_tile), _ONCE, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) 45 + 46 + #define xe_tile_WARN_ON(_tile, _condition) \ 47 + xe_tile_WARN((_tile), _condition, "%s(%s)", "WARN_ON", __stringify(_condition)) 48 + 49 + #define xe_tile_WARN_ON_ONCE(_tile, _condition) \ 50 + xe_tile_WARN_ONCE((_tile), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition)) 51 + 52 + static inline void __xe_tile_printfn_err(struct drm_printer *p, struct va_format *vaf) 53 + { 54 + struct xe_tile *tile = p->arg; 55 + 56 + xe_tile_err(tile, "%pV", vaf); 57 + } 58 + 59 + static inline void __xe_tile_printfn_info(struct drm_printer *p, struct va_format *vaf) 60 + { 61 + struct xe_tile *tile = p->arg; 62 + 63 + xe_tile_info(tile, "%pV", vaf); 64 + } 65 + 66 + static inline void __xe_tile_printfn_dbg(struct drm_printer *p, struct va_format *vaf) 67 + { 68 + struct xe_tile *tile = p->arg; 69 + struct drm_printer dbg; 70 + 71 + /* 72 + * The original xe_tile_dbg() callsite annotations are useless here, 73 + * redirect to the tweaked xe_dbg_printer() instead. 74 + */ 75 + dbg = xe_dbg_printer(tile->xe); 76 + dbg.origin = p->origin; 77 + 78 + drm_printf(&dbg, __XE_TILE_PRINTK_FMT(tile, "%pV", vaf)); 79 + } 80 + 81 + /** 82 + * xe_tile_err_printer - Construct a &drm_printer that outputs to xe_tile_err() 83 + * @tile: the &xe_tile pointer to use in xe_tile_err() 84 + * 85 + * Return: The &drm_printer object. 86 + */ 87 + static inline struct drm_printer xe_tile_err_printer(struct xe_tile *tile) 88 + { 89 + struct drm_printer p = { 90 + .printfn = __xe_tile_printfn_err, 91 + .arg = tile, 92 + }; 93 + return p; 94 + } 95 + 96 + /** 97 + * xe_tile_info_printer - Construct a &drm_printer that outputs to xe_tile_info() 98 + * @tile: the &xe_tile pointer to use in xe_tile_info() 99 + * 100 + * Return: The &drm_printer object. 101 + */ 102 + static inline struct drm_printer xe_tile_info_printer(struct xe_tile *tile) 103 + { 104 + struct drm_printer p = { 105 + .printfn = __xe_tile_printfn_info, 106 + .arg = tile, 107 + }; 108 + return p; 109 + } 110 + 111 + /** 112 + * xe_tile_dbg_printer - Construct a &drm_printer that outputs like xe_tile_dbg() 113 + * @tile: the &xe_tile pointer to use in xe_tile_dbg() 114 + * 115 + * Return: The &drm_printer object. 116 + */ 117 + static inline struct drm_printer xe_tile_dbg_printer(struct xe_tile *tile) 118 + { 119 + struct drm_printer p = { 120 + .printfn = __xe_tile_printfn_dbg, 121 + .arg = tile, 122 + .origin = (const void *)_THIS_IP_, 123 + }; 124 + return p; 125 + } 126 + 127 + #endif

+7 -5

drivers/gpu/drm/xe/xe_tile_sysfs.c

··· 44 44 kt->tile = tile; 45 45 46 46 err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id); 47 - if (err) { 48 - kobject_put(&kt->base); 49 - return err; 50 - } 47 + if (err) 48 + goto err_object; 51 49 52 50 tile->sysfs = &kt->base; 53 51 54 52 err = xe_vram_freq_sysfs_init(tile); 55 53 if (err) 56 - return err; 54 + goto err_object; 57 55 58 56 return devm_add_action_or_reset(xe->drm.dev, tile_sysfs_fini, tile); 57 + 58 + err_object: 59 + kobject_put(&kt->base); 60 + return err; 59 61 }

+1 -2

drivers/gpu/drm/xe/xe_tlb_inval.c

··· 10 10 #include "xe_force_wake.h" 11 11 #include "xe_gt.h" 12 12 #include "xe_gt_printk.h" 13 + #include "xe_gt_stats.h" 13 14 #include "xe_guc.h" 14 15 #include "xe_guc_ct.h" 15 16 #include "xe_guc_tlb_inval.h" 16 - #include "xe_gt_stats.h" 17 - #include "xe_tlb_inval.h" 18 17 #include "xe_mmio.h" 19 18 #include "xe_pm.h" 20 19 #include "xe_tlb_inval.h"

+15 -14

drivers/gpu/drm/xe/xe_uc_fw.c

··· 115 115 #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED 116 116 117 117 #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ 118 - fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 47, 0)) \ 119 - fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \ 118 + fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 49, 4)) \ 119 + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 49, 4)) \ 120 120 fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ 121 121 fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ 122 122 fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ ··· 328 328 xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED); 329 329 } 330 330 331 - static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) 331 + static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_guc_info *guc_info) 332 332 { 333 333 struct xe_gt *gt = uc_fw_to_gt(uc_fw); 334 334 struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; ··· 343 343 return -EINVAL; 344 344 } 345 345 346 - compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->submission_version); 347 - compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->submission_version); 348 - compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->submission_version); 346 + compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, guc_info->submission_version); 347 + compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, guc_info->submission_version); 348 + compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, guc_info->submission_version); 349 349 350 - uc_fw->private_data_size = css->private_data_size; 350 + uc_fw->build_type = FIELD_GET(CSS_UKERNEL_INFO_BUILDTYPE, guc_info->ukernel_info); 351 + uc_fw->private_data_size = guc_info->private_data_size; 351 352 352 353 return 0; 353 354 } ··· 417 416 css = (struct uc_css_header *)fw_data; 418 417 419 418 /* Check integrity of size values inside CSS header */ 420 - size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw - 421 - css->exponent_size_dw) * sizeof(u32); 419 + size = (css->header_size_dw - css->rsa_info.key_size_dw - css->rsa_info.modulus_size_dw - 420 + css->rsa_info.exponent_size_dw) * sizeof(u32); 422 421 if (unlikely(size != sizeof(struct uc_css_header))) { 423 422 drm_warn(&xe->drm, 424 423 "%s firmware %s: unexpected header size: %zu != %zu\n", ··· 431 430 uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); 432 431 433 432 /* now RSA */ 434 - uc_fw->rsa_size = css->key_size_dw * sizeof(u32); 433 + uc_fw->rsa_size = css->rsa_info.key_size_dw * sizeof(u32); 435 434 436 435 /* At least, it should have header, uCode and RSA. Size of all three. */ 437 436 size = sizeof(struct uc_css_header) + uc_fw->ucode_size + ··· 444 443 } 445 444 446 445 /* Get version numbers from the CSS header */ 447 - release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version); 448 - release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); 449 - release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version); 446 + release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->guc_info.sw_version); 447 + release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->guc_info.sw_version); 448 + release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->guc_info.sw_version); 450 449 451 450 if (uc_fw->type == XE_UC_FW_TYPE_GUC) 452 - return guc_read_css_info(uc_fw, css); 451 + return guc_read_css_info(uc_fw, &css->guc_info); 453 452 454 453 return 0; 455 454 }

+107 -23

drivers/gpu/drm/xe/xe_uc_fw_abi.h

··· 44 44 * in fw. So driver will load a truncated firmware in this case. 45 45 */ 46 46 47 + struct uc_css_rsa_info { 48 + u32 key_size_dw; 49 + u32 modulus_size_dw; 50 + u32 exponent_size_dw; 51 + } __packed; 52 + 53 + struct uc_css_guc_info { 54 + u32 time; 55 + #define CSS_TIME_HOUR (0xFF << 0) 56 + #define CSS_TIME_MIN (0xFF << 8) 57 + #define CSS_TIME_SEC (0xFFFF << 16) 58 + u32 reserved0[5]; 59 + u32 sw_version; 60 + #define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) 61 + #define CSS_SW_VERSION_UC_MINOR (0xFF << 8) 62 + #define CSS_SW_VERSION_UC_PATCH (0xFF << 0) 63 + u32 submission_version; 64 + u32 reserved1[11]; 65 + u32 header_info; 66 + #define CSS_HEADER_INFO_SVN (0xFF) 67 + #define CSS_HEADER_INFO_COPY_VALID (0x1 << 31) 68 + u32 private_data_size; 69 + u32 ukernel_info; 70 + #define CSS_UKERNEL_INFO_DEVICEID (0xFFFF << 16) 71 + #define CSS_UKERNEL_INFO_PRODKEY (0xFF << 8) 72 + #define CSS_UKERNEL_INFO_BUILDTYPE (0x3 << 2) 73 + #define CSS_UKERNEL_INFO_BUILDTYPE_PROD 0 74 + #define CSS_UKERNEL_INFO_BUILDTYPE_PREPROD 1 75 + #define CSS_UKERNEL_INFO_BUILDTYPE_DEBUG 2 76 + #define CSS_UKERNEL_INFO_ENCSTATUS (0x1 << 1) 77 + #define CSS_UKERNEL_INFO_COPY_VALID (0x1 << 0) 78 + } __packed; 79 + 47 80 struct uc_css_header { 48 81 u32 module_type; 49 82 /* ··· 85 52 */ 86 53 u32 header_size_dw; 87 54 u32 header_version; 88 - u32 module_id; 55 + u32 reserved0; 89 56 u32 module_vendor; 90 57 u32 date; 91 - #define CSS_DATE_DAY (0xFF << 0) 92 - #define CSS_DATE_MONTH (0xFF << 8) 93 - #define CSS_DATE_YEAR (0xFFFF << 16) 58 + #define CSS_DATE_DAY (0xFF << 0) 59 + #define CSS_DATE_MONTH (0xFF << 8) 60 + #define CSS_DATE_YEAR (0xFFFF << 16) 94 61 u32 size_dw; /* uCode plus header_size_dw */ 95 - u32 key_size_dw; 96 - u32 modulus_size_dw; 97 - u32 exponent_size_dw; 98 - u32 time; 99 - #define CSS_TIME_HOUR (0xFF << 0) 100 - #define CSS_DATE_MIN (0xFF << 8) 101 - #define CSS_DATE_SEC (0xFFFF << 16) 102 - char username[8]; 103 - char buildnumber[12]; 104 - u32 sw_version; 105 - #define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) 106 - #define CSS_SW_VERSION_UC_MINOR (0xFF << 8) 107 - #define CSS_SW_VERSION_UC_PATCH (0xFF << 0) 108 62 union { 109 - u32 submission_version; /* only applies to GuC */ 110 - u32 reserved2; 63 + u32 reserved1[3]; 64 + struct uc_css_rsa_info rsa_info; 111 65 }; 112 - u32 reserved0[12]; 113 66 union { 114 - u32 private_data_size; /* only applies to GuC */ 115 - u32 reserved1; 67 + u32 reserved2[22]; 68 + struct uc_css_guc_info guc_info; 116 69 }; 117 - u32 header_info; 118 70 } __packed; 119 71 static_assert(sizeof(struct uc_css_header) == 128); 120 72 ··· 334 316 u8 reserved3[56]; 335 317 u32 modulus_size; /* in dwords */ 336 318 u32 exponent_size; /* in dwords */ 319 + } __packed; 320 + 321 + /** 322 + * DOC: Late binding Firmware Layout 323 + * 324 + * The Late binding binary starts with FPT header, which contains locations 325 + * of various partitions of the binary. Here we're interested in finding out 326 + * manifest version. To the manifest version, we need to locate CPD header 327 + * one of the entry in CPD header points to manifest header. Manifest header 328 + * contains the version. 329 + * 330 + * +================================================+ 331 + * | FPT Header | 332 + * +================================================+ 333 + * | FPT entries[] | 334 + * | entry1 | 335 + * | ... | 336 + * | entryX | 337 + * | "LTES" | 338 + * | ... | 339 + * | offset >-----------------------------|------o 340 + * +================================================+ | 341 + * | 342 + * +================================================+ | 343 + * | CPD Header |<-----o 344 + * +================================================+ 345 + * | CPD entries[] | 346 + * | entry1 | 347 + * | ... | 348 + * | entryX | 349 + * | "LTES.man" | 350 + * | ... | 351 + * | offset >----------------------------|------o 352 + * +================================================+ | 353 + * | 354 + * +================================================+ | 355 + * | Manifest Header |<-----o 356 + * | ... | 357 + * | FW version | 358 + * | ... | 359 + * +================================================+ 360 + */ 361 + 362 + /* FPT Headers */ 363 + struct csc_fpt_header { 364 + u32 header_marker; 365 + #define CSC_FPT_HEADER_MARKER 0x54504624 366 + u32 num_of_entries; 367 + u8 header_version; 368 + u8 entry_version; 369 + u8 header_length; /* in bytes */ 370 + u8 flags; 371 + u16 ticks_to_add; 372 + u16 tokens_to_add; 373 + u32 uma_size; 374 + u32 crc32; 375 + struct gsc_version fitc_version; 376 + } __packed; 377 + 378 + struct csc_fpt_entry { 379 + u8 name[4]; /* partition name */ 380 + u32 reserved1; 381 + u32 offset; /* offset from beginning of CSE region */ 382 + u32 length; /* partition length in bytes */ 383 + u32 reserved2[3]; 384 + u32 partition_flags; 337 385 } __packed; 338 386 339 387 #endif

+3

drivers/gpu/drm/xe/xe_uc_fw_types.h

··· 147 147 148 148 /** @private_data_size: size of private data found in uC css header */ 149 149 u32 private_data_size; 150 + 151 + /** @build_type: Firmware build type (see CSS_UKERNEL_INFO_BUILDTYPE for definitions) */ 152 + u32 build_type; 150 153 }; 151 154 152 155 #endif

+319

drivers/gpu/drm/xe/xe_userptr.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #include "xe_userptr.h" 7 + 8 + #include <linux/mm.h> 9 + 10 + #include "xe_trace_bo.h" 11 + 12 + /** 13 + * xe_vma_userptr_check_repin() - Advisory check for repin needed 14 + * @uvma: The userptr vma 15 + * 16 + * Check if the userptr vma has been invalidated since last successful 17 + * repin. The check is advisory only and can the function can be called 18 + * without the vm->svm.gpusvm.notifier_lock held. There is no guarantee that the 19 + * vma userptr will remain valid after a lockless check, so typically 20 + * the call needs to be followed by a proper check under the notifier_lock. 21 + * 22 + * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 23 + */ 24 + int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 25 + { 26 + return mmu_interval_check_retry(&uvma->userptr.notifier, 27 + uvma->userptr.pages.notifier_seq) ? 28 + -EAGAIN : 0; 29 + } 30 + 31 + /** 32 + * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 33 + * that need repinning. 34 + * @vm: The VM. 35 + * 36 + * This function checks for whether the VM has userptrs that need repinning, 37 + * and provides a release-type barrier on the svm.gpusvm.notifier_lock after 38 + * checking. 39 + * 40 + * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 41 + */ 42 + int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 43 + { 44 + lockdep_assert_held_read(&vm->svm.gpusvm.notifier_lock); 45 + 46 + return (list_empty(&vm->userptr.repin_list) && 47 + list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 48 + } 49 + 50 + int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 51 + { 52 + struct xe_vma *vma = &uvma->vma; 53 + struct xe_vm *vm = xe_vma_vm(vma); 54 + struct xe_device *xe = vm->xe; 55 + struct drm_gpusvm_ctx ctx = { 56 + .read_only = xe_vma_read_only(vma), 57 + }; 58 + 59 + lockdep_assert_held(&vm->lock); 60 + xe_assert(xe, xe_vma_is_userptr(vma)); 61 + 62 + if (vma->gpuva.flags & XE_VMA_DESTROYED) 63 + return 0; 64 + 65 + return drm_gpusvm_get_pages(&vm->svm.gpusvm, &uvma->userptr.pages, 66 + uvma->userptr.notifier.mm, 67 + &uvma->userptr.notifier, 68 + xe_vma_userptr(vma), 69 + xe_vma_userptr(vma) + xe_vma_size(vma), 70 + &ctx); 71 + } 72 + 73 + static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) 74 + { 75 + struct xe_userptr *userptr = &uvma->userptr; 76 + struct xe_vma *vma = &uvma->vma; 77 + struct dma_resv_iter cursor; 78 + struct dma_fence *fence; 79 + struct drm_gpusvm_ctx ctx = { 80 + .in_notifier = true, 81 + .read_only = xe_vma_read_only(vma), 82 + }; 83 + long err; 84 + 85 + /* 86 + * Tell exec and rebind worker they need to repin and rebind this 87 + * userptr. 88 + */ 89 + if (!xe_vm_in_fault_mode(vm) && 90 + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { 91 + spin_lock(&vm->userptr.invalidated_lock); 92 + list_move_tail(&userptr->invalidate_link, 93 + &vm->userptr.invalidated); 94 + spin_unlock(&vm->userptr.invalidated_lock); 95 + } 96 + 97 + /* 98 + * Preempt fences turn into schedule disables, pipeline these. 99 + * Note that even in fault mode, we need to wait for binds and 100 + * unbinds to complete, and those are attached as BOOKMARK fences 101 + * to the vm. 102 + */ 103 + dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 104 + DMA_RESV_USAGE_BOOKKEEP); 105 + dma_resv_for_each_fence_unlocked(&cursor, fence) 106 + dma_fence_enable_sw_signaling(fence); 107 + dma_resv_iter_end(&cursor); 108 + 109 + err = dma_resv_wait_timeout(xe_vm_resv(vm), 110 + DMA_RESV_USAGE_BOOKKEEP, 111 + false, MAX_SCHEDULE_TIMEOUT); 112 + XE_WARN_ON(err <= 0); 113 + 114 + if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { 115 + err = xe_vm_invalidate_vma(vma); 116 + XE_WARN_ON(err); 117 + } 118 + 119 + drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages, 120 + xe_vma_size(vma) >> PAGE_SHIFT, &ctx); 121 + } 122 + 123 + static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 124 + const struct mmu_notifier_range *range, 125 + unsigned long cur_seq) 126 + { 127 + struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); 128 + struct xe_vma *vma = &uvma->vma; 129 + struct xe_vm *vm = xe_vma_vm(vma); 130 + 131 + xe_assert(vm->xe, xe_vma_is_userptr(vma)); 132 + trace_xe_vma_userptr_invalidate(vma); 133 + 134 + if (!mmu_notifier_range_blockable(range)) 135 + return false; 136 + 137 + vm_dbg(&xe_vma_vm(vma)->xe->drm, 138 + "NOTIFIER: addr=0x%016llx, range=0x%016llx", 139 + xe_vma_start(vma), xe_vma_size(vma)); 140 + 141 + down_write(&vm->svm.gpusvm.notifier_lock); 142 + mmu_interval_set_seq(mni, cur_seq); 143 + 144 + __vma_userptr_invalidate(vm, uvma); 145 + up_write(&vm->svm.gpusvm.notifier_lock); 146 + trace_xe_vma_userptr_invalidate_complete(vma); 147 + 148 + return true; 149 + } 150 + 151 + static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 152 + .invalidate = vma_userptr_invalidate, 153 + }; 154 + 155 + #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 156 + /** 157 + * xe_vma_userptr_force_invalidate() - force invalidate a userptr 158 + * @uvma: The userptr vma to invalidate 159 + * 160 + * Perform a forced userptr invalidation for testing purposes. 161 + */ 162 + void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) 163 + { 164 + struct xe_vm *vm = xe_vma_vm(&uvma->vma); 165 + 166 + /* Protect against concurrent userptr pinning */ 167 + lockdep_assert_held(&vm->lock); 168 + /* Protect against concurrent notifiers */ 169 + lockdep_assert_held(&vm->svm.gpusvm.notifier_lock); 170 + /* 171 + * Protect against concurrent instances of this function and 172 + * the critical exec sections 173 + */ 174 + xe_vm_assert_held(vm); 175 + 176 + if (!mmu_interval_read_retry(&uvma->userptr.notifier, 177 + uvma->userptr.pages.notifier_seq)) 178 + uvma->userptr.pages.notifier_seq -= 2; 179 + __vma_userptr_invalidate(vm, uvma); 180 + } 181 + #endif 182 + 183 + int xe_vm_userptr_pin(struct xe_vm *vm) 184 + { 185 + struct xe_userptr_vma *uvma, *next; 186 + int err = 0; 187 + 188 + xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 189 + lockdep_assert_held_write(&vm->lock); 190 + 191 + /* Collect invalidated userptrs */ 192 + spin_lock(&vm->userptr.invalidated_lock); 193 + xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); 194 + list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 195 + userptr.invalidate_link) { 196 + list_del_init(&uvma->userptr.invalidate_link); 197 + list_add_tail(&uvma->userptr.repin_link, 198 + &vm->userptr.repin_list); 199 + } 200 + spin_unlock(&vm->userptr.invalidated_lock); 201 + 202 + /* Pin and move to bind list */ 203 + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 204 + userptr.repin_link) { 205 + err = xe_vma_userptr_pin_pages(uvma); 206 + if (err == -EFAULT) { 207 + list_del_init(&uvma->userptr.repin_link); 208 + /* 209 + * We might have already done the pin once already, but 210 + * then had to retry before the re-bind happened, due 211 + * some other condition in the caller, but in the 212 + * meantime the userptr got dinged by the notifier such 213 + * that we need to revalidate here, but this time we hit 214 + * the EFAULT. In such a case make sure we remove 215 + * ourselves from the rebind list to avoid going down in 216 + * flames. 217 + */ 218 + if (!list_empty(&uvma->vma.combined_links.rebind)) 219 + list_del_init(&uvma->vma.combined_links.rebind); 220 + 221 + /* Wait for pending binds */ 222 + xe_vm_lock(vm, false); 223 + dma_resv_wait_timeout(xe_vm_resv(vm), 224 + DMA_RESV_USAGE_BOOKKEEP, 225 + false, MAX_SCHEDULE_TIMEOUT); 226 + 227 + down_read(&vm->svm.gpusvm.notifier_lock); 228 + err = xe_vm_invalidate_vma(&uvma->vma); 229 + up_read(&vm->svm.gpusvm.notifier_lock); 230 + xe_vm_unlock(vm); 231 + if (err) 232 + break; 233 + } else { 234 + if (err) 235 + break; 236 + 237 + list_del_init(&uvma->userptr.repin_link); 238 + list_move_tail(&uvma->vma.combined_links.rebind, 239 + &vm->rebind_list); 240 + } 241 + } 242 + 243 + if (err) { 244 + down_write(&vm->svm.gpusvm.notifier_lock); 245 + spin_lock(&vm->userptr.invalidated_lock); 246 + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 247 + userptr.repin_link) { 248 + list_del_init(&uvma->userptr.repin_link); 249 + list_move_tail(&uvma->userptr.invalidate_link, 250 + &vm->userptr.invalidated); 251 + } 252 + spin_unlock(&vm->userptr.invalidated_lock); 253 + up_write(&vm->svm.gpusvm.notifier_lock); 254 + } 255 + return err; 256 + } 257 + 258 + /** 259 + * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 260 + * that need repinning. 261 + * @vm: The VM. 262 + * 263 + * This function does an advisory check for whether the VM has userptrs that 264 + * need repinning. 265 + * 266 + * Return: 0 if there are no indications of userptrs needing repinning, 267 + * -EAGAIN if there are. 268 + */ 269 + int xe_vm_userptr_check_repin(struct xe_vm *vm) 270 + { 271 + return (list_empty_careful(&vm->userptr.repin_list) && 272 + list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 273 + } 274 + 275 + int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start, 276 + unsigned long range) 277 + { 278 + struct xe_userptr *userptr = &uvma->userptr; 279 + int err; 280 + 281 + INIT_LIST_HEAD(&userptr->invalidate_link); 282 + INIT_LIST_HEAD(&userptr->repin_link); 283 + 284 + err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, 285 + start, range, 286 + &vma_userptr_notifier_ops); 287 + if (err) 288 + return err; 289 + 290 + userptr->pages.notifier_seq = LONG_MAX; 291 + 292 + return 0; 293 + } 294 + 295 + void xe_userptr_remove(struct xe_userptr_vma *uvma) 296 + { 297 + struct xe_vm *vm = xe_vma_vm(&uvma->vma); 298 + struct xe_userptr *userptr = &uvma->userptr; 299 + 300 + drm_gpusvm_free_pages(&vm->svm.gpusvm, &uvma->userptr.pages, 301 + xe_vma_size(&uvma->vma) >> PAGE_SHIFT); 302 + 303 + /* 304 + * Since userptr pages are not pinned, we can't remove 305 + * the notifier until we're sure the GPU is not accessing 306 + * them anymore 307 + */ 308 + mmu_interval_notifier_remove(&userptr->notifier); 309 + } 310 + 311 + void xe_userptr_destroy(struct xe_userptr_vma *uvma) 312 + { 313 + struct xe_vm *vm = xe_vma_vm(&uvma->vma); 314 + 315 + spin_lock(&vm->userptr.invalidated_lock); 316 + xe_assert(vm->xe, list_empty(&uvma->userptr.repin_link)); 317 + list_del(&uvma->userptr.invalidate_link); 318 + spin_unlock(&vm->userptr.invalidated_lock); 319 + }

+107

drivers/gpu/drm/xe/xe_userptr.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_USERPTR_H_ 7 + #define _XE_USERPTR_H_ 8 + 9 + #include <linux/list.h> 10 + #include <linux/mutex.h> 11 + #include <linux/notifier.h> 12 + #include <linux/scatterlist.h> 13 + #include <linux/spinlock.h> 14 + 15 + #include <drm/drm_gpusvm.h> 16 + 17 + struct xe_vm; 18 + struct xe_vma; 19 + struct xe_userptr_vma; 20 + 21 + /** struct xe_userptr_vm - User pointer VM level state */ 22 + struct xe_userptr_vm { 23 + /** 24 + * @userptr.repin_list: list of VMAs which are user pointers, 25 + * and needs repinning. Protected by @lock. 26 + */ 27 + struct list_head repin_list; 28 + /** 29 + * @userptr.invalidated_lock: Protects the 30 + * @userptr.invalidated list. 31 + */ 32 + spinlock_t invalidated_lock; 33 + /** 34 + * @userptr.invalidated: List of invalidated userptrs, not yet 35 + * picked 36 + * up for revalidation. Protected from access with the 37 + * @invalidated_lock. Removing items from the list 38 + * additionally requires @lock in write mode, and adding 39 + * items to the list requires either the @svm.gpusvm.notifier_lock in 40 + * write mode, OR @lock in write mode. 41 + */ 42 + struct list_head invalidated; 43 + }; 44 + 45 + /** struct xe_userptr - User pointer */ 46 + struct xe_userptr { 47 + /** @invalidate_link: Link for the vm::userptr.invalidated list */ 48 + struct list_head invalidate_link; 49 + /** @userptr: link into VM repin list if userptr. */ 50 + struct list_head repin_link; 51 + /** 52 + * @pages: gpusvm pages for this user pointer. 53 + */ 54 + struct drm_gpusvm_pages pages; 55 + /** 56 + * @notifier: MMU notifier for user pointer (invalidation call back) 57 + */ 58 + struct mmu_interval_notifier notifier; 59 + 60 + /** 61 + * @initial_bind: user pointer has been bound at least once. 62 + * write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held. 63 + * read: vm->svm.gpusvm.notifier_lock in write mode or vm->resv held. 64 + */ 65 + bool initial_bind; 66 + #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 67 + u32 divisor; 68 + #endif 69 + }; 70 + 71 + #if IS_ENABLED(CONFIG_DRM_GPUSVM) 72 + void xe_userptr_remove(struct xe_userptr_vma *uvma); 73 + int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start, 74 + unsigned long range); 75 + void xe_userptr_destroy(struct xe_userptr_vma *uvma); 76 + 77 + int xe_vm_userptr_pin(struct xe_vm *vm); 78 + int __xe_vm_userptr_needs_repin(struct xe_vm *vm); 79 + int xe_vm_userptr_check_repin(struct xe_vm *vm); 80 + int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma); 81 + int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma); 82 + #else 83 + static inline void xe_userptr_remove(struct xe_userptr_vma *uvma) {} 84 + 85 + static inline int xe_userptr_setup(struct xe_userptr_vma *uvma, 86 + unsigned long start, unsigned long range) 87 + { 88 + return -ENODEV; 89 + } 90 + 91 + static inline void xe_userptr_destroy(struct xe_userptr_vma *uvma) {} 92 + 93 + static inline int xe_vm_userptr_pin(struct xe_vm *vm) { return 0; } 94 + static inline int __xe_vm_userptr_needs_repin(struct xe_vm *vm) { return 0; } 95 + static inline int xe_vm_userptr_check_repin(struct xe_vm *vm) { return 0; } 96 + static inline int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) { return -ENODEV; } 97 + static inline int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) { return -ENODEV; }; 98 + #endif 99 + 100 + #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 101 + void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); 102 + #else 103 + static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) 104 + { 105 + } 106 + #endif 107 + #endif

+278

drivers/gpu/drm/xe/xe_validation.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + #include "xe_bo.h" 6 + #include <drm/drm_exec.h> 7 + #include <drm/drm_gem.h> 8 + #include <drm/drm_gpuvm.h> 9 + 10 + #include "xe_assert.h" 11 + #include "xe_validation.h" 12 + 13 + #ifdef CONFIG_DRM_XE_DEBUG 14 + /** 15 + * xe_validation_assert_exec() - Assert that the drm_exec pointer is suitable 16 + * for validation. 17 + * @xe: Pointer to the xe device. 18 + * @exec: The drm_exec pointer to check. 19 + * @obj: Pointer to the object subject to validation. 20 + * 21 + * NULL exec pointers are not allowed. 22 + * For XE_VALIDATION_UNIMPLEMENTED, no checking. 23 + * For XE_VLIDATION_OPT_OUT, check that the caller is a kunit test 24 + * For XE_VALIDATION_UNSUPPORTED, check that the object subject to 25 + * validation is a dma-buf, for which support for ww locking is 26 + * not in place in the dma-buf layer. 27 + */ 28 + void xe_validation_assert_exec(const struct xe_device *xe, 29 + const struct drm_exec *exec, 30 + const struct drm_gem_object *obj) 31 + { 32 + xe_assert(xe, exec); 33 + if (IS_ERR(exec)) { 34 + switch (PTR_ERR(exec)) { 35 + case __XE_VAL_UNIMPLEMENTED: 36 + break; 37 + case __XE_VAL_UNSUPPORTED: 38 + xe_assert(xe, !!obj->dma_buf); 39 + break; 40 + #if IS_ENABLED(CONFIG_KUNIT) 41 + case __XE_VAL_OPT_OUT: 42 + xe_assert(xe, current->kunit_test); 43 + break; 44 + #endif 45 + default: 46 + xe_assert(xe, false); 47 + } 48 + } 49 + } 50 + #endif 51 + 52 + static int xe_validation_lock(struct xe_validation_ctx *ctx) 53 + { 54 + struct xe_validation_device *val = ctx->val; 55 + int ret = 0; 56 + 57 + if (ctx->val_flags.interruptible) { 58 + if (ctx->request_exclusive) 59 + ret = down_write_killable(&val->lock); 60 + else 61 + ret = down_read_interruptible(&val->lock); 62 + } else { 63 + if (ctx->request_exclusive) 64 + down_write(&val->lock); 65 + else 66 + down_read(&val->lock); 67 + } 68 + 69 + if (!ret) { 70 + ctx->lock_held = true; 71 + ctx->lock_held_exclusive = ctx->request_exclusive; 72 + } 73 + 74 + return ret; 75 + } 76 + 77 + static int xe_validation_trylock(struct xe_validation_ctx *ctx) 78 + { 79 + struct xe_validation_device *val = ctx->val; 80 + bool locked; 81 + 82 + if (ctx->request_exclusive) 83 + locked = down_write_trylock(&val->lock); 84 + else 85 + locked = down_read_trylock(&val->lock); 86 + 87 + if (locked) { 88 + ctx->lock_held = true; 89 + ctx->lock_held_exclusive = ctx->request_exclusive; 90 + } 91 + 92 + return locked ? 0 : -EWOULDBLOCK; 93 + } 94 + 95 + static void xe_validation_unlock(struct xe_validation_ctx *ctx) 96 + { 97 + if (!ctx->lock_held) 98 + return; 99 + 100 + if (ctx->lock_held_exclusive) 101 + up_write(&ctx->val->lock); 102 + else 103 + up_read(&ctx->val->lock); 104 + 105 + ctx->lock_held = false; 106 + } 107 + 108 + /** 109 + * xe_validation_ctx_init() - Initialize an xe_validation_ctx 110 + * @ctx: The xe_validation_ctx to initialize. 111 + * @val: The xe_validation_device representing the validation domain. 112 + * @exec: The struct drm_exec to use for the transaction. May be NULL. 113 + * @flags: The flags to use for initialization. 114 + * 115 + * Initialize and lock a an xe_validation transaction using the validation domain 116 + * represented by @val. Also initialize the drm_exec object forwarding parts of 117 + * @flags to the drm_exec initialization. The @flags.exclusive flag should 118 + * typically be set to false to avoid locking out other validators from the 119 + * domain until an OOM is hit. For testing- or final attempt purposes it can, 120 + * however, be set to true. 121 + * 122 + * Return: %0 on success, %-EINTR if interruptible initial locking failed with a 123 + * signal pending. If @flags.no_block is set to true, a failed trylock 124 + * returns %-EWOULDBLOCK. 125 + */ 126 + int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val, 127 + struct drm_exec *exec, const struct xe_val_flags flags) 128 + { 129 + int ret; 130 + 131 + ctx->exec = exec; 132 + ctx->val = val; 133 + ctx->lock_held = false; 134 + ctx->lock_held_exclusive = false; 135 + ctx->request_exclusive = flags.exclusive; 136 + ctx->val_flags = flags; 137 + ctx->exec_flags = 0; 138 + ctx->nr = 0; 139 + 140 + if (flags.no_block) 141 + ret = xe_validation_trylock(ctx); 142 + else 143 + ret = xe_validation_lock(ctx); 144 + if (ret) 145 + return ret; 146 + 147 + if (exec) { 148 + if (flags.interruptible) 149 + ctx->exec_flags |= DRM_EXEC_INTERRUPTIBLE_WAIT; 150 + if (flags.exec_ignore_duplicates) 151 + ctx->exec_flags |= DRM_EXEC_IGNORE_DUPLICATES; 152 + drm_exec_init(exec, ctx->exec_flags, ctx->nr); 153 + } 154 + 155 + return 0; 156 + } 157 + 158 + #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH 159 + /* 160 + * This abuses both drm_exec and ww_mutex internals and should be 161 + * replaced by checking for -EDEADLK when we can make TTM 162 + * stop converting -EDEADLK to -ENOMEM. 163 + * An alternative is to not have exhaustive eviction with 164 + * CONFIG_DEBUG_WW_MUTEX_SLOWPATH until that happens. 165 + */ 166 + static bool xe_validation_contention_injected(struct drm_exec *exec) 167 + { 168 + return !!exec->ticket.contending_lock; 169 + } 170 + 171 + #else 172 + 173 + static bool xe_validation_contention_injected(struct drm_exec *exec) 174 + { 175 + return false; 176 + } 177 + 178 + #endif 179 + 180 + static bool __xe_validation_should_retry(struct xe_validation_ctx *ctx, int ret) 181 + { 182 + if (ret == -ENOMEM && 183 + ((ctx->request_exclusive && 184 + xe_validation_contention_injected(ctx->exec)) || 185 + !ctx->request_exclusive)) { 186 + ctx->request_exclusive = true; 187 + return true; 188 + } 189 + 190 + return false; 191 + } 192 + 193 + /** 194 + * xe_validation_exec_lock() - Perform drm_gpuvm_exec_lock within a validation 195 + * transaction. 196 + * @ctx: An uninitialized xe_validation_ctx. 197 + * @vm_exec: An initialized struct vm_exec. 198 + * @val: The validation domain. 199 + * 200 + * The drm_gpuvm_exec_lock() function internally initializes its drm_exec 201 + * transaction and therefore doesn't lend itself very well to be using 202 + * xe_validation_ctx_init(). Provide a helper that takes an uninitialized 203 + * xe_validation_ctx and calls drm_gpuvm_exec_lock() with OOM retry. 204 + * 205 + * Return: %0 on success, negative error code on failure. 206 + */ 207 + int xe_validation_exec_lock(struct xe_validation_ctx *ctx, 208 + struct drm_gpuvm_exec *vm_exec, 209 + struct xe_validation_device *val) 210 + { 211 + int ret; 212 + 213 + memset(ctx, 0, sizeof(*ctx)); 214 + ctx->exec = &vm_exec->exec; 215 + ctx->exec_flags = vm_exec->flags; 216 + ctx->val = val; 217 + if (ctx->exec_flags & DRM_EXEC_INTERRUPTIBLE_WAIT) 218 + ctx->val_flags.interruptible = 1; 219 + if (ctx->exec_flags & DRM_EXEC_IGNORE_DUPLICATES) 220 + ctx->val_flags.exec_ignore_duplicates = 1; 221 + retry: 222 + ret = xe_validation_lock(ctx); 223 + if (ret) 224 + return ret; 225 + 226 + ret = drm_gpuvm_exec_lock(vm_exec); 227 + if (ret) { 228 + xe_validation_unlock(ctx); 229 + if (__xe_validation_should_retry(ctx, ret)) 230 + goto retry; 231 + } 232 + 233 + return ret; 234 + } 235 + 236 + /** 237 + * xe_validation_ctx_fini() - Finalize a validation transaction 238 + * @ctx: The Validation transaction to finalize. 239 + * 240 + * Finalize a validation transaction and its related drm_exec transaction. 241 + */ 242 + void xe_validation_ctx_fini(struct xe_validation_ctx *ctx) 243 + { 244 + if (ctx->exec) 245 + drm_exec_fini(ctx->exec); 246 + xe_validation_unlock(ctx); 247 + } 248 + 249 + /** 250 + * xe_validation_should_retry() - Determine if a validation transaction should retry 251 + * @ctx: The validation transaction. 252 + * @ret: Pointer to a return value variable. 253 + * 254 + * Determines whether a validation transaction should retry based on the 255 + * internal transaction state and the return value pointed to by @ret. 256 + * If a validation should be retried, the transaction is prepared for that, 257 + * and the validation locked might be re-locked in exclusive mode, and *@ret 258 + * is set to %0. If the re-locking errors, typically due to interruptible 259 + * locking with signal pending, *@ret is instead set to -EINTR and the 260 + * function returns %false. 261 + * 262 + * Return: %true if validation should be retried, %false otherwise. 263 + */ 264 + bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret) 265 + { 266 + if (__xe_validation_should_retry(ctx, *ret)) { 267 + drm_exec_fini(ctx->exec); 268 + *ret = 0; 269 + if (ctx->request_exclusive != ctx->lock_held_exclusive) { 270 + xe_validation_unlock(ctx); 271 + *ret = xe_validation_lock(ctx); 272 + } 273 + drm_exec_init(ctx->exec, ctx->exec_flags, ctx->nr); 274 + return !*ret; 275 + } 276 + 277 + return false; 278 + }

+192

drivers/gpu/drm/xe/xe_validation.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + #ifndef _XE_VALIDATION_H_ 6 + #define _XE_VALIDATION_H_ 7 + 8 + #include <linux/dma-resv.h> 9 + #include <linux/types.h> 10 + #include <linux/rwsem.h> 11 + 12 + struct drm_exec; 13 + struct drm_gem_object; 14 + struct drm_gpuvm_exec; 15 + struct xe_device; 16 + 17 + #ifdef CONFIG_PROVE_LOCKING 18 + /** 19 + * xe_validation_lockdep() - Assert that a drm_exec locking transaction can 20 + * be initialized at this point. 21 + */ 22 + static inline void xe_validation_lockdep(void) 23 + { 24 + struct ww_acquire_ctx ticket; 25 + 26 + ww_acquire_init(&ticket, &reservation_ww_class); 27 + ww_acquire_fini(&ticket); 28 + } 29 + #else 30 + static inline void xe_validation_lockdep(void) 31 + { 32 + } 33 + #endif 34 + 35 + /* 36 + * Various values of the drm_exec pointer where we've not (yet) 37 + * implemented full ww locking. 38 + * 39 + * XE_VALIDATION_UNIMPLEMENTED means implementation is pending. 40 + * A lockdep check is made to assure that a drm_exec locking 41 + * transaction can actually take place where the macro is 42 + * used. If this asserts, the exec pointer needs to be assigned 43 + * higher up in the callchain and passed down. 44 + * 45 + * XE_VALIDATION_UNSUPPORTED is for dma-buf code only where 46 + * the dma-buf layer doesn't support WW locking. 47 + * 48 + * XE_VALIDATION_OPT_OUT is for simplification of kunit tests where 49 + * exhaustive eviction isn't necessary. 50 + */ 51 + #define __XE_VAL_UNIMPLEMENTED -EINVAL 52 + #define XE_VALIDATION_UNIMPLEMENTED (xe_validation_lockdep(), \ 53 + (struct drm_exec *)ERR_PTR(__XE_VAL_UNIMPLEMENTED)) 54 + 55 + #define __XE_VAL_UNSUPPORTED -EOPNOTSUPP 56 + #define XE_VALIDATION_UNSUPPORTED ((struct drm_exec *)ERR_PTR(__XE_VAL_UNSUPPORTED)) 57 + 58 + #define __XE_VAL_OPT_OUT -ENOMEM 59 + #define XE_VALIDATION_OPT_OUT (xe_validation_lockdep(), \ 60 + (struct drm_exec *)ERR_PTR(__XE_VAL_OPT_OUT)) 61 + #ifdef CONFIG_DRM_XE_DEBUG 62 + void xe_validation_assert_exec(const struct xe_device *xe, const struct drm_exec *exec, 63 + const struct drm_gem_object *obj); 64 + #else 65 + #define xe_validation_assert_exec(_xe, _exec, _obj) \ 66 + do { \ 67 + (void)_xe; (void)_exec; (void)_obj; \ 68 + } while (0) 69 + #endif 70 + 71 + /** 72 + * struct xe_validation_device - The domain for exhaustive eviction 73 + * @lock: The lock used to exclude other processes from allocating graphics memory 74 + * 75 + * The struct xe_validation_device represents the domain for which we want to use 76 + * exhaustive eviction. The @lock is typically grabbed in read mode for allocations 77 + * but when graphics memory allocation fails, it is retried with the write mode held. 78 + */ 79 + struct xe_validation_device { 80 + struct rw_semaphore lock; 81 + }; 82 + 83 + /** 84 + * struct xe_val_flags - Flags for xe_validation_ctx_init(). 85 + * @exclusive: Start the validation transaction by locking out all other validators. 86 + * @no_block: Don't block on initialization. 87 + * @interruptible: Block interruptible if blocking. Implies initializing the drm_exec 88 + * context with the DRM_EXEC_INTERRUPTIBLE_WAIT flag. 89 + * @exec_ignore_duplicates: Initialize the drm_exec context with the 90 + * DRM_EXEC_IGNORE_DUPLICATES flag. 91 + */ 92 + struct xe_val_flags { 93 + u32 exclusive :1; 94 + u32 no_block :1; 95 + u32 interruptible :1; 96 + u32 exec_ignore_duplicates :1; 97 + }; 98 + 99 + /** 100 + * struct xe_validation_ctx - A struct drm_exec subclass with support for 101 + * exhaustive eviction 102 + * @exec: The drm_exec object base class. Note that we use a pointer instead of 103 + * embedding to avoid diamond inheritance. 104 + * @val: The exhaustive eviction domain. 105 + * @val_flags: Copy of the struct xe_val_flags passed to xe_validation_ctx_init. 106 + * @lock_held: Whether The domain lock is currently held. 107 + * @lock_held_exclusive: Whether the domain lock is held in exclusive mode. 108 + * @request_exclusive: Whether to lock exclusively (write mode) the next time 109 + * the domain lock is locked. 110 + * @exec_flags: The drm_exec flags used for drm_exec (re-)initialization. 111 + * @nr: The drm_exec nr parameter used for drm_exec (re-)initializaiton. 112 + */ 113 + struct xe_validation_ctx { 114 + struct drm_exec *exec; 115 + struct xe_validation_device *val; 116 + struct xe_val_flags val_flags; 117 + bool lock_held; 118 + bool lock_held_exclusive; 119 + bool request_exclusive; 120 + u32 exec_flags; 121 + unsigned int nr; 122 + }; 123 + 124 + int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val, 125 + struct drm_exec *exec, const struct xe_val_flags flags); 126 + 127 + int xe_validation_exec_lock(struct xe_validation_ctx *ctx, struct drm_gpuvm_exec *vm_exec, 128 + struct xe_validation_device *val); 129 + 130 + void xe_validation_ctx_fini(struct xe_validation_ctx *ctx); 131 + 132 + bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret); 133 + 134 + /** 135 + * xe_validation_retry_on_oom() - Retry on oom in an xe_validaton transaction 136 + * @_ctx: Pointer to the xe_validation_ctx 137 + * @_ret: The current error value possibly holding -ENOMEM 138 + * 139 + * Use this in way similar to drm_exec_retry_on_contention(). 140 + * If @_ret contains -ENOMEM the tranaction is restarted once in a way that 141 + * blocks other transactions and allows exhastive eviction. If the transaction 142 + * was already restarted once, Just return the -ENOMEM. May also set 143 + * _ret to -EINTR if not retrying and waits are interruptible. 144 + * May only be used within a drm_exec_until_all_locked() loop. 145 + */ 146 + #define xe_validation_retry_on_oom(_ctx, _ret) \ 147 + do { \ 148 + if (xe_validation_should_retry(_ctx, _ret)) \ 149 + goto *__drm_exec_retry_ptr; \ 150 + } while (0) 151 + 152 + /** 153 + * xe_validation_device_init - Initialize a struct xe_validation_device 154 + * @val: The xe_validation_device to init. 155 + */ 156 + static inline void 157 + xe_validation_device_init(struct xe_validation_device *val) 158 + { 159 + init_rwsem(&val->lock); 160 + } 161 + 162 + /* 163 + * Make guard() and scoped_guard() work with xe_validation_ctx 164 + * so that we can exit transactions without caring about the 165 + * cleanup. 166 + */ 167 + DEFINE_CLASS(xe_validation, struct xe_validation_ctx *, 168 + if (_T) xe_validation_ctx_fini(_T);, 169 + ({_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); 170 + _ret ? NULL : _ctx; }), 171 + struct xe_validation_ctx *_ctx, struct xe_validation_device *_val, 172 + struct drm_exec *_exec, const struct xe_val_flags _flags, int _ret); 173 + static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) 174 + {return *_T; } 175 + #define class_xe_validation_is_conditional true 176 + 177 + /** 178 + * xe_validation_guard() - An auto-cleanup xe_validation_ctx transaction 179 + * @_ctx: The xe_validation_ctx. 180 + * @_val: The xe_validation_device. 181 + * @_exec: The struct drm_exec object 182 + * @_flags: Flags for the xe_validation_ctx initialization. 183 + * @_ret: Return in / out parameter. May be set by this macro. Typicall 0 when called. 184 + * 185 + * This macro is will initiate a drm_exec transaction with additional support for 186 + * exhaustive eviction. 187 + */ 188 + #define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \ 189 + scoped_guard(xe_validation, _ctx, _val, _exec, _flags, _ret) \ 190 + drm_exec_until_all_locked(_exec) 191 + 192 + #endif

+184 -425

drivers/gpu/drm/xe/xe_vm.c

··· 41 41 #include "xe_tlb_inval.h" 42 42 #include "xe_trace_bo.h" 43 43 #include "xe_wa.h" 44 - #include "xe_hmm.h" 45 44 46 45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) 47 46 { ··· 48 49 } 49 50 50 51 /** 51 - * xe_vma_userptr_check_repin() - Advisory check for repin needed 52 - * @uvma: The userptr vma 52 + * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction 53 + * @vm: The vm whose resv is to be locked. 54 + * @exec: The drm_exec transaction. 53 55 * 54 - * Check if the userptr vma has been invalidated since last successful 55 - * repin. The check is advisory only and can the function can be called 56 - * without the vm->userptr.notifier_lock held. There is no guarantee that the 57 - * vma userptr will remain valid after a lockless check, so typically 58 - * the call needs to be followed by a proper check under the notifier_lock. 56 + * Helper to lock the vm's resv as part of a drm_exec transaction. 59 57 * 60 - * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 58 + * Return: %0 on success. See drm_exec_lock_obj() for error codes. 61 59 */ 62 - int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) 60 + int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) 63 61 { 64 - return mmu_interval_check_retry(&uvma->userptr.notifier, 65 - uvma->userptr.notifier_seq) ? 66 - -EAGAIN : 0; 67 - } 68 - 69 - int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) 70 - { 71 - struct xe_vma *vma = &uvma->vma; 72 - struct xe_vm *vm = xe_vma_vm(vma); 73 - struct xe_device *xe = vm->xe; 74 - 75 - lockdep_assert_held(&vm->lock); 76 - xe_assert(xe, xe_vma_is_userptr(vma)); 77 - 78 - return xe_hmm_userptr_populate_range(uvma, false); 62 + return drm_exec_lock_obj(exec, xe_vm_obj(vm)); 79 63 } 80 64 81 65 static bool preempt_fences_waiting(struct xe_vm *vm) ··· 210 228 .num_fences = 1, 211 229 }; 212 230 struct drm_exec *exec = &vm_exec.exec; 231 + struct xe_validation_ctx ctx; 213 232 struct dma_fence *pfence; 214 233 int err; 215 234 bool wait; ··· 218 235 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); 219 236 220 237 down_write(&vm->lock); 221 - err = drm_gpuvm_exec_lock(&vm_exec); 238 + err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); 222 239 if (err) 223 240 goto out_up_write; 224 241 225 242 pfence = xe_preempt_fence_create(q, q->lr.context, 226 243 ++q->lr.seqno); 227 - if (!pfence) { 228 - err = -ENOMEM; 244 + if (IS_ERR(pfence)) { 245 + err = PTR_ERR(pfence); 229 246 goto out_fini; 230 247 } 231 248 ··· 233 250 ++vm->preempt.num_exec_queues; 234 251 q->lr.pfence = pfence; 235 252 236 - down_read(&vm->userptr.notifier_lock); 253 + xe_svm_notifier_lock(vm); 237 254 238 255 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, 239 256 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); ··· 247 264 if (wait) 248 265 dma_fence_enable_sw_signaling(pfence); 249 266 250 - up_read(&vm->userptr.notifier_lock); 267 + xe_svm_notifier_unlock(vm); 251 268 252 269 out_fini: 253 - drm_exec_fini(exec); 270 + xe_validation_ctx_fini(&ctx); 254 271 out_up_write: 255 272 up_write(&vm->lock); 256 273 ··· 281 298 q->lr.pfence = NULL; 282 299 } 283 300 up_write(&vm->lock); 284 - } 285 - 286 - /** 287 - * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 288 - * that need repinning. 289 - * @vm: The VM. 290 - * 291 - * This function checks for whether the VM has userptrs that need repinning, 292 - * and provides a release-type barrier on the userptr.notifier_lock after 293 - * checking. 294 - * 295 - * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 296 - */ 297 - int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 298 - { 299 - lockdep_assert_held_read(&vm->userptr.notifier_lock); 300 - 301 - return (list_empty(&vm->userptr.repin_list) && 302 - list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 303 301 } 304 302 305 303 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 ··· 314 350 /* TODO: Inform user the VM is banned */ 315 351 } 316 352 317 - /** 318 - * xe_vm_validate_should_retry() - Whether to retry after a validate error. 319 - * @exec: The drm_exec object used for locking before validation. 320 - * @err: The error returned from ttm_bo_validate(). 321 - * @end: A ktime_t cookie that should be set to 0 before first use and 322 - * that should be reused on subsequent calls. 323 - * 324 - * With multiple active VMs, under memory pressure, it is possible that 325 - * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. 326 - * Until ttm properly handles locking in such scenarios, best thing the 327 - * driver can do is retry with a timeout. Check if that is necessary, and 328 - * if so unlock the drm_exec's objects while keeping the ticket to prepare 329 - * for a rerun. 330 - * 331 - * Return: true if a retry after drm_exec_init() is recommended; 332 - * false otherwise. 333 - */ 334 - bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) 335 - { 336 - ktime_t cur; 337 - 338 - if (err != -ENOMEM) 339 - return false; 340 - 341 - cur = ktime_get(); 342 - *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); 343 - if (!ktime_before(cur, *end)) 344 - return false; 345 - 346 - msleep(20); 347 - return true; 348 - } 349 - 350 353 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) 351 354 { 352 355 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); ··· 328 397 if (!try_wait_for_completion(&vm->xe->pm_block)) 329 398 return -EAGAIN; 330 399 331 - ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); 400 + ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); 332 401 if (ret) 333 402 return ret; 334 403 ··· 444 513 static void preempt_rebind_work_func(struct work_struct *w) 445 514 { 446 515 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 516 + struct xe_validation_ctx ctx; 447 517 struct drm_exec exec; 448 518 unsigned int fence_count = 0; 449 519 LIST_HEAD(preempt_fences); 450 - ktime_t end = 0; 451 520 int err = 0; 452 521 long wait; 453 522 int __maybe_unused tries = 0; ··· 475 544 goto out_unlock_outer; 476 545 } 477 546 478 - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 547 + err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, 548 + (struct xe_val_flags) {.interruptible = true}); 549 + if (err) 550 + goto out_unlock_outer; 479 551 480 552 drm_exec_until_all_locked(&exec) { 481 553 bool done = false; 482 554 483 555 err = xe_preempt_work_begin(&exec, vm, &done); 484 556 drm_exec_retry_on_contention(&exec); 557 + xe_validation_retry_on_oom(&ctx, &err); 485 558 if (err || done) { 486 - drm_exec_fini(&exec); 487 - if (err && xe_vm_validate_should_retry(&exec, err, &end)) 488 - err = -EAGAIN; 489 - 559 + xe_validation_ctx_fini(&ctx); 490 560 goto out_unlock_outer; 491 561 } 492 562 } ··· 496 564 if (err) 497 565 goto out_unlock; 498 566 567 + xe_vm_set_validation_exec(vm, &exec); 499 568 err = xe_vm_rebind(vm, true); 569 + xe_vm_set_validation_exec(vm, NULL); 500 570 if (err) 501 571 goto out_unlock; 502 572 ··· 516 582 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 517 583 __xe_vm_userptr_needs_repin(__vm)) 518 584 519 - down_read(&vm->userptr.notifier_lock); 585 + xe_svm_notifier_lock(vm); 520 586 if (retry_required(tries, vm)) { 521 - up_read(&vm->userptr.notifier_lock); 587 + xe_svm_notifier_unlock(vm); 522 588 err = -EAGAIN; 523 589 goto out_unlock; 524 590 } ··· 532 598 /* Point of no return. */ 533 599 arm_preempt_fences(vm, &preempt_fences); 534 600 resume_and_reinstall_preempt_fences(vm, &exec); 535 - up_read(&vm->userptr.notifier_lock); 601 + xe_svm_notifier_unlock(vm); 536 602 537 603 out_unlock: 538 - drm_exec_fini(&exec); 604 + xe_validation_ctx_fini(&ctx); 539 605 out_unlock_outer: 540 606 if (err == -EAGAIN) { 541 607 trace_xe_vm_rebind_worker_retry(vm); ··· 551 617 free_preempt_fences(&preempt_fences); 552 618 553 619 trace_xe_vm_rebind_worker_exit(vm); 554 - } 555 - 556 - static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) 557 - { 558 - struct xe_userptr *userptr = &uvma->userptr; 559 - struct xe_vma *vma = &uvma->vma; 560 - struct dma_resv_iter cursor; 561 - struct dma_fence *fence; 562 - long err; 563 - 564 - /* 565 - * Tell exec and rebind worker they need to repin and rebind this 566 - * userptr. 567 - */ 568 - if (!xe_vm_in_fault_mode(vm) && 569 - !(vma->gpuva.flags & XE_VMA_DESTROYED)) { 570 - spin_lock(&vm->userptr.invalidated_lock); 571 - list_move_tail(&userptr->invalidate_link, 572 - &vm->userptr.invalidated); 573 - spin_unlock(&vm->userptr.invalidated_lock); 574 - } 575 - 576 - /* 577 - * Preempt fences turn into schedule disables, pipeline these. 578 - * Note that even in fault mode, we need to wait for binds and 579 - * unbinds to complete, and those are attached as BOOKMARK fences 580 - * to the vm. 581 - */ 582 - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), 583 - DMA_RESV_USAGE_BOOKKEEP); 584 - dma_resv_for_each_fence_unlocked(&cursor, fence) 585 - dma_fence_enable_sw_signaling(fence); 586 - dma_resv_iter_end(&cursor); 587 - 588 - err = dma_resv_wait_timeout(xe_vm_resv(vm), 589 - DMA_RESV_USAGE_BOOKKEEP, 590 - false, MAX_SCHEDULE_TIMEOUT); 591 - XE_WARN_ON(err <= 0); 592 - 593 - if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { 594 - err = xe_vm_invalidate_vma(vma); 595 - XE_WARN_ON(err); 596 - } 597 - 598 - xe_hmm_userptr_unmap(uvma); 599 - } 600 - 601 - static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 602 - const struct mmu_notifier_range *range, 603 - unsigned long cur_seq) 604 - { 605 - struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); 606 - struct xe_vma *vma = &uvma->vma; 607 - struct xe_vm *vm = xe_vma_vm(vma); 608 - 609 - xe_assert(vm->xe, xe_vma_is_userptr(vma)); 610 - trace_xe_vma_userptr_invalidate(vma); 611 - 612 - if (!mmu_notifier_range_blockable(range)) 613 - return false; 614 - 615 - vm_dbg(&xe_vma_vm(vma)->xe->drm, 616 - "NOTIFIER: addr=0x%016llx, range=0x%016llx", 617 - xe_vma_start(vma), xe_vma_size(vma)); 618 - 619 - down_write(&vm->userptr.notifier_lock); 620 - mmu_interval_set_seq(mni, cur_seq); 621 - 622 - __vma_userptr_invalidate(vm, uvma); 623 - up_write(&vm->userptr.notifier_lock); 624 - trace_xe_vma_userptr_invalidate_complete(vma); 625 - 626 - return true; 627 - } 628 - 629 - static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 630 - .invalidate = vma_userptr_invalidate, 631 - }; 632 - 633 - #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 634 - /** 635 - * xe_vma_userptr_force_invalidate() - force invalidate a userptr 636 - * @uvma: The userptr vma to invalidate 637 - * 638 - * Perform a forced userptr invalidation for testing purposes. 639 - */ 640 - void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) 641 - { 642 - struct xe_vm *vm = xe_vma_vm(&uvma->vma); 643 - 644 - /* Protect against concurrent userptr pinning */ 645 - lockdep_assert_held(&vm->lock); 646 - /* Protect against concurrent notifiers */ 647 - lockdep_assert_held(&vm->userptr.notifier_lock); 648 - /* 649 - * Protect against concurrent instances of this function and 650 - * the critical exec sections 651 - */ 652 - xe_vm_assert_held(vm); 653 - 654 - if (!mmu_interval_read_retry(&uvma->userptr.notifier, 655 - uvma->userptr.notifier_seq)) 656 - uvma->userptr.notifier_seq -= 2; 657 - __vma_userptr_invalidate(vm, uvma); 658 - } 659 - #endif 660 - 661 - int xe_vm_userptr_pin(struct xe_vm *vm) 662 - { 663 - struct xe_userptr_vma *uvma, *next; 664 - int err = 0; 665 - 666 - xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); 667 - lockdep_assert_held_write(&vm->lock); 668 - 669 - /* Collect invalidated userptrs */ 670 - spin_lock(&vm->userptr.invalidated_lock); 671 - xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); 672 - list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, 673 - userptr.invalidate_link) { 674 - list_del_init(&uvma->userptr.invalidate_link); 675 - list_add_tail(&uvma->userptr.repin_link, 676 - &vm->userptr.repin_list); 677 - } 678 - spin_unlock(&vm->userptr.invalidated_lock); 679 - 680 - /* Pin and move to bind list */ 681 - list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 682 - userptr.repin_link) { 683 - err = xe_vma_userptr_pin_pages(uvma); 684 - if (err == -EFAULT) { 685 - list_del_init(&uvma->userptr.repin_link); 686 - /* 687 - * We might have already done the pin once already, but 688 - * then had to retry before the re-bind happened, due 689 - * some other condition in the caller, but in the 690 - * meantime the userptr got dinged by the notifier such 691 - * that we need to revalidate here, but this time we hit 692 - * the EFAULT. In such a case make sure we remove 693 - * ourselves from the rebind list to avoid going down in 694 - * flames. 695 - */ 696 - if (!list_empty(&uvma->vma.combined_links.rebind)) 697 - list_del_init(&uvma->vma.combined_links.rebind); 698 - 699 - /* Wait for pending binds */ 700 - xe_vm_lock(vm, false); 701 - dma_resv_wait_timeout(xe_vm_resv(vm), 702 - DMA_RESV_USAGE_BOOKKEEP, 703 - false, MAX_SCHEDULE_TIMEOUT); 704 - 705 - down_read(&vm->userptr.notifier_lock); 706 - err = xe_vm_invalidate_vma(&uvma->vma); 707 - up_read(&vm->userptr.notifier_lock); 708 - xe_vm_unlock(vm); 709 - if (err) 710 - break; 711 - } else { 712 - if (err) 713 - break; 714 - 715 - list_del_init(&uvma->userptr.repin_link); 716 - list_move_tail(&uvma->vma.combined_links.rebind, 717 - &vm->rebind_list); 718 - } 719 - } 720 - 721 - if (err) { 722 - down_write(&vm->userptr.notifier_lock); 723 - spin_lock(&vm->userptr.invalidated_lock); 724 - list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, 725 - userptr.repin_link) { 726 - list_del_init(&uvma->userptr.repin_link); 727 - list_move_tail(&uvma->userptr.invalidate_link, 728 - &vm->userptr.invalidated); 729 - } 730 - spin_unlock(&vm->userptr.invalidated_lock); 731 - up_write(&vm->userptr.notifier_lock); 732 - } 733 - return err; 734 - } 735 - 736 - /** 737 - * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 738 - * that need repinning. 739 - * @vm: The VM. 740 - * 741 - * This function does an advisory check for whether the VM has userptrs that 742 - * need repinning. 743 - * 744 - * Return: 0 if there are no indications of userptrs needing repinning, 745 - * -EAGAIN if there are. 746 - */ 747 - int xe_vm_userptr_check_repin(struct xe_vm *vm) 748 - { 749 - return (list_empty_careful(&vm->userptr.repin_list) && 750 - list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 751 620 } 752 621 753 622 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) ··· 1017 1280 drm_gpuvm_bo_put(vm_bo); 1018 1281 } else /* userptr or null */ { 1019 1282 if (!is_null && !is_cpu_addr_mirror) { 1020 - struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; 1283 + struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1021 1284 u64 size = end - start + 1; 1022 1285 int err; 1023 1286 1024 - INIT_LIST_HEAD(&userptr->invalidate_link); 1025 - INIT_LIST_HEAD(&userptr->repin_link); 1026 1287 vma->gpuva.gem.offset = bo_offset_or_userptr; 1027 - mutex_init(&userptr->unmap_mutex); 1028 1288 1029 - err = mmu_interval_notifier_insert(&userptr->notifier, 1030 - current->mm, 1031 - xe_vma_userptr(vma), size, 1032 - &vma_userptr_notifier_ops); 1289 + err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); 1033 1290 if (err) { 1034 1291 xe_vma_free(vma); 1035 1292 return ERR_PTR(err); 1036 1293 } 1037 - 1038 - userptr->notifier_seq = LONG_MAX; 1039 1294 } 1040 1295 1041 1296 xe_vm_get(vm); ··· 1047 1318 1048 1319 if (xe_vma_is_userptr(vma)) { 1049 1320 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 1050 - struct xe_userptr *userptr = &uvma->userptr; 1051 1321 1052 - if (userptr->sg) 1053 - xe_hmm_userptr_free_sg(uvma); 1054 - 1055 - /* 1056 - * Since userptr pages are not pinned, we can't remove 1057 - * the notifier until we're sure the GPU is not accessing 1058 - * them anymore 1059 - */ 1060 - mmu_interval_notifier_remove(&userptr->notifier); 1061 - mutex_destroy(&userptr->unmap_mutex); 1322 + xe_userptr_remove(uvma); 1062 1323 xe_vm_put(vm); 1063 1324 } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { 1064 1325 xe_vm_put(vm); ··· 1085 1366 1086 1367 if (xe_vma_is_userptr(vma)) { 1087 1368 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); 1088 - 1089 - spin_lock(&vm->userptr.invalidated_lock); 1090 - xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); 1091 - list_del(&to_userptr_vma(vma)->userptr.invalidate_link); 1092 - spin_unlock(&vm->userptr.invalidated_lock); 1369 + xe_userptr_destroy(to_userptr_vma(vma)); 1093 1370 } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { 1094 1371 xe_bo_assert_held(xe_vma_bo(vma)); 1095 1372 ··· 1133 1418 1134 1419 static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1135 1420 { 1421 + struct xe_device *xe = xe_vma_vm(vma)->xe; 1422 + struct xe_validation_ctx ctx; 1136 1423 struct drm_exec exec; 1137 - int err; 1424 + int err = 0; 1138 1425 1139 - drm_exec_init(&exec, 0, 0); 1140 - drm_exec_until_all_locked(&exec) { 1426 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1141 1427 err = xe_vm_lock_vma(&exec, vma); 1142 1428 drm_exec_retry_on_contention(&exec); 1143 1429 if (XE_WARN_ON(err)) 1144 1430 break; 1431 + xe_vma_destroy(vma, NULL); 1145 1432 } 1146 - 1147 - xe_vma_destroy(vma, NULL); 1148 - 1149 - drm_exec_fini(&exec); 1433 + xe_assert(xe, !err); 1150 1434 } 1151 1435 1152 1436 struct xe_vma * ··· 1370 1656 * @xe: xe device. 1371 1657 * @tile: tile to set up for. 1372 1658 * @vm: vm to set up for. 1659 + * @exec: The struct drm_exec object used to lock the vm resv. 1373 1660 * 1374 1661 * Sets up a pagetable tree with one page-table per level and a single 1375 1662 * leaf PTE. All pagetable entries point to the single page-table or, ··· 1380 1665 * Return: 0 on success, negative error code on error. 1381 1666 */ 1382 1667 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, 1383 - struct xe_vm *vm) 1668 + struct xe_vm *vm, struct drm_exec *exec) 1384 1669 { 1385 1670 u8 id = tile->id; 1386 1671 int i; 1387 1672 1388 1673 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { 1389 - vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); 1674 + vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); 1390 1675 if (IS_ERR(vm->scratch_pt[id][i])) { 1391 1676 int err = PTR_ERR(vm->scratch_pt[id][i]); 1392 1677 1393 1678 vm->scratch_pt[id][i] = NULL; 1394 1679 return err; 1395 1680 } 1396 - 1397 1681 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); 1398 1682 } 1399 1683 ··· 1420 1706 } 1421 1707 } 1422 1708 1709 + static void xe_vm_pt_destroy(struct xe_vm *vm) 1710 + { 1711 + struct xe_tile *tile; 1712 + u8 id; 1713 + 1714 + xe_vm_assert_held(vm); 1715 + 1716 + for_each_tile(tile, vm->xe, id) { 1717 + if (vm->pt_root[id]) { 1718 + xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1719 + vm->pt_root[id] = NULL; 1720 + } 1721 + } 1722 + } 1723 + 1423 1724 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) 1424 1725 { 1425 1726 struct drm_gem_object *vm_resv_obj; 1727 + struct xe_validation_ctx ctx; 1728 + struct drm_exec exec; 1426 1729 struct xe_vm *vm; 1427 1730 int err, number_tiles = 0; 1428 1731 struct xe_tile *tile; ··· 1483 1752 1484 1753 INIT_LIST_HEAD(&vm->userptr.repin_list); 1485 1754 INIT_LIST_HEAD(&vm->userptr.invalidated); 1486 - init_rwsem(&vm->userptr.notifier_lock); 1487 1755 spin_lock_init(&vm->userptr.invalidated_lock); 1488 1756 1489 1757 ttm_lru_bulk_move_init(&vm->lru_bulk_move); ··· 1509 1779 INIT_LIST_HEAD(&vm->preempt.pm_activate_link); 1510 1780 } 1511 1781 1512 - if (flags & XE_VM_FLAG_FAULT_MODE) { 1513 - err = xe_svm_init(vm); 1514 - if (err) 1515 - goto err_no_resv; 1516 - } 1782 + err = xe_svm_init(vm); 1783 + if (err) 1784 + goto err_no_resv; 1517 1785 1518 1786 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); 1519 1787 if (!vm_resv_obj) { ··· 1524 1796 1525 1797 drm_gem_object_put(vm_resv_obj); 1526 1798 1527 - err = xe_vm_lock(vm, true); 1528 - if (err) 1529 - goto err_close; 1799 + err = 0; 1800 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1801 + err) { 1802 + err = xe_vm_drm_exec_lock(vm, &exec); 1803 + drm_exec_retry_on_contention(&exec); 1530 1804 1531 - if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1532 - vm->flags |= XE_VM_FLAG_64K; 1805 + if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1806 + vm->flags |= XE_VM_FLAG_64K; 1533 1807 1534 - for_each_tile(tile, xe, id) { 1535 - if (flags & XE_VM_FLAG_MIGRATION && 1536 - tile->id != XE_VM_FLAG_TILE_ID(flags)) 1537 - continue; 1808 + for_each_tile(tile, xe, id) { 1809 + if (flags & XE_VM_FLAG_MIGRATION && 1810 + tile->id != XE_VM_FLAG_TILE_ID(flags)) 1811 + continue; 1538 1812 1539 - vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); 1540 - if (IS_ERR(vm->pt_root[id])) { 1541 - err = PTR_ERR(vm->pt_root[id]); 1542 - vm->pt_root[id] = NULL; 1543 - goto err_unlock_close; 1813 + vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, 1814 + &exec); 1815 + if (IS_ERR(vm->pt_root[id])) { 1816 + err = PTR_ERR(vm->pt_root[id]); 1817 + vm->pt_root[id] = NULL; 1818 + xe_vm_pt_destroy(vm); 1819 + drm_exec_retry_on_contention(&exec); 1820 + xe_validation_retry_on_oom(&ctx, &err); 1821 + break; 1822 + } 1544 1823 } 1545 - } 1824 + if (err) 1825 + break; 1546 1826 1547 - if (xe_vm_has_scratch(vm)) { 1827 + if (xe_vm_has_scratch(vm)) { 1828 + for_each_tile(tile, xe, id) { 1829 + if (!vm->pt_root[id]) 1830 + continue; 1831 + 1832 + err = xe_vm_create_scratch(xe, tile, vm, &exec); 1833 + if (err) { 1834 + xe_vm_free_scratch(vm); 1835 + xe_vm_pt_destroy(vm); 1836 + drm_exec_retry_on_contention(&exec); 1837 + xe_validation_retry_on_oom(&ctx, &err); 1838 + break; 1839 + } 1840 + } 1841 + if (err) 1842 + break; 1843 + vm->batch_invalidate_tlb = true; 1844 + } 1845 + 1846 + if (vm->flags & XE_VM_FLAG_LR_MODE) { 1847 + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1848 + vm->batch_invalidate_tlb = false; 1849 + } 1850 + 1851 + /* Fill pt_root after allocating scratch tables */ 1548 1852 for_each_tile(tile, xe, id) { 1549 1853 if (!vm->pt_root[id]) 1550 1854 continue; 1551 1855 1552 - err = xe_vm_create_scratch(xe, tile, vm); 1553 - if (err) 1554 - goto err_unlock_close; 1856 + xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1555 1857 } 1556 - vm->batch_invalidate_tlb = true; 1557 1858 } 1558 - 1559 - if (vm->flags & XE_VM_FLAG_LR_MODE) 1560 - vm->batch_invalidate_tlb = false; 1561 - 1562 - /* Fill pt_root after allocating scratch tables */ 1563 - for_each_tile(tile, xe, id) { 1564 - if (!vm->pt_root[id]) 1565 - continue; 1566 - 1567 - xe_pt_populate_empty(tile, vm, vm->pt_root[id]); 1568 - } 1569 - xe_vm_unlock(vm); 1859 + if (err) 1860 + goto err_close; 1570 1861 1571 1862 /* Kernel migration VM shouldn't have a circular loop.. */ 1572 1863 if (!(flags & XE_VM_FLAG_MIGRATION)) { ··· 1618 1871 &xe->usm.next_asid, GFP_KERNEL); 1619 1872 up_write(&xe->usm.lock); 1620 1873 if (err < 0) 1621 - goto err_unlock_close; 1874 + goto err_close; 1622 1875 1623 1876 vm->usm.asid = asid; 1624 1877 } ··· 1627 1880 1628 1881 return vm; 1629 1882 1630 - err_unlock_close: 1631 - xe_vm_unlock(vm); 1632 1883 err_close: 1633 1884 xe_vm_close_and_put(vm); 1634 1885 return ERR_PTR(err); ··· 1733 1988 vma = gpuva_to_vma(gpuva); 1734 1989 1735 1990 if (xe_vma_has_no_bo(vma)) { 1736 - down_read(&vm->userptr.notifier_lock); 1991 + xe_svm_notifier_lock(vm); 1737 1992 vma->gpuva.flags |= XE_VMA_DESTROYED; 1738 - up_read(&vm->userptr.notifier_lock); 1993 + xe_svm_notifier_unlock(vm); 1739 1994 } 1740 1995 1741 1996 xe_vm_remove_vma(vm, vma); ··· 1759 2014 * destroy the pagetables immediately. 1760 2015 */ 1761 2016 xe_vm_free_scratch(vm); 1762 - 1763 - for_each_tile(tile, xe, id) { 1764 - if (vm->pt_root[id]) { 1765 - xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1766 - vm->pt_root[id] = NULL; 1767 - } 1768 - } 2017 + xe_vm_pt_destroy(vm); 1769 2018 xe_vm_unlock(vm); 1770 2019 1771 2020 /* ··· 1773 2034 xe_vma_destroy_unlocked(vma); 1774 2035 } 1775 2036 1776 - if (xe_vm_in_fault_mode(vm)) 1777 - xe_svm_fini(vm); 2037 + xe_svm_fini(vm); 1778 2038 1779 2039 up_write(&vm->lock); 1780 2040 ··· 2066 2328 2067 2329 err = copy_to_user(attrs_user, mem_attrs, 2068 2330 args->sizeof_mem_range_attr * args->num_mem_ranges); 2331 + if (err) 2332 + err = -EFAULT; 2069 2333 2070 2334 free_mem_attrs: 2071 2335 kvfree(mem_attrs); ··· 2116 2376 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, 2117 2377 bool post_commit) 2118 2378 { 2119 - down_read(&vm->userptr.notifier_lock); 2379 + xe_svm_notifier_lock(vm); 2120 2380 vma->gpuva.flags |= XE_VMA_DESTROYED; 2121 - up_read(&vm->userptr.notifier_lock); 2381 + xe_svm_notifier_unlock(vm); 2122 2382 if (post_commit) 2123 2383 xe_vm_remove_vma(vm, vma); 2124 2384 } ··· 2379 2639 struct xe_vma_mem_attr *attr, unsigned int flags) 2380 2640 { 2381 2641 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2642 + struct xe_validation_ctx ctx; 2382 2643 struct drm_exec exec; 2383 2644 struct xe_vma *vma; 2384 2645 int err = 0; ··· 2387 2646 lockdep_assert_held_write(&vm->lock); 2388 2647 2389 2648 if (bo) { 2390 - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 2391 - drm_exec_until_all_locked(&exec) { 2392 - err = 0; 2649 + err = 0; 2650 + xe_validation_guard(&ctx, &vm->xe->val, &exec, 2651 + (struct xe_val_flags) {.interruptible = true}, err) { 2393 2652 if (!bo->vm) { 2394 2653 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); 2395 2654 drm_exec_retry_on_contention(&exec); ··· 2398 2657 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 2399 2658 drm_exec_retry_on_contention(&exec); 2400 2659 } 2401 - if (err) { 2402 - drm_exec_fini(&exec); 2660 + if (err) 2403 2661 return ERR_PTR(err); 2662 + 2663 + vma = xe_vma_create(vm, bo, op->gem.offset, 2664 + op->va.addr, op->va.addr + 2665 + op->va.range - 1, attr, flags); 2666 + if (IS_ERR(vma)) 2667 + return vma; 2668 + 2669 + if (!bo->vm) { 2670 + err = add_preempt_fences(vm, bo); 2671 + if (err) { 2672 + prep_vma_destroy(vm, vma, false); 2673 + xe_vma_destroy(vma, NULL); 2674 + } 2404 2675 } 2405 2676 } 2677 + if (err) 2678 + return ERR_PTR(err); 2679 + } else { 2680 + vma = xe_vma_create(vm, NULL, op->gem.offset, 2681 + op->va.addr, op->va.addr + 2682 + op->va.range - 1, attr, flags); 2683 + if (IS_ERR(vma)) 2684 + return vma; 2685 + 2686 + if (xe_vma_is_userptr(vma)) 2687 + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2406 2688 } 2407 - vma = xe_vma_create(vm, bo, op->gem.offset, 2408 - op->va.addr, op->va.addr + 2409 - op->va.range - 1, attr, flags); 2410 - if (IS_ERR(vma)) 2411 - goto err_unlock; 2412 - 2413 - if (xe_vma_is_userptr(vma)) 2414 - err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2415 - else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2416 - err = add_preempt_fences(vm, bo); 2417 - 2418 - err_unlock: 2419 - if (bo) 2420 - drm_exec_fini(&exec); 2421 - 2422 2689 if (err) { 2423 2690 prep_vma_destroy(vm, vma, false); 2424 2691 xe_vma_destroy_unlocked(vma); ··· 2770 3021 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); 2771 3022 2772 3023 if (vma) { 2773 - down_read(&vm->userptr.notifier_lock); 3024 + xe_svm_notifier_lock(vm); 2774 3025 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2775 - up_read(&vm->userptr.notifier_lock); 3026 + xe_svm_notifier_unlock(vm); 2776 3027 if (post_commit) 2777 3028 xe_vm_insert_vma(vm, vma); 2778 3029 } ··· 2791 3042 xe_vma_destroy_unlocked(op->remap.next); 2792 3043 } 2793 3044 if (vma) { 2794 - down_read(&vm->userptr.notifier_lock); 3045 + xe_svm_notifier_lock(vm); 2795 3046 vma->gpuva.flags &= ~XE_VMA_DESTROYED; 2796 - up_read(&vm->userptr.notifier_lock); 3047 + xe_svm_notifier_unlock(vm); 2797 3048 if (post_commit) 2798 3049 xe_vm_insert_vma(vm, vma); 2799 3050 } ··· 2843 3094 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2844 3095 if (!err && validate) 2845 3096 err = xe_bo_validate(bo, vm, 2846 - !xe_vm_in_preempt_fence_mode(vm)); 3097 + !xe_vm_in_preempt_fence_mode(vm), exec); 2847 3098 } 2848 3099 2849 3100 return err; ··· 2961 3212 false); 2962 3213 if (!err && !xe_vma_has_no_bo(vma)) 2963 3214 err = xe_bo_migrate(xe_vma_bo(vma), 2964 - region_to_mem_type[region]); 3215 + region_to_mem_type[region], 3216 + NULL, 3217 + exec); 2965 3218 break; 2966 3219 } 2967 3220 default: ··· 3226 3475 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, 3227 3476 struct xe_vma_ops *vops) 3228 3477 { 3478 + struct xe_validation_ctx ctx; 3229 3479 struct drm_exec exec; 3230 3480 struct dma_fence *fence; 3231 - int err; 3481 + int err = 0; 3232 3482 3233 3483 lockdep_assert_held_write(&vm->lock); 3234 3484 3235 - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 3236 - DRM_EXEC_IGNORE_DUPLICATES, 0); 3237 - drm_exec_until_all_locked(&exec) { 3485 + xe_validation_guard(&ctx, &vm->xe->val, &exec, 3486 + ((struct xe_val_flags) { 3487 + .interruptible = true, 3488 + .exec_ignore_duplicates = true, 3489 + }), err) { 3238 3490 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); 3239 3491 drm_exec_retry_on_contention(&exec); 3240 - if (err) { 3241 - fence = ERR_PTR(err); 3242 - goto unlock; 3243 - } 3492 + xe_validation_retry_on_oom(&ctx, &err); 3493 + if (err) 3494 + return ERR_PTR(err); 3244 3495 3496 + xe_vm_set_validation_exec(vm, &exec); 3245 3497 fence = ops_execute(vm, vops); 3498 + xe_vm_set_validation_exec(vm, NULL); 3246 3499 if (IS_ERR(fence)) { 3247 3500 if (PTR_ERR(fence) == -ENODATA) 3248 3501 vm_bind_ioctl_ops_fini(vm, vops, NULL); 3249 - goto unlock; 3502 + return fence; 3250 3503 } 3251 3504 3252 3505 vm_bind_ioctl_ops_fini(vm, vops, fence); 3253 3506 } 3254 3507 3255 - unlock: 3256 - drm_exec_fini(&exec); 3257 - return fence; 3508 + return err ? ERR_PTR(err) : fence; 3258 3509 } 3259 3510 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 3260 3511 ··· 3372 3619 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3373 3620 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && 3374 3621 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || 3622 + XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && 3623 + !IS_ENABLED(CONFIG_DRM_GPUSVM)) || 3375 3624 XE_IOCTL_DBG(xe, obj && 3376 3625 op == DRM_XE_VM_BIND_OP_PREFETCH) || 3377 3626 XE_IOCTL_DBG(xe, prefetch_region && ··· 3809 4054 */ 3810 4055 int xe_vm_lock(struct xe_vm *vm, bool intr) 3811 4056 { 3812 - if (intr) 3813 - return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 4057 + int ret; 3814 4058 3815 - return dma_resv_lock(xe_vm_resv(vm), NULL); 4059 + if (intr) 4060 + ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); 4061 + else 4062 + ret = dma_resv_lock(xe_vm_resv(vm), NULL); 4063 + 4064 + return ret; 3816 4065 } 3817 4066 3818 4067 /** ··· 3923 4164 */ 3924 4165 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3925 4166 if (xe_vma_is_userptr(vma)) { 3926 - lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) || 3927 - (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) && 4167 + lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || 4168 + (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 3928 4169 lockdep_is_held(&xe_vm_resv(vm)->lock.base))); 3929 4170 3930 4171 WARN_ON_ONCE(!mmu_interval_check_retry 3931 4172 (&to_userptr_vma(vma)->userptr.notifier, 3932 - to_userptr_vma(vma)->userptr.notifier_seq)); 4173 + to_userptr_vma(vma)->userptr.pages.notifier_seq)); 3933 4174 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), 3934 4175 DMA_RESV_USAGE_BOOKKEEP)); 3935 4176

+33 -22

drivers/gpu/drm/xe/xe_vm.h

··· 220 220 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); 221 221 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); 222 222 223 - int xe_vm_userptr_pin(struct xe_vm *vm); 224 - 225 - int __xe_vm_userptr_needs_repin(struct xe_vm *vm); 226 - 227 - int xe_vm_userptr_check_repin(struct xe_vm *vm); 228 - 229 223 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); 230 224 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, 231 225 u8 tile_mask); ··· 260 266 } 261 267 } 262 268 263 - int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma); 264 - 265 - int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma); 266 - 267 - bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); 268 - 269 269 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma); 270 270 271 271 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, ··· 290 302 */ 291 303 #define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm)) 292 304 305 + int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec); 306 + 293 307 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 294 308 #define vm_dbg drm_dbg 295 309 #else ··· 321 331 if (vm && !allow_res_evict) { 322 332 xe_vm_assert_held(vm); 323 333 /* Pairs with READ_ONCE in xe_vm_is_validating() */ 324 - WRITE_ONCE(vm->validating, current); 334 + WRITE_ONCE(vm->validation.validating, current); 325 335 } 326 336 } 327 337 ··· 339 349 { 340 350 if (vm && !allow_res_evict) { 341 351 /* Pairs with READ_ONCE in xe_vm_is_validating() */ 342 - WRITE_ONCE(vm->validating, NULL); 352 + WRITE_ONCE(vm->validation.validating, NULL); 343 353 } 344 354 } 345 355 ··· 357 367 static inline bool xe_vm_is_validating(struct xe_vm *vm) 358 368 { 359 369 /* Pairs with WRITE_ONCE in xe_vm_is_validating() */ 360 - if (READ_ONCE(vm->validating) == current) { 370 + if (READ_ONCE(vm->validation.validating) == current) { 361 371 xe_vm_assert_held(vm); 362 372 return true; 363 373 } 364 374 return false; 375 + } 376 + 377 + /** 378 + * xe_vm_set_validation_exec() - Accessor to set the drm_exec object 379 + * @vm: The vm we want to register a drm_exec object with. 380 + * @exec: The exec object we want to register. 381 + * 382 + * Set the drm_exec object used to lock the vm's resv. 383 + */ 384 + static inline void xe_vm_set_validation_exec(struct xe_vm *vm, struct drm_exec *exec) 385 + { 386 + xe_vm_assert_held(vm); 387 + xe_assert(vm->xe, !!exec ^ !!vm->validation._exec); 388 + vm->validation._exec = exec; 389 + } 390 + 391 + /** 392 + * xe_vm_set_validation_exec() - Accessor to read the drm_exec object 393 + * @vm: The vm we want to register a drm_exec object with. 394 + * 395 + * Return: The drm_exec object used to lock the vm's resv. The value 396 + * is a valid pointer, %NULL, or one of the special values defined in 397 + * xe_validation.h. 398 + */ 399 + static inline struct drm_exec *xe_vm_validation_exec(struct xe_vm *vm) 400 + { 401 + xe_vm_assert_held(vm); 402 + return vm->validation._exec; 365 403 } 366 404 367 405 /** ··· 411 393 #define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \ 412 394 ((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id)) 413 395 414 - #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 415 - void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); 416 - #else 417 - static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) 418 - { 419 - } 420 - #endif 421 396 #endif

+13 -27

drivers/gpu/drm/xe/xe_vm_madvise.c

··· 18 18 u64 range; 19 19 struct xe_vma **vmas; 20 20 int num_vmas; 21 - bool has_svm_vmas; 22 21 bool has_bo_vmas; 23 - bool has_userptr_vmas; 22 + bool has_svm_userptr_vmas; 24 23 }; 25 24 26 25 static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) ··· 45 46 46 47 if (xe_vma_bo(vma)) 47 48 madvise_range->has_bo_vmas = true; 48 - else if (xe_vma_is_cpu_addr_mirror(vma)) 49 - madvise_range->has_svm_vmas = true; 50 - else if (xe_vma_is_userptr(vma)) 51 - madvise_range->has_userptr_vmas = true; 49 + else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma)) 50 + madvise_range->has_svm_userptr_vmas = true; 52 51 53 52 if (madvise_range->num_vmas == max_vmas) { 54 53 max_vmas <<= 1; ··· 124 127 vmas[i]->attr.atomic_access = op->atomic.val; 125 128 } 126 129 127 - vmas[i]->attr.atomic_access = op->atomic.val; 128 - 129 130 bo = xe_vma_bo(vmas[i]); 130 131 if (!bo || bo->attr.atomic_access == op->atomic.val) 131 132 continue; ··· 196 201 if (xe_pt_zap_ptes(tile, vma)) { 197 202 tile_mask |= BIT(id); 198 203 199 - /* 200 - * WRITE_ONCE pairs with READ_ONCE 201 - * in xe_vm_has_valid_gpu_mapping() 202 - */ 203 - WRITE_ONCE(vma->tile_invalidated, 204 - vma->tile_invalidated | BIT(id)); 204 + /* 205 + * WRITE_ONCE pairs with READ_ONCE 206 + * in xe_vm_has_valid_gpu_mapping() 207 + */ 208 + WRITE_ONCE(vma->tile_invalidated, 209 + vma->tile_invalidated | BIT(id)); 205 210 } 206 211 } 207 212 } ··· 251 256 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad)) 252 257 return false; 253 258 254 - if (XE_IOCTL_DBG(xe, args->atomic.reserved)) 259 + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved)) 255 260 return false; 256 261 break; 257 262 } ··· 404 409 } 405 410 } 406 411 407 - if (madvise_range.has_userptr_vmas) { 408 - err = down_read_interruptible(&vm->userptr.notifier_lock); 412 + if (madvise_range.has_svm_userptr_vmas) { 413 + err = xe_svm_notifier_lock_interruptible(vm); 409 414 if (err) 410 415 goto err_fini; 411 - } 412 - 413 - if (madvise_range.has_svm_vmas) { 414 - err = down_read_interruptible(&vm->svm.gpusvm.notifier_lock); 415 - if (err) 416 - goto unlock_userptr; 417 416 } 418 417 419 418 attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); ··· 415 426 416 427 err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); 417 428 418 - if (madvise_range.has_svm_vmas) 429 + if (madvise_range.has_svm_userptr_vmas) 419 430 xe_svm_notifier_unlock(vm); 420 431 421 - unlock_userptr: 422 - if (madvise_range.has_userptr_vmas) 423 - up_read(&vm->userptr.notifier_lock); 424 432 err_fini: 425 433 if (madvise_range.has_bo_vmas) 426 434 drm_exec_fini(&exec);

+30 -70

drivers/gpu/drm/xe/xe_vm_types.h

··· 17 17 #include "xe_device_types.h" 18 18 #include "xe_pt_types.h" 19 19 #include "xe_range_fence.h" 20 + #include "xe_userptr.h" 20 21 21 22 struct xe_bo; 22 23 struct xe_svm_range; ··· 46 45 #define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 7) 47 46 #define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8) 48 47 #define XE_VMA_SYSTEM_ALLOCATOR (DRM_GPUVA_USERBITS << 9) 49 - 50 - /** struct xe_userptr - User pointer */ 51 - struct xe_userptr { 52 - /** @invalidate_link: Link for the vm::userptr.invalidated list */ 53 - struct list_head invalidate_link; 54 - /** @userptr: link into VM repin list if userptr. */ 55 - struct list_head repin_link; 56 - /** 57 - * @notifier: MMU notifier for user pointer (invalidation call back) 58 - */ 59 - struct mmu_interval_notifier notifier; 60 - /** @sgt: storage for a scatter gather table */ 61 - struct sg_table sgt; 62 - /** @sg: allocated scatter gather table */ 63 - struct sg_table *sg; 64 - /** @notifier_seq: notifier sequence number */ 65 - unsigned long notifier_seq; 66 - /** @unmap_mutex: Mutex protecting dma-unmapping */ 67 - struct mutex unmap_mutex; 68 - /** 69 - * @initial_bind: user pointer has been bound at least once. 70 - * write: vm->userptr.notifier_lock in read mode and vm->resv held. 71 - * read: vm->userptr.notifier_lock in write mode or vm->resv held. 72 - */ 73 - bool initial_bind; 74 - /** @mapped: Whether the @sgt sg-table is dma-mapped. Protected by @unmap_mutex. */ 75 - bool mapped; 76 - #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 77 - u32 divisor; 78 - #endif 79 - }; 80 48 81 49 /** 82 50 * struct xe_vma_mem_attr - memory attributes associated with vma ··· 110 140 111 141 /** 112 142 * @tile_invalidated: Tile mask of binding are invalidated for this VMA. 113 - * protected by BO's resv and for userptrs, vm->userptr.notifier_lock in 114 - * write mode for writing or vm->userptr.notifier_lock in read mode and 143 + * protected by BO's resv and for userptrs, vm->svm.gpusvm.notifier_lock in 144 + * write mode for writing or vm->svm.gpusvm.notifier_lock in read mode and 115 145 * the vm->resv. For stable reading, BO's resv or userptr 116 - * vm->userptr.notifier_lock in read mode is required. Can be 146 + * vm->svm.gpusvm.notifier_lock in read mode is required. Can be 117 147 * opportunistically read with READ_ONCE outside of locks. 118 148 */ 119 149 u8 tile_invalidated; ··· 124 154 /** 125 155 * @tile_present: Tile mask of binding are present for this VMA. 126 156 * protected by vm->lock, vm->resv and for userptrs, 127 - * vm->userptr.notifier_lock for writing. Needs either for reading, 157 + * vm->svm.gpusvm.notifier_lock for writing. Needs either for reading, 128 158 * but if reading is done under the vm->lock only, it needs to be held 129 159 * in write mode. 130 160 */ ··· 259 289 const struct xe_pt_ops *pt_ops; 260 290 261 291 /** @userptr: user pointer state */ 262 - struct { 263 - /** 264 - * @userptr.repin_list: list of VMAs which are user pointers, 265 - * and needs repinning. Protected by @lock. 266 - */ 267 - struct list_head repin_list; 268 - /** 269 - * @notifier_lock: protects notifier in write mode and 270 - * submission in read mode. 271 - */ 272 - struct rw_semaphore notifier_lock; 273 - /** 274 - * @userptr.invalidated_lock: Protects the 275 - * @userptr.invalidated list. 276 - */ 277 - spinlock_t invalidated_lock; 278 - /** 279 - * @userptr.invalidated: List of invalidated userptrs, not yet 280 - * picked 281 - * up for revalidation. Protected from access with the 282 - * @invalidated_lock. Removing items from the list 283 - * additionally requires @lock in write mode, and adding 284 - * items to the list requires either the @userptr.notifier_lock in 285 - * write mode, OR @lock in write mode. 286 - */ 287 - struct list_head invalidated; 288 - } userptr; 292 + struct xe_userptr_vm userptr; 289 293 290 294 /** @preempt: preempt state */ 291 295 struct { ··· 307 363 } error_capture; 308 364 309 365 /** 366 + * @validation: Validation data only valid with the vm resv held. 367 + * Note: This is really task state of the task holding the vm resv, 368 + * and moving forward we should 369 + * come up with a better way of passing this down the call- 370 + * chain. 371 + */ 372 + struct { 373 + /** 374 + * @validation.validating: The task that is currently making bos resident. 375 + * for this vm. 376 + * Protected by the VM's resv for writing. Opportunistic reading can be done 377 + * using READ_ONCE. Note: This is a workaround for the 378 + * TTM eviction_valuable() callback not being passed a struct 379 + * ttm_operation_context(). Future work might want to address this. 380 + */ 381 + struct task_struct *validating; 382 + /** 383 + * @validation.exec The drm_exec context used when locking the vm resv. 384 + * Protected by the vm's resv. 385 + */ 386 + struct drm_exec *_exec; 387 + } validation; 388 + 389 + /** 310 390 * @tlb_flush_seqno: Required TLB flush seqno for the next exec. 311 391 * protected by the vm resv. 312 392 */ 313 393 u64 tlb_flush_seqno; 314 - /** 315 - * @validating: The task that is currently making bos resident for this vm. 316 - * Protected by the VM's resv for writing. Opportunistic reading can be done 317 - * using READ_ONCE. Note: This is a workaround for the 318 - * TTM eviction_valuable() callback not being passed a struct 319 - * ttm_operation_context(). Future work might want to address this. 320 - */ 321 - struct task_struct *validating; 322 394 /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ 323 395 bool batch_invalidate_tlb; 324 396 /** @xef: XE file handle for tracking this VM's drm client */

+50 -13

drivers/gpu/drm/xe/xe_wa.c

··· 39 39 * Register Immediate commands) once when initializing the device and saved in 40 40 * the default context. That default context is then used on every context 41 41 * creation to have a "primed golden context", i.e. a context image that 42 - * already contains the changes needed to all the registers. 42 + * already contains the changes needed to all the registers. See 43 + * drivers/gpu/drm/xe/xe_lrc.c for default context handling. 43 44 * 44 45 * - Engine workarounds: the list of these WAs is applied whenever the specific 45 46 * engine is reset. It's also possible that a set of engine classes share a ··· 49 48 * them need to keeep the workaround programming: the approach taken in the 50 49 * driver is to tie those workarounds to the first compute/render engine that 51 50 * is registered. When executing with GuC submission, engine resets are 52 - * outside of kernel driver control, hence the list of registers involved in 51 + * outside of kernel driver control, hence the list of registers involved is 53 52 * written once, on engine initialization, and then passed to GuC, that 54 53 * saves/restores their values before/after the reset takes place. See 55 - * ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference. 54 + * drivers/gpu/drm/xe/xe_guc_ads.c for reference. 56 55 * 57 56 * - GT workarounds: the list of these WAs is applied whenever these registers 58 57 * revert to their default values: on GPU reset, suspend/resume [1]_, etc. ··· 67 66 * hardware on every HW context restore. These buffers are created and 68 67 * programmed in the default context so the hardware always go through those 69 68 * programming sequences when switching contexts. The support for workaround 70 - * batchbuffers is enabled these hardware mechanisms: 69 + * batchbuffers is enabled via these hardware mechanisms: 71 70 * 72 - * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default 73 - * context, pointing the hardware to jump to that location when that offset 74 - * is reached in the context restore. Workaround batchbuffer in the driver 75 - * currently uses this mechanism for all platforms. 71 + * #. INDIRECT_CTX (also known as **mid context restore bb**): A batchbuffer 72 + * and an offset are provided in the default context, pointing the hardware 73 + * to jump to that location when that offset is reached in the context 74 + * restore. When a context is being restored, this is executed after the 75 + * ring context, in the middle (or beginning) of the engine context image. 76 76 * 77 - * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context, 78 - * pointing the hardware to a buffer to continue executing after the 79 - * engine registers are restored in a context restore sequence. This is 80 - * currently not used in the driver. 77 + * #. BB_PER_CTX_PTR (also known as **post context restore bb**): A 78 + * batchbuffer is provided in the default context, pointing the hardware to 79 + * a buffer to continue executing after the engine registers are restored 80 + * in a context restore sequence. 81 + * 82 + * Below is the timeline for a context restore sequence: 83 + * 84 + * .. code:: 85 + * 86 + * INDIRECT_CTX_OFFSET 87 + * |----------->| 88 + * .------------.------------.-------------.------------.--------------.-----------. 89 + * |Ring | Engine | Mid-context | Engine | Post-context | Ring | 90 + * |Restore | Restore (1)| BB Restore | Restore (2)| BB Restore | Execution | 91 + * `------------'------------'-------------'------------'--------------'-----------' 81 92 * 82 93 * - Other/OOB: There are WAs that, due to their nature, cannot be applied from 83 94 * a central place. Those are peppered around the rest of the code, as needed. 84 - * Workarounds related to the display IP are the main example. 95 + * There's a central place to control which workarounds are enabled: 96 + * drivers/gpu/drm/xe/xe_wa_oob.rules for GT workarounds and 97 + * drivers/gpu/drm/xe/xe_device_wa_oob.rules for device/SoC workarounds. 98 + * These files only record which workarounds are enabled: during early device 99 + * initialization those rules are evaluated and recorded by the driver. Then 100 + * later the driver checks with ``XE_GT_WA()`` and ``XE_DEVICE_WA()`` to 101 + * implement them. 85 102 * 86 103 * .. [1] Technically, some registers are powercontext saved & restored, so they 87 104 * survive a suspend/resume. In practice, writing them again is not too ··· 631 612 FUNC(xe_rtp_match_first_render_or_compute)), 632 613 XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) 633 614 }, 615 + { XE_RTP_NAME("18041344222"), 616 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), 617 + FUNC(xe_rtp_match_first_render_or_compute), 618 + FUNC(xe_rtp_match_not_sriov_vf), 619 + FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), 620 + XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) 621 + }, 634 622 635 623 /* Xe2_LPM */ 636 624 ··· 697 671 { XE_RTP_NAME("14021402888"), 698 672 XE_RTP_RULES(GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)), 699 673 XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) 674 + }, 675 + { XE_RTP_NAME("18041344222"), 676 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), 677 + FUNC(xe_rtp_match_first_render_or_compute), 678 + FUNC(xe_rtp_match_not_sriov_vf), 679 + FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), 680 + XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) 700 681 }, 701 682 }; 702 683 ··· 911 878 SET(VFLSKPD, 912 879 DIS_PARTIAL_AUTOSTRIP | 913 880 DIS_AUTOSTRIP)) 881 + }, 882 + { XE_RTP_NAME("22021007897"), 883 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3003), ENGINE_CLASS(RENDER)), 884 + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) 914 885 }, 915 886 }; 916 887

-1

drivers/gpu/drm/xe/xe_wa_oob.rules

··· 49 49 16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 50 50 14019789679 GRAPHICS_VERSION(1255) 51 51 GRAPHICS_VERSION_RANGE(1270, 2004) 52 - no_media_l3 MEDIA_VERSION_RANGE(3000, 3002) 53 52 14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0) 54 53 MEDIA_VERSION(3000), MEDIA_STEP(A0, B0) 55 54 16021333562 GRAPHICS_VERSION_RANGE(1200, 1274)

+13

drivers/misc/mei/Kconfig

··· 81 81 This driver can also be built as a module. If so, the module 82 82 will be called mei-vsc. 83 83 84 + config INTEL_MEI_LB 85 + tristate "Intel Late Binding (LB) support on ME Interface" 86 + depends on INTEL_MEI_ME 87 + depends on DRM_XE 88 + help 89 + Enable support for Intel Late Binding (LB) via the MEI interface. 90 + 91 + Late Binding is a method for applying firmware updates at runtime, 92 + allowing the Intel Xe driver to load firmware payloads such as 93 + fan controller or voltage regulator. These firmware updates are 94 + authenticated and versioned, and do not require firmware flashing 95 + or system reboot. 96 + 84 97 source "drivers/misc/mei/hdcp/Kconfig" 85 98 source "drivers/misc/mei/pxp/Kconfig" 86 99 source "drivers/misc/mei/gsc_proxy/Kconfig"

+1

drivers/misc/mei/Makefile

··· 31 31 obj-$(CONFIG_INTEL_MEI_HDCP) += hdcp/ 32 32 obj-$(CONFIG_INTEL_MEI_PXP) += pxp/ 33 33 obj-$(CONFIG_INTEL_MEI_GSC_PROXY) += gsc_proxy/ 34 + obj-$(CONFIG_INTEL_MEI_LB) += mei_lb.o 34 35 35 36 obj-$(CONFIG_INTEL_MEI_VSC_HW) += mei-vsc-hw.o 36 37 mei-vsc-hw-y := vsc-tp.o

+13

drivers/misc/mei/bus.c

··· 615 615 EXPORT_SYMBOL_GPL(mei_cldev_ver); 616 616 617 617 /** 618 + * mei_cldev_mtu - max message that client can send and receive 619 + * 620 + * @cldev: mei client device 621 + * 622 + * Return: mtu or 0 if client is not connected 623 + */ 624 + size_t mei_cldev_mtu(const struct mei_cl_device *cldev) 625 + { 626 + return mei_cl_mtu(cldev->cl); 627 + } 628 + EXPORT_SYMBOL_GPL(mei_cldev_mtu); 629 + 630 + /** 618 631 * mei_cldev_enabled - check whether the device is enabled 619 632 * 620 633 * @cldev: mei client device

+312

drivers/misc/mei/mei_lb.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2025 Intel Corporation 4 + */ 5 + 6 + #include <linux/component.h> 7 + #include <linux/mei_cl_bus.h> 8 + #include <linux/module.h> 9 + #include <linux/overflow.h> 10 + #include <linux/pci.h> 11 + #include <linux/slab.h> 12 + #include <linux/uuid.h> 13 + 14 + #include <drm/intel/i915_component.h> 15 + #include <drm/intel/intel_lb_mei_interface.h> 16 + 17 + #include "mkhi.h" 18 + 19 + /** 20 + * DOC: Late Binding Firmware Update/Upload 21 + * 22 + * Late Binding is a firmware update/upload mechanism that allows configuration 23 + * payloads to be securely delivered and applied at runtime, rather than 24 + * being embedded in the system firmware image (e.g., IFWI or SPI flash). 25 + * 26 + * This mechanism is used to update device-level configuration such as: 27 + * - Fan controller 28 + * - Voltage regulator (VR) 29 + * 30 + * Key Characteristics: 31 + * --------------------- 32 + * - Runtime Delivery: 33 + * Firmware blobs are loaded by the host driver (e.g., Xe KMD) 34 + * after the GPU or SoC has booted. 35 + * 36 + * - Secure and Authenticated: 37 + * All payloads are signed and verified by the authentication firmware. 38 + * 39 + * - No Firmware Flashing Required: 40 + * Updates are applied in volatile memory and do not require SPI flash 41 + * modification or system reboot. 42 + * 43 + * - Re-entrant: 44 + * Multiple updates of the same or different types can be applied 45 + * sequentially within a single boot session. 46 + * 47 + * - Version Controlled: 48 + * Each payload includes version and security version number (SVN) 49 + * metadata to support anti-rollback enforcement. 50 + * 51 + * Upload Flow: 52 + * ------------ 53 + * 1. Host driver (KMD or user-space tool) loads the late binding firmware. 54 + * 2. Firmware is passed to the MEI interface and forwarded to 55 + * authentication firmware. 56 + * 3. Authentication firmware authenticates the payload and extracts 57 + * command and data arrays. 58 + * 4. Authentication firmware delivers the configuration to PUnit/PCODE. 59 + * 5. Status is returned back to the host via MEI. 60 + */ 61 + 62 + #define INTEL_LB_CMD 0x12 63 + #define INTEL_LB_RSP (INTEL_LB_CMD | 0x80) 64 + 65 + #define INTEL_LB_SEND_TIMEOUT_MSEC 3000 66 + #define INTEL_LB_RECV_TIMEOUT_MSEC 3000 67 + 68 + /** 69 + * struct mei_lb_req - Late Binding request structure 70 + * @header: MKHI message header (see struct mkhi_msg_hdr) 71 + * @type: Type of the Late Binding payload 72 + * @flags: Flags to be passed to the authentication firmware (e.g. %INTEL_LB_FLAGS_IS_PERSISTENT) 73 + * @reserved: Reserved for future use by authentication firmware, must be set to 0 74 + * @payload_size: Size of the payload data in bytes 75 + * @payload: Payload data to be sent to the authentication firmware 76 + */ 77 + struct mei_lb_req { 78 + struct mkhi_msg_hdr header; 79 + __le32 type; 80 + __le32 flags; 81 + __le32 reserved[2]; 82 + __le32 payload_size; 83 + u8 payload[] __counted_by(payload_size); 84 + } __packed; 85 + 86 + /** 87 + * struct mei_lb_rsp - Late Binding response structure 88 + * @header: MKHI message header (see struct mkhi_msg_hdr) 89 + * @type: Type of the Late Binding payload 90 + * @reserved: Reserved for future use by authentication firmware, must be set to 0 91 + * @status: Status returned by authentication firmware (see &enum intel_lb_status) 92 + */ 93 + struct mei_lb_rsp { 94 + struct mkhi_msg_hdr header; 95 + __le32 type; 96 + __le32 reserved[2]; 97 + __le32 status; 98 + } __packed; 99 + 100 + static bool mei_lb_check_response(const struct device *dev, ssize_t bytes, 101 + struct mei_lb_rsp *rsp) 102 + { 103 + /* 104 + * Received message size may be smaller than the full message size when 105 + * reply contains only MKHI header with result field set to the error code. 106 + * Check the header size and content first to output exact error, if needed, 107 + * and then process to the whole message. 108 + */ 109 + if (bytes < sizeof(rsp->header)) { 110 + dev_err(dev, "Received less than header size from the firmware: %zd < %zu\n", 111 + bytes, sizeof(rsp->header)); 112 + return false; 113 + } 114 + if (rsp->header.group_id != MKHI_GROUP_ID_GFX) { 115 + dev_err(dev, "Mismatch group id: 0x%x instead of 0x%x\n", 116 + rsp->header.group_id, MKHI_GROUP_ID_GFX); 117 + return false; 118 + } 119 + if (rsp->header.command != INTEL_LB_RSP) { 120 + dev_err(dev, "Mismatch command: 0x%x instead of 0x%x\n", 121 + rsp->header.command, INTEL_LB_RSP); 122 + return false; 123 + } 124 + if (rsp->header.result) { 125 + dev_err(dev, "Error in result: 0x%x\n", rsp->header.result); 126 + return false; 127 + } 128 + if (bytes < sizeof(*rsp)) { 129 + dev_err(dev, "Received less than message size from the firmware: %zd < %zu\n", 130 + bytes, sizeof(*rsp)); 131 + return false; 132 + } 133 + 134 + return true; 135 + } 136 + 137 + static int mei_lb_push_payload(struct device *dev, 138 + enum intel_lb_type type, u32 flags, 139 + const void *payload, size_t payload_size) 140 + { 141 + struct mei_cl_device *cldev; 142 + struct mei_lb_req *req = NULL; 143 + struct mei_lb_rsp rsp; 144 + size_t req_size; 145 + ssize_t bytes; 146 + int ret; 147 + 148 + cldev = to_mei_cl_device(dev); 149 + 150 + ret = mei_cldev_enable(cldev); 151 + if (ret) { 152 + dev_dbg(dev, "Failed to enable firmware client. %d\n", ret); 153 + return ret; 154 + } 155 + 156 + req_size = struct_size(req, payload, payload_size); 157 + if (req_size > mei_cldev_mtu(cldev)) { 158 + dev_err(dev, "Payload is too big: %zu\n", payload_size); 159 + ret = -EMSGSIZE; 160 + goto end; 161 + } 162 + 163 + req = kmalloc(req_size, GFP_KERNEL); 164 + if (!req) { 165 + ret = -ENOMEM; 166 + goto end; 167 + } 168 + 169 + req->header.group_id = MKHI_GROUP_ID_GFX; 170 + req->header.command = INTEL_LB_CMD; 171 + req->type = cpu_to_le32(type); 172 + req->flags = cpu_to_le32(flags); 173 + req->reserved[0] = 0; 174 + req->reserved[1] = 0; 175 + req->payload_size = cpu_to_le32(payload_size); 176 + memcpy(req->payload, payload, payload_size); 177 + 178 + bytes = mei_cldev_send_timeout(cldev, (u8 *)req, req_size, 179 + INTEL_LB_SEND_TIMEOUT_MSEC); 180 + if (bytes < 0) { 181 + dev_err(dev, "Failed to send late binding request to firmware. %zd\n", bytes); 182 + ret = bytes; 183 + goto end; 184 + } 185 + 186 + bytes = mei_cldev_recv_timeout(cldev, (u8 *)&rsp, sizeof(rsp), 187 + INTEL_LB_RECV_TIMEOUT_MSEC); 188 + if (bytes < 0) { 189 + dev_err(dev, "Failed to receive late binding reply from MEI firmware. %zd\n", 190 + bytes); 191 + ret = bytes; 192 + goto end; 193 + } 194 + if (!mei_lb_check_response(dev, bytes, &rsp)) { 195 + dev_err(dev, "Bad response from the firmware. header: %02x %02x %02x %02x\n", 196 + rsp.header.group_id, rsp.header.command, 197 + rsp.header.reserved, rsp.header.result); 198 + ret = -EPROTO; 199 + goto end; 200 + } 201 + 202 + dev_dbg(dev, "status = %u\n", le32_to_cpu(rsp.status)); 203 + ret = (int)le32_to_cpu(rsp.status); 204 + end: 205 + mei_cldev_disable(cldev); 206 + kfree(req); 207 + return ret; 208 + } 209 + 210 + static const struct intel_lb_component_ops mei_lb_ops = { 211 + .push_payload = mei_lb_push_payload, 212 + }; 213 + 214 + static int mei_lb_component_master_bind(struct device *dev) 215 + { 216 + return component_bind_all(dev, (void *)&mei_lb_ops); 217 + } 218 + 219 + static void mei_lb_component_master_unbind(struct device *dev) 220 + { 221 + component_unbind_all(dev, (void *)&mei_lb_ops); 222 + } 223 + 224 + static const struct component_master_ops mei_lb_component_master_ops = { 225 + .bind = mei_lb_component_master_bind, 226 + .unbind = mei_lb_component_master_unbind, 227 + }; 228 + 229 + static int mei_lb_component_match(struct device *dev, int subcomponent, 230 + void *data) 231 + { 232 + /* 233 + * This function checks if requester is Intel %PCI_CLASS_DISPLAY_VGA or 234 + * %PCI_CLASS_DISPLAY_OTHER device, and checks if the requester is the 235 + * grand parent of mei_if i.e. late bind MEI device 236 + */ 237 + struct device *base = data; 238 + struct pci_dev *pdev; 239 + 240 + if (!dev) 241 + return 0; 242 + 243 + if (!dev_is_pci(dev)) 244 + return 0; 245 + 246 + pdev = to_pci_dev(dev); 247 + 248 + if (pdev->vendor != PCI_VENDOR_ID_INTEL) 249 + return 0; 250 + 251 + if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8) && 252 + pdev->class != (PCI_CLASS_DISPLAY_OTHER << 8)) 253 + return 0; 254 + 255 + if (subcomponent != INTEL_COMPONENT_LB) 256 + return 0; 257 + 258 + base = base->parent; 259 + if (!base) /* mei device */ 260 + return 0; 261 + 262 + base = base->parent; /* pci device */ 263 + 264 + return !!base && dev == base; 265 + } 266 + 267 + static int mei_lb_probe(struct mei_cl_device *cldev, 268 + const struct mei_cl_device_id *id) 269 + { 270 + struct component_match *master_match = NULL; 271 + int ret; 272 + 273 + component_match_add_typed(&cldev->dev, &master_match, 274 + mei_lb_component_match, &cldev->dev); 275 + if (IS_ERR_OR_NULL(master_match)) 276 + return -ENOMEM; 277 + 278 + ret = component_master_add_with_match(&cldev->dev, 279 + &mei_lb_component_master_ops, 280 + master_match); 281 + if (ret < 0) 282 + dev_err(&cldev->dev, "Failed to add late binding master component. %d\n", ret); 283 + 284 + return ret; 285 + } 286 + 287 + static void mei_lb_remove(struct mei_cl_device *cldev) 288 + { 289 + component_master_del(&cldev->dev, &mei_lb_component_master_ops); 290 + } 291 + 292 + #define MEI_GUID_MKHI UUID_LE(0xe2c2afa2, 0x3817, 0x4d19, \ 293 + 0x9d, 0x95, 0x6, 0xb1, 0x6b, 0x58, 0x8a, 0x5d) 294 + 295 + static const struct mei_cl_device_id mei_lb_tbl[] = { 296 + { .uuid = MEI_GUID_MKHI, .version = MEI_CL_VERSION_ANY }, 297 + { } 298 + }; 299 + MODULE_DEVICE_TABLE(mei, mei_lb_tbl); 300 + 301 + static struct mei_cl_driver mei_lb_driver = { 302 + .id_table = mei_lb_tbl, 303 + .name = "mei_lb", 304 + .probe = mei_lb_probe, 305 + .remove = mei_lb_remove, 306 + }; 307 + 308 + module_mei_cl_driver(mei_lb_driver); 309 + 310 + MODULE_AUTHOR("Intel Corporation"); 311 + MODULE_LICENSE("GPL"); 312 + MODULE_DESCRIPTION("MEI Late Binding Firmware Update/Upload");

+47 -17

include/drm/drm_gpusvm.h

··· 106 106 }; 107 107 108 108 /** 109 - * struct drm_gpusvm_range_flags - Structure representing a GPU SVM range flags 109 + * struct drm_gpusvm_pages_flags - Structure representing a GPU SVM pages flags 110 110 * 111 - * @migrate_devmem: Flag indicating whether the range can be migrated to device memory 112 - * @unmapped: Flag indicating if the range has been unmapped 113 - * @partial_unmap: Flag indicating if the range has been partially unmapped 114 - * @has_devmem_pages: Flag indicating if the range has devmem pages 115 - * @has_dma_mapping: Flag indicating if the range has a DMA mapping 116 - * @__flags: Flags for range in u16 form (used for READ_ONCE) 111 + * @migrate_devmem: Flag indicating whether the pages can be migrated to device memory 112 + * @unmapped: Flag indicating if the pages has been unmapped 113 + * @partial_unmap: Flag indicating if the pages has been partially unmapped 114 + * @has_devmem_pages: Flag indicating if the pages has devmem pages 115 + * @has_dma_mapping: Flag indicating if the pages has a DMA mapping 116 + * @__flags: Flags for pages in u16 form (used for READ_ONCE) 117 117 */ 118 - struct drm_gpusvm_range_flags { 118 + struct drm_gpusvm_pages_flags { 119 119 union { 120 120 struct { 121 121 /* All flags below must be set upon creation */ ··· 131 131 }; 132 132 133 133 /** 134 + * struct drm_gpusvm_pages - Structure representing a GPU SVM mapped pages 135 + * 136 + * @dma_addr: Device address array 137 + * @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping. 138 + * Note this is assuming only one drm_pagemap per range is allowed. 139 + * @notifier_seq: Notifier sequence number of the range's pages 140 + * @flags: Flags for range 141 + * @flags.migrate_devmem: Flag indicating whether the range can be migrated to device memory 142 + * @flags.unmapped: Flag indicating if the range has been unmapped 143 + * @flags.partial_unmap: Flag indicating if the range has been partially unmapped 144 + * @flags.has_devmem_pages: Flag indicating if the range has devmem pages 145 + * @flags.has_dma_mapping: Flag indicating if the range has a DMA mapping 146 + */ 147 + struct drm_gpusvm_pages { 148 + struct drm_pagemap_addr *dma_addr; 149 + struct drm_pagemap *dpagemap; 150 + unsigned long notifier_seq; 151 + struct drm_gpusvm_pages_flags flags; 152 + }; 153 + 154 + /** 134 155 * struct drm_gpusvm_range - Structure representing a GPU SVM range 135 156 * 136 157 * @gpusvm: Pointer to the GPU SVM structure ··· 159 138 * @refcount: Reference count for the range 160 139 * @itree: Interval tree node for the range (inserted in GPU SVM notifier) 161 140 * @entry: List entry to fast interval tree traversal 162 - * @notifier_seq: Notifier sequence number of the range's pages 163 - * @dma_addr: Device address array 164 - * @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping. 165 - * Note this is assuming only one drm_pagemap per range is allowed. 166 - * @flags: Flags for range 141 + * @pages: The pages for this range. 167 142 * 168 143 * This structure represents a GPU SVM range used for tracking memory ranges 169 144 * mapped in a DRM device. ··· 170 153 struct kref refcount; 171 154 struct interval_tree_node itree; 172 155 struct list_head entry; 173 - unsigned long notifier_seq; 174 - struct drm_pagemap_addr *dma_addr; 175 - struct drm_pagemap *dpagemap; 176 - struct drm_gpusvm_range_flags flags; 156 + struct drm_gpusvm_pages pages; 177 157 }; 178 158 179 159 /** ··· 306 292 307 293 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, 308 294 const struct mmu_notifier_range *mmu_range); 295 + 296 + int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, 297 + struct drm_gpusvm_pages *svm_pages, 298 + struct mm_struct *mm, 299 + struct mmu_interval_notifier *notifier, 300 + unsigned long pages_start, unsigned long pages_end, 301 + const struct drm_gpusvm_ctx *ctx); 302 + 303 + void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, 304 + struct drm_gpusvm_pages *svm_pages, 305 + unsigned long npages, 306 + const struct drm_gpusvm_ctx *ctx); 307 + 308 + void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, 309 + struct drm_gpusvm_pages *svm_pages, 310 + unsigned long npages); 309 311 310 312 #ifdef CONFIG_LOCKDEP 311 313 /**

+1

include/drm/intel/i915_component.h

··· 31 31 I915_COMPONENT_HDCP, 32 32 I915_COMPONENT_PXP, 33 33 I915_COMPONENT_GSC_PROXY, 34 + INTEL_COMPONENT_LB, 34 35 }; 35 36 36 37 /* MAX_PORT is the number of port

+70

include/drm/intel/intel_lb_mei_interface.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright (c) 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _INTEL_LB_MEI_INTERFACE_H_ 7 + #define _INTEL_LB_MEI_INTERFACE_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct device; 12 + 13 + /** 14 + * define INTEL_LB_FLAG_IS_PERSISTENT - Mark the payload as persistent 15 + * 16 + * This flag indicates that the late binding payload should be stored 17 + * persistently in flash across warm resets. 18 + */ 19 + #define INTEL_LB_FLAG_IS_PERSISTENT BIT(0) 20 + 21 + /** 22 + * enum intel_lb_type - enum to determine late binding payload type 23 + * @INTEL_LB_TYPE_FAN_CONTROL: Fan controller configuration 24 + */ 25 + enum intel_lb_type { 26 + INTEL_LB_TYPE_FAN_CONTROL = 1, 27 + }; 28 + 29 + /** 30 + * enum intel_lb_status - Status codes returned on late binding transmissions 31 + * @INTEL_LB_STATUS_SUCCESS: Operation completed successfully 32 + * @INTEL_LB_STATUS_4ID_MISMATCH: Mismatch in the expected 4ID (firmware identity/token) 33 + * @INTEL_LB_STATUS_ARB_FAILURE: Arbitration failure (e.g. conflicting access or state) 34 + * @INTEL_LB_STATUS_GENERAL_ERROR: General firmware error not covered by other codes 35 + * @INTEL_LB_STATUS_INVALID_PARAMS: One or more input parameters are invalid 36 + * @INTEL_LB_STATUS_INVALID_SIGNATURE: Payload has an invalid or untrusted signature 37 + * @INTEL_LB_STATUS_INVALID_PAYLOAD: Payload contents are not accepted by firmware 38 + * @INTEL_LB_STATUS_TIMEOUT: Operation timed out before completion 39 + */ 40 + enum intel_lb_status { 41 + INTEL_LB_STATUS_SUCCESS = 0, 42 + INTEL_LB_STATUS_4ID_MISMATCH = 1, 43 + INTEL_LB_STATUS_ARB_FAILURE = 2, 44 + INTEL_LB_STATUS_GENERAL_ERROR = 3, 45 + INTEL_LB_STATUS_INVALID_PARAMS = 4, 46 + INTEL_LB_STATUS_INVALID_SIGNATURE = 5, 47 + INTEL_LB_STATUS_INVALID_PAYLOAD = 6, 48 + INTEL_LB_STATUS_TIMEOUT = 7, 49 + }; 50 + 51 + /** 52 + * struct intel_lb_component_ops - Ops for late binding services 53 + */ 54 + struct intel_lb_component_ops { 55 + /** 56 + * push_payload - Sends a payload to the authentication firmware 57 + * @dev: Device struct corresponding to the mei device 58 + * @type: Payload type (see &enum intel_lb_type) 59 + * @flags: Payload flags bitmap (e.g. %INTEL_LB_FLAGS_IS_PERSISTENT) 60 + * @payload: Pointer to payload buffer 61 + * @payload_size: Payload buffer size in bytes 62 + * 63 + * Return: 0 success, negative errno value on transport failure, 64 + * positive status returned by firmware 65 + */ 66 + int (*push_payload)(struct device *dev, u32 type, u32 flags, 67 + const void *payload, size_t payload_size); 68 + }; 69 + 70 + #endif /* _INTEL_LB_MEI_INTERFACE_H_ */

+1

include/linux/mei_cl_bus.h

··· 113 113 mei_cldev_cb_t notif_cb); 114 114 115 115 u8 mei_cldev_ver(const struct mei_cl_device *cldev); 116 + size_t mei_cldev_mtu(const struct mei_cl_device *cldev); 116 117 117 118 void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev); 118 119 void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data);