Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: add timeline support in amdgpu CS v3

syncobj wait/signal operation is appending in command submission.
v2: separate to two kinds in/out_deps functions
v3: fix checking for timeline syncobj

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Cc: Tobias Hector <Tobias.Hector@amd.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Chunming Zhou and committed by
Alex Deucher
2624dd15 ecc4946f

+145 -27
+8 -2
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 436 436 void *kdata; 437 437 }; 438 438 439 + struct amdgpu_cs_post_dep { 440 + struct drm_syncobj *syncobj; 441 + struct dma_fence_chain *chain; 442 + u64 point; 443 + }; 444 + 439 445 struct amdgpu_cs_parser { 440 446 struct amdgpu_device *adev; 441 447 struct drm_file *filp; ··· 471 465 /* user fence */ 472 466 struct amdgpu_bo_list_entry uf_entry; 473 467 474 - unsigned num_post_dep_syncobjs; 475 - struct drm_syncobj **post_dep_syncobjs; 468 + unsigned num_post_deps; 469 + struct amdgpu_cs_post_dep *post_deps; 476 470 }; 477 471 478 472 static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
+129 -25
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 215 215 case AMDGPU_CHUNK_ID_SYNCOBJ_IN: 216 216 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: 217 217 case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: 218 + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: 219 + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: 218 220 break; 219 221 220 222 default: ··· 806 804 ttm_eu_backoff_reservation(&parser->ticket, 807 805 &parser->validated); 808 806 809 - for (i = 0; i < parser->num_post_dep_syncobjs; i++) 810 - drm_syncobj_put(parser->post_dep_syncobjs[i]); 811 - kfree(parser->post_dep_syncobjs); 807 + for (i = 0; i < parser->num_post_deps; i++) { 808 + drm_syncobj_put(parser->post_deps[i].syncobj); 809 + kfree(parser->post_deps[i].chain); 810 + } 811 + kfree(parser->post_deps); 812 812 813 813 dma_fence_put(parser->fence); 814 814 ··· 1121 1117 } 1122 1118 1123 1119 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, 1124 - uint32_t handle) 1120 + uint32_t handle, u64 point, 1121 + u64 flags) 1125 1122 { 1126 - int r; 1127 1123 struct dma_fence *fence; 1128 - r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence); 1129 - if (r) 1124 + int r; 1125 + 1126 + r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); 1127 + if (r) { 1128 + DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", 1129 + handle, point, r); 1130 1130 return r; 1131 + } 1131 1132 1132 1133 r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); 1133 1134 dma_fence_put(fence); ··· 1143 1134 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, 1144 1135 struct amdgpu_cs_chunk *chunk) 1145 1136 { 1137 + struct drm_amdgpu_cs_chunk_sem *deps; 1146 1138 unsigned num_deps; 1147 1139 int i, r; 1148 - struct drm_amdgpu_cs_chunk_sem *deps; 1149 1140 1150 1141 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; 1151 1142 num_deps = chunk->length_dw * 4 / 1152 1143 sizeof(struct drm_amdgpu_cs_chunk_sem); 1153 - 1154 1144 for (i = 0; i < num_deps; ++i) { 1155 - r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); 1145 + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, 1146 + 0, 0); 1156 1147 if (r) 1157 1148 return r; 1158 1149 } 1150 + 1151 + return 0; 1152 + } 1153 + 1154 + 1155 + static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, 1156 + struct amdgpu_cs_chunk *chunk) 1157 + { 1158 + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; 1159 + unsigned num_deps; 1160 + int i, r; 1161 + 1162 + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; 1163 + num_deps = chunk->length_dw * 4 / 1164 + sizeof(struct drm_amdgpu_cs_chunk_syncobj); 1165 + for (i = 0; i < num_deps; ++i) { 1166 + r = amdgpu_syncobj_lookup_and_add_to_sync(p, 1167 + syncobj_deps[i].handle, 1168 + syncobj_deps[i].point, 1169 + syncobj_deps[i].flags); 1170 + if (r) 1171 + return r; 1172 + } 1173 + 1159 1174 return 0; 1160 1175 } 1161 1176 1162 1177 static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, 1163 1178 struct amdgpu_cs_chunk *chunk) 1164 1179 { 1180 + struct drm_amdgpu_cs_chunk_sem *deps; 1165 1181 unsigned num_deps; 1166 1182 int i; 1167 - struct drm_amdgpu_cs_chunk_sem *deps; 1183 + 1168 1184 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; 1169 1185 num_deps = chunk->length_dw * 4 / 1170 1186 sizeof(struct drm_amdgpu_cs_chunk_sem); 1171 1187 1172 - p->post_dep_syncobjs = kmalloc_array(num_deps, 1173 - sizeof(struct drm_syncobj *), 1174 - GFP_KERNEL); 1175 - p->num_post_dep_syncobjs = 0; 1188 + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), 1189 + GFP_KERNEL); 1190 + p->num_post_deps = 0; 1176 1191 1177 - if (!p->post_dep_syncobjs) 1192 + if (!p->post_deps) 1193 + return -ENOMEM; 1194 + 1195 + 1196 + for (i = 0; i < num_deps; ++i) { 1197 + p->post_deps[i].syncobj = 1198 + drm_syncobj_find(p->filp, deps[i].handle); 1199 + if (!p->post_deps[i].syncobj) 1200 + return -EINVAL; 1201 + p->post_deps[i].chain = NULL; 1202 + p->post_deps[i].point = 0; 1203 + p->num_post_deps++; 1204 + } 1205 + 1206 + return 0; 1207 + } 1208 + 1209 + 1210 + static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, 1211 + struct amdgpu_cs_chunk 1212 + *chunk) 1213 + { 1214 + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; 1215 + unsigned num_deps; 1216 + int i; 1217 + 1218 + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; 1219 + num_deps = chunk->length_dw * 4 / 1220 + sizeof(struct drm_amdgpu_cs_chunk_syncobj); 1221 + 1222 + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), 1223 + GFP_KERNEL); 1224 + p->num_post_deps = 0; 1225 + 1226 + if (!p->post_deps) 1178 1227 return -ENOMEM; 1179 1228 1180 1229 for (i = 0; i < num_deps; ++i) { 1181 - p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); 1182 - if (!p->post_dep_syncobjs[i]) 1230 + struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; 1231 + 1232 + dep->chain = NULL; 1233 + if (syncobj_deps[i].point) { 1234 + dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL); 1235 + if (!dep->chain) 1236 + return -ENOMEM; 1237 + } 1238 + 1239 + dep->syncobj = drm_syncobj_find(p->filp, 1240 + syncobj_deps[i].handle); 1241 + if (!dep->syncobj) { 1242 + kfree(dep->chain); 1183 1243 return -EINVAL; 1184 - p->num_post_dep_syncobjs++; 1244 + } 1245 + dep->point = syncobj_deps[i].point; 1246 + p->num_post_deps++; 1185 1247 } 1248 + 1186 1249 return 0; 1187 1250 } 1188 1251 ··· 1268 1187 1269 1188 chunk = &p->chunks[i]; 1270 1189 1271 - if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES || 1272 - chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { 1190 + switch (chunk->chunk_id) { 1191 + case AMDGPU_CHUNK_ID_DEPENDENCIES: 1192 + case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: 1273 1193 r = amdgpu_cs_process_fence_dep(p, chunk); 1274 1194 if (r) 1275 1195 return r; 1276 - } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { 1196 + break; 1197 + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: 1277 1198 r = amdgpu_cs_process_syncobj_in_dep(p, chunk); 1278 1199 if (r) 1279 1200 return r; 1280 - } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { 1201 + break; 1202 + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: 1281 1203 r = amdgpu_cs_process_syncobj_out_dep(p, chunk); 1282 1204 if (r) 1283 1205 return r; 1206 + break; 1207 + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: 1208 + r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); 1209 + if (r) 1210 + return r; 1211 + break; 1212 + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: 1213 + r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); 1214 + if (r) 1215 + return r; 1216 + break; 1284 1217 } 1285 1218 } 1286 1219 ··· 1305 1210 { 1306 1211 int i; 1307 1212 1308 - for (i = 0; i < p->num_post_dep_syncobjs; ++i) 1309 - drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); 1213 + for (i = 0; i < p->num_post_deps; ++i) { 1214 + if (p->post_deps[i].chain && p->post_deps[i].point) { 1215 + drm_syncobj_add_point(p->post_deps[i].syncobj, 1216 + p->post_deps[i].chain, 1217 + p->fence, p->post_deps[i].point); 1218 + p->post_deps[i].chain = NULL; 1219 + } else { 1220 + drm_syncobj_replace_fence(p->post_deps[i].syncobj, 1221 + p->fence); 1222 + } 1223 + } 1310 1224 } 1311 1225 1312 1226 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
+8
include/uapi/drm/amdgpu_drm.h
··· 528 528 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 529 529 #define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 530 530 #define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 531 + #define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08 532 + #define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09 531 533 532 534 struct drm_amdgpu_cs_chunk { 533 535 __u32 chunk_id; ··· 608 606 609 607 struct drm_amdgpu_cs_chunk_sem { 610 608 __u32 handle; 609 + }; 610 + 611 + struct drm_amdgpu_cs_chunk_syncobj { 612 + __u32 handle; 613 + __u32 flags; 614 + __u64 point; 611 615 }; 612 616 613 617 #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0