Merge tag 'drm-intel-next-2017-10-12' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

+3

drivers/gpu/drm/i915/Makefile

··· 47 47 i915_gem_tiling.o \ 48 48 i915_gem_timeline.o \ 49 49 i915_gem_userptr.o \ 50 + i915_gemfs.o \ 50 51 i915_trace_points.o \ 51 52 i915_vma.o \ 52 53 intel_breadcrumbs.o \ ··· 60 59 61 60 # general-purpose microcontroller (GuC) support 62 61 i915-y += intel_uc.o \ 62 + intel_uc_fw.o \ 63 + intel_guc.o \ 63 64 intel_guc_ct.o \ 64 65 intel_guc_log.o \ 65 66 intel_guc_loader.o \

+1

drivers/gpu/drm/i915/gvt/scheduler.c

··· 174 174 atomic_set(&workload->shadow_ctx_active, 1); 175 175 break; 176 176 case INTEL_CONTEXT_SCHEDULE_OUT: 177 + case INTEL_CONTEXT_SCHEDULE_PREEMPTED: 177 178 atomic_set(&workload->shadow_ctx_active, 0); 178 179 break; 179 180 default:

+146 -233

drivers/gpu/drm/i915/i915_debugfs.c

··· 30 30 #include <linux/sort.h> 31 31 #include <linux/sched/mm.h> 32 32 #include "intel_drv.h" 33 + #include "i915_guc_submission.h" 33 34 34 35 static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) 35 36 { ··· 98 97 99 98 static char get_global_flag(struct drm_i915_gem_object *obj) 100 99 { 101 - return !list_empty(&obj->userfault_link) ? 'g' : ' '; 100 + return obj->userfault_count ? 'g' : ' '; 102 101 } 103 102 104 103 static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) ··· 117 116 } 118 117 119 118 return size; 119 + } 120 + 121 + static const char * 122 + stringify_page_sizes(unsigned int page_sizes, char *buf, size_t len) 123 + { 124 + size_t x = 0; 125 + 126 + switch (page_sizes) { 127 + case 0: 128 + return ""; 129 + case I915_GTT_PAGE_SIZE_4K: 130 + return "4K"; 131 + case I915_GTT_PAGE_SIZE_64K: 132 + return "64K"; 133 + case I915_GTT_PAGE_SIZE_2M: 134 + return "2M"; 135 + default: 136 + if (!buf) 137 + return "M"; 138 + 139 + if (page_sizes & I915_GTT_PAGE_SIZE_2M) 140 + x += snprintf(buf + x, len - x, "2M, "); 141 + if (page_sizes & I915_GTT_PAGE_SIZE_64K) 142 + x += snprintf(buf + x, len - x, "64K, "); 143 + if (page_sizes & I915_GTT_PAGE_SIZE_4K) 144 + x += snprintf(buf + x, len - x, "4K, "); 145 + buf[x-2] = '\0'; 146 + 147 + return buf; 148 + } 120 149 } 121 150 122 151 static void ··· 186 155 if (!drm_mm_node_allocated(&vma->node)) 187 156 continue; 188 157 189 - seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", 158 + seq_printf(m, " (%sgtt offset: %08llx, size: %08llx, pages: %s", 190 159 i915_vma_is_ggtt(vma) ? "g" : "pp", 191 - vma->node.start, vma->node.size); 160 + vma->node.start, vma->node.size, 161 + stringify_page_sizes(vma->page_sizes.gtt, NULL, 0)); 192 162 if (i915_vma_is_ggtt(vma)) { 193 163 switch (vma->ggtt_view.type) { 194 164 case I915_GGTT_VIEW_NORMAL: ··· 434 402 struct drm_i915_private *dev_priv = node_to_i915(m->private); 435 403 struct drm_device *dev = &dev_priv->drm; 436 404 struct i915_ggtt *ggtt = &dev_priv->ggtt; 437 - u32 count, mapped_count, purgeable_count, dpy_count; 438 - u64 size, mapped_size, purgeable_size, dpy_size; 405 + u32 count, mapped_count, purgeable_count, dpy_count, huge_count; 406 + u64 size, mapped_size, purgeable_size, dpy_size, huge_size; 439 407 struct drm_i915_gem_object *obj; 408 + unsigned int page_sizes = 0; 440 409 struct drm_file *file; 410 + char buf[80]; 441 411 int ret; 442 412 443 413 ret = mutex_lock_interruptible(&dev->struct_mutex); ··· 453 419 size = count = 0; 454 420 mapped_size = mapped_count = 0; 455 421 purgeable_size = purgeable_count = 0; 422 + huge_size = huge_count = 0; 456 423 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { 457 424 size += obj->base.size; 458 425 ++count; ··· 466 431 if (obj->mm.mapping) { 467 432 mapped_count++; 468 433 mapped_size += obj->base.size; 434 + } 435 + 436 + if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) { 437 + huge_count++; 438 + huge_size += obj->base.size; 439 + page_sizes |= obj->mm.page_sizes.sg; 469 440 } 470 441 } 471 442 seq_printf(m, "%u unbound objects, %llu bytes\n", count, size); ··· 495 454 mapped_count++; 496 455 mapped_size += obj->base.size; 497 456 } 457 + 458 + if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) { 459 + huge_count++; 460 + huge_size += obj->base.size; 461 + page_sizes |= obj->mm.page_sizes.sg; 462 + } 498 463 } 499 464 seq_printf(m, "%u bound objects, %llu bytes\n", 500 465 count, size); ··· 508 461 purgeable_count, purgeable_size); 509 462 seq_printf(m, "%u mapped objects, %llu bytes\n", 510 463 mapped_count, mapped_size); 464 + seq_printf(m, "%u huge-paged objects (%s) %llu bytes\n", 465 + huge_count, 466 + stringify_page_sizes(page_sizes, buf, sizeof(buf)), 467 + huge_size); 511 468 seq_printf(m, "%u display objects (pinned), %llu bytes\n", 512 469 dpy_count, dpy_size); 513 470 514 471 seq_printf(m, "%llu [%llu] gtt total\n", 515 472 ggtt->base.total, ggtt->mappable_end); 473 + seq_printf(m, "Supported page sizes: %s\n", 474 + stringify_page_sizes(INTEL_INFO(dev_priv)->page_sizes, 475 + buf, sizeof(buf))); 516 476 517 477 seq_putc(m, '\n'); 518 478 print_batch_pool_stats(m, dev_priv); ··· 1080 1026 static int i915_frequency_info(struct seq_file *m, void *unused) 1081 1027 { 1082 1028 struct drm_i915_private *dev_priv = node_to_i915(m->private); 1029 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 1083 1030 int ret = 0; 1084 1031 1085 1032 intel_runtime_pm_get(dev_priv); ··· 1096 1041 seq_printf(m, "Current P-state: %d\n", 1097 1042 (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); 1098 1043 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 1099 - u32 freq_sts; 1044 + u32 rpmodectl, freq_sts; 1100 1045 1101 - mutex_lock(&dev_priv->rps.hw_lock); 1046 + mutex_lock(&dev_priv->pcu_lock); 1047 + 1048 + rpmodectl = I915_READ(GEN6_RP_CONTROL); 1049 + seq_printf(m, "Video Turbo Mode: %s\n", 1050 + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); 1051 + seq_printf(m, "HW control enabled: %s\n", 1052 + yesno(rpmodectl & GEN6_RP_ENABLE)); 1053 + seq_printf(m, "SW control enabled: %s\n", 1054 + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == 1055 + GEN6_RP_MEDIA_SW_MODE)); 1056 + 1102 1057 freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 1103 1058 seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); 1104 1059 seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); ··· 1117 1052 intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff)); 1118 1053 1119 1054 seq_printf(m, "current GPU freq: %d MHz\n", 1120 - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); 1055 + intel_gpu_freq(dev_priv, rps->cur_freq)); 1121 1056 1122 1057 seq_printf(m, "max GPU freq: %d MHz\n", 1123 - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); 1058 + intel_gpu_freq(dev_priv, rps->max_freq)); 1124 1059 1125 1060 seq_printf(m, "min GPU freq: %d MHz\n", 1126 - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); 1061 + intel_gpu_freq(dev_priv, rps->min_freq)); 1127 1062 1128 1063 seq_printf(m, "idle GPU freq: %d MHz\n", 1129 - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); 1064 + intel_gpu_freq(dev_priv, rps->idle_freq)); 1130 1065 1131 1066 seq_printf(m, 1132 1067 "efficient (RPe) frequency: %d MHz\n", 1133 - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); 1134 - mutex_unlock(&dev_priv->rps.hw_lock); 1068 + intel_gpu_freq(dev_priv, rps->efficient_freq)); 1069 + mutex_unlock(&dev_priv->pcu_lock); 1135 1070 } else if (INTEL_GEN(dev_priv) >= 6) { 1136 1071 u32 rp_state_limits; 1137 1072 u32 gt_perf_status; ··· 1201 1136 pm_iir = I915_READ(GEN8_GT_IIR(2)); 1202 1137 pm_mask = I915_READ(GEN6_PMINTRMSK); 1203 1138 } 1139 + seq_printf(m, "Video Turbo Mode: %s\n", 1140 + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); 1141 + seq_printf(m, "HW control enabled: %s\n", 1142 + yesno(rpmodectl & GEN6_RP_ENABLE)); 1143 + seq_printf(m, "SW control enabled: %s\n", 1144 + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == 1145 + GEN6_RP_MEDIA_SW_MODE)); 1204 1146 seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n", 1205 1147 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask); 1206 1148 seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", 1207 - dev_priv->rps.pm_intrmsk_mbz); 1149 + rps->pm_intrmsk_mbz); 1208 1150 seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); 1209 1151 seq_printf(m, "Render p-state ratio: %d\n", 1210 1152 (gt_perf_status & (INTEL_GEN(dev_priv) >= 9 ? 0x1ff00 : 0xff00)) >> 8); ··· 1231 1159 rpcurup, GT_PM_INTERVAL_TO_US(dev_priv, rpcurup)); 1232 1160 seq_printf(m, "RP PREV UP: %d (%dus)\n", 1233 1161 rpprevup, GT_PM_INTERVAL_TO_US(dev_priv, rpprevup)); 1234 - seq_printf(m, "Up threshold: %d%%\n", 1235 - dev_priv->rps.up_threshold); 1162 + seq_printf(m, "Up threshold: %d%%\n", rps->up_threshold); 1236 1163 1237 1164 seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n", 1238 1165 rpdownei, GT_PM_INTERVAL_TO_US(dev_priv, rpdownei)); ··· 1239 1168 rpcurdown, GT_PM_INTERVAL_TO_US(dev_priv, rpcurdown)); 1240 1169 seq_printf(m, "RP PREV DOWN: %d (%dus)\n", 1241 1170 rpprevdown, GT_PM_INTERVAL_TO_US(dev_priv, rpprevdown)); 1242 - seq_printf(m, "Down threshold: %d%%\n", 1243 - dev_priv->rps.down_threshold); 1171 + seq_printf(m, "Down threshold: %d%%\n", rps->down_threshold); 1244 1172 1245 1173 max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : 1246 1174 rp_state_cap >> 16) & 0xff; ··· 1261 1191 seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", 1262 1192 intel_gpu_freq(dev_priv, max_freq)); 1263 1193 seq_printf(m, "Max overclocked frequency: %dMHz\n", 1264 - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); 1194 + intel_gpu_freq(dev_priv, rps->max_freq)); 1265 1195 1266 1196 seq_printf(m, "Current freq: %d MHz\n", 1267 - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); 1197 + intel_gpu_freq(dev_priv, rps->cur_freq)); 1268 1198 seq_printf(m, "Actual freq: %d MHz\n", cagf); 1269 1199 seq_printf(m, "Idle freq: %d MHz\n", 1270 - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); 1200 + intel_gpu_freq(dev_priv, rps->idle_freq)); 1271 1201 seq_printf(m, "Min freq: %d MHz\n", 1272 - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); 1202 + intel_gpu_freq(dev_priv, rps->min_freq)); 1273 1203 seq_printf(m, "Boost freq: %d MHz\n", 1274 - intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); 1204 + intel_gpu_freq(dev_priv, rps->boost_freq)); 1275 1205 seq_printf(m, "Max freq: %d MHz\n", 1276 - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); 1206 + intel_gpu_freq(dev_priv, rps->max_freq)); 1277 1207 seq_printf(m, 1278 1208 "efficient (RPe) frequency: %d MHz\n", 1279 - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); 1209 + intel_gpu_freq(dev_priv, rps->efficient_freq)); 1280 1210 } else { 1281 1211 seq_puts(m, "no P-state info available\n"); 1282 1212 } ··· 1517 1447 static int vlv_drpc_info(struct seq_file *m) 1518 1448 { 1519 1449 struct drm_i915_private *dev_priv = node_to_i915(m->private); 1520 - u32 rpmodectl1, rcctl1, pw_status; 1450 + u32 rcctl1, pw_status; 1521 1451 1522 1452 pw_status = I915_READ(VLV_GTLC_PW_STATUS); 1523 - rpmodectl1 = I915_READ(GEN6_RP_CONTROL); 1524 1453 rcctl1 = I915_READ(GEN6_RC_CONTROL); 1525 1454 1526 - seq_printf(m, "Video Turbo Mode: %s\n", 1527 - yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO)); 1528 - seq_printf(m, "Turbo enabled: %s\n", 1529 - yesno(rpmodectl1 & GEN6_RP_ENABLE)); 1530 - seq_printf(m, "HW control enabled: %s\n", 1531 - yesno(rpmodectl1 & GEN6_RP_ENABLE)); 1532 - seq_printf(m, "SW control enabled: %s\n", 1533 - yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) == 1534 - GEN6_RP_MEDIA_SW_MODE)); 1535 1455 seq_printf(m, "RC6 Enabled: %s\n", 1536 1456 yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE | 1537 1457 GEN6_RC_CTL_EI_MODE(1)))); ··· 1539 1479 static int gen6_drpc_info(struct seq_file *m) 1540 1480 { 1541 1481 struct drm_i915_private *dev_priv = node_to_i915(m->private); 1542 - u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0; 1482 + u32 gt_core_status, rcctl1, rc6vids = 0; 1543 1483 u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; 1544 1484 unsigned forcewake_count; 1545 1485 int count = 0; ··· 1558 1498 gt_core_status = I915_READ_FW(GEN6_GT_CORE_STATUS); 1559 1499 trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true); 1560 1500 1561 - rpmodectl1 = I915_READ(GEN6_RP_CONTROL); 1562 1501 rcctl1 = I915_READ(GEN6_RC_CONTROL); 1563 1502 if (INTEL_GEN(dev_priv) >= 9) { 1564 1503 gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE); 1565 1504 gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); 1566 1505 } 1567 1506 1568 - mutex_lock(&dev_priv->rps.hw_lock); 1507 + mutex_lock(&dev_priv->pcu_lock); 1569 1508 sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); 1570 - mutex_unlock(&dev_priv->rps.hw_lock); 1509 + mutex_unlock(&dev_priv->pcu_lock); 1571 1510 1572 - seq_printf(m, "Video Turbo Mode: %s\n", 1573 - yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO)); 1574 - seq_printf(m, "HW control enabled: %s\n", 1575 - yesno(rpmodectl1 & GEN6_RP_ENABLE)); 1576 - seq_printf(m, "SW control enabled: %s\n", 1577 - yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) == 1578 - GEN6_RP_MEDIA_SW_MODE)); 1579 1511 seq_printf(m, "RC1e Enabled: %s\n", 1580 1512 yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); 1581 1513 seq_printf(m, "RC6 Enabled: %s\n", ··· 1830 1778 static int i915_ring_freq_table(struct seq_file *m, void *unused) 1831 1779 { 1832 1780 struct drm_i915_private *dev_priv = node_to_i915(m->private); 1781 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 1833 1782 int ret = 0; 1834 1783 int gpu_freq, ia_freq; 1835 1784 unsigned int max_gpu_freq, min_gpu_freq; ··· 1842 1789 1843 1790 intel_runtime_pm_get(dev_priv); 1844 1791 1845 - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); 1792 + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); 1846 1793 if (ret) 1847 1794 goto out; 1848 1795 1849 1796 if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { 1850 1797 /* Convert GT frequency to 50 HZ units */ 1851 - min_gpu_freq = 1852 - dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER; 1853 - max_gpu_freq = 1854 - dev_priv->rps.max_freq_softlimit / GEN9_FREQ_SCALER; 1798 + min_gpu_freq = rps->min_freq_softlimit / GEN9_FREQ_SCALER; 1799 + max_gpu_freq = rps->max_freq_softlimit / GEN9_FREQ_SCALER; 1855 1800 } else { 1856 - min_gpu_freq = dev_priv->rps.min_freq_softlimit; 1857 - max_gpu_freq = dev_priv->rps.max_freq_softlimit; 1801 + min_gpu_freq = rps->min_freq_softlimit; 1802 + max_gpu_freq = rps->max_freq_softlimit; 1858 1803 } 1859 1804 1860 1805 seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); ··· 1871 1820 ((ia_freq >> 8) & 0xff) * 100); 1872 1821 } 1873 1822 1874 - mutex_unlock(&dev_priv->rps.hw_lock); 1823 + mutex_unlock(&dev_priv->pcu_lock); 1875 1824 1876 1825 out: 1877 1826 intel_runtime_pm_put(dev_priv); ··· 2305 2254 { 2306 2255 struct drm_i915_private *dev_priv = node_to_i915(m->private); 2307 2256 struct drm_device *dev = &dev_priv->drm; 2257 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 2308 2258 struct drm_file *file; 2309 2259 2310 - seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled); 2260 + seq_printf(m, "RPS enabled? %d\n", rps->enabled); 2311 2261 seq_printf(m, "GPU busy? %s [%d requests]\n", 2312 2262 yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); 2313 2263 seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); 2314 2264 seq_printf(m, "Boosts outstanding? %d\n", 2315 - atomic_read(&dev_priv->rps.num_waiters)); 2265 + atomic_read(&rps->num_waiters)); 2316 2266 seq_printf(m, "Frequency requested %d\n", 2317 - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); 2267 + intel_gpu_freq(dev_priv, rps->cur_freq)); 2318 2268 seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", 2319 - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 2320 - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit), 2321 - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit), 2322 - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); 2269 + intel_gpu_freq(dev_priv, rps->min_freq), 2270 + intel_gpu_freq(dev_priv, rps->min_freq_softlimit), 2271 + intel_gpu_freq(dev_priv, rps->max_freq_softlimit), 2272 + intel_gpu_freq(dev_priv, rps->max_freq)); 2323 2273 seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", 2324 - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), 2325 - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 2326 - intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); 2274 + intel_gpu_freq(dev_priv, rps->idle_freq), 2275 + intel_gpu_freq(dev_priv, rps->efficient_freq), 2276 + intel_gpu_freq(dev_priv, rps->boost_freq)); 2327 2277 2328 2278 mutex_lock(&dev->filelist_mutex); 2329 2279 list_for_each_entry_reverse(file, &dev->filelist, lhead) { ··· 2336 2284 seq_printf(m, "%s [%d]: %d boosts\n", 2337 2285 task ? task->comm : "<unknown>", 2338 2286 task ? task->pid : -1, 2339 - atomic_read(&file_priv->rps.boosts)); 2287 + atomic_read(&file_priv->rps_client.boosts)); 2340 2288 rcu_read_unlock(); 2341 2289 } 2342 2290 seq_printf(m, "Kernel (anonymous) boosts: %d\n", 2343 - atomic_read(&dev_priv->rps.boosts)); 2291 + atomic_read(&rps->boosts)); 2344 2292 mutex_unlock(&dev->filelist_mutex); 2345 2293 2346 2294 if (INTEL_GEN(dev_priv) >= 6 && 2347 - dev_priv->rps.enabled && 2295 + rps->enabled && 2348 2296 dev_priv->gt.active_requests) { 2349 2297 u32 rpup, rpupei; 2350 2298 u32 rpdown, rpdownei; ··· 2357 2305 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 2358 2306 2359 2307 seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", 2360 - rps_power_to_str(dev_priv->rps.power)); 2308 + rps_power_to_str(rps->power)); 2361 2309 seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", 2362 2310 rpup && rpupei ? 100 * rpup / rpupei : 0, 2363 - dev_priv->rps.up_threshold); 2311 + rps->up_threshold); 2364 2312 seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", 2365 2313 rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, 2366 - dev_priv->rps.down_threshold); 2314 + rps->down_threshold); 2367 2315 } else { 2368 2316 seq_puts(m, "\nRPS Autotuning inactive\n"); 2369 2317 } ··· 3290 3238 static int i915_engine_info(struct seq_file *m, void *unused) 3291 3239 { 3292 3240 struct drm_i915_private *dev_priv = node_to_i915(m->private); 3293 - struct i915_gpu_error *error = &dev_priv->gpu_error; 3294 3241 struct intel_engine_cs *engine; 3295 3242 enum intel_engine_id id; 3243 + struct drm_printer p; 3296 3244 3297 3245 intel_runtime_pm_get(dev_priv); 3298 3246 ··· 3301 3249 seq_printf(m, "Global active requests: %d\n", 3302 3250 dev_priv->gt.active_requests); 3303 3251 3304 - for_each_engine(engine, dev_priv, id) { 3305 - struct intel_breadcrumbs *b = &engine->breadcrumbs; 3306 - struct drm_i915_gem_request *rq; 3307 - struct rb_node *rb; 3308 - u64 addr; 3309 - 3310 - seq_printf(m, "%s\n", engine->name); 3311 - seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", 3312 - intel_engine_get_seqno(engine), 3313 - intel_engine_last_submit(engine), 3314 - engine->hangcheck.seqno, 3315 - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), 3316 - engine->timeline->inflight_seqnos); 3317 - seq_printf(m, "\tReset count: %d\n", 3318 - i915_reset_engine_count(error, engine)); 3319 - 3320 - rcu_read_lock(); 3321 - 3322 - seq_printf(m, "\tRequests:\n"); 3323 - 3324 - rq = list_first_entry(&engine->timeline->requests, 3325 - struct drm_i915_gem_request, link); 3326 - if (&rq->link != &engine->timeline->requests) 3327 - print_request(m, rq, "\t\tfirst "); 3328 - 3329 - rq = list_last_entry(&engine->timeline->requests, 3330 - struct drm_i915_gem_request, link); 3331 - if (&rq->link != &engine->timeline->requests) 3332 - print_request(m, rq, "\t\tlast "); 3333 - 3334 - rq = i915_gem_find_active_request(engine); 3335 - if (rq) { 3336 - print_request(m, rq, "\t\tactive "); 3337 - seq_printf(m, 3338 - "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", 3339 - rq->head, rq->postfix, rq->tail, 3340 - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, 3341 - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); 3342 - } 3343 - 3344 - seq_printf(m, "\tRING_START: 0x%08x [0x%08x]\n", 3345 - I915_READ(RING_START(engine->mmio_base)), 3346 - rq ? i915_ggtt_offset(rq->ring->vma) : 0); 3347 - seq_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n", 3348 - I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR, 3349 - rq ? rq->ring->head : 0); 3350 - seq_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", 3351 - I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, 3352 - rq ? rq->ring->tail : 0); 3353 - seq_printf(m, "\tRING_CTL: 0x%08x [%s]\n", 3354 - I915_READ(RING_CTL(engine->mmio_base)), 3355 - I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : ""); 3356 - 3357 - rcu_read_unlock(); 3358 - 3359 - addr = intel_engine_get_active_head(engine); 3360 - seq_printf(m, "\tACTHD: 0x%08x_%08x\n", 3361 - upper_32_bits(addr), lower_32_bits(addr)); 3362 - addr = intel_engine_get_last_batch_head(engine); 3363 - seq_printf(m, "\tBBADDR: 0x%08x_%08x\n", 3364 - upper_32_bits(addr), lower_32_bits(addr)); 3365 - 3366 - if (i915_modparams.enable_execlists) { 3367 - const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; 3368 - struct intel_engine_execlists * const execlists = &engine->execlists; 3369 - u32 ptr, read, write; 3370 - unsigned int idx; 3371 - 3372 - seq_printf(m, "\tExeclist status: 0x%08x %08x\n", 3373 - I915_READ(RING_EXECLIST_STATUS_LO(engine)), 3374 - I915_READ(RING_EXECLIST_STATUS_HI(engine))); 3375 - 3376 - ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); 3377 - read = GEN8_CSB_READ_PTR(ptr); 3378 - write = GEN8_CSB_WRITE_PTR(ptr); 3379 - seq_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", 3380 - read, execlists->csb_head, 3381 - write, 3382 - intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), 3383 - yesno(test_bit(ENGINE_IRQ_EXECLIST, 3384 - &engine->irq_posted))); 3385 - if (read >= GEN8_CSB_ENTRIES) 3386 - read = 0; 3387 - if (write >= GEN8_CSB_ENTRIES) 3388 - write = 0; 3389 - if (read > write) 3390 - write += GEN8_CSB_ENTRIES; 3391 - while (read < write) { 3392 - idx = ++read % GEN8_CSB_ENTRIES; 3393 - seq_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n", 3394 - idx, 3395 - I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), 3396 - hws[idx * 2], 3397 - I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)), 3398 - hws[idx * 2 + 1]); 3399 - } 3400 - 3401 - rcu_read_lock(); 3402 - for (idx = 0; idx < execlists_num_ports(execlists); idx++) { 3403 - unsigned int count; 3404 - 3405 - rq = port_unpack(&execlists->port[idx], &count); 3406 - if (rq) { 3407 - seq_printf(m, "\t\tELSP[%d] count=%d, ", 3408 - idx, count); 3409 - print_request(m, rq, "rq: "); 3410 - } else { 3411 - seq_printf(m, "\t\tELSP[%d] idle\n", 3412 - idx); 3413 - } 3414 - } 3415 - rcu_read_unlock(); 3416 - 3417 - spin_lock_irq(&engine->timeline->lock); 3418 - for (rb = execlists->first; rb; rb = rb_next(rb)) { 3419 - struct i915_priolist *p = 3420 - rb_entry(rb, typeof(*p), node); 3421 - 3422 - list_for_each_entry(rq, &p->requests, 3423 - priotree.link) 3424 - print_request(m, rq, "\t\tQ "); 3425 - } 3426 - spin_unlock_irq(&engine->timeline->lock); 3427 - } else if (INTEL_GEN(dev_priv) > 6) { 3428 - seq_printf(m, "\tPP_DIR_BASE: 0x%08x\n", 3429 - I915_READ(RING_PP_DIR_BASE(engine))); 3430 - seq_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", 3431 - I915_READ(RING_PP_DIR_BASE_READ(engine))); 3432 - seq_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", 3433 - I915_READ(RING_PP_DIR_DCLV(engine))); 3434 - } 3435 - 3436 - spin_lock_irq(&b->rb_lock); 3437 - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { 3438 - struct intel_wait *w = rb_entry(rb, typeof(*w), node); 3439 - 3440 - seq_printf(m, "\t%s [%d] waiting for %x\n", 3441 - w->tsk->comm, w->tsk->pid, w->seqno); 3442 - } 3443 - spin_unlock_irq(&b->rb_lock); 3444 - 3445 - seq_puts(m, "\n"); 3446 - } 3252 + p = drm_seq_file_printer(m); 3253 + for_each_engine(engine, dev_priv, id) 3254 + intel_engine_dump(engine, &p); 3447 3255 3448 3256 intel_runtime_pm_put(dev_priv); 3449 3257 ··· 4170 4258 mutex_unlock(&i915->drm.struct_mutex); 4171 4259 4172 4260 /* Flush idle worker to disarm irq */ 4173 - while (flush_delayed_work(&i915->gt.idle_work)) 4174 - ; 4261 + drain_delayed_work(&i915->gt.idle_work); 4175 4262 4176 4263 return 0; 4177 4264 ··· 4303 4392 if (INTEL_GEN(dev_priv) < 6) 4304 4393 return -ENODEV; 4305 4394 4306 - *val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); 4395 + *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit); 4307 4396 return 0; 4308 4397 } 4309 4398 ··· 4311 4400 i915_max_freq_set(void *data, u64 val) 4312 4401 { 4313 4402 struct drm_i915_private *dev_priv = data; 4403 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 4314 4404 u32 hw_max, hw_min; 4315 4405 int ret; 4316 4406 ··· 4320 4408 4321 4409 DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); 4322 4410 4323 - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); 4411 + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); 4324 4412 if (ret) 4325 4413 return ret; 4326 4414 ··· 4329 4417 */ 4330 4418 val = intel_freq_opcode(dev_priv, val); 4331 4419 4332 - hw_max = dev_priv->rps.max_freq; 4333 - hw_min = dev_priv->rps.min_freq; 4420 + hw_max = rps->max_freq; 4421 + hw_min = rps->min_freq; 4334 4422 4335 - if (val < hw_min || val > hw_max || val < dev_priv->rps.min_freq_softlimit) { 4336 - mutex_unlock(&dev_priv->rps.hw_lock); 4423 + if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) { 4424 + mutex_unlock(&dev_priv->pcu_lock); 4337 4425 return -EINVAL; 4338 4426 } 4339 4427 4340 - dev_priv->rps.max_freq_softlimit = val; 4428 + rps->max_freq_softlimit = val; 4341 4429 4342 4430 if (intel_set_rps(dev_priv, val)) 4343 4431 DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); 4344 4432 4345 - mutex_unlock(&dev_priv->rps.hw_lock); 4433 + mutex_unlock(&dev_priv->pcu_lock); 4346 4434 4347 4435 return 0; 4348 4436 } ··· 4359 4447 if (INTEL_GEN(dev_priv) < 6) 4360 4448 return -ENODEV; 4361 4449 4362 - *val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); 4450 + *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit); 4363 4451 return 0; 4364 4452 } 4365 4453 ··· 4367 4455 i915_min_freq_set(void *data, u64 val) 4368 4456 { 4369 4457 struct drm_i915_private *dev_priv = data; 4458 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 4370 4459 u32 hw_max, hw_min; 4371 4460 int ret; 4372 4461 ··· 4376 4463 4377 4464 DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); 4378 4465 4379 - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); 4466 + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); 4380 4467 if (ret) 4381 4468 return ret; 4382 4469 ··· 4385 4472 */ 4386 4473 val = intel_freq_opcode(dev_priv, val); 4387 4474 4388 - hw_max = dev_priv->rps.max_freq; 4389 - hw_min = dev_priv->rps.min_freq; 4475 + hw_max = rps->max_freq; 4476 + hw_min = rps->min_freq; 4390 4477 4391 4478 if (val < hw_min || 4392 - val > hw_max || val > dev_priv->rps.max_freq_softlimit) { 4393 - mutex_unlock(&dev_priv->rps.hw_lock); 4479 + val > hw_max || val > rps->max_freq_softlimit) { 4480 + mutex_unlock(&dev_priv->pcu_lock); 4394 4481 return -EINVAL; 4395 4482 } 4396 4483 4397 - dev_priv->rps.min_freq_softlimit = val; 4484 + rps->min_freq_softlimit = val; 4398 4485 4399 4486 if (intel_set_rps(dev_priv, val)) 4400 4487 DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); 4401 4488 4402 - mutex_unlock(&dev_priv->rps.hw_lock); 4489 + mutex_unlock(&dev_priv->pcu_lock); 4403 4490 4404 4491 return 0; 4405 4492 }

+33 -15

drivers/gpu/drm/i915/i915_drv.c

··· 367 367 value = i915_gem_mmap_gtt_version(); 368 368 break; 369 369 case I915_PARAM_HAS_SCHEDULER: 370 - value = dev_priv->engine[RCS] && 371 - dev_priv->engine[RCS]->schedule; 370 + value = 0; 371 + if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) { 372 + value |= I915_SCHEDULER_CAP_ENABLED; 373 + value |= I915_SCHEDULER_CAP_PRIORITY; 374 + 375 + if (INTEL_INFO(dev_priv)->has_logical_ring_preemption && 376 + i915_modparams.enable_execlists && 377 + !i915_modparams.enable_guc_submission) 378 + value |= I915_SCHEDULER_CAP_PREEMPTION; 379 + } 372 380 break; 381 + 373 382 case I915_PARAM_MMAP_VERSION: 374 383 /* Remember to bump this if the version changes! */ 375 384 case I915_PARAM_HAS_GEM: ··· 615 606 intel_uc_fini_hw(dev_priv); 616 607 i915_gem_cleanup_engines(dev_priv); 617 608 i915_gem_contexts_fini(dev_priv); 618 - i915_gem_cleanup_userptr(dev_priv); 619 609 mutex_unlock(&dev_priv->drm.struct_mutex); 610 + 611 + i915_gem_cleanup_userptr(dev_priv); 620 612 621 613 i915_gem_drain_freed_objects(dev_priv); 622 614 ··· 1016 1006 goto err_bridge; 1017 1007 1018 1008 intel_uncore_init(dev_priv); 1009 + 1010 + intel_uc_init_mmio(dev_priv); 1019 1011 1020 1012 ret = intel_engines_init_mmio(dev_priv); 1021 1013 if (ret) ··· 1592 1580 1593 1581 intel_display_set_init_power(dev_priv, false); 1594 1582 1595 - fw_csr = !IS_GEN9_LP(dev_priv) && 1583 + fw_csr = !IS_GEN9_LP(dev_priv) && !hibernation && 1596 1584 suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload; 1597 1585 /* 1598 1586 * In case of firmware assisted context save/restore don't manually ··· 2082 2070 /* freeze: before creating the hibernation_image */ 2083 2071 static int i915_pm_freeze(struct device *kdev) 2084 2072 { 2073 + struct drm_device *dev = &kdev_to_i915(kdev)->drm; 2085 2074 int ret; 2086 2075 2087 - ret = i915_pm_suspend(kdev); 2088 - if (ret) 2089 - return ret; 2076 + if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) { 2077 + ret = i915_drm_suspend(dev); 2078 + if (ret) 2079 + return ret; 2080 + } 2090 2081 2091 2082 ret = i915_gem_freeze(kdev_to_i915(kdev)); 2092 2083 if (ret) ··· 2100 2085 2101 2086 static int i915_pm_freeze_late(struct device *kdev) 2102 2087 { 2088 + struct drm_device *dev = &kdev_to_i915(kdev)->drm; 2103 2089 int ret; 2104 2090 2105 - ret = i915_pm_suspend_late(kdev); 2106 - if (ret) 2107 - return ret; 2091 + if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) { 2092 + ret = i915_drm_suspend_late(dev, true); 2093 + if (ret) 2094 + return ret; 2095 + } 2108 2096 2109 2097 ret = i915_gem_freeze_late(kdev_to_i915(kdev)); 2110 2098 if (ret) ··· 2503 2485 struct drm_i915_private *dev_priv = to_i915(dev); 2504 2486 int ret; 2505 2487 2506 - if (WARN_ON_ONCE(!(dev_priv->rps.enabled && intel_enable_rc6()))) 2488 + if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && intel_rc6_enabled()))) 2507 2489 return -ENODEV; 2508 2490 2509 2491 if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv))) ··· 2545 2527 intel_uncore_suspend(dev_priv); 2546 2528 2547 2529 enable_rpm_wakeref_asserts(dev_priv); 2548 - WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count)); 2530 + WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); 2549 2531 2550 2532 if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv)) 2551 2533 DRM_ERROR("Unclaimed access detected prior to suspending\n"); 2552 2534 2553 - dev_priv->pm.suspended = true; 2535 + dev_priv->runtime_pm.suspended = true; 2554 2536 2555 2537 /* 2556 2538 * FIXME: We really should find a document that references the arguments ··· 2596 2578 2597 2579 DRM_DEBUG_KMS("Resuming device\n"); 2598 2580 2599 - WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count)); 2581 + WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); 2600 2582 disable_rpm_wakeref_asserts(dev_priv); 2601 2583 2602 2584 intel_opregion_notify_adapter(dev_priv, PCI_D0); 2603 - dev_priv->pm.suspended = false; 2585 + dev_priv->runtime_pm.suspended = false; 2604 2586 if (intel_uncore_unclaimed_mmio(dev_priv)) 2605 2587 DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n"); 2606 2588

+64 -19

drivers/gpu/drm/i915/i915_drv.h

··· 80 80 81 81 #define DRIVER_NAME "i915" 82 82 #define DRIVER_DESC "Intel Graphics" 83 - #define DRIVER_DATE "20170929" 84 - #define DRIVER_TIMESTAMP 1506682238 83 + #define DRIVER_DATE "20171012" 84 + #define DRIVER_TIMESTAMP 1507831511 85 85 86 86 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and 87 87 * WARN_ON()) for hw state sanity checks to check for unexpected conditions ··· 609 609 610 610 struct intel_rps_client { 611 611 atomic_t boosts; 612 - } rps; 612 + } rps_client; 613 613 614 614 unsigned int bsd_engine; 615 615 ··· 783 783 func(has_l3_dpf); \ 784 784 func(has_llc); \ 785 785 func(has_logical_ring_contexts); \ 786 + func(has_logical_ring_preemption); \ 786 787 func(has_overlay); \ 787 788 func(has_pipe_cxsr); \ 788 789 func(has_pooled_eu); \ ··· 868 867 u8 num_pipes; 869 868 u8 num_sprites[I915_MAX_PIPES]; 870 869 u8 num_scalers[I915_MAX_PIPES]; 870 + 871 + unsigned int page_sizes; /* page sizes supported by the HW */ 871 872 872 873 #define DEFINE_FLAG(name) u8 name:1 873 874 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); ··· 984 981 pid_t pid; 985 982 u32 handle; 986 983 u32 hw_id; 984 + int priority; 987 985 int ban_score; 988 986 int active; 989 987 int guilty; ··· 1007 1003 long jiffies; 1008 1004 pid_t pid; 1009 1005 u32 context; 1006 + int priority; 1010 1007 int ban_score; 1011 1008 u32 seqno; 1012 1009 u32 head; ··· 1317 1312 u32 media_c0; 1318 1313 }; 1319 1314 1320 - struct intel_gen6_power_mgmt { 1315 + struct intel_rps { 1321 1316 /* 1322 1317 * work, interrupts_enabled and pm_iir are protected by 1323 1318 * dev_priv->irq_lock ··· 1358 1353 enum { LOW_POWER, BETWEEN, HIGH_POWER } power; 1359 1354 1360 1355 bool enabled; 1361 - struct delayed_work autoenable_work; 1362 1356 atomic_t num_waiters; 1363 1357 atomic_t boosts; 1364 1358 1365 1359 /* manual wa residency calculations */ 1366 1360 struct intel_rps_ei ei; 1361 + }; 1367 1362 1368 - /* 1369 - * Protects RPS/RC6 register access and PCU communication. 1370 - * Must be taken after struct_mutex if nested. Note that 1371 - * this lock may be held for long periods of time when 1372 - * talking to hw - so only take it when talking to hw! 1373 - */ 1374 - struct mutex hw_lock; 1363 + struct intel_rc6 { 1364 + bool enabled; 1365 + }; 1366 + 1367 + struct intel_llc_pstate { 1368 + bool enabled; 1369 + }; 1370 + 1371 + struct intel_gen6_power_mgmt { 1372 + struct intel_rps rps; 1373 + struct intel_rc6 rc6; 1374 + struct intel_llc_pstate llc_pstate; 1375 + struct delayed_work autoenable_work; 1375 1376 }; 1376 1377 1377 1378 /* defined intel_pm.c */ ··· 1518 1507 1519 1508 /** Usable portion of the GTT for GEM */ 1520 1509 dma_addr_t stolen_base; /* limited to low memory (32-bit) */ 1510 + 1511 + /** 1512 + * tmpfs instance used for shmem backed objects 1513 + */ 1514 + struct vfsmount *gemfs; 1521 1515 1522 1516 /** PPGTT used for aliasing the PPGTT with the GTT */ 1523 1517 struct i915_hw_ppgtt *aliasing_ppgtt; ··· 2267 2251 wait_queue_head_t gmbus_wait_queue; 2268 2252 2269 2253 struct pci_dev *bridge_dev; 2270 - struct i915_gem_context *kernel_context; 2271 2254 struct intel_engine_cs *engine[I915_NUM_ENGINES]; 2255 + /* Context used internally to idle the GPU and setup initial state */ 2256 + struct i915_gem_context *kernel_context; 2257 + /* Context only to be used for injecting preemption commands */ 2258 + struct i915_gem_context *preempt_context; 2272 2259 struct i915_vma *semaphore; 2273 2260 2274 2261 struct drm_dma_handle *status_page_dmah; ··· 2427 2408 /* Cannot be determined by PCIID. You must always read a register. */ 2428 2409 u32 edram_cap; 2429 2410 2430 - /* gen6+ rps state */ 2431 - struct intel_gen6_power_mgmt rps; 2411 + /* 2412 + * Protects RPS/RC6 register access and PCU communication. 2413 + * Must be taken after struct_mutex if nested. Note that 2414 + * this lock may be held for long periods of time when 2415 + * talking to hw - so only take it when talking to hw! 2416 + */ 2417 + struct mutex pcu_lock; 2418 + 2419 + /* gen6+ GT PM state */ 2420 + struct intel_gen6_power_mgmt gt_pm; 2432 2421 2433 2422 /* ilk-only ips/rps state. Everything in here is protected by the global 2434 2423 * mchdev_lock in intel_pm.c */ ··· 2547 2520 bool distrust_bios_wm; 2548 2521 } wm; 2549 2522 2550 - struct i915_runtime_pm pm; 2523 + struct i915_runtime_pm runtime_pm; 2551 2524 2552 2525 struct { 2553 2526 bool initialized; ··· 2886 2859 (((__iter).curr += PAGE_SIZE) >= (__iter).max) ? \ 2887 2860 (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0) 2888 2861 2862 + static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg) 2863 + { 2864 + unsigned int page_sizes; 2865 + 2866 + page_sizes = 0; 2867 + while (sg) { 2868 + GEM_BUG_ON(sg->offset); 2869 + GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE)); 2870 + page_sizes |= sg->length; 2871 + sg = __sg_next(sg); 2872 + } 2873 + 2874 + return page_sizes; 2875 + } 2876 + 2889 2877 static inline unsigned int i915_sg_segment_size(void) 2890 2878 { 2891 2879 unsigned int size = swiotlb_max_segment(); ··· 3130 3088 #define USES_PPGTT(dev_priv) (i915_modparams.enable_ppgtt) 3131 3089 #define USES_FULL_PPGTT(dev_priv) (i915_modparams.enable_ppgtt >= 2) 3132 3090 #define USES_FULL_48BIT_PPGTT(dev_priv) (i915_modparams.enable_ppgtt == 3) 3091 + #define HAS_PAGE_SIZES(dev_priv, sizes) ({ \ 3092 + GEM_BUG_ON((sizes) == 0); \ 3093 + ((sizes) & ~(dev_priv)->info.page_sizes) == 0; \ 3094 + }) 3133 3095 3134 3096 #define HAS_OVERLAY(dev_priv) ((dev_priv)->info.has_overlay) 3135 3097 #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \ ··· 3550 3504 unsigned long n); 3551 3505 3552 3506 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 3553 - struct sg_table *pages); 3507 + struct sg_table *pages, 3508 + unsigned int sg_page_sizes); 3554 3509 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); 3555 3510 3556 3511 static inline int __must_check ··· 3773 3726 } 3774 3727 3775 3728 /* i915_gem_fence_reg.c */ 3776 - int __must_check i915_vma_get_fence(struct i915_vma *vma); 3777 - int __must_check i915_vma_put_fence(struct i915_vma *vma); 3778 3729 struct drm_i915_fence_reg * 3779 3730 i915_reserve_fence(struct drm_i915_private *dev_priv); 3780 3731 void i915_unreserve_fence(struct drm_i915_fence_reg *fence);

+264 -119

drivers/gpu/drm/i915/i915_gem.c

··· 35 35 #include "intel_drv.h" 36 36 #include "intel_frontbuffer.h" 37 37 #include "intel_mocs.h" 38 + #include "i915_gemfs.h" 38 39 #include <linux/dma-fence-array.h> 39 40 #include <linux/kthread.h> 40 41 #include <linux/reservation.h> ··· 162 161 return 0; 163 162 } 164 163 165 - static struct sg_table * 166 - i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 164 + static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 167 165 { 168 166 struct address_space *mapping = obj->base.filp->f_mapping; 169 167 drm_dma_handle_t *phys; ··· 170 170 struct scatterlist *sg; 171 171 char *vaddr; 172 172 int i; 173 + int err; 173 174 174 175 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 175 - return ERR_PTR(-EINVAL); 176 + return -EINVAL; 176 177 177 178 /* Always aligning to the object size, allows a single allocation 178 179 * to handle all possible callers, and given typical object sizes, ··· 183 182 roundup_pow_of_two(obj->base.size), 184 183 roundup_pow_of_two(obj->base.size)); 185 184 if (!phys) 186 - return ERR_PTR(-ENOMEM); 185 + return -ENOMEM; 187 186 188 187 vaddr = phys->vaddr; 189 188 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { ··· 192 191 193 192 page = shmem_read_mapping_page(mapping, i); 194 193 if (IS_ERR(page)) { 195 - st = ERR_CAST(page); 194 + err = PTR_ERR(page); 196 195 goto err_phys; 197 196 } 198 197 ··· 209 208 210 209 st = kmalloc(sizeof(*st), GFP_KERNEL); 211 210 if (!st) { 212 - st = ERR_PTR(-ENOMEM); 211 + err = -ENOMEM; 213 212 goto err_phys; 214 213 } 215 214 216 215 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 217 216 kfree(st); 218 - st = ERR_PTR(-ENOMEM); 217 + err = -ENOMEM; 219 218 goto err_phys; 220 219 } 221 220 ··· 227 226 sg_dma_len(sg) = obj->base.size; 228 227 229 228 obj->phys_handle = phys; 230 - return st; 229 + 230 + __i915_gem_object_set_pages(obj, st, sg->length); 231 + 232 + return 0; 231 233 232 234 err_phys: 233 235 drm_pci_free(obj->base.dev, phys); 234 - return st; 236 + 237 + return err; 235 238 } 236 239 237 240 static void __start_cpu_write(struct drm_i915_gem_object *obj) ··· 358 353 i915_gem_object_wait_fence(struct dma_fence *fence, 359 354 unsigned int flags, 360 355 long timeout, 361 - struct intel_rps_client *rps) 356 + struct intel_rps_client *rps_client) 362 357 { 363 358 struct drm_i915_gem_request *rq; 364 359 ··· 391 386 * forcing the clocks too high for the whole system, we only allow 392 387 * each client to waitboost once in a busy period. 393 388 */ 394 - if (rps) { 389 + if (rps_client) { 395 390 if (INTEL_GEN(rq->i915) >= 6) 396 - gen6_rps_boost(rq, rps); 391 + gen6_rps_boost(rq, rps_client); 397 392 else 398 - rps = NULL; 393 + rps_client = NULL; 399 394 } 400 395 401 396 timeout = i915_wait_request(rq, flags, timeout); ··· 411 406 i915_gem_object_wait_reservation(struct reservation_object *resv, 412 407 unsigned int flags, 413 408 long timeout, 414 - struct intel_rps_client *rps) 409 + struct intel_rps_client *rps_client) 415 410 { 416 411 unsigned int seq = __read_seqcount_begin(&resv->seq); 417 412 struct dma_fence *excl; ··· 430 425 for (i = 0; i < count; i++) { 431 426 timeout = i915_gem_object_wait_fence(shared[i], 432 427 flags, timeout, 433 - rps); 428 + rps_client); 434 429 if (timeout < 0) 435 430 break; 436 431 ··· 447 442 } 448 443 449 444 if (excl && timeout >= 0) { 450 - timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); 445 + timeout = i915_gem_object_wait_fence(excl, flags, timeout, 446 + rps_client); 451 447 prune_fences = timeout >= 0; 452 448 } 453 449 ··· 544 538 i915_gem_object_wait(struct drm_i915_gem_object *obj, 545 539 unsigned int flags, 546 540 long timeout, 547 - struct intel_rps_client *rps) 541 + struct intel_rps_client *rps_client) 548 542 { 549 543 might_sleep(); 550 544 #if IS_ENABLED(CONFIG_LOCKDEP) ··· 556 550 557 551 timeout = i915_gem_object_wait_reservation(obj->resv, 558 552 flags, timeout, 559 - rps); 553 + rps_client); 560 554 return timeout < 0 ? timeout : 0; 561 555 } 562 556 ··· 564 558 { 565 559 struct drm_i915_file_private *fpriv = file->driver_priv; 566 560 567 - return &fpriv->rps; 561 + return &fpriv->rps_client; 568 562 } 569 563 570 564 static int ··· 1056 1050 1057 1051 intel_runtime_pm_get(i915); 1058 1052 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1059 - PIN_MAPPABLE | PIN_NONBLOCK); 1053 + PIN_MAPPABLE | 1054 + PIN_NONFAULT | 1055 + PIN_NONBLOCK); 1060 1056 if (!IS_ERR(vma)) { 1061 1057 node.start = i915_ggtt_offset(vma); 1062 1058 node.allocated = false; ··· 1242 1234 1243 1235 intel_runtime_pm_get(i915); 1244 1236 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1245 - PIN_MAPPABLE | PIN_NONBLOCK); 1237 + PIN_MAPPABLE | 1238 + PIN_NONFAULT | 1239 + PIN_NONBLOCK); 1246 1240 if (!IS_ERR(vma)) { 1247 1241 node.start = i915_ggtt_offset(vma); 1248 1242 node.allocated = false; ··· 1915 1905 if (ret) 1916 1906 goto err_unpin; 1917 1907 1918 - ret = i915_vma_get_fence(vma); 1908 + ret = i915_vma_pin_fence(vma); 1919 1909 if (ret) 1920 1910 goto err_unpin; 1921 - 1922 - /* Mark as being mmapped into userspace for later revocation */ 1923 - assert_rpm_wakelock_held(dev_priv); 1924 - if (list_empty(&obj->userfault_link)) 1925 - list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 1926 1911 1927 1912 /* Finally, remap it using the new GTT offset */ 1928 1913 ret = remap_io_mapping(area, ··· 1925 1920 (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, 1926 1921 min_t(u64, vma->size, area->vm_end - area->vm_start), 1927 1922 &ggtt->mappable); 1923 + if (ret) 1924 + goto err_fence; 1928 1925 1926 + /* Mark as being mmapped into userspace for later revocation */ 1927 + assert_rpm_wakelock_held(dev_priv); 1928 + if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 1929 + list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 1930 + GEM_BUG_ON(!obj->userfault_count); 1931 + 1932 + err_fence: 1933 + i915_vma_unpin_fence(vma); 1929 1934 err_unpin: 1930 1935 __i915_vma_unpin(vma); 1931 1936 err_unlock: ··· 1987 1972 return ret; 1988 1973 } 1989 1974 1975 + static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 1976 + { 1977 + struct i915_vma *vma; 1978 + 1979 + GEM_BUG_ON(!obj->userfault_count); 1980 + 1981 + obj->userfault_count = 0; 1982 + list_del(&obj->userfault_link); 1983 + drm_vma_node_unmap(&obj->base.vma_node, 1984 + obj->base.dev->anon_inode->i_mapping); 1985 + 1986 + list_for_each_entry(vma, &obj->vma_list, obj_link) { 1987 + if (!i915_vma_is_ggtt(vma)) 1988 + break; 1989 + 1990 + i915_vma_unset_userfault(vma); 1991 + } 1992 + } 1993 + 1990 1994 /** 1991 1995 * i915_gem_release_mmap - remove physical page mappings 1992 1996 * @obj: obj in question ··· 2036 2002 lockdep_assert_held(&i915->drm.struct_mutex); 2037 2003 intel_runtime_pm_get(i915); 2038 2004 2039 - if (list_empty(&obj->userfault_link)) 2005 + if (!obj->userfault_count) 2040 2006 goto out; 2041 2007 2042 - list_del_init(&obj->userfault_link); 2043 - drm_vma_node_unmap(&obj->base.vma_node, 2044 - obj->base.dev->anon_inode->i_mapping); 2008 + __i915_gem_object_release_mmap(obj); 2045 2009 2046 2010 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2047 2011 * memory transactions from userspace before we return. The TLB ··· 2067 2035 */ 2068 2036 2069 2037 list_for_each_entry_safe(obj, on, 2070 - &dev_priv->mm.userfault_list, userfault_link) { 2071 - list_del_init(&obj->userfault_link); 2072 - drm_vma_node_unmap(&obj->base.vma_node, 2073 - obj->base.dev->anon_inode->i_mapping); 2074 - } 2038 + &dev_priv->mm.userfault_list, userfault_link) 2039 + __i915_gem_object_release_mmap(obj); 2075 2040 2076 2041 /* The fence will be lost when the device powers down. If any were 2077 2042 * in use by hardware (i.e. they are pinned), we should not be powering ··· 2091 2062 if (!reg->vma) 2092 2063 continue; 2093 2064 2094 - GEM_BUG_ON(!list_empty(&reg->vma->obj->userfault_link)); 2065 + GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2095 2066 reg->dirty = true; 2096 2067 } 2097 2068 } ··· 2290 2261 if (!IS_ERR(pages)) 2291 2262 obj->ops->put_pages(obj, pages); 2292 2263 2264 + obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2265 + 2293 2266 unlock: 2294 2267 mutex_unlock(&obj->mm.lock); 2295 2268 } ··· 2322 2291 return true; 2323 2292 } 2324 2293 2325 - static struct sg_table * 2326 - i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2294 + static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2327 2295 { 2328 2296 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2329 2297 const unsigned long page_count = obj->base.size / PAGE_SIZE; ··· 2334 2304 struct page *page; 2335 2305 unsigned long last_pfn = 0; /* suppress gcc warning */ 2336 2306 unsigned int max_segment = i915_sg_segment_size(); 2307 + unsigned int sg_page_sizes; 2337 2308 gfp_t noreclaim; 2338 2309 int ret; 2339 2310 ··· 2347 2316 2348 2317 st = kmalloc(sizeof(*st), GFP_KERNEL); 2349 2318 if (st == NULL) 2350 - return ERR_PTR(-ENOMEM); 2319 + return -ENOMEM; 2351 2320 2352 2321 rebuild_st: 2353 2322 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2354 2323 kfree(st); 2355 - return ERR_PTR(-ENOMEM); 2324 + return -ENOMEM; 2356 2325 } 2357 2326 2358 2327 /* Get the list of pages out of our struct file. They'll be pinned ··· 2366 2335 2367 2336 sg = st->sgl; 2368 2337 st->nents = 0; 2338 + sg_page_sizes = 0; 2369 2339 for (i = 0; i < page_count; i++) { 2370 2340 const unsigned int shrink[] = { 2371 2341 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, ··· 2419 2387 if (!i || 2420 2388 sg->length >= max_segment || 2421 2389 page_to_pfn(page) != last_pfn + 1) { 2422 - if (i) 2390 + if (i) { 2391 + sg_page_sizes |= sg->length; 2423 2392 sg = sg_next(sg); 2393 + } 2424 2394 st->nents++; 2425 2395 sg_set_page(sg, page, PAGE_SIZE, 0); 2426 2396 } else { ··· 2433 2399 /* Check that the i965g/gm workaround works. */ 2434 2400 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2435 2401 } 2436 - if (sg) /* loop terminated early; short sg table */ 2402 + if (sg) { /* loop terminated early; short sg table */ 2403 + sg_page_sizes |= sg->length; 2437 2404 sg_mark_end(sg); 2405 + } 2438 2406 2439 2407 /* Trim unused sg entries to avoid wasting memory. */ 2440 2408 i915_sg_trim(st); ··· 2465 2429 if (i915_gem_object_needs_bit17_swizzle(obj)) 2466 2430 i915_gem_object_do_bit_17_swizzle(obj, st); 2467 2431 2468 - return st; 2432 + __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2433 + 2434 + return 0; 2469 2435 2470 2436 err_sg: 2471 2437 sg_mark_end(sg); ··· 2488 2450 if (ret == -ENOSPC) 2489 2451 ret = -ENOMEM; 2490 2452 2491 - return ERR_PTR(ret); 2453 + return ret; 2492 2454 } 2493 2455 2494 2456 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2495 - struct sg_table *pages) 2457 + struct sg_table *pages, 2458 + unsigned int sg_page_sizes) 2496 2459 { 2460 + struct drm_i915_private *i915 = to_i915(obj->base.dev); 2461 + unsigned long supported = INTEL_INFO(i915)->page_sizes; 2462 + int i; 2463 + 2497 2464 lockdep_assert_held(&obj->mm.lock); 2498 2465 2499 2466 obj->mm.get_page.sg_pos = pages->sgl; ··· 2512 2469 __i915_gem_object_pin_pages(obj); 2513 2470 obj->mm.quirked = true; 2514 2471 } 2472 + 2473 + GEM_BUG_ON(!sg_page_sizes); 2474 + obj->mm.page_sizes.phys = sg_page_sizes; 2475 + 2476 + /* 2477 + * Calculate the supported page-sizes which fit into the given 2478 + * sg_page_sizes. This will give us the page-sizes which we may be able 2479 + * to use opportunistically when later inserting into the GTT. For 2480 + * example if phys=2G, then in theory we should be able to use 1G, 2M, 2481 + * 64K or 4K pages, although in practice this will depend on a number of 2482 + * other factors. 2483 + */ 2484 + obj->mm.page_sizes.sg = 0; 2485 + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2486 + if (obj->mm.page_sizes.phys & ~0u << i) 2487 + obj->mm.page_sizes.sg |= BIT(i); 2488 + } 2489 + 2490 + GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2515 2491 } 2516 2492 2517 2493 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2518 2494 { 2519 - struct sg_table *pages; 2495 + int err; 2520 2496 2521 2497 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2522 2498 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2523 2499 return -EFAULT; 2524 2500 } 2525 2501 2526 - pages = obj->ops->get_pages(obj); 2527 - if (unlikely(IS_ERR(pages))) 2528 - return PTR_ERR(pages); 2502 + err = obj->ops->get_pages(obj); 2503 + GEM_BUG_ON(!err && IS_ERR_OR_NULL(obj->mm.pages)); 2529 2504 2530 - __i915_gem_object_set_pages(obj, pages); 2531 - return 0; 2505 + return err; 2532 2506 } 2533 2507 2534 2508 /* Ensure that the associated pages are gathered from the backing storage ··· 2856 2796 { 2857 2797 struct drm_i915_gem_request *request = NULL; 2858 2798 2859 - /* Prevent the signaler thread from updating the request 2799 + /* 2800 + * During the reset sequence, we must prevent the engine from 2801 + * entering RC6. As the context state is undefined until we restart 2802 + * the engine, if it does enter RC6 during the reset, the state 2803 + * written to the powercontext is undefined and so we may lose 2804 + * GPU state upon resume, i.e. fail to restart after a reset. 2805 + */ 2806 + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 2807 + 2808 + /* 2809 + * Prevent the signaler thread from updating the request 2860 2810 * state (by calling dma_fence_signal) as we are processing 2861 2811 * the reset. The write from the GPU of the seqno is 2862 2812 * asynchronous and the signaler thread may see a different ··· 2877 2807 */ 2878 2808 kthread_park(engine->breadcrumbs.signaler); 2879 2809 2880 - /* Prevent request submission to the hardware until we have 2810 + /* 2811 + * Prevent request submission to the hardware until we have 2881 2812 * completed the reset in i915_gem_reset_finish(). If a request 2882 2813 * is completed by one engine, it may then queue a request 2883 2814 * to a second via its engine->irq_tasklet *just* as we are ··· 3068 2997 { 3069 2998 tasklet_enable(&engine->execlists.irq_tasklet); 3070 2999 kthread_unpark(engine->breadcrumbs.signaler); 3000 + 3001 + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3071 3002 } 3072 3003 3073 3004 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) ··· 3089 3016 { 3090 3017 GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); 3091 3018 dma_fence_set_error(&request->fence, -EIO); 3019 + 3092 3020 i915_gem_request_submit(request); 3021 + } 3022 + 3023 + static void nop_complete_submit_request(struct drm_i915_gem_request *request) 3024 + { 3025 + unsigned long flags; 3026 + 3027 + GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); 3028 + dma_fence_set_error(&request->fence, -EIO); 3029 + 3030 + spin_lock_irqsave(&request->engine->timeline->lock, flags); 3031 + __i915_gem_request_submit(request); 3093 3032 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3033 + spin_unlock_irqrestore(&request->engine->timeline->lock, flags); 3094 3034 } 3095 3035 3096 - static void engine_set_wedged(struct intel_engine_cs *engine) 3036 + void i915_gem_set_wedged(struct drm_i915_private *i915) 3097 3037 { 3098 - /* We need to be sure that no thread is running the old callback as 3099 - * we install the nop handler (otherwise we would submit a request 3100 - * to hardware that will never complete). In order to prevent this 3101 - * race, we wait until the machine is idle before making the swap 3102 - * (using stop_machine()). 3103 - */ 3104 - engine->submit_request = nop_submit_request; 3105 - 3106 - /* Mark all executing requests as skipped */ 3107 - engine->cancel_requests(engine); 3108 - 3109 - /* Mark all pending requests as complete so that any concurrent 3110 - * (lockless) lookup doesn't try and wait upon the request as we 3111 - * reset it. 3112 - */ 3113 - intel_engine_init_global_seqno(engine, 3114 - intel_engine_last_submit(engine)); 3115 - } 3116 - 3117 - static int __i915_gem_set_wedged_BKL(void *data) 3118 - { 3119 - struct drm_i915_private *i915 = data; 3120 3038 struct intel_engine_cs *engine; 3121 3039 enum intel_engine_id id; 3122 3040 3041 + /* 3042 + * First, stop submission to hw, but do not yet complete requests by 3043 + * rolling the global seqno forward (since this would complete requests 3044 + * for which we haven't set the fence error to EIO yet). 3045 + */ 3123 3046 for_each_engine(engine, i915, id) 3124 - engine_set_wedged(engine); 3047 + engine->submit_request = nop_submit_request; 3048 + 3049 + /* 3050 + * Make sure no one is running the old callback before we proceed with 3051 + * cancelling requests and resetting the completion tracking. Otherwise 3052 + * we might submit a request to the hardware which never completes. 3053 + */ 3054 + synchronize_rcu(); 3055 + 3056 + for_each_engine(engine, i915, id) { 3057 + /* Mark all executing requests as skipped */ 3058 + engine->cancel_requests(engine); 3059 + 3060 + /* 3061 + * Only once we've force-cancelled all in-flight requests can we 3062 + * start to complete all requests. 3063 + */ 3064 + engine->submit_request = nop_complete_submit_request; 3065 + } 3066 + 3067 + /* 3068 + * Make sure no request can slip through without getting completed by 3069 + * either this call here to intel_engine_init_global_seqno, or the one 3070 + * in nop_complete_submit_request. 3071 + */ 3072 + synchronize_rcu(); 3073 + 3074 + for_each_engine(engine, i915, id) { 3075 + unsigned long flags; 3076 + 3077 + /* Mark all pending requests as complete so that any concurrent 3078 + * (lockless) lookup doesn't try and wait upon the request as we 3079 + * reset it. 3080 + */ 3081 + spin_lock_irqsave(&engine->timeline->lock, flags); 3082 + intel_engine_init_global_seqno(engine, 3083 + intel_engine_last_submit(engine)); 3084 + spin_unlock_irqrestore(&engine->timeline->lock, flags); 3085 + } 3125 3086 3126 3087 set_bit(I915_WEDGED, &i915->gpu_error.flags); 3127 3088 wake_up_all(&i915->gpu_error.reset_queue); 3128 - 3129 - return 0; 3130 - } 3131 - 3132 - void i915_gem_set_wedged(struct drm_i915_private *dev_priv) 3133 - { 3134 - stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); 3135 3089 } 3136 3090 3137 3091 bool i915_gem_unset_wedged(struct drm_i915_private *i915) ··· 4059 3959 4060 3960 lockdep_assert_held(&obj->base.dev->struct_mutex); 4061 3961 3962 + if (!view && flags & PIN_MAPPABLE) { 3963 + /* If the required space is larger than the available 3964 + * aperture, we will not able to find a slot for the 3965 + * object and unbinding the object now will be in 3966 + * vain. Worse, doing so may cause us to ping-pong 3967 + * the object in and out of the Global GTT and 3968 + * waste a lot of cycles under the mutex. 3969 + */ 3970 + if (obj->base.size > dev_priv->ggtt.mappable_end) 3971 + return ERR_PTR(-E2BIG); 3972 + 3973 + /* If NONBLOCK is set the caller is optimistically 3974 + * trying to cache the full object within the mappable 3975 + * aperture, and *must* have a fallback in place for 3976 + * situations where we cannot bind the object. We 3977 + * can be a little more lax here and use the fallback 3978 + * more often to avoid costly migrations of ourselves 3979 + * and other objects within the aperture. 3980 + * 3981 + * Half-the-aperture is used as a simple heuristic. 3982 + * More interesting would to do search for a free 3983 + * block prior to making the commitment to unbind. 3984 + * That caters for the self-harm case, and with a 3985 + * little more heuristics (e.g. NOFAULT, NOEVICT) 3986 + * we could try to minimise harm to others. 3987 + */ 3988 + if (flags & PIN_NONBLOCK && 3989 + obj->base.size > dev_priv->ggtt.mappable_end / 2) 3990 + return ERR_PTR(-ENOSPC); 3991 + } 3992 + 4062 3993 vma = i915_vma_instance(obj, vm, view); 4063 3994 if (unlikely(IS_ERR(vma))) 4064 3995 return vma; 4065 3996 4066 3997 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4067 - if (flags & PIN_NONBLOCK && 4068 - (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) 4069 - return ERR_PTR(-ENOSPC); 3998 + if (flags & PIN_NONBLOCK) { 3999 + if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4000 + return ERR_PTR(-ENOSPC); 4070 4001 4071 - if (flags & PIN_MAPPABLE) { 4072 - /* If the required space is larger than the available 4073 - * aperture, we will not able to find a slot for the 4074 - * object and unbinding the object now will be in 4075 - * vain. Worse, doing so may cause us to ping-pong 4076 - * the object in and out of the Global GTT and 4077 - * waste a lot of cycles under the mutex. 4078 - */ 4079 - if (vma->fence_size > dev_priv->ggtt.mappable_end) 4080 - return ERR_PTR(-E2BIG); 4081 - 4082 - /* If NONBLOCK is set the caller is optimistically 4083 - * trying to cache the full object within the mappable 4084 - * aperture, and *must* have a fallback in place for 4085 - * situations where we cannot bind the object. We 4086 - * can be a little more lax here and use the fallback 4087 - * more often to avoid costly migrations of ourselves 4088 - * and other objects within the aperture. 4089 - * 4090 - * Half-the-aperture is used as a simple heuristic. 4091 - * More interesting would to do search for a free 4092 - * block prior to making the commitment to unbind. 4093 - * That caters for the self-harm case, and with a 4094 - * little more heuristics (e.g. NOFAULT, NOEVICT) 4095 - * we could try to minimise harm to others. 4096 - */ 4097 - if (flags & PIN_NONBLOCK && 4002 + if (flags & PIN_MAPPABLE && 4098 4003 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4099 4004 return ERR_PTR(-ENOSPC); 4100 4005 } ··· 4326 4221 mutex_init(&obj->mm.lock); 4327 4222 4328 4223 INIT_LIST_HEAD(&obj->global_link); 4329 - INIT_LIST_HEAD(&obj->userfault_link); 4330 4224 INIT_LIST_HEAD(&obj->vma_list); 4331 4225 INIT_LIST_HEAD(&obj->lut_list); 4332 4226 INIT_LIST_HEAD(&obj->batch_pool_link); ··· 4355 4251 .pwrite = i915_gem_object_pwrite_gtt, 4356 4252 }; 4357 4253 4254 + static int i915_gem_object_create_shmem(struct drm_device *dev, 4255 + struct drm_gem_object *obj, 4256 + size_t size) 4257 + { 4258 + struct drm_i915_private *i915 = to_i915(dev); 4259 + unsigned long flags = VM_NORESERVE; 4260 + struct file *filp; 4261 + 4262 + drm_gem_private_object_init(dev, obj, size); 4263 + 4264 + if (i915->mm.gemfs) 4265 + filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4266 + flags); 4267 + else 4268 + filp = shmem_file_setup("i915", size, flags); 4269 + 4270 + if (IS_ERR(filp)) 4271 + return PTR_ERR(filp); 4272 + 4273 + obj->filp = filp; 4274 + 4275 + return 0; 4276 + } 4277 + 4358 4278 struct drm_i915_gem_object * 4359 4279 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4360 4280 { ··· 4403 4275 if (obj == NULL) 4404 4276 return ERR_PTR(-ENOMEM); 4405 4277 4406 - ret = drm_gem_object_init(&dev_priv->drm, &obj->base, size); 4278 + ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4407 4279 if (ret) 4408 4280 goto fail; 4409 4281 ··· 4506 4378 4507 4379 llist_for_each_entry_safe(obj, on, freed, freed) { 4508 4380 GEM_BUG_ON(obj->bind_count); 4381 + GEM_BUG_ON(obj->userfault_count); 4509 4382 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4510 4383 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4511 4384 ··· 4676 4547 /* As the idle_work is rearming if it detects a race, play safe and 4677 4548 * repeat the flush until it is definitely idle. 4678 4549 */ 4679 - while (flush_delayed_work(&dev_priv->gt.idle_work)) 4680 - ; 4550 + drain_delayed_work(&dev_priv->gt.idle_work); 4681 4551 4682 4552 /* Assert that we sucessfully flushed all the work and 4683 4553 * reset the GPU back to its idle, low power state. ··· 4723 4595 4724 4596 mutex_lock(&dev->struct_mutex); 4725 4597 i915_gem_restore_gtt_mappings(dev_priv); 4598 + i915_gem_restore_fences(dev_priv); 4726 4599 4727 4600 /* As we didn't flush the kernel context before suspend, we cannot 4728 4601 * guarantee that the context image is complete. So let's just reset ··· 4886 4757 4887 4758 mutex_lock(&dev_priv->drm.struct_mutex); 4888 4759 4760 + /* 4761 + * We need to fallback to 4K pages since gvt gtt handling doesn't 4762 + * support huge page entries - we will need to check either hypervisor 4763 + * mm can support huge guest page or just do emulation in gvt. 4764 + */ 4765 + if (intel_vgpu_active(dev_priv)) 4766 + mkwrite_device_info(dev_priv)->page_sizes = 4767 + I915_GTT_PAGE_SIZE_4K; 4768 + 4889 4769 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 4890 4770 4891 4771 if (!i915_modparams.enable_execlists) { ··· 5052 4914 5053 4915 spin_lock_init(&dev_priv->fb_tracking.lock); 5054 4916 4917 + err = i915_gemfs_init(dev_priv); 4918 + if (err) 4919 + DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 4920 + 5055 4921 return 0; 5056 4922 5057 4923 err_priorities: ··· 5094 4952 5095 4953 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5096 4954 rcu_barrier(); 4955 + 4956 + i915_gemfs_fini(dev_priv); 5097 4957 } 5098 4958 5099 4959 int i915_gem_freeze(struct drm_i915_private *dev_priv) ··· 5485 5341 #include "selftests/scatterlist.c" 5486 5342 #include "selftests/mock_gem_device.c" 5487 5343 #include "selftests/huge_gem_object.c" 5344 + #include "selftests/huge_pages.c" 5488 5345 #include "selftests/i915_gem_object.c" 5489 5346 #include "selftests/i915_gem_coherency.c" 5490 5347 #endif

+79 -22

drivers/gpu/drm/i915/i915_gem_context.c

··· 416 416 return ctx; 417 417 } 418 418 419 - int i915_gem_contexts_init(struct drm_i915_private *dev_priv) 419 + static struct i915_gem_context * 420 + create_kernel_context(struct drm_i915_private *i915, int prio) 420 421 { 421 422 struct i915_gem_context *ctx; 422 423 423 - /* Init should only be called once per module load. Eventually the 424 - * restriction on the context_disabled check can be loosened. */ 425 - if (WARN_ON(dev_priv->kernel_context)) 426 - return 0; 424 + ctx = i915_gem_create_context(i915, NULL); 425 + if (IS_ERR(ctx)) 426 + return ctx; 427 + 428 + i915_gem_context_clear_bannable(ctx); 429 + ctx->priority = prio; 430 + ctx->ring_size = PAGE_SIZE; 431 + 432 + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); 433 + 434 + return ctx; 435 + } 436 + 437 + static void 438 + destroy_kernel_context(struct i915_gem_context **ctxp) 439 + { 440 + struct i915_gem_context *ctx; 441 + 442 + /* Keep the context ref so that we can free it immediately ourselves */ 443 + ctx = i915_gem_context_get(fetch_and_zero(ctxp)); 444 + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); 445 + 446 + context_close(ctx); 447 + i915_gem_context_free(ctx); 448 + } 449 + 450 + int i915_gem_contexts_init(struct drm_i915_private *dev_priv) 451 + { 452 + struct i915_gem_context *ctx; 453 + int err; 454 + 455 + GEM_BUG_ON(dev_priv->kernel_context); 427 456 428 457 INIT_LIST_HEAD(&dev_priv->contexts.list); 429 458 INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker); ··· 470 441 BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); 471 442 ida_init(&dev_priv->contexts.hw_ida); 472 443 473 - ctx = i915_gem_create_context(dev_priv, NULL); 444 + /* lowest priority; idle task */ 445 + ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN); 474 446 if (IS_ERR(ctx)) { 475 - DRM_ERROR("Failed to create default global context (error %ld)\n", 476 - PTR_ERR(ctx)); 477 - return PTR_ERR(ctx); 447 + DRM_ERROR("Failed to create default global context\n"); 448 + err = PTR_ERR(ctx); 449 + goto err; 478 450 } 479 - 480 - /* For easy recognisablity, we want the kernel context to be 0 and then 451 + /* 452 + * For easy recognisablity, we want the kernel context to be 0 and then 481 453 * all user contexts will have non-zero hw_id. 482 454 */ 483 455 GEM_BUG_ON(ctx->hw_id); 484 - 485 - i915_gem_context_clear_bannable(ctx); 486 - ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ 487 456 dev_priv->kernel_context = ctx; 488 457 489 - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); 458 + /* highest priority; preempting task */ 459 + ctx = create_kernel_context(dev_priv, INT_MAX); 460 + if (IS_ERR(ctx)) { 461 + DRM_ERROR("Failed to create default preempt context\n"); 462 + err = PTR_ERR(ctx); 463 + goto err_kernel_context; 464 + } 465 + dev_priv->preempt_context = ctx; 490 466 491 467 DRM_DEBUG_DRIVER("%s context support initialized\n", 492 468 dev_priv->engine[RCS]->context_size ? "logical" : 493 469 "fake"); 494 470 return 0; 471 + 472 + err_kernel_context: 473 + destroy_kernel_context(&dev_priv->kernel_context); 474 + err: 475 + return err; 495 476 } 496 477 497 478 void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) ··· 546 507 547 508 void i915_gem_contexts_fini(struct drm_i915_private *i915) 548 509 { 549 - struct i915_gem_context *ctx; 550 - 551 510 lockdep_assert_held(&i915->drm.struct_mutex); 552 511 553 - /* Keep the context so that we can free it immediately ourselves */ 554 - ctx = i915_gem_context_get(fetch_and_zero(&i915->kernel_context)); 555 - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); 556 - context_close(ctx); 557 - i915_gem_context_free(ctx); 512 + destroy_kernel_context(&i915->preempt_context); 513 + destroy_kernel_context(&i915->kernel_context); 558 514 559 515 /* Must free all deferred contexts (via flush_workqueue) first */ 560 516 ida_destroy(&i915->contexts.hw_ida); ··· 1070 1036 case I915_CONTEXT_PARAM_BANNABLE: 1071 1037 args->value = i915_gem_context_is_bannable(ctx); 1072 1038 break; 1039 + case I915_CONTEXT_PARAM_PRIORITY: 1040 + args->value = ctx->priority; 1041 + break; 1073 1042 default: 1074 1043 ret = -EINVAL; 1075 1044 break; ··· 1128 1091 else 1129 1092 i915_gem_context_clear_bannable(ctx); 1130 1093 break; 1094 + 1095 + case I915_CONTEXT_PARAM_PRIORITY: 1096 + { 1097 + int priority = args->value; 1098 + 1099 + if (args->size) 1100 + ret = -EINVAL; 1101 + else if (!to_i915(dev)->engine[RCS]->schedule) 1102 + ret = -ENODEV; 1103 + else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || 1104 + priority < I915_CONTEXT_MIN_USER_PRIORITY) 1105 + ret = -EINVAL; 1106 + else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && 1107 + !capable(CAP_SYS_NICE)) 1108 + ret = -EPERM; 1109 + else 1110 + ctx->priority = priority; 1111 + } 1112 + break; 1113 + 1131 1114 default: 1132 1115 ret = -EINVAL; 1133 1116 break;

+14 -4

drivers/gpu/drm/i915/i915_gem_dmabuf.c

··· 256 256 return drm_gem_dmabuf_export(dev, &exp_info); 257 257 } 258 258 259 - static struct sg_table * 260 - i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) 259 + static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) 261 260 { 262 - return dma_buf_map_attachment(obj->base.import_attach, 263 - DMA_BIDIRECTIONAL); 261 + struct sg_table *pages; 262 + unsigned int sg_page_sizes; 263 + 264 + pages = dma_buf_map_attachment(obj->base.import_attach, 265 + DMA_BIDIRECTIONAL); 266 + if (IS_ERR(pages)) 267 + return PTR_ERR(pages); 268 + 269 + sg_page_sizes = i915_sg_page_sizes(pages->sgl); 270 + 271 + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); 272 + 273 + return 0; 264 274 } 265 275 266 276 static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj,

+6 -1

drivers/gpu/drm/i915/i915_gem_evict.c

··· 82 82 if (i915_vma_is_pinned(vma)) 83 83 return false; 84 84 85 - if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link)) 85 + if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) 86 86 return false; 87 87 88 88 list_add(&vma->evict_link, unwind); ··· 311 311 312 312 if (flags & PIN_NONBLOCK && 313 313 (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) { 314 + ret = -ENOSPC; 315 + break; 316 + } 317 + 318 + if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) { 314 319 ret = -ENOSPC; 315 320 break; 316 321 }

+8 -6

drivers/gpu/drm/i915/i915_gem_execbuffer.c

··· 367 367 return false; 368 368 369 369 if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { 370 - if (unlikely(i915_vma_get_fence(vma))) { 370 + if (unlikely(i915_vma_pin_fence(vma))) { 371 371 i915_vma_unpin(vma); 372 372 return false; 373 373 } 374 374 375 - if (i915_vma_pin_fence(vma)) 375 + if (vma->fence) 376 376 exec_flags |= __EXEC_OBJECT_HAS_FENCE; 377 377 } 378 378 ··· 385 385 GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); 386 386 387 387 if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) 388 - i915_vma_unpin_fence(vma); 388 + __i915_vma_unpin_fence(vma); 389 389 390 390 __i915_vma_unpin(vma); 391 391 } ··· 563 563 } 564 564 565 565 if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { 566 - err = i915_vma_get_fence(vma); 566 + err = i915_vma_pin_fence(vma); 567 567 if (unlikely(err)) { 568 568 i915_vma_unpin(vma); 569 569 return err; 570 570 } 571 571 572 - if (i915_vma_pin_fence(vma)) 572 + if (vma->fence) 573 573 exec_flags |= __EXEC_OBJECT_HAS_FENCE; 574 574 } 575 575 ··· 974 974 return ERR_PTR(err); 975 975 976 976 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 977 - PIN_MAPPABLE | PIN_NONBLOCK); 977 + PIN_MAPPABLE | 978 + PIN_NONBLOCK | 979 + PIN_NONFAULT); 978 980 if (IS_ERR(vma)) { 979 981 memset(&cache->node, 0, sizeof(cache->node)); 980 982 err = drm_mm_insert_node_in_range

+32 -8

drivers/gpu/drm/i915/i915_gem_fence_reg.c

··· 240 240 /* Ensure that all userspace CPU access is completed before 241 241 * stealing the fence. 242 242 */ 243 - i915_gem_release_mmap(fence->vma->obj); 243 + GEM_BUG_ON(fence->vma->fence != fence); 244 + i915_vma_revoke_mmap(fence->vma); 244 245 245 246 fence->vma->fence = NULL; 246 247 fence->vma = NULL; ··· 281 280 * 282 281 * 0 on success, negative error code on failure. 283 282 */ 284 - int 285 - i915_vma_put_fence(struct i915_vma *vma) 283 + int i915_vma_put_fence(struct i915_vma *vma) 286 284 { 287 285 struct drm_i915_fence_reg *fence = vma->fence; 288 286 ··· 299 299 struct drm_i915_fence_reg *fence; 300 300 301 301 list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { 302 + GEM_BUG_ON(fence->vma && fence->vma->fence != fence); 303 + 302 304 if (fence->pin_count) 303 305 continue; 304 306 ··· 315 313 } 316 314 317 315 /** 318 - * i915_vma_get_fence - set up fencing for a vma 316 + * i915_vma_pin_fence - set up fencing for a vma 319 317 * @vma: vma to map through a fence reg 320 318 * 321 319 * When mapping objects through the GTT, userspace wants to be able to write ··· 333 331 * 0 on success, negative error code on failure. 334 332 */ 335 333 int 336 - i915_vma_get_fence(struct i915_vma *vma) 334 + i915_vma_pin_fence(struct i915_vma *vma) 337 335 { 338 336 struct drm_i915_fence_reg *fence; 339 337 struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; 338 + int err; 340 339 341 340 /* Note that we revoke fences on runtime suspend. Therefore the user 342 341 * must keep the device awake whilst using the fence. ··· 347 344 /* Just update our place in the LRU if our fence is getting reused. */ 348 345 if (vma->fence) { 349 346 fence = vma->fence; 347 + GEM_BUG_ON(fence->vma != vma); 348 + fence->pin_count++; 350 349 if (!fence->dirty) { 351 350 list_move_tail(&fence->link, 352 351 &fence->i915->mm.fence_list); ··· 358 353 fence = fence_find(vma->vm->i915); 359 354 if (IS_ERR(fence)) 360 355 return PTR_ERR(fence); 356 + 357 + GEM_BUG_ON(fence->pin_count); 358 + fence->pin_count++; 361 359 } else 362 360 return 0; 363 361 364 - return fence_update(fence, set); 362 + err = fence_update(fence, set); 363 + if (err) 364 + goto out_unpin; 365 + 366 + GEM_BUG_ON(fence->vma != set); 367 + GEM_BUG_ON(vma->fence != (set ? fence : NULL)); 368 + 369 + if (set) 370 + return 0; 371 + 372 + out_unpin: 373 + fence->pin_count--; 374 + return err; 365 375 } 366 376 367 377 /** ··· 449 429 for (i = 0; i < dev_priv->num_fence_regs; i++) { 450 430 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 451 431 432 + GEM_BUG_ON(fence->vma && fence->vma->fence != fence); 433 + 452 434 if (fence->vma) 453 - i915_gem_release_mmap(fence->vma->obj); 435 + i915_vma_revoke_mmap(fence->vma); 454 436 } 455 437 } 456 438 ··· 472 450 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 473 451 struct i915_vma *vma = reg->vma; 474 452 453 + GEM_BUG_ON(vma && vma->fence != reg); 454 + 475 455 /* 476 456 * Commit delayed tiling changes if we have an object still 477 457 * attached to the fence, otherwise just clear the fence. 478 458 */ 479 459 if (vma && !i915_gem_object_is_tiled(vma->obj)) { 480 460 GEM_BUG_ON(!reg->dirty); 481 - GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); 461 + GEM_BUG_ON(i915_vma_has_userfault(vma)); 482 462 483 463 list_move(&reg->link, &dev_priv->mm.fence_list); 484 464 vma->fence = NULL;

+238 -47

drivers/gpu/drm/i915/i915_gem_gtt.c

··· 135 135 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 136 136 int enable_ppgtt) 137 137 { 138 - bool has_aliasing_ppgtt; 139 138 bool has_full_ppgtt; 140 139 bool has_full_48bit_ppgtt; 141 140 142 - has_aliasing_ppgtt = dev_priv->info.has_aliasing_ppgtt; 141 + if (!dev_priv->info.has_aliasing_ppgtt) 142 + return 0; 143 + 143 144 has_full_ppgtt = dev_priv->info.has_full_ppgtt; 144 145 has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt; 145 146 ··· 149 148 has_full_ppgtt = false; 150 149 has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv); 151 150 } 152 - 153 - if (!has_aliasing_ppgtt) 154 - return 0; 155 151 156 152 /* 157 153 * We don't allow disabling PPGTT for gen9+ as it's a requirement for ··· 186 188 return 2; 187 189 } 188 190 189 - return has_aliasing_ppgtt ? 1 : 0; 191 + return 1; 190 192 } 191 193 192 194 static int ppgtt_bind_vma(struct i915_vma *vma, ··· 203 205 return ret; 204 206 } 205 207 206 - vma->pages = vma->obj->mm.pages; 207 - 208 208 /* Currently applicable only to VLV */ 209 209 pte_flags = 0; 210 210 if (vma->obj->gt_ro) ··· 216 220 static void ppgtt_unbind_vma(struct i915_vma *vma) 217 221 { 218 222 vma->vm->clear_range(vma->vm, vma->node.start, vma->size); 223 + } 224 + 225 + static int ppgtt_set_pages(struct i915_vma *vma) 226 + { 227 + GEM_BUG_ON(vma->pages); 228 + 229 + vma->pages = vma->obj->mm.pages; 230 + 231 + vma->page_sizes = vma->obj->mm.page_sizes; 232 + 233 + return 0; 234 + } 235 + 236 + static void clear_pages(struct i915_vma *vma) 237 + { 238 + GEM_BUG_ON(!vma->pages); 239 + 240 + if (vma->pages != vma->obj->mm.pages) { 241 + sg_free_table(vma->pages); 242 + kfree(vma->pages); 243 + } 244 + vma->pages = NULL; 245 + 246 + memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); 219 247 } 220 248 221 249 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, ··· 517 497 static int 518 498 setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) 519 499 { 520 - struct page *page; 500 + struct page *page = NULL; 521 501 dma_addr_t addr; 502 + int order; 522 503 523 - page = alloc_page(gfp | __GFP_ZERO); 524 - if (unlikely(!page)) 525 - return -ENOMEM; 504 + /* 505 + * In order to utilize 64K pages for an object with a size < 2M, we will 506 + * need to support a 64K scratch page, given that every 16th entry for a 507 + * page-table operating in 64K mode must point to a properly aligned 64K 508 + * region, including any PTEs which happen to point to scratch. 509 + * 510 + * This is only relevant for the 48b PPGTT where we support 511 + * huge-gtt-pages, see also i915_vma_insert(). 512 + * 513 + * TODO: we should really consider write-protecting the scratch-page and 514 + * sharing between ppgtt 515 + */ 516 + if (i915_vm_is_48bit(vm) && 517 + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { 518 + order = get_order(I915_GTT_PAGE_SIZE_64K); 519 + page = alloc_pages(gfp | __GFP_ZERO | __GFP_NOWARN, order); 520 + if (page) { 521 + addr = dma_map_page(vm->dma, page, 0, 522 + I915_GTT_PAGE_SIZE_64K, 523 + PCI_DMA_BIDIRECTIONAL); 524 + if (unlikely(dma_mapping_error(vm->dma, addr))) { 525 + __free_pages(page, order); 526 + page = NULL; 527 + } 526 528 527 - addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE, 528 - PCI_DMA_BIDIRECTIONAL); 529 - if (unlikely(dma_mapping_error(vm->dma, addr))) { 530 - __free_page(page); 531 - return -ENOMEM; 529 + if (!IS_ALIGNED(addr, I915_GTT_PAGE_SIZE_64K)) { 530 + dma_unmap_page(vm->dma, addr, 531 + I915_GTT_PAGE_SIZE_64K, 532 + PCI_DMA_BIDIRECTIONAL); 533 + __free_pages(page, order); 534 + page = NULL; 535 + } 536 + } 537 + } 538 + 539 + if (!page) { 540 + order = 0; 541 + page = alloc_page(gfp | __GFP_ZERO); 542 + if (unlikely(!page)) 543 + return -ENOMEM; 544 + 545 + addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE, 546 + PCI_DMA_BIDIRECTIONAL); 547 + if (unlikely(dma_mapping_error(vm->dma, addr))) { 548 + __free_page(page); 549 + return -ENOMEM; 550 + } 532 551 } 533 552 534 553 vm->scratch_page.page = page; 535 554 vm->scratch_page.daddr = addr; 555 + vm->scratch_page.order = order; 556 + 536 557 return 0; 537 558 } 538 559 ··· 581 520 { 582 521 struct i915_page_dma *p = &vm->scratch_page; 583 522 584 - dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 585 - __free_page(p->page); 523 + dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT, 524 + PCI_DMA_BIDIRECTIONAL); 525 + __free_pages(p->page, p->order); 586 526 } 587 527 588 528 static struct i915_page_table *alloc_pt(struct i915_address_space *vm) ··· 1051 989 1052 990 gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, 1053 991 cache_level); 992 + 993 + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 994 + } 995 + 996 + static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, 997 + struct i915_page_directory_pointer **pdps, 998 + struct sgt_dma *iter, 999 + enum i915_cache_level cache_level) 1000 + { 1001 + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); 1002 + u64 start = vma->node.start; 1003 + dma_addr_t rem = iter->sg->length; 1004 + 1005 + do { 1006 + struct gen8_insert_pte idx = gen8_insert_pte(start); 1007 + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; 1008 + struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; 1009 + unsigned int page_size; 1010 + bool maybe_64K = false; 1011 + gen8_pte_t encode = pte_encode; 1012 + gen8_pte_t *vaddr; 1013 + u16 index, max; 1014 + 1015 + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && 1016 + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && 1017 + rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) { 1018 + index = idx.pde; 1019 + max = I915_PDES; 1020 + page_size = I915_GTT_PAGE_SIZE_2M; 1021 + 1022 + encode |= GEN8_PDE_PS_2M; 1023 + 1024 + vaddr = kmap_atomic_px(pd); 1025 + } else { 1026 + struct i915_page_table *pt = pd->page_table[idx.pde]; 1027 + 1028 + index = idx.pte; 1029 + max = GEN8_PTES; 1030 + page_size = I915_GTT_PAGE_SIZE; 1031 + 1032 + if (!index && 1033 + vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && 1034 + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 1035 + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || 1036 + rem >= (max - index) << PAGE_SHIFT)) 1037 + maybe_64K = true; 1038 + 1039 + vaddr = kmap_atomic_px(pt); 1040 + } 1041 + 1042 + do { 1043 + GEM_BUG_ON(iter->sg->length < page_size); 1044 + vaddr[index++] = encode | iter->dma; 1045 + 1046 + start += page_size; 1047 + iter->dma += page_size; 1048 + rem -= page_size; 1049 + if (iter->dma >= iter->max) { 1050 + iter->sg = __sg_next(iter->sg); 1051 + if (!iter->sg) 1052 + break; 1053 + 1054 + rem = iter->sg->length; 1055 + iter->dma = sg_dma_address(iter->sg); 1056 + iter->max = iter->dma + rem; 1057 + 1058 + if (maybe_64K && index < max && 1059 + !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 1060 + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || 1061 + rem >= (max - index) << PAGE_SHIFT))) 1062 + maybe_64K = false; 1063 + 1064 + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) 1065 + break; 1066 + } 1067 + } while (rem >= page_size && index < max); 1068 + 1069 + kunmap_atomic(vaddr); 1070 + 1071 + /* 1072 + * Is it safe to mark the 2M block as 64K? -- Either we have 1073 + * filled whole page-table with 64K entries, or filled part of 1074 + * it and have reached the end of the sg table and we have 1075 + * enough padding. 1076 + */ 1077 + if (maybe_64K && 1078 + (index == max || 1079 + (i915_vm_has_scratch_64K(vma->vm) && 1080 + !iter->sg && IS_ALIGNED(vma->node.start + 1081 + vma->node.size, 1082 + I915_GTT_PAGE_SIZE_2M)))) { 1083 + vaddr = kmap_atomic_px(pd); 1084 + vaddr[idx.pde] |= GEN8_PDE_IPS_64K; 1085 + kunmap_atomic(vaddr); 1086 + page_size = I915_GTT_PAGE_SIZE_64K; 1087 + } 1088 + 1089 + vma->page_sizes.gtt |= page_size; 1090 + } while (iter->sg); 1054 1091 } 1055 1092 1056 1093 static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, ··· 1164 1003 .max = iter.dma + iter.sg->length, 1165 1004 }; 1166 1005 struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; 1167 - struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); 1168 1006 1169 - while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, 1170 - &idx, cache_level)) 1171 - GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); 1007 + if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { 1008 + gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level); 1009 + } else { 1010 + struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); 1011 + 1012 + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], 1013 + &iter, &idx, cache_level)) 1014 + GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); 1015 + 1016 + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 1017 + } 1172 1018 } 1173 1019 1174 1020 static void gen8_free_page_tables(struct i915_address_space *vm, ··· 1620 1452 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1621 1453 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1622 1454 ppgtt->base.bind_vma = ppgtt_bind_vma; 1455 + ppgtt->base.set_pages = ppgtt_set_pages; 1456 + ppgtt->base.clear_pages = clear_pages; 1623 1457 ppgtt->debug_dump = gen8_dump_ppgtt; 1624 1458 1625 1459 return 0; ··· 1896 1726 } 1897 1727 } while (1); 1898 1728 kunmap_atomic(vaddr); 1729 + 1730 + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 1899 1731 } 1900 1732 1901 1733 static int gen6_alloc_va_range(struct i915_address_space *vm, ··· 2066 1894 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2067 1895 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2068 1896 ppgtt->base.bind_vma = ppgtt_bind_vma; 1897 + ppgtt->base.set_pages = ppgtt_set_pages; 1898 + ppgtt->base.clear_pages = clear_pages; 2069 1899 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2070 1900 ppgtt->debug_dump = gen6_dump_ppgtt; 2071 1901 ··· 2135 1961 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2136 1962 else if (IS_GEN9_LP(dev_priv)) 2137 1963 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 1964 + 1965 + /* 1966 + * To support 64K PTEs we need to first enable the use of the 1967 + * Intermediate-Page-Size(IPS) bit of the PDE field via some magical 1968 + * mmio, otherwise the page-walker will simply ignore the IPS bit. This 1969 + * shouldn't be needed after GEN10. 1970 + * 1971 + * 64K pages were first introduced from BDW+, although technically they 1972 + * only *work* from gen9+. For pre-BDW we instead have the option for 1973 + * 32K pages, but we don't currently have any support for it in our 1974 + * driver. 1975 + */ 1976 + if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) && 1977 + INTEL_GEN(dev_priv) <= 10) 1978 + I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA, 1979 + I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) | 1980 + GAMW_ECO_ENABLE_64K_IPS_FIELD); 2138 1981 } 2139 1982 2140 1983 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) ··· 2596 2405 struct drm_i915_gem_object *obj = vma->obj; 2597 2406 u32 pte_flags; 2598 2407 2599 - if (unlikely(!vma->pages)) { 2600 - int ret = i915_get_ggtt_vma_pages(vma); 2601 - if (ret) 2602 - return ret; 2603 - } 2604 - 2605 2408 /* Currently applicable only to VLV */ 2606 2409 pte_flags = 0; 2607 2410 if (obj->gt_ro) ··· 2604 2419 intel_runtime_pm_get(i915); 2605 2420 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); 2606 2421 intel_runtime_pm_put(i915); 2422 + 2423 + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 2607 2424 2608 2425 /* 2609 2426 * Without aliasing PPGTT there's no difference between ··· 2634 2447 u32 pte_flags; 2635 2448 int ret; 2636 2449 2637 - if (unlikely(!vma->pages)) { 2638 - ret = i915_get_ggtt_vma_pages(vma); 2639 - if (ret) 2640 - return ret; 2641 - } 2642 - 2643 2450 /* Currently applicable only to VLV */ 2644 2451 pte_flags = 0; 2645 2452 if (vma->obj->gt_ro) ··· 2648 2467 vma->node.start, 2649 2468 vma->size); 2650 2469 if (ret) 2651 - goto err_pages; 2470 + return ret; 2652 2471 } 2653 2472 2654 2473 appgtt->base.insert_entries(&appgtt->base, vma, cache_level, ··· 2662 2481 } 2663 2482 2664 2483 return 0; 2665 - 2666 - err_pages: 2667 - if (!(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND))) { 2668 - if (vma->pages != vma->obj->mm.pages) { 2669 - GEM_BUG_ON(!vma->pages); 2670 - sg_free_table(vma->pages); 2671 - kfree(vma->pages); 2672 - } 2673 - vma->pages = NULL; 2674 - } 2675 - return ret; 2676 2484 } 2677 2485 2678 2486 static void aliasing_gtt_unbind_vma(struct i915_vma *vma) ··· 2697 2527 } 2698 2528 2699 2529 dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); 2530 + } 2531 + 2532 + static int ggtt_set_pages(struct i915_vma *vma) 2533 + { 2534 + int ret; 2535 + 2536 + GEM_BUG_ON(vma->pages); 2537 + 2538 + ret = i915_get_ggtt_vma_pages(vma); 2539 + if (ret) 2540 + return ret; 2541 + 2542 + vma->page_sizes = vma->obj->mm.page_sizes; 2543 + 2544 + return 0; 2700 2545 } 2701 2546 2702 2547 static void i915_gtt_color_adjust(const struct drm_mm_node *node, ··· 3336 3151 ggtt->base.cleanup = gen6_gmch_remove; 3337 3152 ggtt->base.bind_vma = ggtt_bind_vma; 3338 3153 ggtt->base.unbind_vma = ggtt_unbind_vma; 3154 + ggtt->base.set_pages = ggtt_set_pages; 3155 + ggtt->base.clear_pages = clear_pages; 3339 3156 ggtt->base.insert_page = gen8_ggtt_insert_page; 3340 3157 ggtt->base.clear_range = nop_clear_range; 3341 3158 if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) ··· 3396 3209 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3397 3210 ggtt->base.bind_vma = ggtt_bind_vma; 3398 3211 ggtt->base.unbind_vma = ggtt_unbind_vma; 3212 + ggtt->base.set_pages = ggtt_set_pages; 3213 + ggtt->base.clear_pages = clear_pages; 3399 3214 ggtt->base.cleanup = gen6_gmch_remove; 3400 3215 3401 3216 ggtt->invalidate = gen6_ggtt_invalidate; ··· 3443 3254 ggtt->base.clear_range = i915_ggtt_clear_range; 3444 3255 ggtt->base.bind_vma = ggtt_bind_vma; 3445 3256 ggtt->base.unbind_vma = ggtt_unbind_vma; 3257 + ggtt->base.set_pages = ggtt_set_pages; 3258 + ggtt->base.clear_pages = clear_pages; 3446 3259 ggtt->base.cleanup = i915_gmch_remove; 3447 3260 3448 3261 ggtt->invalidate = gmch_ggtt_invalidate;

+19 -1

drivers/gpu/drm/i915/i915_gem_gtt.h

··· 42 42 #include "i915_gem_request.h" 43 43 #include "i915_selftest.h" 44 44 45 - #define I915_GTT_PAGE_SIZE 4096UL 45 + #define I915_GTT_PAGE_SIZE_4K BIT(12) 46 + #define I915_GTT_PAGE_SIZE_64K BIT(16) 47 + #define I915_GTT_PAGE_SIZE_2M BIT(21) 48 + 49 + #define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K 50 + #define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M 51 + 46 52 #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE 47 53 48 54 #define I915_FENCE_REG_NONE -1 ··· 154 148 #define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4)) 155 149 #define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6)) 156 150 151 + #define GEN8_PDE_IPS_64K BIT(11) 152 + #define GEN8_PDE_PS_2M BIT(7) 153 + 157 154 struct sg_table; 158 155 159 156 struct intel_rotation_info { ··· 216 207 217 208 struct i915_page_dma { 218 209 struct page *page; 210 + int order; 219 211 union { 220 212 dma_addr_t daddr; 221 213 ··· 339 329 int (*bind_vma)(struct i915_vma *vma, 340 330 enum i915_cache_level cache_level, 341 331 u32 flags); 332 + int (*set_pages)(struct i915_vma *vma); 333 + void (*clear_pages)(struct i915_vma *vma); 342 334 343 335 I915_SELFTEST_DECLARE(struct fault_attr fault_attr); 344 336 }; ··· 351 339 i915_vm_is_48bit(const struct i915_address_space *vm) 352 340 { 353 341 return (vm->total - 1) >> 32; 342 + } 343 + 344 + static inline bool 345 + i915_vm_has_scratch_64K(struct i915_address_space *vm) 346 + { 347 + return vm->scratch_page.order == get_order(I915_GTT_PAGE_SIZE_64K); 354 348 } 355 349 356 350 /* The Graphics Translation Table is the way in which GEN hardware translates a

+12 -6

drivers/gpu/drm/i915/i915_gem_internal.c

··· 44 44 kfree(st); 45 45 } 46 46 47 - static struct sg_table * 48 - i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) 47 + static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) 49 48 { 50 49 struct drm_i915_private *i915 = to_i915(obj->base.dev); 51 50 struct sg_table *st; 52 51 struct scatterlist *sg; 52 + unsigned int sg_page_sizes; 53 53 unsigned int npages; 54 54 int max_order; 55 55 gfp_t gfp; ··· 78 78 create_st: 79 79 st = kmalloc(sizeof(*st), GFP_KERNEL); 80 80 if (!st) 81 - return ERR_PTR(-ENOMEM); 81 + return -ENOMEM; 82 82 83 83 npages = obj->base.size / PAGE_SIZE; 84 84 if (sg_alloc_table(st, npages, GFP_KERNEL)) { 85 85 kfree(st); 86 - return ERR_PTR(-ENOMEM); 86 + return -ENOMEM; 87 87 } 88 88 89 89 sg = st->sgl; 90 90 st->nents = 0; 91 + sg_page_sizes = 0; 91 92 92 93 do { 93 94 int order = min(fls(npages) - 1, max_order); ··· 106 105 } while (1); 107 106 108 107 sg_set_page(sg, page, PAGE_SIZE << order, 0); 108 + sg_page_sizes |= PAGE_SIZE << order; 109 109 st->nents++; 110 110 111 111 npages -= 1 << order; ··· 134 132 * object are only valid whilst active and pinned. 135 133 */ 136 134 obj->mm.madv = I915_MADV_DONTNEED; 137 - return st; 135 + 136 + __i915_gem_object_set_pages(obj, st, sg_page_sizes); 137 + 138 + return 0; 138 139 139 140 err: 140 141 sg_set_page(sg, NULL, 0, 0); 141 142 sg_mark_end(sg); 142 143 internal_free_pages(st); 143 - return ERR_PTR(-ENOMEM); 144 + 145 + return -ENOMEM; 144 146 } 145 147 146 148 static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,

+31 -1

drivers/gpu/drm/i915/i915_gem_object.h

··· 69 69 * being released or under memory pressure (where we attempt to 70 70 * reap pages for the shrinker). 71 71 */ 72 - struct sg_table *(*get_pages)(struct drm_i915_gem_object *); 72 + int (*get_pages)(struct drm_i915_gem_object *); 73 73 void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); 74 74 75 75 int (*pwrite)(struct drm_i915_gem_object *, ··· 123 123 /** 124 124 * Whether the object is currently in the GGTT mmap. 125 125 */ 126 + unsigned int userfault_count; 126 127 struct list_head userfault_link; 127 128 128 129 struct list_head batch_pool_link; ··· 169 168 170 169 struct sg_table *pages; 171 170 void *mapping; 171 + 172 + /* TODO: whack some of this into the error state */ 173 + struct i915_page_sizes { 174 + /** 175 + * The sg mask of the pages sg_table. i.e the mask of 176 + * of the lengths for each sg entry. 177 + */ 178 + unsigned int phys; 179 + 180 + /** 181 + * The gtt page sizes we are allowed to use given the 182 + * sg mask and the supported page sizes. This will 183 + * express the smallest unit we can use for the whole 184 + * object, as well as the larger sizes we may be able 185 + * to use opportunistically. 186 + */ 187 + unsigned int sg; 188 + 189 + /** 190 + * The actual gtt page size usage. Since we can have 191 + * multiple vma associated with this object we need to 192 + * prevent any trampling of state, hence a copy of this 193 + * struct also lives in each vma, therefore the gtt 194 + * value here should only be read/write through the vma. 195 + */ 196 + unsigned int gtt; 197 + } page_sizes; 198 + 199 + I915_SELFTEST_DECLARE(unsigned int page_mask); 172 200 173 201 struct i915_gem_object_page_iter { 174 202 struct scatterlist *sg_pos;

+18 -2

drivers/gpu/drm/i915/i915_gem_request.c

··· 186 186 INIT_LIST_HEAD(&pt->signalers_list); 187 187 INIT_LIST_HEAD(&pt->waiters_list); 188 188 INIT_LIST_HEAD(&pt->link); 189 - pt->priority = INT_MIN; 189 + pt->priority = I915_PRIORITY_INVALID; 190 190 } 191 191 192 192 static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) ··· 416 416 417 417 spin_lock_irq(&request->lock); 418 418 if (request->waitboost) 419 - atomic_dec(&request->i915->rps.num_waiters); 419 + atomic_dec(&request->i915->gt_pm.rps.num_waiters); 420 420 dma_fence_signal_locked(&request->fence); 421 421 spin_unlock_irq(&request->lock); 422 422 ··· 556 556 switch (state) { 557 557 case FENCE_COMPLETE: 558 558 trace_i915_gem_request_submit(request); 559 + /* 560 + * We need to serialize use of the submit_request() callback with its 561 + * hotplugging performed during an emergency i915_gem_set_wedged(). 562 + * We use the RCU mechanism to mark the critical section in order to 563 + * force i915_gem_set_wedged() to wait until the submit_request() is 564 + * completed before proceeding. 565 + */ 566 + rcu_read_lock(); 559 567 request->engine->submit_request(request); 568 + rcu_read_unlock(); 560 569 break; 561 570 562 571 case FENCE_FREE: ··· 595 586 int ret; 596 587 597 588 lockdep_assert_held(&dev_priv->drm.struct_mutex); 589 + 590 + /* 591 + * Preempt contexts are reserved for exclusive use to inject a 592 + * preemption context switch. They are never to be used for any trivial 593 + * request! 594 + */ 595 + GEM_BUG_ON(ctx == dev_priv->preempt_context); 598 596 599 597 /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report 600 598 * EIO if the GPU is already wedged.

+10 -3

drivers/gpu/drm/i915/i915_gem_request.h

··· 30 30 #include "i915_gem.h" 31 31 #include "i915_sw_fence.h" 32 32 33 + #include <uapi/drm/i915_drm.h> 34 + 33 35 struct drm_file; 34 36 struct drm_i915_gem_object; 35 37 struct drm_i915_gem_request; ··· 71 69 struct list_head waiters_list; /* those after us, they depend upon us */ 72 70 struct list_head link; 73 71 int priority; 74 - #define I915_PRIORITY_MAX 1024 75 - #define I915_PRIORITY_NORMAL 0 76 - #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX) 72 + }; 73 + 74 + enum { 75 + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, 76 + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, 77 + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, 78 + 79 + I915_PRIORITY_INVALID = INT_MIN 77 80 }; 78 81 79 82 struct i915_gem_capture_list {

+11 -5

drivers/gpu/drm/i915/i915_gem_stolen.c

··· 539 539 return st; 540 540 } 541 541 542 - static struct sg_table * 543 - i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) 542 + static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) 544 543 { 545 - return i915_pages_create_for_stolen(obj->base.dev, 546 - obj->stolen->start, 547 - obj->stolen->size); 544 + struct sg_table *pages = 545 + i915_pages_create_for_stolen(obj->base.dev, 546 + obj->stolen->start, 547 + obj->stolen->size); 548 + if (IS_ERR(pages)) 549 + return PTR_ERR(pages); 550 + 551 + __i915_gem_object_set_pages(obj, pages, obj->stolen->size); 552 + 553 + return 0; 548 554 } 549 555 550 556 static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,

+32 -21

drivers/gpu/drm/i915/i915_gem_userptr.c

··· 164 164 i915_mmu_notifier_create(struct mm_struct *mm) 165 165 { 166 166 struct i915_mmu_notifier *mn; 167 - int ret; 168 167 169 168 mn = kmalloc(sizeof(*mn), GFP_KERNEL); 170 169 if (mn == NULL) ··· 176 177 if (mn->wq == NULL) { 177 178 kfree(mn); 178 179 return ERR_PTR(-ENOMEM); 179 - } 180 - 181 - /* Protected by mmap_sem (write-lock) */ 182 - ret = __mmu_notifier_register(&mn->mn, mm); 183 - if (ret) { 184 - destroy_workqueue(mn->wq); 185 - kfree(mn); 186 - return ERR_PTR(ret); 187 180 } 188 181 189 182 return mn; ··· 201 210 static struct i915_mmu_notifier * 202 211 i915_mmu_notifier_find(struct i915_mm_struct *mm) 203 212 { 204 - struct i915_mmu_notifier *mn = mm->mn; 213 + struct i915_mmu_notifier *mn; 214 + int err = 0; 205 215 206 216 mn = mm->mn; 207 217 if (mn) 208 218 return mn; 209 219 220 + mn = i915_mmu_notifier_create(mm->mm); 221 + if (IS_ERR(mn)) 222 + err = PTR_ERR(mn); 223 + 210 224 down_write(&mm->mm->mmap_sem); 211 225 mutex_lock(&mm->i915->mm_lock); 212 - if ((mn = mm->mn) == NULL) { 213 - mn = i915_mmu_notifier_create(mm->mm); 214 - if (!IS_ERR(mn)) 215 - mm->mn = mn; 226 + if (mm->mn == NULL && !err) { 227 + /* Protected by mmap_sem (write-lock) */ 228 + err = __mmu_notifier_register(&mn->mn, mm->mm); 229 + if (!err) { 230 + /* Protected by mm_lock */ 231 + mm->mn = fetch_and_zero(&mn); 232 + } 233 + } else { 234 + /* someone else raced and successfully installed the mmu 235 + * notifier, we can cancel our own errors */ 236 + err = 0; 216 237 } 217 238 mutex_unlock(&mm->i915->mm_lock); 218 239 up_write(&mm->mm->mmap_sem); 219 240 220 - return mn; 241 + if (mn) { 242 + destroy_workqueue(mn->wq); 243 + kfree(mn); 244 + } 245 + 246 + return err ? ERR_PTR(err) : mm->mn; 221 247 } 222 248 223 249 static int ··· 413 405 { 414 406 unsigned int max_segment = i915_sg_segment_size(); 415 407 struct sg_table *st; 408 + unsigned int sg_page_sizes; 416 409 int ret; 417 410 418 411 st = kmalloc(sizeof(*st), GFP_KERNEL); ··· 442 433 kfree(st); 443 434 return ERR_PTR(ret); 444 435 } 436 + 437 + sg_page_sizes = i915_sg_page_sizes(st->sgl); 438 + 439 + __i915_gem_object_set_pages(obj, st, sg_page_sizes); 445 440 446 441 return st; 447 442 } ··· 534 521 pages = __i915_gem_userptr_alloc_pages(obj, pvec, 535 522 npages); 536 523 if (!IS_ERR(pages)) { 537 - __i915_gem_object_set_pages(obj, pages); 538 524 pinned = 0; 539 525 pages = NULL; 540 526 } ··· 594 582 return ERR_PTR(-EAGAIN); 595 583 } 596 584 597 - static struct sg_table * 598 - i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) 585 + static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) 599 586 { 600 587 const int num_pages = obj->base.size >> PAGE_SHIFT; 601 588 struct mm_struct *mm = obj->userptr.mm->mm; ··· 623 612 if (obj->userptr.work) { 624 613 /* active flag should still be held for the pending work */ 625 614 if (IS_ERR(obj->userptr.work)) 626 - return ERR_CAST(obj->userptr.work); 615 + return PTR_ERR(obj->userptr.work); 627 616 else 628 - return ERR_PTR(-EAGAIN); 617 + return -EAGAIN; 629 618 } 630 619 631 620 pvec = NULL; ··· 661 650 release_pages(pvec, pinned, 0); 662 651 kvfree(pvec); 663 652 664 - return pages; 653 + return PTR_ERR_OR_ZERO(pages); 665 654 } 666 655 667 656 static void

+74

drivers/gpu/drm/i915/i915_gemfs.c

··· 1 + /* 2 + * Copyright © 2017 Intel Corporation 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + * 23 + */ 24 + 25 + #include <linux/fs.h> 26 + #include <linux/mount.h> 27 + #include <linux/pagemap.h> 28 + 29 + #include "i915_drv.h" 30 + #include "i915_gemfs.h" 31 + 32 + int i915_gemfs_init(struct drm_i915_private *i915) 33 + { 34 + struct file_system_type *type; 35 + struct vfsmount *gemfs; 36 + 37 + type = get_fs_type("tmpfs"); 38 + if (!type) 39 + return -ENODEV; 40 + 41 + gemfs = kern_mount(type); 42 + if (IS_ERR(gemfs)) 43 + return PTR_ERR(gemfs); 44 + 45 + /* 46 + * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most 47 + * likely 2M. Note that within_size may overallocate huge-pages, if say 48 + * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under 49 + * memory pressure shmem should split any huge-pages which can be 50 + * shrunk. 51 + */ 52 + 53 + if (has_transparent_hugepage()) { 54 + struct super_block *sb = gemfs->mnt_sb; 55 + char options[] = "huge=within_size"; 56 + int flags = 0; 57 + int err; 58 + 59 + err = sb->s_op->remount_fs(sb, &flags, options); 60 + if (err) { 61 + kern_unmount(gemfs); 62 + return err; 63 + } 64 + } 65 + 66 + i915->mm.gemfs = gemfs; 67 + 68 + return 0; 69 + } 70 + 71 + void i915_gemfs_fini(struct drm_i915_private *i915) 72 + { 73 + kern_unmount(i915->mm.gemfs); 74 + }

+34

drivers/gpu/drm/i915/i915_gemfs.h

··· 1 + /* 2 + * Copyright © 2017 Intel Corporation 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + * 23 + */ 24 + 25 + #ifndef __I915_GEMFS_H__ 26 + #define __I915_GEMFS_H__ 27 + 28 + struct drm_i915_private; 29 + 30 + int i915_gemfs_init(struct drm_i915_private *i915); 31 + 32 + void i915_gemfs_fini(struct drm_i915_private *i915); 33 + 34 + #endif

+8 -6

drivers/gpu/drm/i915/i915_gpu_error.c

··· 377 377 if (!erq->seqno) 378 378 return; 379 379 380 - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n", 380 + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n", 381 381 prefix, erq->pid, erq->ban_score, 382 - erq->context, erq->seqno, 382 + erq->context, erq->seqno, erq->priority, 383 383 jiffies_to_msecs(jiffies - erq->jiffies), 384 384 erq->head, erq->tail); 385 385 } ··· 388 388 const char *header, 389 389 const struct drm_i915_error_context *ctx) 390 390 { 391 - err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n", 391 + err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n", 392 392 header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, 393 - ctx->ban_score, ctx->guilty, ctx->active); 393 + ctx->priority, ctx->ban_score, ctx->guilty, ctx->active); 394 394 } 395 395 396 396 static void error_print_engine(struct drm_i915_error_state_buf *m, ··· 1271 1271 struct drm_i915_error_request *erq) 1272 1272 { 1273 1273 erq->context = request->ctx->hw_id; 1274 + erq->priority = request->priotree.priority; 1274 1275 erq->ban_score = atomic_read(&request->ctx->ban_score); 1275 1276 erq->seqno = request->global_seqno; 1276 1277 erq->jiffies = request->emitted_jiffies; ··· 1365 1364 1366 1365 e->handle = ctx->user_handle; 1367 1366 e->hw_id = ctx->hw_id; 1367 + e->priority = ctx->priority; 1368 1368 e->ban_score = atomic_read(&ctx->ban_score); 1369 1369 e->guilty = atomic_read(&ctx->guilty_count); 1370 1370 e->active = atomic_read(&ctx->active_count); ··· 1674 1672 struct i915_gpu_state *error) 1675 1673 { 1676 1674 error->awake = dev_priv->gt.awake; 1677 - error->wakelock = atomic_read(&dev_priv->pm.wakeref_count); 1678 - error->suspended = dev_priv->pm.suspended; 1675 + error->wakelock = atomic_read(&dev_priv->runtime_pm.wakeref_count); 1676 + error->suspended = dev_priv->runtime_pm.suspended; 1679 1677 1680 1678 error->iommu = -1; 1681 1679 #ifdef CONFIG_INTEL_IOMMU

+16 -107

drivers/gpu/drm/i915/i915_guc_submission.c

··· 21 21 * IN THE SOFTWARE. 22 22 * 23 23 */ 24 - #include <linux/circ_buf.h> 25 - #include "i915_drv.h" 26 - #include "intel_uc.h" 27 24 25 + #include <linux/circ_buf.h> 28 26 #include <trace/events/dma_fence.h> 27 + 28 + #include "i915_guc_submission.h" 29 + #include "i915_drv.h" 29 30 30 31 /** 31 32 * DOC: GuC-based command submission ··· 338 337 339 338 for_each_engine_masked(engine, dev_priv, client->engines, tmp) { 340 339 struct intel_context *ce = &ctx->engine[engine->id]; 341 - uint32_t guc_engine_id = engine->guc_id; 340 + u32 guc_engine_id = engine->guc_id; 342 341 struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; 343 342 344 343 /* TODO: We have a design issue to be solved here. Only when we ··· 388 387 gfx_addr = guc_ggtt_offset(client->vma); 389 388 desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + 390 389 client->doorbell_offset; 391 - desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client); 390 + desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client)); 392 391 desc->db_trigger_uk = gfx_addr + client->doorbell_offset; 393 392 desc->process_desc = gfx_addr + client->proc_desc_offset; 394 393 desc->wq_addr = gfx_addr + GUC_DB_SIZE; 395 394 desc->wq_size = GUC_WQ_SIZE; 396 395 397 - desc->desc_private = (uintptr_t)client; 396 + desc->desc_private = ptr_to_u64(client); 398 397 } 399 398 400 399 static void guc_stage_desc_fini(struct intel_guc *guc, ··· 500 499 const unsigned int engine_id = engine->id; 501 500 unsigned int n; 502 501 503 - for (n = 0; n < ARRAY_SIZE(execlists->port); n++) { 502 + for (n = 0; n < execlists_num_ports(execlists); n++) { 504 503 struct drm_i915_gem_request *rq; 505 504 unsigned int count; 506 505 ··· 644 643 * path of i915_guc_submit() above. 645 644 */ 646 645 647 - /** 648 - * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage 649 - * @guc: the guc 650 - * @size: size of area to allocate (both virtual space and memory) 651 - * 652 - * This is a wrapper to create an object for use with the GuC. In order to 653 - * use it inside the GuC, an object needs to be pinned lifetime, so we allocate 654 - * both some backing storage and a range inside the Global GTT. We must pin 655 - * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that 656 - * range is reserved inside GuC. 657 - * 658 - * Return: A i915_vma if successful, otherwise an ERR_PTR. 659 - */ 660 - struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) 661 - { 662 - struct drm_i915_private *dev_priv = guc_to_i915(guc); 663 - struct drm_i915_gem_object *obj; 664 - struct i915_vma *vma; 665 - int ret; 666 - 667 - obj = i915_gem_object_create(dev_priv, size); 668 - if (IS_ERR(obj)) 669 - return ERR_CAST(obj); 670 - 671 - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); 672 - if (IS_ERR(vma)) 673 - goto err; 674 - 675 - ret = i915_vma_pin(vma, 0, PAGE_SIZE, 676 - PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); 677 - if (ret) { 678 - vma = ERR_PTR(ret); 679 - goto err; 680 - } 681 - 682 - return vma; 683 - 684 - err: 685 - i915_gem_object_put(obj); 686 - return vma; 687 - } 688 - 689 646 /* Check that a doorbell register is in the expected state */ 690 647 static bool doorbell_ok(struct intel_guc *guc, u16 db_id) 691 648 { ··· 755 796 */ 756 797 static struct i915_guc_client * 757 798 guc_client_alloc(struct drm_i915_private *dev_priv, 758 - uint32_t engines, 759 - uint32_t priority, 799 + u32 engines, 800 + u32 priority, 760 801 struct i915_gem_context *ctx) 761 802 { 762 803 struct i915_guc_client *client; ··· 1028 1069 1029 1070 static void guc_interrupts_capture(struct drm_i915_private *dev_priv) 1030 1071 { 1072 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 1031 1073 struct intel_engine_cs *engine; 1032 1074 enum intel_engine_id id; 1033 1075 int irqs; ··· 1065 1105 * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will 1066 1106 * result in the register bit being left SET! 1067 1107 */ 1068 - dev_priv->rps.pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; 1069 - dev_priv->rps.pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 1108 + rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; 1109 + rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 1070 1110 } 1071 1111 1072 1112 static void guc_interrupts_release(struct drm_i915_private *dev_priv) 1073 1113 { 1114 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 1074 1115 struct intel_engine_cs *engine; 1075 1116 enum intel_engine_id id; 1076 1117 int irqs; ··· 1090 1129 I915_WRITE(GUC_VCS2_VCS1_IER, 0); 1091 1130 I915_WRITE(GUC_WD_VECS_IER, 0); 1092 1131 1093 - dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 1094 - dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; 1132 + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 1133 + rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; 1095 1134 } 1096 1135 1097 1136 int i915_guc_submission_enable(struct drm_i915_private *dev_priv) ··· 1172 1211 1173 1212 guc_client_free(guc->execbuf_client); 1174 1213 guc->execbuf_client = NULL; 1175 - } 1176 - 1177 - /** 1178 - * intel_guc_suspend() - notify GuC entering suspend state 1179 - * @dev_priv: i915 device private 1180 - */ 1181 - int intel_guc_suspend(struct drm_i915_private *dev_priv) 1182 - { 1183 - struct intel_guc *guc = &dev_priv->guc; 1184 - struct i915_gem_context *ctx; 1185 - u32 data[3]; 1186 - 1187 - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) 1188 - return 0; 1189 - 1190 - gen9_disable_guc_interrupts(dev_priv); 1191 - 1192 - ctx = dev_priv->kernel_context; 1193 - 1194 - data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; 1195 - /* any value greater than GUC_POWER_D0 */ 1196 - data[1] = GUC_POWER_D1; 1197 - /* first page is shared data with GuC */ 1198 - data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + LRC_GUCSHR_PN * PAGE_SIZE; 1199 - 1200 - return intel_guc_send(guc, data, ARRAY_SIZE(data)); 1201 - } 1202 - 1203 - /** 1204 - * intel_guc_resume() - notify GuC resuming from suspend state 1205 - * @dev_priv: i915 device private 1206 - */ 1207 - int intel_guc_resume(struct drm_i915_private *dev_priv) 1208 - { 1209 - struct intel_guc *guc = &dev_priv->guc; 1210 - struct i915_gem_context *ctx; 1211 - u32 data[3]; 1212 - 1213 - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) 1214 - return 0; 1215 - 1216 - if (i915_modparams.guc_log_level >= 0) 1217 - gen9_enable_guc_interrupts(dev_priv); 1218 - 1219 - ctx = dev_priv->kernel_context; 1220 - 1221 - data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; 1222 - data[1] = GUC_POWER_D0; 1223 - /* first page is shared data with GuC */ 1224 - data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + LRC_GUCSHR_PN * PAGE_SIZE; 1225 - 1226 - return intel_guc_send(guc, data, ARRAY_SIZE(data)); 1227 1214 }

+80

drivers/gpu/drm/i915/i915_guc_submission.h

··· 1 + /* 2 + * Copyright © 2014-2017 Intel Corporation 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + * 23 + */ 24 + 25 + #ifndef _I915_GUC_SUBMISSION_H_ 26 + #define _I915_GUC_SUBMISSION_H_ 27 + 28 + #include <linux/spinlock.h> 29 + 30 + #include "i915_gem.h" 31 + 32 + struct drm_i915_private; 33 + 34 + /* 35 + * This structure primarily describes the GEM object shared with the GuC. 36 + * The specs sometimes refer to this object as a "GuC context", but we use 37 + * the term "client" to avoid confusion with hardware contexts. This 38 + * GEM object is held for the entire lifetime of our interaction with 39 + * the GuC, being allocated before the GuC is loaded with its firmware. 40 + * Because there's no way to update the address used by the GuC after 41 + * initialisation, the shared object must stay pinned into the GGTT as 42 + * long as the GuC is in use. We also keep the first page (only) mapped 43 + * into kernel address space, as it includes shared data that must be 44 + * updated on every request submission. 45 + * 46 + * The single GEM object described here is actually made up of several 47 + * separate areas, as far as the GuC is concerned. The first page (kept 48 + * kmap'd) includes the "process descriptor" which holds sequence data for 49 + * the doorbell, and one cacheline which actually *is* the doorbell; a 50 + * write to this will "ring the doorbell" (i.e. send an interrupt to the 51 + * GuC). The subsequent pages of the client object constitute the work 52 + * queue (a circular array of work items), again described in the process 53 + * descriptor. Work queue pages are mapped momentarily as required. 54 + */ 55 + struct i915_guc_client { 56 + struct i915_vma *vma; 57 + void *vaddr; 58 + struct i915_gem_context *owner; 59 + struct intel_guc *guc; 60 + 61 + /* bitmap of (host) engine ids */ 62 + u32 engines; 63 + u32 priority; 64 + u32 stage_id; 65 + u32 proc_desc_offset; 66 + 67 + u16 doorbell_id; 68 + unsigned long doorbell_offset; 69 + 70 + spinlock_t wq_lock; 71 + /* Per-engine counts of GuC submissions */ 72 + u64 submissions[I915_NUM_ENGINES]; 73 + }; 74 + 75 + int i915_guc_submission_init(struct drm_i915_private *dev_priv); 76 + int i915_guc_submission_enable(struct drm_i915_private *dev_priv); 77 + void i915_guc_submission_disable(struct drm_i915_private *dev_priv); 78 + void i915_guc_submission_fini(struct drm_i915_private *dev_priv); 79 + 80 + #endif

+77 -61

drivers/gpu/drm/i915/i915_irq.c

··· 404 404 { 405 405 spin_lock_irq(&dev_priv->irq_lock); 406 406 gen6_reset_pm_iir(dev_priv, dev_priv->pm_rps_events); 407 - dev_priv->rps.pm_iir = 0; 407 + dev_priv->gt_pm.rps.pm_iir = 0; 408 408 spin_unlock_irq(&dev_priv->irq_lock); 409 409 } 410 410 411 411 void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) 412 412 { 413 - if (READ_ONCE(dev_priv->rps.interrupts_enabled)) 413 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 414 + 415 + if (READ_ONCE(rps->interrupts_enabled)) 414 416 return; 415 417 416 418 spin_lock_irq(&dev_priv->irq_lock); 417 - WARN_ON_ONCE(dev_priv->rps.pm_iir); 419 + WARN_ON_ONCE(rps->pm_iir); 418 420 WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); 419 - dev_priv->rps.interrupts_enabled = true; 421 + rps->interrupts_enabled = true; 420 422 gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); 421 423 422 424 spin_unlock_irq(&dev_priv->irq_lock); ··· 426 424 427 425 void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) 428 426 { 429 - if (!READ_ONCE(dev_priv->rps.interrupts_enabled)) 427 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 428 + 429 + if (!READ_ONCE(rps->interrupts_enabled)) 430 430 return; 431 431 432 432 spin_lock_irq(&dev_priv->irq_lock); 433 - dev_priv->rps.interrupts_enabled = false; 433 + rps->interrupts_enabled = false; 434 434 435 435 I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); 436 436 ··· 446 442 * we will reset the GPU to minimum frequencies, so the current 447 443 * state of the worker can be discarded. 448 444 */ 449 - cancel_work_sync(&dev_priv->rps.work); 445 + cancel_work_sync(&rps->work); 450 446 gen6_reset_rps_interrupts(dev_priv); 451 447 } 452 448 ··· 1123 1119 1124 1120 void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) 1125 1121 { 1126 - memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei)); 1122 + memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei)); 1127 1123 } 1128 1124 1129 1125 static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) 1130 1126 { 1131 - const struct intel_rps_ei *prev = &dev_priv->rps.ei; 1127 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 1128 + const struct intel_rps_ei *prev = &rps->ei; 1132 1129 struct intel_rps_ei now; 1133 1130 u32 events = 0; 1134 1131 ··· 1156 1151 c0 = max(render, media); 1157 1152 c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ 1158 1153 1159 - if (c0 > time * dev_priv->rps.up_threshold) 1154 + if (c0 > time * rps->up_threshold) 1160 1155 events = GEN6_PM_RP_UP_THRESHOLD; 1161 - else if (c0 < time * dev_priv->rps.down_threshold) 1156 + else if (c0 < time * rps->down_threshold) 1162 1157 events = GEN6_PM_RP_DOWN_THRESHOLD; 1163 1158 } 1164 1159 1165 - dev_priv->rps.ei = now; 1160 + rps->ei = now; 1166 1161 return events; 1167 1162 } 1168 1163 1169 1164 static void gen6_pm_rps_work(struct work_struct *work) 1170 1165 { 1171 1166 struct drm_i915_private *dev_priv = 1172 - container_of(work, struct drm_i915_private, rps.work); 1167 + container_of(work, struct drm_i915_private, gt_pm.rps.work); 1168 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 1173 1169 bool client_boost = false; 1174 1170 int new_delay, adj, min, max; 1175 1171 u32 pm_iir = 0; 1176 1172 1177 1173 spin_lock_irq(&dev_priv->irq_lock); 1178 - if (dev_priv->rps.interrupts_enabled) { 1179 - pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir); 1180 - client_boost = atomic_read(&dev_priv->rps.num_waiters); 1174 + if (rps->interrupts_enabled) { 1175 + pm_iir = fetch_and_zero(&rps->pm_iir); 1176 + client_boost = atomic_read(&rps->num_waiters); 1181 1177 } 1182 1178 spin_unlock_irq(&dev_priv->irq_lock); 1183 1179 ··· 1187 1181 if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) 1188 1182 goto out; 1189 1183 1190 - mutex_lock(&dev_priv->rps.hw_lock); 1184 + mutex_lock(&dev_priv->pcu_lock); 1191 1185 1192 1186 pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); 1193 1187 1194 - adj = dev_priv->rps.last_adj; 1195 - new_delay = dev_priv->rps.cur_freq; 1196 - min = dev_priv->rps.min_freq_softlimit; 1197 - max = dev_priv->rps.max_freq_softlimit; 1188 + adj = rps->last_adj; 1189 + new_delay = rps->cur_freq; 1190 + min = rps->min_freq_softlimit; 1191 + max = rps->max_freq_softlimit; 1198 1192 if (client_boost) 1199 - max = dev_priv->rps.max_freq; 1200 - if (client_boost && new_delay < dev_priv->rps.boost_freq) { 1201 - new_delay = dev_priv->rps.boost_freq; 1193 + max = rps->max_freq; 1194 + if (client_boost && new_delay < rps->boost_freq) { 1195 + new_delay = rps->boost_freq; 1202 1196 adj = 0; 1203 1197 } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { 1204 1198 if (adj > 0) ··· 1206 1200 else /* CHV needs even encode values */ 1207 1201 adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1; 1208 1202 1209 - if (new_delay >= dev_priv->rps.max_freq_softlimit) 1203 + if (new_delay >= rps->max_freq_softlimit) 1210 1204 adj = 0; 1211 1205 } else if (client_boost) { 1212 1206 adj = 0; 1213 1207 } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { 1214 - if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) 1215 - new_delay = dev_priv->rps.efficient_freq; 1216 - else if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit) 1217 - new_delay = dev_priv->rps.min_freq_softlimit; 1208 + if (rps->cur_freq > rps->efficient_freq) 1209 + new_delay = rps->efficient_freq; 1210 + else if (rps->cur_freq > rps->min_freq_softlimit) 1211 + new_delay = rps->min_freq_softlimit; 1218 1212 adj = 0; 1219 1213 } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { 1220 1214 if (adj < 0) ··· 1222 1216 else /* CHV needs even encode values */ 1223 1217 adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1; 1224 1218 1225 - if (new_delay <= dev_priv->rps.min_freq_softlimit) 1219 + if (new_delay <= rps->min_freq_softlimit) 1226 1220 adj = 0; 1227 1221 } else { /* unknown event */ 1228 1222 adj = 0; 1229 1223 } 1230 1224 1231 - dev_priv->rps.last_adj = adj; 1225 + rps->last_adj = adj; 1232 1226 1233 1227 /* sysfs frequency interfaces may have snuck in while servicing the 1234 1228 * interrupt ··· 1238 1232 1239 1233 if (intel_set_rps(dev_priv, new_delay)) { 1240 1234 DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); 1241 - dev_priv->rps.last_adj = 0; 1235 + rps->last_adj = 0; 1242 1236 } 1243 1237 1244 - mutex_unlock(&dev_priv->rps.hw_lock); 1238 + mutex_unlock(&dev_priv->pcu_lock); 1245 1239 1246 1240 out: 1247 1241 /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ 1248 1242 spin_lock_irq(&dev_priv->irq_lock); 1249 - if (dev_priv->rps.interrupts_enabled) 1243 + if (rps->interrupts_enabled) 1250 1244 gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events); 1251 1245 spin_unlock_irq(&dev_priv->irq_lock); 1252 1246 } ··· 1388 1382 bool tasklet = false; 1389 1383 1390 1384 if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { 1391 - if (port_count(&execlists->port[0])) { 1392 - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); 1393 - tasklet = true; 1394 - } 1385 + __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); 1386 + tasklet = true; 1395 1387 } 1396 1388 1397 1389 if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { ··· 1727 1723 * the work queue. */ 1728 1724 static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) 1729 1725 { 1726 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 1727 + 1730 1728 if (pm_iir & dev_priv->pm_rps_events) { 1731 1729 spin_lock(&dev_priv->irq_lock); 1732 1730 gen6_mask_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events); 1733 - if (dev_priv->rps.interrupts_enabled) { 1734 - dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events; 1735 - schedule_work(&dev_priv->rps.work); 1731 + if (rps->interrupts_enabled) { 1732 + rps->pm_iir |= pm_iir & dev_priv->pm_rps_events; 1733 + schedule_work(&rps->work); 1736 1734 } 1737 1735 spin_unlock(&dev_priv->irq_lock); 1738 1736 } ··· 2260 2254 static void cpt_serr_int_handler(struct drm_i915_private *dev_priv) 2261 2255 { 2262 2256 u32 serr_int = I915_READ(SERR_INT); 2257 + enum pipe pipe; 2263 2258 2264 2259 if (serr_int & SERR_INT_POISON) 2265 2260 DRM_ERROR("PCH poison interrupt\n"); 2266 2261 2267 - if (serr_int & SERR_INT_TRANS_A_FIFO_UNDERRUN) 2268 - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_A); 2269 - 2270 - if (serr_int & SERR_INT_TRANS_B_FIFO_UNDERRUN) 2271 - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_B); 2272 - 2273 - if (serr_int & SERR_INT_TRANS_C_FIFO_UNDERRUN) 2274 - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_C); 2262 + for_each_pipe(dev_priv, pipe) 2263 + if (serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pipe)) 2264 + intel_pch_fifo_underrun_irq_handler(dev_priv, pipe); 2275 2265 2276 2266 I915_WRITE(SERR_INT, serr_int); 2277 2267 } ··· 3165 3163 enum pipe pipe; 3166 3164 3167 3165 spin_lock_irq(&dev_priv->irq_lock); 3166 + 3167 + if (!intel_irqs_enabled(dev_priv)) { 3168 + spin_unlock_irq(&dev_priv->irq_lock); 3169 + return; 3170 + } 3171 + 3168 3172 for_each_pipe_masked(dev_priv, pipe, pipe_mask) 3169 3173 GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, 3170 3174 dev_priv->de_irq_mask[pipe], 3171 3175 ~dev_priv->de_irq_mask[pipe] | extra_ier); 3176 + 3172 3177 spin_unlock_irq(&dev_priv->irq_lock); 3173 3178 } 3174 3179 ··· 3185 3176 enum pipe pipe; 3186 3177 3187 3178 spin_lock_irq(&dev_priv->irq_lock); 3179 + 3180 + if (!intel_irqs_enabled(dev_priv)) { 3181 + spin_unlock_irq(&dev_priv->irq_lock); 3182 + return; 3183 + } 3184 + 3188 3185 for_each_pipe_masked(dev_priv, pipe, pipe_mask) 3189 3186 GEN8_IRQ_RESET_NDX(DE_PIPE, pipe); 3187 + 3190 3188 spin_unlock_irq(&dev_priv->irq_lock); 3191 3189 3192 3190 /* make sure we're done processing display irqs */ ··· 3614 3598 else if (IS_BROADWELL(dev_priv)) 3615 3599 de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; 3616 3600 3617 - dev_priv->de_irq_mask[PIPE_A] = ~de_pipe_masked; 3618 - dev_priv->de_irq_mask[PIPE_B] = ~de_pipe_masked; 3619 - dev_priv->de_irq_mask[PIPE_C] = ~de_pipe_masked; 3601 + for_each_pipe(dev_priv, pipe) { 3602 + dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; 3620 3603 3621 - for_each_pipe(dev_priv, pipe) 3622 3604 if (intel_display_power_is_enabled(dev_priv, 3623 3605 POWER_DOMAIN_PIPE(pipe))) 3624 3606 GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, 3625 3607 dev_priv->de_irq_mask[pipe], 3626 3608 de_pipe_enables); 3609 + } 3627 3610 3628 3611 GEN3_IRQ_INIT(GEN8_DE_PORT_, ~de_port_masked, de_port_enables); 3629 3612 GEN3_IRQ_INIT(GEN8_DE_MISC_, ~de_misc_masked, de_misc_masked); ··· 4015 4000 void intel_irq_init(struct drm_i915_private *dev_priv) 4016 4001 { 4017 4002 struct drm_device *dev = &dev_priv->drm; 4003 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 4018 4004 int i; 4019 4005 4020 4006 intel_hpd_init_work(dev_priv); 4021 4007 4022 - INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); 4008 + INIT_WORK(&rps->work, gen6_pm_rps_work); 4023 4009 4024 4010 INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); 4025 4011 for (i = 0; i < MAX_L3_SLICES; ++i) ··· 4036 4020 else 4037 4021 dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; 4038 4022 4039 - dev_priv->rps.pm_intrmsk_mbz = 0; 4023 + rps->pm_intrmsk_mbz = 0; 4040 4024 4041 4025 /* 4042 4026 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer ··· 4045 4029 * TODO: verify if this can be reproduced on VLV,CHV. 4046 4030 */ 4047 4031 if (INTEL_GEN(dev_priv) <= 7) 4048 - dev_priv->rps.pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; 4032 + rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; 4049 4033 4050 4034 if (INTEL_GEN(dev_priv) >= 8) 4051 - dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 4035 + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 4052 4036 4053 4037 if (IS_GEN2(dev_priv)) { 4054 4038 /* Gen2 doesn't have a hardware frame counter */ ··· 4182 4166 * interrupts as enabled _before_ actually enabling them to avoid 4183 4167 * special cases in our ordering checks. 4184 4168 */ 4185 - dev_priv->pm.irqs_enabled = true; 4169 + dev_priv->runtime_pm.irqs_enabled = true; 4186 4170 4187 4171 return drm_irq_install(&dev_priv->drm, dev_priv->drm.pdev->irq); 4188 4172 } ··· 4198 4182 { 4199 4183 drm_irq_uninstall(&dev_priv->drm); 4200 4184 intel_hpd_cancel_work(dev_priv); 4201 - dev_priv->pm.irqs_enabled = false; 4185 + dev_priv->runtime_pm.irqs_enabled = false; 4202 4186 } 4203 4187 4204 4188 /** ··· 4211 4195 void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv) 4212 4196 { 4213 4197 dev_priv->drm.driver->irq_uninstall(&dev_priv->drm); 4214 - dev_priv->pm.irqs_enabled = false; 4198 + dev_priv->runtime_pm.irqs_enabled = false; 4215 4199 synchronize_irq(dev_priv->drm.irq); 4216 4200 } 4217 4201 ··· 4224 4208 */ 4225 4209 void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv) 4226 4210 { 4227 - dev_priv->pm.irqs_enabled = true; 4211 + dev_priv->runtime_pm.irqs_enabled = true; 4228 4212 dev_priv->drm.driver->irq_preinstall(&dev_priv->drm); 4229 4213 dev_priv->drm.driver->irq_postinstall(&dev_priv->drm); 4230 4214 }

-3

drivers/gpu/drm/i915/i915_params.c

··· 146 146 i915_param_named_unsafe(enable_cmd_parser, bool, 0400, 147 147 "Enable command parsing (true=enabled [default], false=disabled)"); 148 148 149 - i915_param_named_unsafe(use_mmio_flip, int, 0600, 150 - "use MMIO flips (-1=never, 0=driver discretion [default], 1=always)"); 151 - 152 149 i915_param_named(mmio_debug, int, 0600, 153 150 "Enable the MMIO debug code for the first N failures (default: off). " 154 151 "This may negatively affect performance.");

-1

drivers/gpu/drm/i915/i915_params.h

··· 49 49 param(int, guc_log_level, -1) \ 50 50 param(char *, guc_firmware_path, NULL) \ 51 51 param(char *, huc_firmware_path, NULL) \ 52 - param(int, use_mmio_flip, 0) \ 53 52 param(int, mmio_debug, 0) \ 54 53 param(int, edp_vswing, 0) \ 55 54 param(int, reset, 2) \

+50 -27

drivers/gpu/drm/i915/i915_pci.c

··· 54 54 .color = { .degamma_lut_size = 512, .gamma_lut_size = 512 } 55 55 #define CHV_COLORS \ 56 56 .color = { .degamma_lut_size = 65, .gamma_lut_size = 257 } 57 + #define GLK_COLORS \ 58 + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } 57 59 58 60 /* Keep in gen based order, and chronological order within a gen */ 61 + 62 + #define GEN_DEFAULT_PAGE_SIZES \ 63 + .page_sizes = I915_GTT_PAGE_SIZE_4K 64 + 59 65 #define GEN2_FEATURES \ 60 66 .gen = 2, .num_pipes = 1, \ 61 67 .has_overlay = 1, .overlay_needs_physical = 1, \ ··· 71 65 .ring_mask = RENDER_RING, \ 72 66 .has_snoop = true, \ 73 67 GEN_DEFAULT_PIPEOFFSETS, \ 68 + GEN_DEFAULT_PAGE_SIZES, \ 74 69 CURSOR_OFFSETS 75 70 76 71 static const struct intel_device_info intel_i830_info __initconst = { ··· 105 98 .ring_mask = RENDER_RING, \ 106 99 .has_snoop = true, \ 107 100 GEN_DEFAULT_PIPEOFFSETS, \ 101 + GEN_DEFAULT_PAGE_SIZES, \ 108 102 CURSOR_OFFSETS 109 103 110 104 static const struct intel_device_info intel_i915g_info __initconst = { ··· 169 161 .ring_mask = RENDER_RING, \ 170 162 .has_snoop = true, \ 171 163 GEN_DEFAULT_PIPEOFFSETS, \ 164 + GEN_DEFAULT_PAGE_SIZES, \ 172 165 CURSOR_OFFSETS 173 166 174 167 static const struct intel_device_info intel_i965g_info __initconst = { ··· 212 203 .ring_mask = RENDER_RING | BSD_RING, \ 213 204 .has_snoop = true, \ 214 205 GEN_DEFAULT_PIPEOFFSETS, \ 206 + GEN_DEFAULT_PAGE_SIZES, \ 215 207 CURSOR_OFFSETS 216 208 217 209 static const struct intel_device_info intel_ironlake_d_info __initconst = { ··· 236 226 .has_rc6p = 1, \ 237 227 .has_aliasing_ppgtt = 1, \ 238 228 GEN_DEFAULT_PIPEOFFSETS, \ 229 + GEN_DEFAULT_PAGE_SIZES, \ 239 230 CURSOR_OFFSETS 240 231 241 232 #define SNB_D_PLATFORM \ ··· 280 269 .has_aliasing_ppgtt = 1, \ 281 270 .has_full_ppgtt = 1, \ 282 271 GEN_DEFAULT_PIPEOFFSETS, \ 272 + GEN_DEFAULT_PAGE_SIZES, \ 283 273 IVB_CURSOR_OFFSETS 284 274 285 275 #define IVB_D_PLATFORM \ ··· 337 325 .has_snoop = true, 338 326 .ring_mask = RENDER_RING | BSD_RING | BLT_RING, 339 327 .display_mmio_offset = VLV_DISPLAY_BASE, 328 + GEN_DEFAULT_PAGE_SIZES, 340 329 GEN_DEFAULT_PIPEOFFSETS, 341 330 CURSOR_OFFSETS 342 331 }; 343 332 344 - #define HSW_FEATURES \ 333 + #define G75_FEATURES \ 345 334 GEN7_FEATURES, \ 346 335 .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \ 347 336 .has_ddi = 1, \ ··· 354 341 .has_runtime_pm = 1 355 342 356 343 #define HSW_PLATFORM \ 357 - HSW_FEATURES, \ 344 + G75_FEATURES, \ 358 345 .platform = INTEL_HASWELL, \ 359 346 .has_l3_dpf = 1 360 347 ··· 373 360 .gt = 3, 374 361 }; 375 362 376 - #define BDW_FEATURES \ 377 - HSW_FEATURES, \ 363 + #define GEN8_FEATURES \ 364 + G75_FEATURES, \ 378 365 BDW_COLORS, \ 366 + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ 367 + I915_GTT_PAGE_SIZE_2M, \ 379 368 .has_logical_ring_contexts = 1, \ 380 369 .has_full_48bit_ppgtt = 1, \ 381 370 .has_64bit_reloc = 1, \ 382 371 .has_reset_engine = 1 383 372 384 373 #define BDW_PLATFORM \ 385 - BDW_FEATURES, \ 374 + GEN8_FEATURES, \ 386 375 .gen = 8, \ 387 376 .platform = INTEL_BROADWELL 388 377 ··· 430 415 .has_reset_engine = 1, 431 416 .has_snoop = true, 432 417 .display_mmio_offset = VLV_DISPLAY_BASE, 418 + GEN_DEFAULT_PAGE_SIZES, 433 419 GEN_CHV_PIPEOFFSETS, 434 420 CURSOR_OFFSETS, 435 421 CHV_COLORS, 436 422 }; 437 423 438 - #define SKL_PLATFORM \ 439 - BDW_FEATURES, \ 440 - .gen = 9, \ 441 - .platform = INTEL_SKYLAKE, \ 424 + #define GEN9_DEFAULT_PAGE_SIZES \ 425 + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ 426 + I915_GTT_PAGE_SIZE_64K | \ 427 + I915_GTT_PAGE_SIZE_2M 428 + 429 + #define GEN9_FEATURES \ 430 + GEN8_FEATURES, \ 431 + GEN9_DEFAULT_PAGE_SIZES, \ 432 + .has_logical_ring_preemption = 1, \ 442 433 .has_csr = 1, \ 443 434 .has_guc = 1, \ 435 + .has_ipc = 1, \ 444 436 .ddb_size = 896 437 + 438 + #define SKL_PLATFORM \ 439 + GEN9_FEATURES, \ 440 + .gen = 9, \ 441 + .platform = INTEL_SKYLAKE 445 442 446 443 static const struct intel_device_info intel_skylake_gt1_info __initconst = { 447 444 SKL_PLATFORM, ··· 490 463 .has_ddi = 1, \ 491 464 .has_fpga_dbg = 1, \ 492 465 .has_fbc = 1, \ 466 + .has_psr = 1, \ 493 467 .has_runtime_pm = 1, \ 494 468 .has_pooled_eu = 0, \ 495 469 .has_csr = 1, \ ··· 498 470 .has_rc6 = 1, \ 499 471 .has_dp_mst = 1, \ 500 472 .has_logical_ring_contexts = 1, \ 473 + .has_logical_ring_preemption = 1, \ 501 474 .has_guc = 1, \ 502 475 .has_aliasing_ppgtt = 1, \ 503 476 .has_full_ppgtt = 1, \ ··· 506 477 .has_reset_engine = 1, \ 507 478 .has_snoop = true, \ 508 479 .has_ipc = 1, \ 480 + GEN9_DEFAULT_PAGE_SIZES, \ 509 481 GEN_DEFAULT_PIPEOFFSETS, \ 510 482 IVB_CURSOR_OFFSETS, \ 511 483 BDW_COLORS ··· 521 491 GEN9_LP_FEATURES, 522 492 .platform = INTEL_GEMINILAKE, 523 493 .ddb_size = 1024, 524 - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } 494 + GLK_COLORS, 525 495 }; 526 496 527 497 #define KBL_PLATFORM \ 528 - BDW_FEATURES, \ 498 + GEN9_FEATURES, \ 529 499 .gen = 9, \ 530 - .platform = INTEL_KABYLAKE, \ 531 - .has_csr = 1, \ 532 - .has_guc = 1, \ 533 - .has_ipc = 1, \ 534 - .ddb_size = 896 500 + .platform = INTEL_KABYLAKE 535 501 536 502 static const struct intel_device_info intel_kabylake_gt1_info __initconst = { 537 503 KBL_PLATFORM, ··· 546 520 }; 547 521 548 522 #define CFL_PLATFORM \ 549 - BDW_FEATURES, \ 523 + GEN9_FEATURES, \ 550 524 .gen = 9, \ 551 - .platform = INTEL_COFFEELAKE, \ 552 - .has_csr = 1, \ 553 - .has_guc = 1, \ 554 - .has_ipc = 1, \ 555 - .ddb_size = 896 525 + .platform = INTEL_COFFEELAKE 556 526 557 527 static const struct intel_device_info intel_coffeelake_gt1_info __initconst = { 558 528 CFL_PLATFORM, ··· 566 544 .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, 567 545 }; 568 546 547 + #define GEN10_FEATURES \ 548 + GEN9_FEATURES, \ 549 + .ddb_size = 1024, \ 550 + GLK_COLORS 551 + 569 552 static const struct intel_device_info intel_cannonlake_gt2_info __initconst = { 570 - BDW_FEATURES, 553 + GEN10_FEATURES, 571 554 .is_alpha_support = 1, 572 555 .platform = INTEL_CANNONLAKE, 573 556 .gen = 10, 574 557 .gt = 2, 575 - .ddb_size = 1024, 576 - .has_csr = 1, 577 - .has_ipc = 1, 578 - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } 579 558 }; 580 559 581 560 /*

+20 -5

drivers/gpu/drm/i915/i915_reg.h

··· 2371 2371 #define GEN9_GAMT_ECO_REG_RW_IA _MMIO(0x4ab0) 2372 2372 #define GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS (1<<18) 2373 2373 2374 + #define GEN8_GAMW_ECO_DEV_RW_IA _MMIO(0x4080) 2375 + #define GAMW_ECO_ENABLE_64K_IPS_FIELD 0xF 2376 + 2374 2377 #define GAMT_CHKN_BIT_REG _MMIO(0x4ab8) 2375 2378 #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1<<28) 2376 2379 #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1<<24) ··· 3821 3818 #define GEN9_CLKGATE_DIS_0 _MMIO(0x46530) 3822 3819 #define PWM2_GATING_DIS (1 << 14) 3823 3820 #define PWM1_GATING_DIS (1 << 13) 3821 + 3822 + #define _CLKGATE_DIS_PSL_A 0x46520 3823 + #define _CLKGATE_DIS_PSL_B 0x46524 3824 + #define _CLKGATE_DIS_PSL_C 0x46528 3825 + #define DPF_GATING_DIS (1 << 10) 3826 + #define DPF_RAM_GATING_DIS (1 << 9) 3827 + #define DPFR_GATING_DIS (1 << 8) 3828 + 3829 + #define CLKGATE_DIS_PSL(pipe) \ 3830 + _MMIO_PIPE(pipe, _CLKGATE_DIS_PSL_A, _CLKGATE_DIS_PSL_B) 3824 3831 3825 3832 /* 3826 3833 * GEN10 clock gating regs ··· 5684 5671 #define CBR_PWM_CLOCK_MUX_SELECT (1<<30) 5685 5672 5686 5673 #define CBR4_VLV _MMIO(VLV_DISPLAY_BASE + 0x70450) 5687 - #define CBR_DPLLBMD_PIPE_C (1<<29) 5688 - #define CBR_DPLLBMD_PIPE_B (1<<18) 5674 + #define CBR_DPLLBMD_PIPE(pipe) (1<<(7+(pipe)*11)) /* pipes B and C */ 5689 5675 5690 5676 /* FIFO watermark sizes etc */ 5691 5677 #define G4X_FIFO_LINE_SIZE 64 ··· 7005 6993 #define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) 7006 6994 #define GEN9_CTX_PREEMPT_REG _MMIO(0x2248) 7007 6995 #define GEN8_CS_CHICKEN1 _MMIO(0x2580) 6996 + #define GEN9_PREEMPT_3D_OBJECT_LEVEL (1<<0) 6997 + #define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) 6998 + #define GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 0) 6999 + #define GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 1) 7000 + #define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0) 7001 + #define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1) 7008 7002 7009 7003 /* GEN7 chicken */ 7010 7004 #define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) ··· 7182 7164 7183 7165 #define SERR_INT _MMIO(0xc4040) 7184 7166 #define SERR_INT_POISON (1<<31) 7185 - #define SERR_INT_TRANS_C_FIFO_UNDERRUN (1<<6) 7186 - #define SERR_INT_TRANS_B_FIFO_UNDERRUN (1<<3) 7187 - #define SERR_INT_TRANS_A_FIFO_UNDERRUN (1<<0) 7188 7167 #define SERR_INT_TRANS_FIFO_UNDERRUN(pipe) (1<<((pipe)*3)) 7189 7168 7190 7169 /* digital port hotplug */

-2

drivers/gpu/drm/i915/i915_suspend.c

··· 108 108 109 109 mutex_lock(&dev_priv->drm.struct_mutex); 110 110 111 - i915_gem_restore_fences(dev_priv); 112 - 113 111 if (IS_GEN4(dev_priv)) 114 112 pci_write_config_word(pdev, GCDGMBUS, 115 113 dev_priv->regfile.saveGCDGMBUS);

+40 -36

drivers/gpu/drm/i915/i915_sysfs.c

··· 49 49 static ssize_t 50 50 show_rc6_mask(struct device *kdev, struct device_attribute *attr, char *buf) 51 51 { 52 - return snprintf(buf, PAGE_SIZE, "%x\n", intel_enable_rc6()); 52 + return snprintf(buf, PAGE_SIZE, "%x\n", intel_rc6_enabled()); 53 53 } 54 54 55 55 static ssize_t ··· 246 246 247 247 intel_runtime_pm_get(dev_priv); 248 248 249 - mutex_lock(&dev_priv->rps.hw_lock); 249 + mutex_lock(&dev_priv->pcu_lock); 250 250 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 251 251 u32 freq; 252 252 freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); ··· 261 261 ret = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; 262 262 ret = intel_gpu_freq(dev_priv, ret); 263 263 } 264 - mutex_unlock(&dev_priv->rps.hw_lock); 264 + mutex_unlock(&dev_priv->pcu_lock); 265 265 266 266 intel_runtime_pm_put(dev_priv); 267 267 ··· 275 275 276 276 return snprintf(buf, PAGE_SIZE, "%d\n", 277 277 intel_gpu_freq(dev_priv, 278 - dev_priv->rps.cur_freq)); 278 + dev_priv->gt_pm.rps.cur_freq)); 279 279 } 280 280 281 281 static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) ··· 284 284 285 285 return snprintf(buf, PAGE_SIZE, "%d\n", 286 286 intel_gpu_freq(dev_priv, 287 - dev_priv->rps.boost_freq)); 287 + dev_priv->gt_pm.rps.boost_freq)); 288 288 } 289 289 290 290 static ssize_t gt_boost_freq_mhz_store(struct device *kdev, ··· 292 292 const char *buf, size_t count) 293 293 { 294 294 struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); 295 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 295 296 u32 val; 296 297 ssize_t ret; 297 298 ··· 302 301 303 302 /* Validate against (static) hardware limits */ 304 303 val = intel_freq_opcode(dev_priv, val); 305 - if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq) 304 + if (val < rps->min_freq || val > rps->max_freq) 306 305 return -EINVAL; 307 306 308 - mutex_lock(&dev_priv->rps.hw_lock); 309 - dev_priv->rps.boost_freq = val; 310 - mutex_unlock(&dev_priv->rps.hw_lock); 307 + mutex_lock(&dev_priv->pcu_lock); 308 + rps->boost_freq = val; 309 + mutex_unlock(&dev_priv->pcu_lock); 311 310 312 311 return count; 313 312 } ··· 319 318 320 319 return snprintf(buf, PAGE_SIZE, "%d\n", 321 320 intel_gpu_freq(dev_priv, 322 - dev_priv->rps.efficient_freq)); 321 + dev_priv->gt_pm.rps.efficient_freq)); 323 322 } 324 323 325 324 static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) ··· 328 327 329 328 return snprintf(buf, PAGE_SIZE, "%d\n", 330 329 intel_gpu_freq(dev_priv, 331 - dev_priv->rps.max_freq_softlimit)); 330 + dev_priv->gt_pm.rps.max_freq_softlimit)); 332 331 } 333 332 334 333 static ssize_t gt_max_freq_mhz_store(struct device *kdev, ··· 336 335 const char *buf, size_t count) 337 336 { 338 337 struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); 338 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 339 339 u32 val; 340 340 ssize_t ret; 341 341 ··· 346 344 347 345 intel_runtime_pm_get(dev_priv); 348 346 349 - mutex_lock(&dev_priv->rps.hw_lock); 347 + mutex_lock(&dev_priv->pcu_lock); 350 348 351 349 val = intel_freq_opcode(dev_priv, val); 352 350 353 - if (val < dev_priv->rps.min_freq || 354 - val > dev_priv->rps.max_freq || 355 - val < dev_priv->rps.min_freq_softlimit) { 356 - mutex_unlock(&dev_priv->rps.hw_lock); 351 + if (val < rps->min_freq || 352 + val > rps->max_freq || 353 + val < rps->min_freq_softlimit) { 354 + mutex_unlock(&dev_priv->pcu_lock); 357 355 intel_runtime_pm_put(dev_priv); 358 356 return -EINVAL; 359 357 } 360 358 361 - if (val > dev_priv->rps.rp0_freq) 359 + if (val > rps->rp0_freq) 362 360 DRM_DEBUG("User requested overclocking to %d\n", 363 361 intel_gpu_freq(dev_priv, val)); 364 362 365 - dev_priv->rps.max_freq_softlimit = val; 363 + rps->max_freq_softlimit = val; 366 364 367 - val = clamp_t(int, dev_priv->rps.cur_freq, 368 - dev_priv->rps.min_freq_softlimit, 369 - dev_priv->rps.max_freq_softlimit); 365 + val = clamp_t(int, rps->cur_freq, 366 + rps->min_freq_softlimit, 367 + rps->max_freq_softlimit); 370 368 371 369 /* We still need *_set_rps to process the new max_delay and 372 370 * update the interrupt limits and PMINTRMSK even though 373 371 * frequency request may be unchanged. */ 374 372 ret = intel_set_rps(dev_priv, val); 375 373 376 - mutex_unlock(&dev_priv->rps.hw_lock); 374 + mutex_unlock(&dev_priv->pcu_lock); 377 375 378 376 intel_runtime_pm_put(dev_priv); 379 377 ··· 386 384 387 385 return snprintf(buf, PAGE_SIZE, "%d\n", 388 386 intel_gpu_freq(dev_priv, 389 - dev_priv->rps.min_freq_softlimit)); 387 + dev_priv->gt_pm.rps.min_freq_softlimit)); 390 388 } 391 389 392 390 static ssize_t gt_min_freq_mhz_store(struct device *kdev, ··· 394 392 const char *buf, size_t count) 395 393 { 396 394 struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); 395 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 397 396 u32 val; 398 397 ssize_t ret; 399 398 ··· 404 401 405 402 intel_runtime_pm_get(dev_priv); 406 403 407 - mutex_lock(&dev_priv->rps.hw_lock); 404 + mutex_lock(&dev_priv->pcu_lock); 408 405 409 406 val = intel_freq_opcode(dev_priv, val); 410 407 411 - if (val < dev_priv->rps.min_freq || 412 - val > dev_priv->rps.max_freq || 413 - val > dev_priv->rps.max_freq_softlimit) { 414 - mutex_unlock(&dev_priv->rps.hw_lock); 408 + if (val < rps->min_freq || 409 + val > rps->max_freq || 410 + val > rps->max_freq_softlimit) { 411 + mutex_unlock(&dev_priv->pcu_lock); 415 412 intel_runtime_pm_put(dev_priv); 416 413 return -EINVAL; 417 414 } 418 415 419 - dev_priv->rps.min_freq_softlimit = val; 416 + rps->min_freq_softlimit = val; 420 417 421 - val = clamp_t(int, dev_priv->rps.cur_freq, 422 - dev_priv->rps.min_freq_softlimit, 423 - dev_priv->rps.max_freq_softlimit); 418 + val = clamp_t(int, rps->cur_freq, 419 + rps->min_freq_softlimit, 420 + rps->max_freq_softlimit); 424 421 425 422 /* We still need *_set_rps to process the new min_delay and 426 423 * update the interrupt limits and PMINTRMSK even though 427 424 * frequency request may be unchanged. */ 428 425 ret = intel_set_rps(dev_priv, val); 429 426 430 - mutex_unlock(&dev_priv->rps.hw_lock); 427 + mutex_unlock(&dev_priv->pcu_lock); 431 428 432 429 intel_runtime_pm_put(dev_priv); 433 430 ··· 451 448 static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) 452 449 { 453 450 struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); 451 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 454 452 u32 val; 455 453 456 454 if (attr == &dev_attr_gt_RP0_freq_mhz) 457 - val = intel_gpu_freq(dev_priv, dev_priv->rps.rp0_freq); 455 + val = intel_gpu_freq(dev_priv, rps->rp0_freq); 458 456 else if (attr == &dev_attr_gt_RP1_freq_mhz) 459 - val = intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq); 457 + val = intel_gpu_freq(dev_priv, rps->rp1_freq); 460 458 else if (attr == &dev_attr_gt_RPn_freq_mhz) 461 - val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq); 459 + val = intel_gpu_freq(dev_priv, rps->min_freq); 462 460 else 463 461 BUG(); 464 462

+39 -77

drivers/gpu/drm/i915/i915_trace.h

··· 345 345 346 346 TP_STRUCT__entry( 347 347 __field(struct drm_i915_gem_object *, obj) 348 - __field(u32, size) 348 + __field(u64, size) 349 349 ), 350 350 351 351 TP_fast_assign( ··· 353 353 __entry->size = obj->base.size; 354 354 ), 355 355 356 - TP_printk("obj=%p, size=%u", __entry->obj, __entry->size) 356 + TP_printk("obj=%p, size=0x%llx", __entry->obj, __entry->size) 357 357 ); 358 358 359 359 TRACE_EVENT(i915_gem_shrink, ··· 384 384 __field(struct drm_i915_gem_object *, obj) 385 385 __field(struct i915_address_space *, vm) 386 386 __field(u64, offset) 387 - __field(u32, size) 387 + __field(u64, size) 388 388 __field(unsigned, flags) 389 389 ), 390 390 ··· 396 396 __entry->flags = flags; 397 397 ), 398 398 399 - TP_printk("obj=%p, offset=%016llx size=%x%s vm=%p", 399 + TP_printk("obj=%p, offset=0x%016llx size=0x%llx%s vm=%p", 400 400 __entry->obj, __entry->offset, __entry->size, 401 401 __entry->flags & PIN_MAPPABLE ? ", mappable" : "", 402 402 __entry->vm) ··· 410 410 __field(struct drm_i915_gem_object *, obj) 411 411 __field(struct i915_address_space *, vm) 412 412 __field(u64, offset) 413 - __field(u32, size) 413 + __field(u64, size) 414 414 ), 415 415 416 416 TP_fast_assign( ··· 420 420 __entry->size = vma->node.size; 421 421 ), 422 422 423 - TP_printk("obj=%p, offset=%016llx size=%x vm=%p", 423 + TP_printk("obj=%p, offset=0x%016llx size=0x%llx vm=%p", 424 424 __entry->obj, __entry->offset, __entry->size, __entry->vm) 425 425 ); 426 426 427 427 TRACE_EVENT(i915_gem_object_pwrite, 428 - TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), 428 + TP_PROTO(struct drm_i915_gem_object *obj, u64 offset, u64 len), 429 429 TP_ARGS(obj, offset, len), 430 430 431 431 TP_STRUCT__entry( 432 432 __field(struct drm_i915_gem_object *, obj) 433 - __field(u32, offset) 434 - __field(u32, len) 433 + __field(u64, offset) 434 + __field(u64, len) 435 435 ), 436 436 437 437 TP_fast_assign( ··· 440 440 __entry->len = len; 441 441 ), 442 442 443 - TP_printk("obj=%p, offset=%u, len=%u", 443 + TP_printk("obj=%p, offset=0x%llx, len=0x%llx", 444 444 __entry->obj, __entry->offset, __entry->len) 445 445 ); 446 446 447 447 TRACE_EVENT(i915_gem_object_pread, 448 - TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), 448 + TP_PROTO(struct drm_i915_gem_object *obj, u64 offset, u64 len), 449 449 TP_ARGS(obj, offset, len), 450 450 451 451 TP_STRUCT__entry( 452 452 __field(struct drm_i915_gem_object *, obj) 453 - __field(u32, offset) 454 - __field(u32, len) 453 + __field(u64, offset) 454 + __field(u64, len) 455 455 ), 456 456 457 457 TP_fast_assign( ··· 460 460 __entry->len = len; 461 461 ), 462 462 463 - TP_printk("obj=%p, offset=%u, len=%u", 463 + TP_printk("obj=%p, offset=0x%llx, len=0x%llx", 464 464 __entry->obj, __entry->offset, __entry->len) 465 465 ); 466 466 467 467 TRACE_EVENT(i915_gem_object_fault, 468 - TP_PROTO(struct drm_i915_gem_object *obj, u32 index, bool gtt, bool write), 468 + TP_PROTO(struct drm_i915_gem_object *obj, u64 index, bool gtt, bool write), 469 469 TP_ARGS(obj, index, gtt, write), 470 470 471 471 TP_STRUCT__entry( 472 472 __field(struct drm_i915_gem_object *, obj) 473 - __field(u32, index) 473 + __field(u64, index) 474 474 __field(bool, gtt) 475 475 __field(bool, write) 476 476 ), ··· 482 482 __entry->write = write; 483 483 ), 484 484 485 - TP_printk("obj=%p, %s index=%u %s", 485 + TP_printk("obj=%p, %s index=%llu %s", 486 486 __entry->obj, 487 487 __entry->gtt ? "GTT" : "CPU", 488 488 __entry->index, ··· 515 515 ); 516 516 517 517 TRACE_EVENT(i915_gem_evict, 518 - TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned int flags), 518 + TP_PROTO(struct i915_address_space *vm, u64 size, u64 align, unsigned int flags), 519 519 TP_ARGS(vm, size, align, flags), 520 520 521 521 TP_STRUCT__entry( 522 522 __field(u32, dev) 523 523 __field(struct i915_address_space *, vm) 524 - __field(u32, size) 525 - __field(u32, align) 524 + __field(u64, size) 525 + __field(u64, align) 526 526 __field(unsigned int, flags) 527 527 ), 528 528 ··· 534 534 __entry->flags = flags; 535 535 ), 536 536 537 - TP_printk("dev=%d, vm=%p, size=%d, align=%d %s", 537 + TP_printk("dev=%d, vm=%p, size=0x%llx, align=0x%llx %s", 538 538 __entry->dev, __entry->vm, __entry->size, __entry->align, 539 539 __entry->flags & PIN_MAPPABLE ? ", mappable" : "") 540 - ); 541 - 542 - TRACE_EVENT(i915_gem_evict_everything, 543 - TP_PROTO(struct drm_device *dev), 544 - TP_ARGS(dev), 545 - 546 - TP_STRUCT__entry( 547 - __field(u32, dev) 548 - ), 549 - 550 - TP_fast_assign( 551 - __entry->dev = dev->primary->index; 552 - ), 553 - 554 - TP_printk("dev=%d", __entry->dev) 555 - ); 556 - 557 - TRACE_EVENT(i915_gem_evict_vm, 558 - TP_PROTO(struct i915_address_space *vm), 559 - TP_ARGS(vm), 560 - 561 - TP_STRUCT__entry( 562 - __field(u32, dev) 563 - __field(struct i915_address_space *, vm) 564 - ), 565 - 566 - TP_fast_assign( 567 - __entry->dev = vm->i915->drm.primary->index; 568 - __entry->vm = vm; 569 - ), 570 - 571 - TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) 572 540 ); 573 541 574 542 TRACE_EVENT(i915_gem_evict_node, ··· 561 593 __entry->flags = flags; 562 594 ), 563 595 564 - TP_printk("dev=%d, vm=%p, start=%llx size=%llx, color=%lx, flags=%x", 596 + TP_printk("dev=%d, vm=%p, start=0x%llx size=0x%llx, color=0x%lx, flags=%x", 565 597 __entry->dev, __entry->vm, 566 598 __entry->start, __entry->size, 567 599 __entry->color, __entry->flags) 600 + ); 601 + 602 + TRACE_EVENT(i915_gem_evict_vm, 603 + TP_PROTO(struct i915_address_space *vm), 604 + TP_ARGS(vm), 605 + 606 + TP_STRUCT__entry( 607 + __field(u32, dev) 608 + __field(struct i915_address_space *, vm) 609 + ), 610 + 611 + TP_fast_assign( 612 + __entry->dev = vm->i915->drm.primary->index; 613 + __entry->vm = vm; 614 + ), 615 + 616 + TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) 568 617 ); 569 618 570 619 TRACE_EVENT(i915_gem_ring_sync_to, ··· 632 647 TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x", 633 648 __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, 634 649 __entry->flags) 635 - ); 636 - 637 - TRACE_EVENT(i915_gem_ring_flush, 638 - TP_PROTO(struct drm_i915_gem_request *req, u32 invalidate, u32 flush), 639 - TP_ARGS(req, invalidate, flush), 640 - 641 - TP_STRUCT__entry( 642 - __field(u32, dev) 643 - __field(u32, ring) 644 - __field(u32, invalidate) 645 - __field(u32, flush) 646 - ), 647 - 648 - TP_fast_assign( 649 - __entry->dev = req->i915->drm.primary->index; 650 - __entry->ring = req->engine->id; 651 - __entry->invalidate = invalidate; 652 - __entry->flush = flush; 653 - ), 654 - 655 - TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x", 656 - __entry->dev, __entry->ring, 657 - __entry->invalidate, __entry->flush) 658 650 ); 659 651 660 652 DECLARE_EVENT_CLASS(i915_gem_request,

+18

drivers/gpu/drm/i915/i915_utils.h

··· 99 99 __T; \ 100 100 }) 101 101 102 + static inline u64 ptr_to_u64(const void *ptr) 103 + { 104 + return (uintptr_t)ptr; 105 + } 106 + 102 107 #define u64_to_ptr(T, x) ({ \ 103 108 typecheck(u64, x); \ 104 109 (T *)(uintptr_t)(x); \ ··· 122 117 { 123 118 first->prev = head; 124 119 WRITE_ONCE(head->next, first); 120 + } 121 + 122 + /* 123 + * Wait until the work is finally complete, even if it tries to postpone 124 + * by requeueing itself. Note, that if the worker never cancels itself, 125 + * we will spin forever. 126 + */ 127 + static inline void drain_delayed_work(struct delayed_work *dw) 128 + { 129 + do { 130 + while (flush_delayed_work(dw)) 131 + ; 132 + } while (delayed_work_pending(dw)); 125 133 } 126 134 127 135 #endif /* !__I915_UTILS_H */

+105 -16

drivers/gpu/drm/i915/i915_vma.c

··· 266 266 if (bind_flags == 0) 267 267 return 0; 268 268 269 + GEM_BUG_ON(!vma->pages); 270 + 269 271 trace_i915_vma_bind(vma, bind_flags); 270 272 ret = vma->vm->bind_vma(vma, cache_level, bind_flags); 271 273 if (ret) ··· 280 278 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) 281 279 { 282 280 void __iomem *ptr; 281 + int err; 283 282 284 283 /* Access through the GTT requires the device to be awake. */ 285 284 assert_rpm_wakelock_held(vma->vm->i915); 286 285 287 286 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 288 - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) 289 - return IO_ERR_PTR(-ENODEV); 287 + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { 288 + err = -ENODEV; 289 + goto err; 290 + } 290 291 291 292 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 292 293 GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); ··· 299 294 ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, 300 295 vma->node.start, 301 296 vma->node.size); 302 - if (ptr == NULL) 303 - return IO_ERR_PTR(-ENOMEM); 297 + if (ptr == NULL) { 298 + err = -ENOMEM; 299 + goto err; 300 + } 304 301 305 302 vma->iomap = ptr; 306 303 } 307 304 308 305 __i915_vma_pin(vma); 306 + 307 + err = i915_vma_pin_fence(vma); 308 + if (err) 309 + goto err_unpin; 310 + 309 311 return ptr; 312 + 313 + err_unpin: 314 + __i915_vma_unpin(vma); 315 + err: 316 + return IO_ERR_PTR(err); 317 + } 318 + 319 + void i915_vma_unpin_iomap(struct i915_vma *vma) 320 + { 321 + lockdep_assert_held(&vma->obj->base.dev->struct_mutex); 322 + 323 + GEM_BUG_ON(vma->iomap == NULL); 324 + 325 + i915_vma_unpin_fence(vma); 326 + i915_vma_unpin(vma); 310 327 } 311 328 312 329 void i915_vma_unpin_and_release(struct i915_vma **p_vma) ··· 498 471 if (ret) 499 472 return ret; 500 473 474 + GEM_BUG_ON(vma->pages); 475 + 476 + ret = vma->vm->set_pages(vma); 477 + if (ret) 478 + goto err_unpin; 479 + 501 480 if (flags & PIN_OFFSET_FIXED) { 502 481 u64 offset = flags & PIN_OFFSET_MASK; 503 482 if (!IS_ALIGNED(offset, alignment) || 504 483 range_overflows(offset, size, end)) { 505 484 ret = -EINVAL; 506 - goto err_unpin; 485 + goto err_clear; 507 486 } 508 487 509 488 ret = i915_gem_gtt_reserve(vma->vm, &vma->node, 510 489 size, offset, obj->cache_level, 511 490 flags); 512 491 if (ret) 513 - goto err_unpin; 492 + goto err_clear; 514 493 } else { 494 + /* 495 + * We only support huge gtt pages through the 48b PPGTT, 496 + * however we also don't want to force any alignment for 497 + * objects which need to be tightly packed into the low 32bits. 498 + * 499 + * Note that we assume that GGTT are limited to 4GiB for the 500 + * forseeable future. See also i915_ggtt_offset(). 501 + */ 502 + if (upper_32_bits(end - 1) && 503 + vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { 504 + /* 505 + * We can't mix 64K and 4K PTEs in the same page-table 506 + * (2M block), and so to avoid the ugliness and 507 + * complexity of coloring we opt for just aligning 64K 508 + * objects to 2M. 509 + */ 510 + u64 page_alignment = 511 + rounddown_pow_of_two(vma->page_sizes.sg | 512 + I915_GTT_PAGE_SIZE_2M); 513 + 514 + /* 515 + * Check we don't expand for the limited Global GTT 516 + * (mappable aperture is even more precious!). This 517 + * also checks that we exclude the aliasing-ppgtt. 518 + */ 519 + GEM_BUG_ON(i915_vma_is_ggtt(vma)); 520 + 521 + alignment = max(alignment, page_alignment); 522 + 523 + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) 524 + size = round_up(size, I915_GTT_PAGE_SIZE_2M); 525 + } 526 + 515 527 ret = i915_gem_gtt_insert(vma->vm, &vma->node, 516 528 size, alignment, obj->cache_level, 517 529 start, end, flags); 518 530 if (ret) 519 - goto err_unpin; 531 + goto err_clear; 520 532 521 533 GEM_BUG_ON(vma->node.start < start); 522 534 GEM_BUG_ON(vma->node.start + vma->node.size > end); ··· 570 504 571 505 return 0; 572 506 507 + err_clear: 508 + vma->vm->clear_pages(vma); 573 509 err_unpin: 574 510 i915_gem_object_unpin_pages(obj); 575 511 return ret; ··· 584 516 585 517 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 586 518 GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); 519 + 520 + vma->vm->clear_pages(vma); 587 521 588 522 drm_mm_remove_node(&vma->node); 589 523 list_move_tail(&vma->vm_link, &vma->vm->unbound_list); ··· 639 569 640 570 err_remove: 641 571 if ((bound & I915_VMA_BIND_MASK) == 0) { 642 - GEM_BUG_ON(vma->pages); 643 572 i915_vma_remove(vma); 573 + GEM_BUG_ON(vma->pages); 644 574 } 645 575 err_unpin: 646 576 __i915_vma_unpin(vma); ··· 688 618 689 619 io_mapping_unmap(vma->iomap); 690 620 vma->iomap = NULL; 621 + } 622 + 623 + void i915_vma_revoke_mmap(struct i915_vma *vma) 624 + { 625 + struct drm_vma_offset_node *node = &vma->obj->base.vma_node; 626 + u64 vma_offset; 627 + 628 + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 629 + 630 + if (!i915_vma_has_userfault(vma)) 631 + return; 632 + 633 + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 634 + GEM_BUG_ON(!vma->obj->userfault_count); 635 + 636 + vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; 637 + unmap_mapping_range(vma->vm->i915->drm.anon_inode->i_mapping, 638 + drm_vma_node_offset_addr(node) + vma_offset, 639 + vma->size, 640 + 1); 641 + 642 + i915_vma_unset_userfault(vma); 643 + if (!--vma->obj->userfault_count) 644 + list_del(&vma->obj->userfault_link); 691 645 } 692 646 693 647 int i915_vma_unbind(struct i915_vma *vma) ··· 777 683 return ret; 778 684 779 685 /* Force a pagefault for domain tracking on next user access */ 780 - i915_gem_release_mmap(obj); 686 + i915_vma_revoke_mmap(vma); 781 687 782 688 __i915_vma_iounmap(vma); 783 689 vma->flags &= ~I915_VMA_CAN_FENCE; 784 690 } 691 + GEM_BUG_ON(vma->fence); 692 + GEM_BUG_ON(i915_vma_has_userfault(vma)); 785 693 786 694 if (likely(!vma->vm->closed)) { 787 695 trace_i915_vma_unbind(vma); 788 696 vma->vm->unbind_vma(vma); 789 697 } 790 698 vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); 791 - 792 - if (vma->pages != obj->mm.pages) { 793 - GEM_BUG_ON(!vma->pages); 794 - sg_free_table(vma->pages); 795 - kfree(vma->pages); 796 - } 797 - vma->pages = NULL; 798 699 799 700 i915_vma_remove(vma); 800 701

+30 -19

drivers/gpu/drm/i915/i915_vma.h

··· 55 55 void __iomem *iomap; 56 56 u64 size; 57 57 u64 display_alignment; 58 + struct i915_page_sizes page_sizes; 58 59 59 60 u32 fence_size; 60 61 u32 fence_alignment; ··· 66 65 * that exist in the ctx->handle_vmas LUT for this vma. 67 66 */ 68 67 unsigned int open_count; 69 - unsigned int flags; 68 + unsigned long flags; 70 69 /** 71 70 * How many users have pinned this object in GTT space. The following 72 71 * users can each hold at most one reference: pwrite/pread, execbuffer ··· 88 87 #define I915_VMA_GGTT BIT(8) 89 88 #define I915_VMA_CAN_FENCE BIT(9) 90 89 #define I915_VMA_CLOSED BIT(10) 90 + #define I915_VMA_USERFAULT_BIT 11 91 + #define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) 91 92 92 93 unsigned int active; 93 94 struct i915_gem_active last_read[I915_NUM_ENGINES]; ··· 146 143 static inline bool i915_vma_is_closed(const struct i915_vma *vma) 147 144 { 148 145 return vma->flags & I915_VMA_CLOSED; 146 + } 147 + 148 + static inline bool i915_vma_set_userfault(struct i915_vma *vma) 149 + { 150 + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 151 + return __test_and_set_bit(I915_VMA_USERFAULT_BIT, &vma->flags); 152 + } 153 + 154 + static inline void i915_vma_unset_userfault(struct i915_vma *vma) 155 + { 156 + return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags); 157 + } 158 + 159 + static inline bool i915_vma_has_userfault(const struct i915_vma *vma) 160 + { 161 + return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags); 149 162 } 150 163 151 164 static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) ··· 262 243 bool i915_vma_misplaced(const struct i915_vma *vma, 263 244 u64 size, u64 alignment, u64 flags); 264 245 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); 246 + void i915_vma_revoke_mmap(struct i915_vma *vma); 265 247 int __must_check i915_vma_unbind(struct i915_vma *vma); 266 248 void i915_vma_unlink_ctx(struct i915_vma *vma); 267 249 void i915_vma_close(struct i915_vma *vma); ··· 341 321 * Callers must hold the struct_mutex. This function is only valid to be 342 322 * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). 343 323 */ 344 - static inline void i915_vma_unpin_iomap(struct i915_vma *vma) 345 - { 346 - lockdep_assert_held(&vma->obj->base.dev->struct_mutex); 347 - GEM_BUG_ON(vma->iomap == NULL); 348 - i915_vma_unpin(vma); 349 - } 324 + void i915_vma_unpin_iomap(struct i915_vma *vma); 350 325 351 326 static inline struct page *i915_vma_first_page(struct i915_vma *vma) 352 327 { ··· 364 349 * 365 350 * True if the vma has a fence, false otherwise. 366 351 */ 367 - static inline bool 368 - i915_vma_pin_fence(struct i915_vma *vma) 352 + int i915_vma_pin_fence(struct i915_vma *vma); 353 + int __must_check i915_vma_put_fence(struct i915_vma *vma); 354 + 355 + static inline void __i915_vma_unpin_fence(struct i915_vma *vma) 369 356 { 370 - lockdep_assert_held(&vma->obj->base.dev->struct_mutex); 371 - if (vma->fence) { 372 - vma->fence->pin_count++; 373 - return true; 374 - } else 375 - return false; 357 + GEM_BUG_ON(vma->fence->pin_count <= 0); 358 + vma->fence->pin_count--; 376 359 } 377 360 378 361 /** ··· 385 372 i915_vma_unpin_fence(struct i915_vma *vma) 386 373 { 387 374 lockdep_assert_held(&vma->obj->base.dev->struct_mutex); 388 - if (vma->fence) { 389 - GEM_BUG_ON(vma->fence->pin_count <= 0); 390 - vma->fence->pin_count--; 391 - } 375 + if (vma->fence) 376 + __i915_vma_unpin_fence(vma); 392 377 } 393 378 394 379 #endif

+1 -1

drivers/gpu/drm/i915/intel_audio.c

··· 754 754 { 755 755 struct intel_encoder *encoder; 756 756 757 - if (WARN_ON(pipe >= I915_MAX_PIPES)) 757 + if (WARN_ON(pipe >= INTEL_INFO(dev_priv)->num_pipes)) 758 758 return NULL; 759 759 760 760 /* MST */

+41 -89

drivers/gpu/drm/i915/intel_bios.c

··· 431 431 dev_priv->vbt.fdi_rx_polarity_inverted); 432 432 } 433 433 434 - static void 435 - parse_general_definitions(struct drm_i915_private *dev_priv, 436 - const struct bdb_header *bdb) 437 - { 438 - const struct bdb_general_definitions *general; 439 - 440 - general = find_section(bdb, BDB_GENERAL_DEFINITIONS); 441 - if (general) { 442 - u16 block_size = get_blocksize(general); 443 - if (block_size >= sizeof(*general)) { 444 - int bus_pin = general->crt_ddc_gmbus_pin; 445 - DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin); 446 - if (intel_gmbus_is_valid_pin(dev_priv, bus_pin)) 447 - dev_priv->vbt.crt_ddc_pin = bus_pin; 448 - } else { 449 - DRM_DEBUG_KMS("BDB_GD too small (%d). Invalid.\n", 450 - block_size); 451 - } 452 - } 453 - } 454 - 455 434 static const struct child_device_config * 456 435 child_device_ptr(const struct bdb_general_definitions *defs, int i) 457 436 { ··· 438 459 } 439 460 440 461 static void 441 - parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, 442 - const struct bdb_header *bdb) 462 + parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, u8 bdb_version) 443 463 { 444 464 struct sdvo_device_mapping *mapping; 445 - const struct bdb_general_definitions *defs; 446 465 const struct child_device_config *child; 447 - int i, child_device_num, count; 448 - u16 block_size; 449 - 450 - defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); 451 - if (!defs) { 452 - DRM_DEBUG_KMS("No general definition block is found, unable to construct sdvo mapping.\n"); 453 - return; 454 - } 466 + int i, count = 0; 455 467 456 468 /* 457 - * Only parse SDVO mappings when the general definitions block child 458 - * device size matches that of the *legacy* child device config 459 - * struct. Thus, SDVO mapping will be skipped for newer VBT. 469 + * Only parse SDVO mappings on gens that could have SDVO. This isn't 470 + * accurate and doesn't have to be, as long as it's not too strict. 460 471 */ 461 - if (defs->child_dev_size != LEGACY_CHILD_DEVICE_CONFIG_SIZE) { 462 - DRM_DEBUG_KMS("Unsupported child device size for SDVO mapping.\n"); 472 + if (!IS_GEN(dev_priv, 3, 7)) { 473 + DRM_DEBUG_KMS("Skipping SDVO device mapping\n"); 463 474 return; 464 475 } 465 - /* get the block size of general definitions */ 466 - block_size = get_blocksize(defs); 467 - /* get the number of child device */ 468 - child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size; 469 - count = 0; 470 - for (i = 0; i < child_device_num; i++) { 471 - child = child_device_ptr(defs, i); 472 - if (!child->device_type) { 473 - /* skip the device block if device type is invalid */ 474 - continue; 475 - } 476 + 477 + for (i = 0, count = 0; i < dev_priv->vbt.child_dev_num; i++) { 478 + child = dev_priv->vbt.child_dev + i; 479 + 476 480 if (child->slave_addr != SLAVE_ADDR1 && 477 481 child->slave_addr != SLAVE_ADDR2) { 478 482 /* ··· 506 544 /* No SDVO device info is found */ 507 545 DRM_DEBUG_KMS("No SDVO device info is found in VBT\n"); 508 546 } 509 - return; 510 547 } 511 548 512 549 static void ··· 1072 1111 } 1073 1112 1074 1113 static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, 1075 - const struct bdb_header *bdb) 1114 + u8 bdb_version) 1076 1115 { 1077 1116 struct child_device_config *it, *child = NULL; 1078 1117 struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; ··· 1176 1215 sanitize_aux_ch(dev_priv, port); 1177 1216 } 1178 1217 1179 - if (bdb->version >= 158) { 1218 + if (bdb_version >= 158) { 1180 1219 /* The VBT HDMI level shift values match the table we have. */ 1181 1220 hdmi_level_shift = child->hdmi_level_shifter_value; 1182 1221 DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n", ··· 1186 1225 } 1187 1226 1188 1227 /* Parse the I_boost config for SKL and above */ 1189 - if (bdb->version >= 196 && child->iboost) { 1228 + if (bdb_version >= 196 && child->iboost) { 1190 1229 info->dp_boost_level = translate_iboost(child->dp_iboost_level); 1191 1230 DRM_DEBUG_KMS("VBT (e)DP boost level for port %c: %d\n", 1192 1231 port_name(port), info->dp_boost_level); ··· 1196 1235 } 1197 1236 } 1198 1237 1199 - static void parse_ddi_ports(struct drm_i915_private *dev_priv, 1200 - const struct bdb_header *bdb) 1238 + static void parse_ddi_ports(struct drm_i915_private *dev_priv, u8 bdb_version) 1201 1239 { 1202 1240 enum port port; 1203 1241 1204 - if (!HAS_DDI(dev_priv)) 1242 + if (!HAS_DDI(dev_priv) && !IS_CHERRYVIEW(dev_priv)) 1205 1243 return; 1206 1244 1207 1245 if (!dev_priv->vbt.child_dev_num) 1208 1246 return; 1209 1247 1210 - if (bdb->version < 155) 1248 + if (bdb_version < 155) 1211 1249 return; 1212 1250 1213 1251 for (port = PORT_A; port < I915_MAX_PORTS; port++) 1214 - parse_ddi_port(dev_priv, port, bdb); 1252 + parse_ddi_port(dev_priv, port, bdb_version); 1215 1253 } 1216 1254 1217 1255 static void 1218 - parse_device_mapping(struct drm_i915_private *dev_priv, 1219 - const struct bdb_header *bdb) 1256 + parse_general_definitions(struct drm_i915_private *dev_priv, 1257 + const struct bdb_header *bdb) 1220 1258 { 1221 1259 const struct bdb_general_definitions *defs; 1222 1260 const struct child_device_config *child; 1223 - struct child_device_config *child_dev_ptr; 1224 1261 int i, child_device_num, count; 1225 1262 u8 expected_size; 1226 1263 u16 block_size; 1264 + int bus_pin; 1227 1265 1228 1266 defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); 1229 1267 if (!defs) { 1230 1268 DRM_DEBUG_KMS("No general definition block is found, no devices defined.\n"); 1231 1269 return; 1232 1270 } 1271 + 1272 + block_size = get_blocksize(defs); 1273 + if (block_size < sizeof(*defs)) { 1274 + DRM_DEBUG_KMS("General definitions block too small (%u)\n", 1275 + block_size); 1276 + return; 1277 + } 1278 + 1279 + bus_pin = defs->crt_ddc_gmbus_pin; 1280 + DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin); 1281 + if (intel_gmbus_is_valid_pin(dev_priv, bus_pin)) 1282 + dev_priv->vbt.crt_ddc_pin = bus_pin; 1283 + 1233 1284 if (bdb->version < 106) { 1234 1285 expected_size = 22; 1235 1286 } else if (bdb->version < 111) { ··· 1271 1298 return; 1272 1299 } 1273 1300 1274 - /* get the block size of general definitions */ 1275 - block_size = get_blocksize(defs); 1276 1301 /* get the number of child device */ 1277 1302 child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size; 1278 1303 count = 0; 1279 1304 /* get the number of child device that is present */ 1280 1305 for (i = 0; i < child_device_num; i++) { 1281 1306 child = child_device_ptr(defs, i); 1282 - if (!child->device_type) { 1283 - /* skip the device block if device type is invalid */ 1307 + if (!child->device_type) 1284 1308 continue; 1285 - } 1286 1309 count++; 1287 1310 } 1288 1311 if (!count) { ··· 1295 1326 count = 0; 1296 1327 for (i = 0; i < child_device_num; i++) { 1297 1328 child = child_device_ptr(defs, i); 1298 - if (!child->device_type) { 1299 - /* skip the device block if device type is invalid */ 1329 + if (!child->device_type) 1300 1330 continue; 1301 - } 1302 - 1303 - child_dev_ptr = dev_priv->vbt.child_dev + count; 1304 - count++; 1305 1331 1306 1332 /* 1307 1333 * Copy as much as we know (sizeof) and is available 1308 1334 * (child_dev_size) of the child device. Accessing the data must 1309 1335 * depend on VBT version. 1310 1336 */ 1311 - memcpy(child_dev_ptr, child, 1337 + memcpy(dev_priv->vbt.child_dev + count, child, 1312 1338 min_t(size_t, defs->child_dev_size, sizeof(*child))); 1313 - 1314 - /* 1315 - * copied full block, now init values when they are not 1316 - * available in current version 1317 - */ 1318 - if (bdb->version < 196) { 1319 - /* Set default values for bits added from v196 */ 1320 - child_dev_ptr->iboost = 0; 1321 - child_dev_ptr->hpd_invert = 0; 1322 - } 1323 - 1324 - if (bdb->version < 192) 1325 - child_dev_ptr->lspcon = 0; 1339 + count++; 1326 1340 } 1327 - return; 1328 1341 } 1329 1342 1330 1343 /* Common defaults which may be overridden by VBT. */ ··· 1487 1536 parse_lfp_panel_data(dev_priv, bdb); 1488 1537 parse_lfp_backlight(dev_priv, bdb); 1489 1538 parse_sdvo_panel_data(dev_priv, bdb); 1490 - parse_sdvo_device_mapping(dev_priv, bdb); 1491 - parse_device_mapping(dev_priv, bdb); 1492 1539 parse_driver_features(dev_priv, bdb); 1493 1540 parse_edp(dev_priv, bdb); 1494 1541 parse_psr(dev_priv, bdb); 1495 1542 parse_mipi_config(dev_priv, bdb); 1496 1543 parse_mipi_sequence(dev_priv, bdb); 1497 - parse_ddi_ports(dev_priv, bdb); 1544 + 1545 + /* Further processing on pre-parsed data */ 1546 + parse_sdvo_device_mapping(dev_priv, bdb->version); 1547 + parse_ddi_ports(dev_priv, bdb->version); 1498 1548 1499 1549 out: 1500 1550 if (!vbt) {

+20 -20

drivers/gpu/drm/i915/intel_cdclk.c

··· 503 503 else 504 504 cmd = 0; 505 505 506 - mutex_lock(&dev_priv->rps.hw_lock); 506 + mutex_lock(&dev_priv->pcu_lock); 507 507 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 508 508 val &= ~DSPFREQGUAR_MASK; 509 509 val |= (cmd << DSPFREQGUAR_SHIFT); ··· 513 513 50)) { 514 514 DRM_ERROR("timed out waiting for CDclk change\n"); 515 515 } 516 - mutex_unlock(&dev_priv->rps.hw_lock); 516 + mutex_unlock(&dev_priv->pcu_lock); 517 517 518 518 mutex_lock(&dev_priv->sb_lock); 519 519 ··· 590 590 */ 591 591 cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; 592 592 593 - mutex_lock(&dev_priv->rps.hw_lock); 593 + mutex_lock(&dev_priv->pcu_lock); 594 594 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 595 595 val &= ~DSPFREQGUAR_MASK_CHV; 596 596 val |= (cmd << DSPFREQGUAR_SHIFT_CHV); ··· 600 600 50)) { 601 601 DRM_ERROR("timed out waiting for CDclk change\n"); 602 602 } 603 - mutex_unlock(&dev_priv->rps.hw_lock); 603 + mutex_unlock(&dev_priv->pcu_lock); 604 604 605 605 intel_update_cdclk(dev_priv); 606 606 ··· 656 656 "trying to change cdclk frequency with cdclk not enabled\n")) 657 657 return; 658 658 659 - mutex_lock(&dev_priv->rps.hw_lock); 659 + mutex_lock(&dev_priv->pcu_lock); 660 660 ret = sandybridge_pcode_write(dev_priv, 661 661 BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); 662 - mutex_unlock(&dev_priv->rps.hw_lock); 662 + mutex_unlock(&dev_priv->pcu_lock); 663 663 if (ret) { 664 664 DRM_ERROR("failed to inform pcode about cdclk change\n"); 665 665 return; ··· 712 712 LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) 713 713 DRM_ERROR("Switching back to LCPLL failed\n"); 714 714 715 - mutex_lock(&dev_priv->rps.hw_lock); 715 + mutex_lock(&dev_priv->pcu_lock); 716 716 sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); 717 - mutex_unlock(&dev_priv->rps.hw_lock); 717 + mutex_unlock(&dev_priv->pcu_lock); 718 718 719 719 I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); 720 720 ··· 928 928 929 929 WARN_ON((cdclk == 24000) != (vco == 0)); 930 930 931 - mutex_lock(&dev_priv->rps.hw_lock); 931 + mutex_lock(&dev_priv->pcu_lock); 932 932 ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, 933 933 SKL_CDCLK_PREPARE_FOR_CHANGE, 934 934 SKL_CDCLK_READY_FOR_CHANGE, 935 935 SKL_CDCLK_READY_FOR_CHANGE, 3); 936 - mutex_unlock(&dev_priv->rps.hw_lock); 936 + mutex_unlock(&dev_priv->pcu_lock); 937 937 if (ret) { 938 938 DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", 939 939 ret); ··· 975 975 POSTING_READ(CDCLK_CTL); 976 976 977 977 /* inform PCU of the change */ 978 - mutex_lock(&dev_priv->rps.hw_lock); 978 + mutex_lock(&dev_priv->pcu_lock); 979 979 sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); 980 - mutex_unlock(&dev_priv->rps.hw_lock); 980 + mutex_unlock(&dev_priv->pcu_lock); 981 981 982 982 intel_update_cdclk(dev_priv); 983 983 } ··· 1268 1268 } 1269 1269 1270 1270 /* Inform power controller of upcoming frequency change */ 1271 - mutex_lock(&dev_priv->rps.hw_lock); 1271 + mutex_lock(&dev_priv->pcu_lock); 1272 1272 ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, 1273 1273 0x80000000); 1274 - mutex_unlock(&dev_priv->rps.hw_lock); 1274 + mutex_unlock(&dev_priv->pcu_lock); 1275 1275 1276 1276 if (ret) { 1277 1277 DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", ··· 1300 1300 val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; 1301 1301 I915_WRITE(CDCLK_CTL, val); 1302 1302 1303 - mutex_lock(&dev_priv->rps.hw_lock); 1303 + mutex_lock(&dev_priv->pcu_lock); 1304 1304 ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, 1305 1305 DIV_ROUND_UP(cdclk, 25000)); 1306 - mutex_unlock(&dev_priv->rps.hw_lock); 1306 + mutex_unlock(&dev_priv->pcu_lock); 1307 1307 1308 1308 if (ret) { 1309 1309 DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", ··· 1518 1518 u32 val, divider, pcu_ack; 1519 1519 int ret; 1520 1520 1521 - mutex_lock(&dev_priv->rps.hw_lock); 1521 + mutex_lock(&dev_priv->pcu_lock); 1522 1522 ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, 1523 1523 SKL_CDCLK_PREPARE_FOR_CHANGE, 1524 1524 SKL_CDCLK_READY_FOR_CHANGE, 1525 1525 SKL_CDCLK_READY_FOR_CHANGE, 3); 1526 - mutex_unlock(&dev_priv->rps.hw_lock); 1526 + mutex_unlock(&dev_priv->pcu_lock); 1527 1527 if (ret) { 1528 1528 DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", 1529 1529 ret); ··· 1575 1575 I915_WRITE(CDCLK_CTL, val); 1576 1576 1577 1577 /* inform PCU of the change */ 1578 - mutex_lock(&dev_priv->rps.hw_lock); 1578 + mutex_lock(&dev_priv->pcu_lock); 1579 1579 sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); 1580 - mutex_unlock(&dev_priv->rps.hw_lock); 1580 + mutex_unlock(&dev_priv->pcu_lock); 1581 1581 1582 1582 intel_update_cdclk(dev_priv); 1583 1583 }

+7 -9

drivers/gpu/drm/i915/intel_color.c

··· 74 74 #define I9XX_CSC_COEFF_1_0 \ 75 75 ((7 << 12) | I9XX_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) 76 76 77 - static bool crtc_state_is_legacy(struct drm_crtc_state *state) 77 + static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state) 78 78 { 79 79 return !state->degamma_lut && 80 80 !state->ctm && ··· 288 288 } 289 289 290 290 mode = (state->ctm ? CGM_PIPE_MODE_CSC : 0); 291 - if (!crtc_state_is_legacy(state)) { 291 + if (!crtc_state_is_legacy_gamma(state)) { 292 292 mode |= (state->degamma_lut ? CGM_PIPE_MODE_DEGAMMA : 0) | 293 293 (state->gamma_lut ? CGM_PIPE_MODE_GAMMA : 0); 294 294 } ··· 469 469 struct intel_crtc_state *intel_state = to_intel_crtc_state(state); 470 470 enum pipe pipe = to_intel_crtc(state->crtc)->pipe; 471 471 472 - if (crtc_state_is_legacy(state)) { 472 + if (crtc_state_is_legacy_gamma(state)) { 473 473 haswell_load_luts(state); 474 474 return; 475 475 } ··· 529 529 530 530 glk_load_degamma_lut(state); 531 531 532 - if (crtc_state_is_legacy(state)) { 532 + if (crtc_state_is_legacy_gamma(state)) { 533 533 haswell_load_luts(state); 534 534 return; 535 535 } ··· 551 551 uint32_t i, lut_size; 552 552 uint32_t word0, word1; 553 553 554 - if (crtc_state_is_legacy(state)) { 554 + if (crtc_state_is_legacy_gamma(state)) { 555 555 /* Turn off degamma/gamma on CGM block. */ 556 556 I915_WRITE(CGM_PIPE_MODE(pipe), 557 557 (state->ctm ? CGM_PIPE_MODE_CSC : 0)); ··· 632 632 return 0; 633 633 634 634 /* 635 - * We also allow no degamma lut and a gamma lut at the legacy 635 + * We also allow no degamma lut/ctm and a gamma lut at the legacy 636 636 * size (256 entries). 637 637 */ 638 - if (!crtc_state->degamma_lut && 639 - crtc_state->gamma_lut && 640 - crtc_state->gamma_lut->length == LEGACY_LUT_LENGTH) 638 + if (crtc_state_is_legacy_gamma(crtc_state)) 641 639 return 0; 642 640 643 641 return -EINVAL;

+78 -8

drivers/gpu/drm/i915/intel_crt.c

··· 213 213 intel_disable_crt(encoder, old_crtc_state, old_conn_state); 214 214 } 215 215 216 + static void hsw_disable_crt(struct intel_encoder *encoder, 217 + const struct intel_crtc_state *old_crtc_state, 218 + const struct drm_connector_state *old_conn_state) 219 + { 220 + struct drm_crtc *crtc = old_crtc_state->base.crtc; 221 + struct drm_i915_private *dev_priv = to_i915(crtc->dev); 222 + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 223 + 224 + WARN_ON(!intel_crtc->config->has_pch_encoder); 225 + 226 + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); 227 + } 228 + 216 229 static void hsw_post_disable_crt(struct intel_encoder *encoder, 217 230 const struct intel_crtc_state *old_crtc_state, 218 231 const struct drm_connector_state *old_conn_state) ··· 238 225 lpt_disable_iclkip(dev_priv); 239 226 240 227 intel_ddi_fdi_post_disable(encoder, old_crtc_state, old_conn_state); 228 + 229 + WARN_ON(!old_crtc_state->has_pch_encoder); 230 + 231 + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); 232 + } 233 + 234 + static void hsw_pre_pll_enable_crt(struct intel_encoder *encoder, 235 + const struct intel_crtc_state *pipe_config, 236 + const struct drm_connector_state *conn_state) 237 + { 238 + struct drm_crtc *crtc = pipe_config->base.crtc; 239 + struct drm_i915_private *dev_priv = to_i915(crtc->dev); 240 + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 241 + 242 + WARN_ON(!intel_crtc->config->has_pch_encoder); 243 + 244 + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); 245 + } 246 + 247 + static void hsw_pre_enable_crt(struct intel_encoder *encoder, 248 + const struct intel_crtc_state *pipe_config, 249 + const struct drm_connector_state *conn_state) 250 + { 251 + struct drm_crtc *crtc = pipe_config->base.crtc; 252 + struct drm_i915_private *dev_priv = to_i915(crtc->dev); 253 + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 254 + int pipe = intel_crtc->pipe; 255 + 256 + WARN_ON(!intel_crtc->config->has_pch_encoder); 257 + 258 + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); 259 + 260 + dev_priv->display.fdi_link_train(intel_crtc, pipe_config); 261 + } 262 + 263 + static void hsw_enable_crt(struct intel_encoder *encoder, 264 + const struct intel_crtc_state *pipe_config, 265 + const struct drm_connector_state *conn_state) 266 + { 267 + struct drm_crtc *crtc = pipe_config->base.crtc; 268 + struct drm_i915_private *dev_priv = to_i915(crtc->dev); 269 + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 270 + int pipe = intel_crtc->pipe; 271 + 272 + WARN_ON(!intel_crtc->config->has_pch_encoder); 273 + 274 + intel_crt_set_dpms(encoder, pipe_config, DRM_MODE_DPMS_ON); 275 + 276 + intel_wait_for_vblank(dev_priv, pipe); 277 + intel_wait_for_vblank(dev_priv, pipe); 278 + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); 279 + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); 241 280 } 242 281 243 282 static void intel_enable_crt(struct intel_encoder *encoder, ··· 955 890 956 891 crt->base.power_domain = POWER_DOMAIN_PORT_CRT; 957 892 958 - crt->base.compute_config = intel_crt_compute_config; 959 - if (HAS_PCH_SPLIT(dev_priv)) { 960 - crt->base.disable = pch_disable_crt; 961 - crt->base.post_disable = pch_post_disable_crt; 962 - } else { 963 - crt->base.disable = intel_disable_crt; 964 - } 965 - crt->base.enable = intel_enable_crt; 966 893 if (I915_HAS_HOTPLUG(dev_priv) && 967 894 !dmi_check_system(intel_spurious_crt_detect)) 968 895 crt->base.hpd_pin = HPD_CRT; 896 + 897 + crt->base.compute_config = intel_crt_compute_config; 969 898 if (HAS_DDI(dev_priv)) { 970 899 crt->base.port = PORT_E; 971 900 crt->base.get_config = hsw_crt_get_config; 972 901 crt->base.get_hw_state = intel_ddi_get_hw_state; 902 + crt->base.pre_pll_enable = hsw_pre_pll_enable_crt; 903 + crt->base.pre_enable = hsw_pre_enable_crt; 904 + crt->base.enable = hsw_enable_crt; 905 + crt->base.disable = hsw_disable_crt; 973 906 crt->base.post_disable = hsw_post_disable_crt; 974 907 } else { 908 + if (HAS_PCH_SPLIT(dev_priv)) { 909 + crt->base.disable = pch_disable_crt; 910 + crt->base.post_disable = pch_post_disable_crt; 911 + } else { 912 + crt->base.disable = intel_disable_crt; 913 + } 975 914 crt->base.port = PORT_NONE; 976 915 crt->base.get_config = intel_crt_get_config; 977 916 crt->base.get_hw_state = intel_crt_get_hw_state; 917 + crt->base.enable = intel_enable_crt; 978 918 } 979 919 intel_connector->get_hw_state = intel_connector_get_hw_state; 980 920

+1 -1

drivers/gpu/drm/i915/intel_csr.c

··· 216 216 217 217 mask = DC_STATE_DEBUG_MASK_MEMORY_UP; 218 218 219 - if (IS_BROXTON(dev_priv)) 219 + if (IS_GEN9_LP(dev_priv)) 220 220 mask |= DC_STATE_DEBUG_MASK_CORES; 221 221 222 222 /* The below bit doesn't need to be cleared ever afterwards */

+36 -8

drivers/gpu/drm/i915/intel_ddi.c

··· 602 602 } else if (voltage == VOLTAGE_INFO_1_05V) { 603 603 *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_1_05V); 604 604 return cnl_ddi_translations_hdmi_1_05V; 605 - } else 605 + } else { 606 + *n_entries = 1; /* shut up gcc */ 606 607 MISSING_CASE(voltage); 608 + } 607 609 return NULL; 608 610 } 609 611 ··· 623 621 } else if (voltage == VOLTAGE_INFO_1_05V) { 624 622 *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_1_05V); 625 623 return cnl_ddi_translations_dp_1_05V; 626 - } else 624 + } else { 625 + *n_entries = 1; /* shut up gcc */ 627 626 MISSING_CASE(voltage); 627 + } 628 628 return NULL; 629 629 } 630 630 ··· 645 641 } else if (voltage == VOLTAGE_INFO_1_05V) { 646 642 *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_1_05V); 647 643 return cnl_ddi_translations_edp_1_05V; 648 - } else 644 + } else { 645 + *n_entries = 1; /* shut up gcc */ 649 646 MISSING_CASE(voltage); 647 + } 650 648 return NULL; 651 649 } else { 652 650 return cnl_get_buf_trans_dp(dev_priv, n_entries); ··· 1220 1214 dco_freq += (((cfgcr0 & DPLL_CFGCR0_DCO_FRACTION_MASK) >> 1221 1215 DPLL_CFGCR0_DCO_FRACTION_SHIFT) * ref_clock) / 0x8000; 1222 1216 1217 + if (WARN_ON(p0 == 0 || p1 == 0 || p2 == 0)) 1218 + return 0; 1219 + 1223 1220 return dco_freq / (p0 * p1 * p2 * 5); 1224 1221 } 1225 1222 ··· 1722 1713 out: 1723 1714 if (ret && IS_GEN9_LP(dev_priv)) { 1724 1715 tmp = I915_READ(BXT_PHY_CTL(port)); 1725 - if ((tmp & (BXT_PHY_LANE_POWERDOWN_ACK | 1716 + if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK | 1717 + BXT_PHY_LANE_POWERDOWN_ACK | 1726 1718 BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED) 1727 1719 DRM_ERROR("Port %c enabled but PHY powered down? " 1728 1720 "(PHY_CTL %08x)\n", port_name(port), tmp); ··· 2171 2161 intel_prepare_dp_ddi_buffers(encoder); 2172 2162 2173 2163 intel_ddi_init_dp_buf_reg(encoder); 2174 - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); 2164 + if (!link_mst) 2165 + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); 2175 2166 intel_dp_start_link_train(intel_dp); 2176 2167 if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) 2177 2168 intel_dp_stop_link_train(intel_dp); ··· 2216 2205 const struct intel_crtc_state *pipe_config, 2217 2206 const struct drm_connector_state *conn_state) 2218 2207 { 2208 + struct drm_crtc *crtc = pipe_config->base.crtc; 2209 + struct drm_i915_private *dev_priv = to_i915(crtc->dev); 2210 + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 2211 + int pipe = intel_crtc->pipe; 2219 2212 int type = encoder->type; 2213 + 2214 + WARN_ON(intel_crtc->config->has_pch_encoder); 2215 + 2216 + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); 2220 2217 2221 2218 if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { 2222 2219 intel_ddi_pre_enable_dp(encoder, ··· 2254 2235 uint32_t val; 2255 2236 bool wait = false; 2256 2237 2257 - /* old_crtc_state and old_conn_state are NULL when called from DP_MST */ 2258 - 2259 2238 if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { 2239 + /* 2240 + * old_crtc_state and old_conn_state are NULL when called from 2241 + * DP_MST. The main connector associated with this port is never 2242 + * bound to a crtc for MST. 2243 + */ 2244 + bool is_mst = !old_crtc_state; 2260 2245 struct intel_dp *intel_dp = enc_to_intel_dp(encoder); 2261 2246 2262 - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); 2247 + /* 2248 + * Power down sink before disabling the port, otherwise we end 2249 + * up getting interrupts from the sink on detecting link loss. 2250 + */ 2251 + if (!is_mst) 2252 + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); 2263 2253 } 2264 2254 2265 2255 val = I915_READ(DDI_BUF_CTL(port));

+105 -104

drivers/gpu/drm/i915/intel_display.c

··· 1539 1539 * DPLLCMD is AWOL. Use chicken bits to propagate 1540 1540 * the value from DPLLBMD to either pipe B or C. 1541 1541 */ 1542 - I915_WRITE(CBR4_VLV, pipe == PIPE_B ? CBR_DPLLBMD_PIPE_B : CBR_DPLLBMD_PIPE_C); 1542 + I915_WRITE(CBR4_VLV, CBR_DPLLBMD_PIPE(pipe)); 1543 1543 I915_WRITE(DPLL_MD(PIPE_B), pipe_config->dpll_hw_state.dpll_md); 1544 1544 I915_WRITE(CBR4_VLV, 0); 1545 1545 dev_priv->chv_dpll_md[pipe] = pipe_config->dpll_hw_state.dpll_md; ··· 1568 1568 return count; 1569 1569 } 1570 1570 1571 - static void i9xx_enable_pll(struct intel_crtc *crtc) 1571 + static void i9xx_enable_pll(struct intel_crtc *crtc, 1572 + const struct intel_crtc_state *crtc_state) 1572 1573 { 1573 1574 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 1574 1575 i915_reg_t reg = DPLL(crtc->pipe); 1575 - u32 dpll = crtc->config->dpll_hw_state.dpll; 1576 + u32 dpll = crtc_state->dpll_hw_state.dpll; 1576 1577 int i; 1577 1578 1578 1579 assert_pipe_disabled(dev_priv, crtc->pipe); ··· 1610 1609 1611 1610 if (INTEL_GEN(dev_priv) >= 4) { 1612 1611 I915_WRITE(DPLL_MD(crtc->pipe), 1613 - crtc->config->dpll_hw_state.dpll_md); 1612 + crtc_state->dpll_hw_state.dpll_md); 1614 1613 } else { 1615 1614 /* The pixel multiplier can only be updated once the 1616 1615 * DPLL is enabled and the clocks are stable. ··· 1628 1627 } 1629 1628 } 1630 1629 1631 - /** 1632 - * i9xx_disable_pll - disable a PLL 1633 - * @dev_priv: i915 private structure 1634 - * @pipe: pipe PLL to disable 1635 - * 1636 - * Disable the PLL for @pipe, making sure the pipe is off first. 1637 - * 1638 - * Note! This is for pre-ILK only. 1639 - */ 1640 1630 static void i9xx_disable_pll(struct intel_crtc *crtc) 1641 1631 { 1642 1632 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); ··· 2211 2219 * something and try to run the system in a "less than optimal" 2212 2220 * mode that matches the user configuration. 2213 2221 */ 2214 - if (i915_vma_get_fence(vma) == 0) 2215 - i915_vma_pin_fence(vma); 2222 + i915_vma_pin_fence(vma); 2216 2223 } 2217 2224 2218 2225 i915_vma_get(vma); ··· 4946 4955 4947 4956 assert_plane_enabled(dev_priv, crtc->plane); 4948 4957 if (IS_BROADWELL(dev_priv)) { 4949 - mutex_lock(&dev_priv->rps.hw_lock); 4958 + mutex_lock(&dev_priv->pcu_lock); 4950 4959 WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 4951 4960 IPS_ENABLE | IPS_PCODE_CONTROL)); 4952 - mutex_unlock(&dev_priv->rps.hw_lock); 4961 + mutex_unlock(&dev_priv->pcu_lock); 4953 4962 /* Quoting Art Runyan: "its not safe to expect any particular 4954 4963 * value in IPS_CTL bit 31 after enabling IPS through the 4955 4964 * mailbox." Moreover, the mailbox may return a bogus state, ··· 4979 4988 4980 4989 assert_plane_enabled(dev_priv, crtc->plane); 4981 4990 if (IS_BROADWELL(dev_priv)) { 4982 - mutex_lock(&dev_priv->rps.hw_lock); 4991 + mutex_lock(&dev_priv->pcu_lock); 4983 4992 WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); 4984 - mutex_unlock(&dev_priv->rps.hw_lock); 4993 + mutex_unlock(&dev_priv->pcu_lock); 4985 4994 /* wait for pcode to finish disabling IPS, which may take up to 42ms */ 4986 4995 if (intel_wait_for_register(dev_priv, 4987 4996 IPS_CTL, IPS_ENABLE, 0, ··· 5450 5459 return HAS_IPS(to_i915(crtc->base.dev)) && crtc->pipe == PIPE_A; 5451 5460 } 5452 5461 5462 + static void glk_pipe_scaler_clock_gating_wa(struct drm_i915_private *dev_priv, 5463 + enum pipe pipe, bool apply) 5464 + { 5465 + u32 val = I915_READ(CLKGATE_DIS_PSL(pipe)); 5466 + u32 mask = DPF_GATING_DIS | DPF_RAM_GATING_DIS | DPFR_GATING_DIS; 5467 + 5468 + if (apply) 5469 + val |= mask; 5470 + else 5471 + val &= ~mask; 5472 + 5473 + I915_WRITE(CLKGATE_DIS_PSL(pipe), val); 5474 + } 5475 + 5453 5476 static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, 5454 5477 struct drm_atomic_state *old_state) 5455 5478 { ··· 5474 5469 enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; 5475 5470 struct intel_atomic_state *old_intel_state = 5476 5471 to_intel_atomic_state(old_state); 5472 + bool psl_clkgate_wa; 5477 5473 5478 5474 if (WARN_ON(intel_crtc->active)) 5479 5475 return; 5480 - 5481 - if (intel_crtc->config->has_pch_encoder) 5482 - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); 5483 5476 5484 5477 intel_encoders_pre_pll_enable(crtc, pipe_config, old_state); 5485 5478 ··· 5512 5509 5513 5510 intel_crtc->active = true; 5514 5511 5515 - if (intel_crtc->config->has_pch_encoder) 5516 - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); 5517 - else 5518 - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); 5519 - 5520 5512 intel_encoders_pre_enable(crtc, pipe_config, old_state); 5521 - 5522 - if (intel_crtc->config->has_pch_encoder) 5523 - dev_priv->display.fdi_link_train(intel_crtc, pipe_config); 5524 5513 5525 5514 if (!transcoder_is_dsi(cpu_transcoder)) 5526 5515 intel_ddi_enable_pipe_clock(pipe_config); 5516 + 5517 + /* Display WA #1180: WaDisableScalarClockGating: glk, cnl */ 5518 + psl_clkgate_wa = (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) && 5519 + intel_crtc->config->pch_pfit.enabled; 5520 + if (psl_clkgate_wa) 5521 + glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true); 5527 5522 5528 5523 if (INTEL_GEN(dev_priv) >= 9) 5529 5524 skylake_pfit_enable(intel_crtc); ··· 5556 5555 5557 5556 intel_encoders_enable(crtc, pipe_config, old_state); 5558 5557 5559 - if (intel_crtc->config->has_pch_encoder) { 5558 + if (psl_clkgate_wa) { 5560 5559 intel_wait_for_vblank(dev_priv, pipe); 5561 - intel_wait_for_vblank(dev_priv, pipe); 5562 - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); 5563 - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); 5560 + glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, false); 5564 5561 } 5565 5562 5566 5563 /* If we change the relative order between pipe/planes enabling, we need ··· 5654 5655 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 5655 5656 enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; 5656 5657 5657 - if (intel_crtc->config->has_pch_encoder) 5658 - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); 5659 - 5660 5658 intel_encoders_disable(crtc, old_crtc_state, old_state); 5661 5659 5662 5660 drm_crtc_vblank_off(crtc); ··· 5678 5682 intel_ddi_disable_pipe_clock(intel_crtc->config); 5679 5683 5680 5684 intel_encoders_post_disable(crtc, old_crtc_state, old_state); 5681 - 5682 - if (old_crtc_state->has_pch_encoder) 5683 - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); 5684 5685 } 5685 5686 5686 5687 static void i9xx_pfit_enable(struct intel_crtc *crtc) ··· 5887 5894 5888 5895 intel_encoders_pre_enable(crtc, pipe_config, old_state); 5889 5896 5890 - i9xx_enable_pll(intel_crtc); 5897 + i9xx_enable_pll(intel_crtc, pipe_config); 5891 5898 5892 5899 i9xx_pfit_enable(intel_crtc); 5893 5900 ··· 8839 8846 static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val) 8840 8847 { 8841 8848 if (IS_HASWELL(dev_priv)) { 8842 - mutex_lock(&dev_priv->rps.hw_lock); 8849 + mutex_lock(&dev_priv->pcu_lock); 8843 8850 if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP, 8844 8851 val)) 8845 8852 DRM_DEBUG_KMS("Failed to write to D_COMP\n"); 8846 - mutex_unlock(&dev_priv->rps.hw_lock); 8853 + mutex_unlock(&dev_priv->pcu_lock); 8847 8854 } else { 8848 8855 I915_WRITE(D_COMP_BDW, val); 8849 8856 POSTING_READ(D_COMP_BDW); ··· 10238 10245 &pipe_config->fdi_m_n); 10239 10246 } 10240 10247 10241 - /** Returns the currently programmed mode of the given pipe. */ 10242 - struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, 10243 - struct drm_crtc *crtc) 10248 + /* Returns the currently programmed mode of the given encoder. */ 10249 + struct drm_display_mode * 10250 + intel_encoder_current_mode(struct intel_encoder *encoder) 10244 10251 { 10245 - struct drm_i915_private *dev_priv = to_i915(dev); 10246 - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 10247 - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; 10252 + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 10253 + struct intel_crtc_state *crtc_state; 10248 10254 struct drm_display_mode *mode; 10249 - struct intel_crtc_state *pipe_config; 10250 - int htot = I915_READ(HTOTAL(cpu_transcoder)); 10251 - int hsync = I915_READ(HSYNC(cpu_transcoder)); 10252 - int vtot = I915_READ(VTOTAL(cpu_transcoder)); 10253 - int vsync = I915_READ(VSYNC(cpu_transcoder)); 10254 - enum pipe pipe = intel_crtc->pipe; 10255 + struct intel_crtc *crtc; 10256 + enum pipe pipe; 10257 + 10258 + if (!encoder->get_hw_state(encoder, &pipe)) 10259 + return NULL; 10260 + 10261 + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); 10255 10262 10256 10263 mode = kzalloc(sizeof(*mode), GFP_KERNEL); 10257 10264 if (!mode) 10258 10265 return NULL; 10259 10266 10260 - pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL); 10261 - if (!pipe_config) { 10267 + crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL); 10268 + if (!crtc_state) { 10262 10269 kfree(mode); 10263 10270 return NULL; 10264 10271 } 10265 10272 10266 - /* 10267 - * Construct a pipe_config sufficient for getting the clock info 10268 - * back out of crtc_clock_get. 10269 - * 10270 - * Note, if LVDS ever uses a non-1 pixel multiplier, we'll need 10271 - * to use a real value here instead. 10272 - */ 10273 - pipe_config->cpu_transcoder = (enum transcoder) pipe; 10274 - pipe_config->pixel_multiplier = 1; 10275 - pipe_config->dpll_hw_state.dpll = I915_READ(DPLL(pipe)); 10276 - pipe_config->dpll_hw_state.fp0 = I915_READ(FP0(pipe)); 10277 - pipe_config->dpll_hw_state.fp1 = I915_READ(FP1(pipe)); 10278 - i9xx_crtc_clock_get(intel_crtc, pipe_config); 10273 + crtc_state->base.crtc = &crtc->base; 10279 10274 10280 - mode->clock = pipe_config->port_clock / pipe_config->pixel_multiplier; 10281 - mode->hdisplay = (htot & 0xffff) + 1; 10282 - mode->htotal = ((htot & 0xffff0000) >> 16) + 1; 10283 - mode->hsync_start = (hsync & 0xffff) + 1; 10284 - mode->hsync_end = ((hsync & 0xffff0000) >> 16) + 1; 10285 - mode->vdisplay = (vtot & 0xffff) + 1; 10286 - mode->vtotal = ((vtot & 0xffff0000) >> 16) + 1; 10287 - mode->vsync_start = (vsync & 0xffff) + 1; 10288 - mode->vsync_end = ((vsync & 0xffff0000) >> 16) + 1; 10275 + if (!dev_priv->display.get_pipe_config(crtc, crtc_state)) { 10276 + kfree(crtc_state); 10277 + kfree(mode); 10278 + return NULL; 10279 + } 10289 10280 10290 - drm_mode_set_name(mode); 10281 + encoder->get_config(encoder, crtc_state); 10291 10282 10292 - kfree(pipe_config); 10283 + intel_mode_from_pipe_config(mode, crtc_state); 10284 + 10285 + kfree(crtc_state); 10293 10286 10294 10287 return mode; 10295 10288 } ··· 11315 11336 PIPE_CONF_CHECK_X(dpll_hw_state.ctrl1); 11316 11337 PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr1); 11317 11338 PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr2); 11339 + PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr0); 11340 + PIPE_CONF_CHECK_X(dpll_hw_state.ebb0); 11341 + PIPE_CONF_CHECK_X(dpll_hw_state.ebb4); 11342 + PIPE_CONF_CHECK_X(dpll_hw_state.pll0); 11343 + PIPE_CONF_CHECK_X(dpll_hw_state.pll1); 11344 + PIPE_CONF_CHECK_X(dpll_hw_state.pll2); 11345 + PIPE_CONF_CHECK_X(dpll_hw_state.pll3); 11346 + PIPE_CONF_CHECK_X(dpll_hw_state.pll6); 11347 + PIPE_CONF_CHECK_X(dpll_hw_state.pll8); 11348 + PIPE_CONF_CHECK_X(dpll_hw_state.pll9); 11349 + PIPE_CONF_CHECK_X(dpll_hw_state.pll10); 11350 + PIPE_CONF_CHECK_X(dpll_hw_state.pcsdw12); 11318 11351 11319 11352 PIPE_CONF_CHECK_X(dsi_pll.ctrl); 11320 11353 PIPE_CONF_CHECK_X(dsi_pll.div); ··· 12211 12220 if (updated & cmask || !cstate->base.active) 12212 12221 continue; 12213 12222 12214 - if (skl_ddb_allocation_overlaps(entries, &cstate->wm.skl.ddb, i)) 12223 + if (skl_ddb_allocation_overlaps(dev_priv, 12224 + entries, 12225 + &cstate->wm.skl.ddb, 12226 + i)) 12215 12227 continue; 12216 12228 12217 12229 updated |= cmask; ··· 12509 12515 struct drm_i915_private *dev_priv = to_i915(dev); 12510 12516 int ret = 0; 12511 12517 12512 - ret = drm_atomic_helper_setup_commit(state, nonblock); 12513 - if (ret) 12514 - return ret; 12515 - 12516 12518 drm_atomic_state_get(state); 12517 12519 i915_sw_fence_init(&intel_state->commit_ready, 12518 12520 intel_atomic_commit_ready); 12519 - 12520 - ret = intel_atomic_prepare_commit(dev, state); 12521 - if (ret) { 12522 - DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); 12523 - i915_sw_fence_commit(&intel_state->commit_ready); 12524 - return ret; 12525 - } 12526 12521 12527 12522 /* 12528 12523 * The intel_legacy_cursor_update() fast path takes care ··· 12521 12538 * updates happen during the correct frames. Gen9+ have 12522 12539 * double buffered watermarks and so shouldn't need this. 12523 12540 * 12524 - * Do this after drm_atomic_helper_setup_commit() and 12525 - * intel_atomic_prepare_commit() because we still want 12526 - * to skip the flip and fb cleanup waits. Although that 12527 - * does risk yanking the mapping from under the display 12528 - * engine. 12541 + * Unset state->legacy_cursor_update before the call to 12542 + * drm_atomic_helper_setup_commit() because otherwise 12543 + * drm_atomic_helper_wait_for_flip_done() is a noop and 12544 + * we get FIFO underruns because we didn't wait 12545 + * for vblank. 12529 12546 * 12530 12547 * FIXME doing watermarks and fb cleanup from a vblank worker 12531 12548 * (assuming we had any) would solve these problems. 12532 12549 */ 12533 - if (INTEL_GEN(dev_priv) < 9) 12534 - state->legacy_cursor_update = false; 12550 + if (INTEL_GEN(dev_priv) < 9 && state->legacy_cursor_update) { 12551 + struct intel_crtc_state *new_crtc_state; 12552 + struct intel_crtc *crtc; 12553 + int i; 12535 12554 12536 - ret = drm_atomic_helper_swap_state(state, true); 12555 + for_each_new_intel_crtc_in_state(intel_state, crtc, new_crtc_state, i) 12556 + if (new_crtc_state->wm.need_postvbl_update || 12557 + new_crtc_state->update_wm_post) 12558 + state->legacy_cursor_update = false; 12559 + } 12560 + 12561 + ret = intel_atomic_prepare_commit(dev, state); 12562 + if (ret) { 12563 + DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); 12564 + i915_sw_fence_commit(&intel_state->commit_ready); 12565 + return ret; 12566 + } 12567 + 12568 + ret = drm_atomic_helper_setup_commit(state, nonblock); 12569 + if (!ret) 12570 + ret = drm_atomic_helper_swap_state(state, true); 12571 + 12537 12572 if (ret) { 12538 12573 i915_sw_fence_commit(&intel_state->commit_ready); 12539 12574 ··· 14749 14748 } 14750 14749 14751 14750 static bool has_pch_trancoder(struct drm_i915_private *dev_priv, 14752 - enum transcoder pch_transcoder) 14751 + enum pipe pch_transcoder) 14753 14752 { 14754 14753 return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || 14755 - (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A); 14754 + (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == PIPE_A); 14756 14755 } 14757 14756 14758 14757 static void intel_sanitize_crtc(struct intel_crtc *crtc, ··· 14835 14834 * PCH transcoders B and C would prevent enabling the south 14836 14835 * error interrupt (see cpt_can_enable_serr_int()). 14837 14836 */ 14838 - if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe)) 14837 + if (has_pch_trancoder(dev_priv, crtc->pipe)) 14839 14838 crtc->pch_fifo_underrun_disabled = true; 14840 14839 } 14841 14840 }

+16 -33

drivers/gpu/drm/i915/intel_dp.c

··· 137 137 enum pipe pipe); 138 138 static void intel_dp_unset_edid(struct intel_dp *intel_dp); 139 139 140 - static int intel_dp_num_rates(u8 link_bw_code) 141 - { 142 - switch (link_bw_code) { 143 - default: 144 - WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n", 145 - link_bw_code); 146 - case DP_LINK_BW_1_62: 147 - return 1; 148 - case DP_LINK_BW_2_7: 149 - return 2; 150 - case DP_LINK_BW_5_4: 151 - return 3; 152 - } 153 - } 154 - 155 140 /* update sink rates from dpcd */ 156 141 static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) 157 142 { 158 - int i, num_rates; 143 + int i, max_rate; 159 144 160 - num_rates = intel_dp_num_rates(intel_dp->dpcd[DP_MAX_LINK_RATE]); 145 + max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); 161 146 162 - for (i = 0; i < num_rates; i++) 147 + for (i = 0; i < ARRAY_SIZE(default_rates); i++) { 148 + if (default_rates[i] > max_rate) 149 + break; 163 150 intel_dp->sink_rates[i] = default_rates[i]; 151 + } 164 152 165 - intel_dp->num_sink_rates = num_rates; 153 + intel_dp->num_sink_rates = i; 166 154 } 167 155 168 156 /* Theoretical max between source and sink */ ··· 242 254 } else if (IS_GEN9_BC(dev_priv)) { 243 255 source_rates = skl_rates; 244 256 size = ARRAY_SIZE(skl_rates); 245 - } else { 257 + } else if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || 258 + IS_BROADWELL(dev_priv)) { 246 259 source_rates = default_rates; 247 260 size = ARRAY_SIZE(default_rates); 261 + } else { 262 + source_rates = default_rates; 263 + size = ARRAY_SIZE(default_rates) - 1; 248 264 } 249 - 250 - /* This depends on the fact that 5.4 is last value in the array */ 251 - if (!intel_dp_source_supports_hbr2(intel_dp)) 252 - size--; 253 265 254 266 intel_dp->source_rates = source_rates; 255 267 intel_dp->num_source_rates = size; ··· 1470 1482 1471 1483 bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp) 1472 1484 { 1473 - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); 1474 - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); 1485 + int max_rate = intel_dp->source_rates[intel_dp->num_source_rates - 1]; 1475 1486 1476 - if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || 1477 - IS_BROADWELL(dev_priv) || (INTEL_GEN(dev_priv) >= 9)) 1478 - return true; 1479 - else 1480 - return false; 1487 + return max_rate >= 540000; 1481 1488 } 1482 1489 1483 1490 static void ··· 2291 2308 I915_WRITE(pp_ctrl_reg, pp); 2292 2309 POSTING_READ(pp_ctrl_reg); 2293 2310 2294 - intel_dp->panel_power_off_time = ktime_get_boottime(); 2295 2311 wait_panel_off(intel_dp); 2312 + intel_dp->panel_power_off_time = ktime_get_boottime(); 2296 2313 2297 2314 /* We got a reference when we enabled the VDD. */ 2298 2315 intel_display_power_put(dev_priv, intel_dp->aux_power_domain); ··· 5269 5286 * seems sufficient to avoid this problem. 5270 5287 */ 5271 5288 if (dev_priv->quirks & QUIRK_INCREASE_T12_DELAY) { 5272 - vbt.t11_t12 = max_t(u16, vbt.t11_t12, 900 * 10); 5289 + vbt.t11_t12 = max_t(u16, vbt.t11_t12, 1300 * 10); 5273 5290 DRM_DEBUG_KMS("Increasing T12 panel delay as per the quirk to %d\n", 5274 5291 vbt.t11_t12); 5275 5292 }

+8 -2

drivers/gpu/drm/i915/intel_dp_mst.c

··· 162 162 163 163 drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port); 164 164 165 + /* 166 + * Power down mst path before disabling the port, otherwise we end 167 + * up getting interrupts from the sink upon detecting link loss. 168 + */ 169 + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, 170 + false); 171 + 165 172 intel_dp->active_mst_links--; 166 173 167 174 intel_mst->connector = NULL; 168 175 if (intel_dp->active_mst_links == 0) { 169 176 intel_dig_port->base.post_disable(&intel_dig_port->base, 170 177 NULL, NULL); 171 - 172 - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); 173 178 } 174 179 DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); 175 180 } ··· 201 196 202 197 DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); 203 198 199 + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true); 204 200 if (intel_dp->active_mst_links == 0) 205 201 intel_dig_port->base.pre_enable(&intel_dig_port->base, 206 202 pipe_config, NULL);

-20

drivers/gpu/drm/i915/intel_dpio_phy.c

··· 208 208 }, 209 209 }; 210 210 211 - static u32 bxt_phy_port_mask(const struct bxt_ddi_phy_info *phy_info) 212 - { 213 - return (phy_info->dual_channel * BIT(phy_info->channel[DPIO_CH1].port)) | 214 - BIT(phy_info->channel[DPIO_CH0].port); 215 - } 216 - 217 211 static const struct bxt_ddi_phy_info * 218 212 bxt_get_phy_list(struct drm_i915_private *dev_priv, int *count) 219 213 { ··· 307 313 enum dpio_phy phy) 308 314 { 309 315 const struct bxt_ddi_phy_info *phy_info; 310 - enum port port; 311 316 312 317 phy_info = bxt_get_phy_info(dev_priv, phy); 313 318 ··· 326 333 phy); 327 334 328 335 return false; 329 - } 330 - 331 - for_each_port_masked(port, bxt_phy_port_mask(phy_info)) { 332 - u32 tmp = I915_READ(BXT_PHY_CTL(port)); 333 - 334 - if (tmp & BXT_PHY_CMNLANE_POWERDOWN_ACK) { 335 - DRM_DEBUG_DRIVER("DDI PHY %d powered, but common lane " 336 - "for port %c powered down " 337 - "(PHY_CTL %08x)\n", 338 - phy, port_name(port), tmp); 339 - 340 - return false; 341 - } 342 336 } 343 337 344 338 return true;

+12 -10

drivers/gpu/drm/i915/intel_drv.h

··· 1243 1243 static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, 1244 1244 u32 mask) 1245 1245 { 1246 - return mask & ~i915->rps.pm_intrmsk_mbz; 1246 + return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz; 1247 1247 } 1248 1248 1249 1249 void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); ··· 1254 1254 * We only use drm_irq_uninstall() at unload and VT switch, so 1255 1255 * this is the only thing we need to check. 1256 1256 */ 1257 - return dev_priv->pm.irqs_enabled; 1257 + return dev_priv->runtime_pm.irqs_enabled; 1258 1258 } 1259 1259 1260 1260 int intel_get_crtc_scanline(struct intel_crtc *crtc); ··· 1363 1363 bool intel_connector_get_hw_state(struct intel_connector *connector); 1364 1364 void intel_connector_attach_encoder(struct intel_connector *connector, 1365 1365 struct intel_encoder *encoder); 1366 - struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, 1367 - struct drm_crtc *crtc); 1366 + struct drm_display_mode * 1367 + intel_encoder_current_mode(struct intel_encoder *encoder); 1368 + 1368 1369 enum pipe intel_get_pipe_from_connector(struct intel_connector *connector); 1369 1370 int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, 1370 1371 struct drm_file *file_priv); ··· 1791 1790 static inline void 1792 1791 assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv) 1793 1792 { 1794 - WARN_ONCE(dev_priv->pm.suspended, 1793 + WARN_ONCE(dev_priv->runtime_pm.suspended, 1795 1794 "Device suspended during HW access\n"); 1796 1795 } 1797 1796 ··· 1799 1798 assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) 1800 1799 { 1801 1800 assert_rpm_device_not_suspended(dev_priv); 1802 - WARN_ONCE(!atomic_read(&dev_priv->pm.wakeref_count), 1801 + WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count), 1803 1802 "RPM wakelock ref not held during HW access"); 1804 1803 } 1805 1804 ··· 1824 1823 static inline void 1825 1824 disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) 1826 1825 { 1827 - atomic_inc(&dev_priv->pm.wakeref_count); 1826 + atomic_inc(&dev_priv->runtime_pm.wakeref_count); 1828 1827 } 1829 1828 1830 1829 /** ··· 1841 1840 static inline void 1842 1841 enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) 1843 1842 { 1844 - atomic_dec(&dev_priv->pm.wakeref_count); 1843 + atomic_dec(&dev_priv->runtime_pm.wakeref_count); 1845 1844 } 1846 1845 1847 1846 void intel_runtime_pm_get(struct drm_i915_private *dev_priv); ··· 1894 1893 int intel_disable_sagv(struct drm_i915_private *dev_priv); 1895 1894 bool skl_wm_level_equals(const struct skl_wm_level *l1, 1896 1895 const struct skl_wm_level *l2); 1897 - bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, 1896 + bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv, 1897 + const struct skl_ddb_entry **entries, 1898 1898 const struct skl_ddb_entry *ddb, 1899 1899 int ignore); 1900 1900 bool ilk_disable_lp_wm(struct drm_device *dev); ··· 1904 1902 struct intel_crtc_state *cstate); 1905 1903 void intel_init_ipc(struct drm_i915_private *dev_priv); 1906 1904 void intel_enable_ipc(struct drm_i915_private *dev_priv); 1907 - static inline int intel_enable_rc6(void) 1905 + static inline int intel_rc6_enabled(void) 1908 1906 { 1909 1907 return i915_modparams.enable_rc6; 1910 1908 }

+6 -1

drivers/gpu/drm/i915/intel_dsi.c

··· 790 790 const struct intel_crtc_state *pipe_config, 791 791 const struct drm_connector_state *conn_state) 792 792 { 793 - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); 794 793 struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); 794 + struct drm_crtc *crtc = pipe_config->base.crtc; 795 + struct drm_i915_private *dev_priv = to_i915(crtc->dev); 796 + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 797 + int pipe = intel_crtc->pipe; 795 798 enum port port; 796 799 u32 val; 797 800 bool glk_cold_boot = false; 798 801 799 802 DRM_DEBUG_KMS("\n"); 803 + 804 + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); 800 805 801 806 /* 802 807 * The BIOS may leave the PLL in a wonky state where it doesn't

+8 -25

drivers/gpu/drm/i915/intel_dvo.c

··· 379 379 * chip being on DVOB/C and having multiple pipes. 380 380 */ 381 381 static struct drm_display_mode * 382 - intel_dvo_get_current_mode(struct drm_connector *connector) 382 + intel_dvo_get_current_mode(struct intel_encoder *encoder) 383 383 { 384 - struct drm_device *dev = connector->dev; 385 - struct drm_i915_private *dev_priv = to_i915(dev); 386 - struct intel_dvo *intel_dvo = intel_attached_dvo(connector); 387 - uint32_t dvo_val = I915_READ(intel_dvo->dev.dvo_reg); 388 - struct drm_display_mode *mode = NULL; 384 + struct drm_display_mode *mode; 389 385 390 - /* If the DVO port is active, that'll be the LVDS, so we can pull out 391 - * its timings to get how the BIOS set up the panel. 392 - */ 393 - if (dvo_val & DVO_ENABLE) { 394 - struct intel_crtc *crtc; 395 - int pipe = (dvo_val & DVO_PIPE_B_SELECT) ? 1 : 0; 396 - 397 - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); 398 - if (crtc) { 399 - mode = intel_crtc_mode_get(dev, &crtc->base); 400 - if (mode) { 401 - mode->type |= DRM_MODE_TYPE_PREFERRED; 402 - if (dvo_val & DVO_HSYNC_ACTIVE_HIGH) 403 - mode->flags |= DRM_MODE_FLAG_PHSYNC; 404 - if (dvo_val & DVO_VSYNC_ACTIVE_HIGH) 405 - mode->flags |= DRM_MODE_FLAG_PVSYNC; 406 - } 407 - } 386 + mode = intel_encoder_current_mode(encoder); 387 + if (mode) { 388 + DRM_DEBUG_KMS("using current (BIOS) mode: "); 389 + drm_mode_debug_printmodeline(mode); 390 + mode->type |= DRM_MODE_TYPE_PREFERRED; 408 391 } 409 392 410 393 return mode; ··· 534 551 * mode being output through DVO. 535 552 */ 536 553 intel_panel_init(&intel_connector->panel, 537 - intel_dvo_get_current_mode(connector), 554 + intel_dvo_get_current_mode(intel_encoder), 538 555 NULL, NULL); 539 556 intel_dvo->panel_wants_dither = true; 540 557 }

+219 -21

drivers/gpu/drm/i915/intel_engine_cs.c

··· 22 22 * 23 23 */ 24 24 25 + #include <drm/drm_print.h> 26 + 25 27 #include "i915_drv.h" 26 28 #include "intel_ringbuffer.h" 27 29 #include "intel_lrc.h" ··· 41 39 42 40 #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) 43 41 #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) 44 - #define GEN10_LR_CONTEXT_RENDER_SIZE (19 * PAGE_SIZE) 42 + #define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) 45 43 46 44 #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) 47 45 ··· 615 613 if (IS_ERR(ring)) 616 614 return PTR_ERR(ring); 617 615 616 + /* 617 + * Similarly the preempt context must always be available so that 618 + * we can interrupt the engine at any time. 619 + */ 620 + if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) { 621 + ring = engine->context_pin(engine, 622 + engine->i915->preempt_context); 623 + if (IS_ERR(ring)) { 624 + ret = PTR_ERR(ring); 625 + goto err_unpin_kernel; 626 + } 627 + } 628 + 618 629 ret = intel_engine_init_breadcrumbs(engine); 619 630 if (ret) 620 - goto err_unpin; 631 + goto err_unpin_preempt; 621 632 622 633 ret = i915_gem_render_state_init(engine); 623 634 if (ret) ··· 649 634 i915_gem_render_state_fini(engine); 650 635 err_breadcrumbs: 651 636 intel_engine_fini_breadcrumbs(engine); 652 - err_unpin: 637 + err_unpin_preempt: 638 + if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) 639 + engine->context_unpin(engine, engine->i915->preempt_context); 640 + err_unpin_kernel: 653 641 engine->context_unpin(engine, engine->i915->kernel_context); 654 642 return ret; 655 643 } ··· 678 660 intel_engine_cleanup_cmd_parser(engine); 679 661 i915_gem_batch_pool_fini(&engine->batch_pool); 680 662 663 + if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) 664 + engine->context_unpin(engine, engine->i915->preempt_context); 681 665 engine->context_unpin(engine, engine->i915->kernel_context); 682 666 } 683 667 ··· 850 830 #define WA_SET_FIELD_MASKED(addr, mask, value) \ 851 831 WA_REG(addr, mask, _MASKED_FIELD(mask, value)) 852 832 853 - #define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) 854 - #define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) 855 - 856 - #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) 857 - 858 833 static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, 859 834 i915_reg_t reg) 860 835 { ··· 860 845 if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) 861 846 return -EINVAL; 862 847 863 - WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), 864 - i915_mmio_reg_offset(reg)); 848 + I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), 849 + i915_mmio_reg_offset(reg)); 865 850 wa->hw_whitelist_count[engine->id]++; 866 851 867 852 return 0; ··· 995 980 GEN9_PBE_COMPRESSED_HASH_SELECTION); 996 981 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 997 982 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); 998 - WA_SET_BIT(MMCD_MISC_CTRL, MMCD_PCLA | MMCD_HOTSPOT_EN); 983 + 984 + I915_WRITE(MMCD_MISC_CTRL, 985 + I915_READ(MMCD_MISC_CTRL) | 986 + MMCD_PCLA | 987 + MMCD_HOTSPOT_EN); 999 988 } 1000 989 1001 990 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ ··· 1090 1071 I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | 1091 1072 GEN8_LQSC_FLUSH_COHERENT_LINES)); 1092 1073 1074 + /* 1075 + * Supporting preemption with fine-granularity requires changes in the 1076 + * batch buffer programming. Since we can't break old userspace, we 1077 + * need to set our default preemption level to safe value. Userspace is 1078 + * still able to use more fine-grained preemption levels, since in 1079 + * WaEnablePreemptionGranularityControlByUMD we're whitelisting the 1080 + * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are 1081 + * not real HW workarounds, but merely a way to start using preemption 1082 + * while maintaining old contract with userspace. 1083 + */ 1084 + 1085 + /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ 1086 + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 1087 + 1088 + /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ 1089 + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, 1090 + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 1091 + 1093 1092 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ 1094 1093 ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); 1095 1094 if (ret) 1096 1095 return ret; 1097 1096 1098 - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl */ 1099 - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 1097 + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ 1098 + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, 1099 + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 1100 + ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 1100 1101 if (ret) 1101 1102 return ret; 1102 1103 ··· 1177 1138 ret = gen9_init_workarounds(engine); 1178 1139 if (ret) 1179 1140 return ret; 1180 - 1181 - /* 1182 - * Actual WA is to disable percontext preemption granularity control 1183 - * until D0 which is the default case so this is equivalent to 1184 - * !WaDisablePerCtxtPreemptionGranularityControl:skl 1185 - */ 1186 - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, 1187 - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 1188 1141 1189 1142 /* WaEnableGapsTsvCreditFix:skl */ 1190 1143 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | ··· 1309 1278 /* FtrEnableFastAnisoL1BankingFix: cnl */ 1310 1279 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); 1311 1280 1281 + /* WaDisable3DMidCmdPreemption:cnl */ 1282 + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 1283 + 1284 + /* WaDisableGPGPUMidCmdPreemption:cnl */ 1285 + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, 1286 + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 1287 + 1312 1288 /* WaEnablePreemptionGranularityControlByUMD:cnl */ 1289 + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, 1290 + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 1313 1291 ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 1314 1292 if (ret) 1315 1293 return ret; ··· 1616 1576 default: 1617 1577 return true; 1618 1578 } 1579 + } 1580 + 1581 + static void print_request(struct drm_printer *m, 1582 + struct drm_i915_gem_request *rq, 1583 + const char *prefix) 1584 + { 1585 + drm_printf(m, "%s%x [%x:%x] prio=%d @ %dms: %s\n", prefix, 1586 + rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno, 1587 + rq->priotree.priority, 1588 + jiffies_to_msecs(jiffies - rq->emitted_jiffies), 1589 + rq->timeline->common->name); 1590 + } 1591 + 1592 + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) 1593 + { 1594 + struct intel_breadcrumbs *b = &engine->breadcrumbs; 1595 + struct i915_gpu_error *error = &engine->i915->gpu_error; 1596 + struct drm_i915_private *dev_priv = engine->i915; 1597 + struct drm_i915_gem_request *rq; 1598 + struct rb_node *rb; 1599 + u64 addr; 1600 + 1601 + drm_printf(m, "%s\n", engine->name); 1602 + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", 1603 + intel_engine_get_seqno(engine), 1604 + intel_engine_last_submit(engine), 1605 + engine->hangcheck.seqno, 1606 + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), 1607 + engine->timeline->inflight_seqnos); 1608 + drm_printf(m, "\tReset count: %d\n", 1609 + i915_reset_engine_count(error, engine)); 1610 + 1611 + rcu_read_lock(); 1612 + 1613 + drm_printf(m, "\tRequests:\n"); 1614 + 1615 + rq = list_first_entry(&engine->timeline->requests, 1616 + struct drm_i915_gem_request, link); 1617 + if (&rq->link != &engine->timeline->requests) 1618 + print_request(m, rq, "\t\tfirst "); 1619 + 1620 + rq = list_last_entry(&engine->timeline->requests, 1621 + struct drm_i915_gem_request, link); 1622 + if (&rq->link != &engine->timeline->requests) 1623 + print_request(m, rq, "\t\tlast "); 1624 + 1625 + rq = i915_gem_find_active_request(engine); 1626 + if (rq) { 1627 + print_request(m, rq, "\t\tactive "); 1628 + drm_printf(m, 1629 + "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", 1630 + rq->head, rq->postfix, rq->tail, 1631 + rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, 1632 + rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); 1633 + } 1634 + 1635 + drm_printf(m, "\tRING_START: 0x%08x [0x%08x]\n", 1636 + I915_READ(RING_START(engine->mmio_base)), 1637 + rq ? i915_ggtt_offset(rq->ring->vma) : 0); 1638 + drm_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n", 1639 + I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR, 1640 + rq ? rq->ring->head : 0); 1641 + drm_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", 1642 + I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, 1643 + rq ? rq->ring->tail : 0); 1644 + drm_printf(m, "\tRING_CTL: 0x%08x [%s]\n", 1645 + I915_READ(RING_CTL(engine->mmio_base)), 1646 + I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : ""); 1647 + 1648 + rcu_read_unlock(); 1649 + 1650 + addr = intel_engine_get_active_head(engine); 1651 + drm_printf(m, "\tACTHD: 0x%08x_%08x\n", 1652 + upper_32_bits(addr), lower_32_bits(addr)); 1653 + addr = intel_engine_get_last_batch_head(engine); 1654 + drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", 1655 + upper_32_bits(addr), lower_32_bits(addr)); 1656 + 1657 + if (i915_modparams.enable_execlists) { 1658 + const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; 1659 + struct intel_engine_execlists * const execlists = &engine->execlists; 1660 + u32 ptr, read, write; 1661 + unsigned int idx; 1662 + 1663 + drm_printf(m, "\tExeclist status: 0x%08x %08x\n", 1664 + I915_READ(RING_EXECLIST_STATUS_LO(engine)), 1665 + I915_READ(RING_EXECLIST_STATUS_HI(engine))); 1666 + 1667 + ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); 1668 + read = GEN8_CSB_READ_PTR(ptr); 1669 + write = GEN8_CSB_WRITE_PTR(ptr); 1670 + drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", 1671 + read, execlists->csb_head, 1672 + write, 1673 + intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), 1674 + yesno(test_bit(ENGINE_IRQ_EXECLIST, 1675 + &engine->irq_posted))); 1676 + if (read >= GEN8_CSB_ENTRIES) 1677 + read = 0; 1678 + if (write >= GEN8_CSB_ENTRIES) 1679 + write = 0; 1680 + if (read > write) 1681 + write += GEN8_CSB_ENTRIES; 1682 + while (read < write) { 1683 + idx = ++read % GEN8_CSB_ENTRIES; 1684 + drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n", 1685 + idx, 1686 + I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), 1687 + hws[idx * 2], 1688 + I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)), 1689 + hws[idx * 2 + 1]); 1690 + } 1691 + 1692 + rcu_read_lock(); 1693 + for (idx = 0; idx < execlists_num_ports(execlists); idx++) { 1694 + unsigned int count; 1695 + 1696 + rq = port_unpack(&execlists->port[idx], &count); 1697 + if (rq) { 1698 + drm_printf(m, "\t\tELSP[%d] count=%d, ", 1699 + idx, count); 1700 + print_request(m, rq, "rq: "); 1701 + } else { 1702 + drm_printf(m, "\t\tELSP[%d] idle\n", 1703 + idx); 1704 + } 1705 + } 1706 + rcu_read_unlock(); 1707 + 1708 + spin_lock_irq(&engine->timeline->lock); 1709 + for (rb = execlists->first; rb; rb = rb_next(rb)) { 1710 + struct i915_priolist *p = 1711 + rb_entry(rb, typeof(*p), node); 1712 + 1713 + list_for_each_entry(rq, &p->requests, 1714 + priotree.link) 1715 + print_request(m, rq, "\t\tQ "); 1716 + } 1717 + spin_unlock_irq(&engine->timeline->lock); 1718 + } else if (INTEL_GEN(dev_priv) > 6) { 1719 + drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", 1720 + I915_READ(RING_PP_DIR_BASE(engine))); 1721 + drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", 1722 + I915_READ(RING_PP_DIR_BASE_READ(engine))); 1723 + drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", 1724 + I915_READ(RING_PP_DIR_DCLV(engine))); 1725 + } 1726 + 1727 + spin_lock_irq(&b->rb_lock); 1728 + for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { 1729 + struct intel_wait *w = rb_entry(rb, typeof(*w), node); 1730 + 1731 + drm_printf(m, "\t%s [%d] waiting for %x\n", 1732 + w->tsk->comm, w->tsk->pid, w->seqno); 1733 + } 1734 + spin_unlock_irq(&b->rb_lock); 1735 + 1736 + drm_printf(m, "\n"); 1619 1737 } 1620 1738 1621 1739 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)

+265

drivers/gpu/drm/i915/intel_guc.c

··· 1 + /* 2 + * Copyright © 2014-2017 Intel Corporation 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + * 23 + */ 24 + 25 + #include "intel_guc.h" 26 + #include "i915_drv.h" 27 + 28 + static void gen8_guc_raise_irq(struct intel_guc *guc) 29 + { 30 + struct drm_i915_private *dev_priv = guc_to_i915(guc); 31 + 32 + I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); 33 + } 34 + 35 + static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) 36 + { 37 + GEM_BUG_ON(!guc->send_regs.base); 38 + GEM_BUG_ON(!guc->send_regs.count); 39 + GEM_BUG_ON(i >= guc->send_regs.count); 40 + 41 + return _MMIO(guc->send_regs.base + 4 * i); 42 + } 43 + 44 + void intel_guc_init_send_regs(struct intel_guc *guc) 45 + { 46 + struct drm_i915_private *dev_priv = guc_to_i915(guc); 47 + enum forcewake_domains fw_domains = 0; 48 + unsigned int i; 49 + 50 + guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); 51 + guc->send_regs.count = SOFT_SCRATCH_COUNT - 1; 52 + 53 + for (i = 0; i < guc->send_regs.count; i++) { 54 + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, 55 + guc_send_reg(guc, i), 56 + FW_REG_READ | FW_REG_WRITE); 57 + } 58 + guc->send_regs.fw_domains = fw_domains; 59 + } 60 + 61 + void intel_guc_init_early(struct intel_guc *guc) 62 + { 63 + intel_guc_ct_init_early(&guc->ct); 64 + 65 + mutex_init(&guc->send_mutex); 66 + guc->send = intel_guc_send_nop; 67 + guc->notify = gen8_guc_raise_irq; 68 + } 69 + 70 + int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) 71 + { 72 + WARN(1, "Unexpected send: action=%#x\n", *action); 73 + return -ENODEV; 74 + } 75 + 76 + /* 77 + * This function implements the MMIO based host to GuC interface. 78 + */ 79 + int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) 80 + { 81 + struct drm_i915_private *dev_priv = guc_to_i915(guc); 82 + u32 status; 83 + int i; 84 + int ret; 85 + 86 + GEM_BUG_ON(!len); 87 + GEM_BUG_ON(len > guc->send_regs.count); 88 + 89 + /* If CT is available, we expect to use MMIO only during init/fini */ 90 + GEM_BUG_ON(HAS_GUC_CT(dev_priv) && 91 + *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && 92 + *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); 93 + 94 + mutex_lock(&guc->send_mutex); 95 + intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); 96 + 97 + for (i = 0; i < len; i++) 98 + I915_WRITE(guc_send_reg(guc, i), action[i]); 99 + 100 + POSTING_READ(guc_send_reg(guc, i - 1)); 101 + 102 + intel_guc_notify(guc); 103 + 104 + /* 105 + * No GuC command should ever take longer than 10ms. 106 + * Fast commands should still complete in 10us. 107 + */ 108 + ret = __intel_wait_for_register_fw(dev_priv, 109 + guc_send_reg(guc, 0), 110 + INTEL_GUC_RECV_MASK, 111 + INTEL_GUC_RECV_MASK, 112 + 10, 10, &status); 113 + if (status != INTEL_GUC_STATUS_SUCCESS) { 114 + /* 115 + * Either the GuC explicitly returned an error (which 116 + * we convert to -EIO here) or no response at all was 117 + * received within the timeout limit (-ETIMEDOUT) 118 + */ 119 + if (ret != -ETIMEDOUT) 120 + ret = -EIO; 121 + 122 + DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" 123 + " ret=%d status=0x%08X response=0x%08X\n", 124 + action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); 125 + } 126 + 127 + intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); 128 + mutex_unlock(&guc->send_mutex); 129 + 130 + return ret; 131 + } 132 + 133 + int intel_guc_sample_forcewake(struct intel_guc *guc) 134 + { 135 + struct drm_i915_private *dev_priv = guc_to_i915(guc); 136 + u32 action[2]; 137 + 138 + action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; 139 + /* WaRsDisableCoarsePowerGating:skl,bxt */ 140 + if (!intel_rc6_enabled() || 141 + NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) 142 + action[1] = 0; 143 + else 144 + /* bit 0 and 1 are for Render and Media domain separately */ 145 + action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; 146 + 147 + return intel_guc_send(guc, action, ARRAY_SIZE(action)); 148 + } 149 + 150 + /** 151 + * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode 152 + * @guc: intel_guc structure 153 + * @rsa_offset: rsa offset w.r.t ggtt base of huc vma 154 + * 155 + * Triggers a HuC firmware authentication request to the GuC via intel_guc_send 156 + * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by 157 + * intel_huc_auth(). 158 + * 159 + * Return: non-zero code on error 160 + */ 161 + int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) 162 + { 163 + u32 action[] = { 164 + INTEL_GUC_ACTION_AUTHENTICATE_HUC, 165 + rsa_offset 166 + }; 167 + 168 + return intel_guc_send(guc, action, ARRAY_SIZE(action)); 169 + } 170 + 171 + /** 172 + * intel_guc_suspend() - notify GuC entering suspend state 173 + * @dev_priv: i915 device private 174 + */ 175 + int intel_guc_suspend(struct drm_i915_private *dev_priv) 176 + { 177 + struct intel_guc *guc = &dev_priv->guc; 178 + struct i915_gem_context *ctx; 179 + u32 data[3]; 180 + 181 + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) 182 + return 0; 183 + 184 + gen9_disable_guc_interrupts(dev_priv); 185 + 186 + ctx = dev_priv->kernel_context; 187 + 188 + data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; 189 + /* any value greater than GUC_POWER_D0 */ 190 + data[1] = GUC_POWER_D1; 191 + /* first page is shared data with GuC */ 192 + data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + 193 + LRC_GUCSHR_PN * PAGE_SIZE; 194 + 195 + return intel_guc_send(guc, data, ARRAY_SIZE(data)); 196 + } 197 + 198 + /** 199 + * intel_guc_resume() - notify GuC resuming from suspend state 200 + * @dev_priv: i915 device private 201 + */ 202 + int intel_guc_resume(struct drm_i915_private *dev_priv) 203 + { 204 + struct intel_guc *guc = &dev_priv->guc; 205 + struct i915_gem_context *ctx; 206 + u32 data[3]; 207 + 208 + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) 209 + return 0; 210 + 211 + if (i915_modparams.guc_log_level >= 0) 212 + gen9_enable_guc_interrupts(dev_priv); 213 + 214 + ctx = dev_priv->kernel_context; 215 + 216 + data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; 217 + data[1] = GUC_POWER_D0; 218 + /* first page is shared data with GuC */ 219 + data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + 220 + LRC_GUCSHR_PN * PAGE_SIZE; 221 + 222 + return intel_guc_send(guc, data, ARRAY_SIZE(data)); 223 + } 224 + 225 + /** 226 + * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage 227 + * @guc: the guc 228 + * @size: size of area to allocate (both virtual space and memory) 229 + * 230 + * This is a wrapper to create an object for use with the GuC. In order to 231 + * use it inside the GuC, an object needs to be pinned lifetime, so we allocate 232 + * both some backing storage and a range inside the Global GTT. We must pin 233 + * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that 234 + * range is reserved inside GuC. 235 + * 236 + * Return: A i915_vma if successful, otherwise an ERR_PTR. 237 + */ 238 + struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) 239 + { 240 + struct drm_i915_private *dev_priv = guc_to_i915(guc); 241 + struct drm_i915_gem_object *obj; 242 + struct i915_vma *vma; 243 + int ret; 244 + 245 + obj = i915_gem_object_create(dev_priv, size); 246 + if (IS_ERR(obj)) 247 + return ERR_CAST(obj); 248 + 249 + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); 250 + if (IS_ERR(vma)) 251 + goto err; 252 + 253 + ret = i915_vma_pin(vma, 0, PAGE_SIZE, 254 + PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); 255 + if (ret) { 256 + vma = ERR_PTR(ret); 257 + goto err; 258 + } 259 + 260 + return vma; 261 + 262 + err: 263 + i915_gem_object_put(obj); 264 + return vma; 265 + }

+110

drivers/gpu/drm/i915/intel_guc.h

··· 1 + /* 2 + * Copyright © 2014-2017 Intel Corporation 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + * 23 + */ 24 + 25 + #ifndef _INTEL_GUC_H_ 26 + #define _INTEL_GUC_H_ 27 + 28 + #include "intel_uncore.h" 29 + #include "intel_guc_fwif.h" 30 + #include "intel_guc_ct.h" 31 + #include "intel_guc_log.h" 32 + #include "intel_uc_fw.h" 33 + #include "i915_guc_reg.h" 34 + #include "i915_vma.h" 35 + 36 + struct intel_guc { 37 + struct intel_uc_fw fw; 38 + struct intel_guc_log log; 39 + struct intel_guc_ct ct; 40 + 41 + /* Log snapshot if GuC errors during load */ 42 + struct drm_i915_gem_object *load_err_log; 43 + 44 + /* intel_guc_recv interrupt related state */ 45 + bool interrupts_enabled; 46 + 47 + struct i915_vma *ads_vma; 48 + struct i915_vma *stage_desc_pool; 49 + void *stage_desc_pool_vaddr; 50 + struct ida stage_ids; 51 + 52 + struct i915_guc_client *execbuf_client; 53 + 54 + DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); 55 + /* Cyclic counter mod pagesize */ 56 + u32 db_cacheline; 57 + 58 + /* GuC's FW specific registers used in MMIO send */ 59 + struct { 60 + u32 base; 61 + unsigned int count; 62 + enum forcewake_domains fw_domains; 63 + } send_regs; 64 + 65 + /* To serialize the intel_guc_send actions */ 66 + struct mutex send_mutex; 67 + 68 + /* GuC's FW specific send function */ 69 + int (*send)(struct intel_guc *guc, const u32 *data, u32 len); 70 + 71 + /* GuC's FW specific notify function */ 72 + void (*notify)(struct intel_guc *guc); 73 + }; 74 + 75 + static 76 + inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) 77 + { 78 + return guc->send(guc, action, len); 79 + } 80 + 81 + static inline void intel_guc_notify(struct intel_guc *guc) 82 + { 83 + guc->notify(guc); 84 + } 85 + 86 + static inline u32 guc_ggtt_offset(struct i915_vma *vma) 87 + { 88 + u32 offset = i915_ggtt_offset(vma); 89 + 90 + GEM_BUG_ON(offset < GUC_WOPCM_TOP); 91 + GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); 92 + 93 + return offset; 94 + } 95 + 96 + void intel_guc_init_early(struct intel_guc *guc); 97 + void intel_guc_init_send_regs(struct intel_guc *guc); 98 + int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); 99 + int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); 100 + int intel_guc_sample_forcewake(struct intel_guc *guc); 101 + int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); 102 + int intel_guc_suspend(struct drm_i915_private *dev_priv); 103 + int intel_guc_resume(struct drm_i915_private *dev_priv); 104 + struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); 105 + 106 + int intel_guc_select_fw(struct intel_guc *guc); 107 + int intel_guc_init_hw(struct intel_guc *guc); 108 + u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); 109 + 110 + #endif

+24 -24

drivers/gpu/drm/i915/intel_guc_fwif.h

··· 178 178 */ 179 179 180 180 struct uc_css_header { 181 - uint32_t module_type; 181 + u32 module_type; 182 182 /* header_size includes all non-uCode bits, including css_header, rsa 183 183 * key, modulus key and exponent data. */ 184 - uint32_t header_size_dw; 185 - uint32_t header_version; 186 - uint32_t module_id; 187 - uint32_t module_vendor; 184 + u32 header_size_dw; 185 + u32 header_version; 186 + u32 module_id; 187 + u32 module_vendor; 188 188 union { 189 189 struct { 190 - uint8_t day; 191 - uint8_t month; 192 - uint16_t year; 190 + u8 day; 191 + u8 month; 192 + u16 year; 193 193 }; 194 - uint32_t date; 194 + u32 date; 195 195 }; 196 - uint32_t size_dw; /* uCode plus header_size_dw */ 197 - uint32_t key_size_dw; 198 - uint32_t modulus_size_dw; 199 - uint32_t exponent_size_dw; 196 + u32 size_dw; /* uCode plus header_size_dw */ 197 + u32 key_size_dw; 198 + u32 modulus_size_dw; 199 + u32 exponent_size_dw; 200 200 union { 201 201 struct { 202 - uint8_t hour; 203 - uint8_t min; 204 - uint16_t sec; 202 + u8 hour; 203 + u8 min; 204 + u16 sec; 205 205 }; 206 - uint32_t time; 206 + u32 time; 207 207 }; 208 208 209 209 char username[8]; 210 210 char buildnumber[12]; 211 211 union { 212 212 struct { 213 - uint32_t branch_client_version; 214 - uint32_t sw_version; 213 + u32 branch_client_version; 214 + u32 sw_version; 215 215 } guc; 216 216 struct { 217 - uint32_t sw_version; 218 - uint32_t reserved; 217 + u32 sw_version; 218 + u32 reserved; 219 219 } huc; 220 220 }; 221 - uint32_t prod_preprod_fw; 222 - uint32_t reserved[12]; 223 - uint32_t header_info; 221 + u32 prod_preprod_fw; 222 + u32 reserved[12]; 223 + u32 header_info; 224 224 } __packed; 225 225 226 226 struct guc_doorbell_info {

+1 -4

drivers/gpu/drm/i915/intel_guc_loader.c

··· 386 386 { 387 387 struct drm_i915_private *dev_priv = guc_to_i915(guc); 388 388 389 - guc->fw.path = NULL; 390 - guc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE; 391 - guc->fw.load_status = INTEL_UC_FIRMWARE_NONE; 392 - guc->fw.type = INTEL_UC_FW_TYPE_GUC; 389 + intel_uc_fw_init(&guc->fw, INTEL_UC_FW_TYPE_GUC); 393 390 394 391 if (i915_modparams.guc_firmware_path) { 395 392 guc->fw.path = i915_modparams.guc_firmware_path;

+5 -1

drivers/gpu/drm/i915/intel_guc_log.c

··· 21 21 * IN THE SOFTWARE. 22 22 * 23 23 */ 24 + 24 25 #include <linux/debugfs.h> 25 26 #include <linux/relay.h> 27 + 28 + #include "intel_guc_log.h" 26 29 #include "i915_drv.h" 27 30 28 31 static void guc_log_capture_logs(struct intel_guc *guc); ··· 528 525 { 529 526 struct i915_vma *vma; 530 527 unsigned long offset; 531 - uint32_t size, flags; 528 + u32 flags; 529 + u32 size; 532 530 int ret; 533 531 534 532 GEM_BUG_ON(guc->log.vma);

+59

drivers/gpu/drm/i915/intel_guc_log.h

··· 1 + /* 2 + * Copyright © 2014-2017 Intel Corporation 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + * 23 + */ 24 + 25 + #ifndef _INTEL_GUC_LOG_H_ 26 + #define _INTEL_GUC_LOG_H_ 27 + 28 + #include <linux/workqueue.h> 29 + 30 + #include "intel_guc_fwif.h" 31 + 32 + struct drm_i915_private; 33 + struct intel_guc; 34 + 35 + struct intel_guc_log { 36 + u32 flags; 37 + struct i915_vma *vma; 38 + /* The runtime stuff gets created only when GuC logging gets enabled */ 39 + struct { 40 + void *buf_addr; 41 + struct workqueue_struct *flush_wq; 42 + struct work_struct flush_work; 43 + struct rchan *relay_chan; 44 + } runtime; 45 + /* logging related stats */ 46 + u32 capture_miss_count; 47 + u32 flush_interrupt_count; 48 + u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; 49 + u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; 50 + u32 flush_count[GUC_MAX_LOG_BUFFER]; 51 + }; 52 + 53 + int intel_guc_log_create(struct intel_guc *guc); 54 + void intel_guc_log_destroy(struct intel_guc *guc); 55 + int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); 56 + void i915_guc_log_register(struct drm_i915_private *dev_priv); 57 + void i915_guc_log_unregister(struct drm_i915_private *dev_priv); 58 + 59 + #endif

+5 -6

drivers/gpu/drm/i915/intel_huc.c

··· 21 21 * IN THE SOFTWARE. 22 22 * 23 23 */ 24 - #include <linux/firmware.h> 24 + 25 + #include <linux/types.h> 26 + 27 + #include "intel_huc.h" 25 28 #include "i915_drv.h" 26 - #include "intel_uc.h" 27 29 28 30 /** 29 31 * DOC: HuC Firmware ··· 152 150 { 153 151 struct drm_i915_private *dev_priv = huc_to_i915(huc); 154 152 155 - huc->fw.path = NULL; 156 - huc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE; 157 - huc->fw.load_status = INTEL_UC_FIRMWARE_NONE; 158 - huc->fw.type = INTEL_UC_FW_TYPE_HUC; 153 + intel_uc_fw_init(&huc->fw, INTEL_UC_FW_TYPE_HUC); 159 154 160 155 if (i915_modparams.huc_firmware_path) { 161 156 huc->fw.path = i915_modparams.huc_firmware_path;

+41

drivers/gpu/drm/i915/intel_huc.h

··· 1 + /* 2 + * Copyright © 2014-2017 Intel Corporation 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + * 23 + */ 24 + 25 + #ifndef _INTEL_HUC_H_ 26 + #define _INTEL_HUC_H_ 27 + 28 + #include "intel_uc_fw.h" 29 + 30 + struct intel_huc { 31 + /* Generic uC firmware management */ 32 + struct intel_uc_fw fw; 33 + 34 + /* HuC-specific additions */ 35 + }; 36 + 37 + void intel_huc_select_fw(struct intel_huc *huc); 38 + void intel_huc_init_hw(struct intel_huc *huc); 39 + void intel_huc_auth(struct intel_huc *huc); 40 + 41 + #endif

+202 -80

drivers/gpu/drm/i915/intel_lrc.c

··· 208 208 209 209 /* Typical size of the average request (2 pipecontrols and a MI_BB) */ 210 210 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ 211 - 212 211 #define WA_TAIL_DWORDS 2 212 + #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) 213 + #define PREEMPT_ID 0x1 213 214 214 215 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, 215 216 struct intel_engine_cs *engine); ··· 244 243 return 0; 245 244 246 245 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) && 247 - USES_PPGTT(dev_priv) && 248 - i915_modparams.use_mmio_flip >= 0) 246 + USES_PPGTT(dev_priv)) 249 247 return 1; 250 248 251 249 return 0; ··· 348 348 return ptr_pack_bits(p, first, 1); 349 349 } 350 350 351 + static void unwind_wa_tail(struct drm_i915_gem_request *rq) 352 + { 353 + rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); 354 + assert_ring_tail_valid(rq->ring, rq->tail); 355 + } 356 + 357 + static void unwind_incomplete_requests(struct intel_engine_cs *engine) 358 + { 359 + struct drm_i915_gem_request *rq, *rn; 360 + struct i915_priolist *uninitialized_var(p); 361 + int last_prio = I915_PRIORITY_INVALID; 362 + 363 + lockdep_assert_held(&engine->timeline->lock); 364 + 365 + list_for_each_entry_safe_reverse(rq, rn, 366 + &engine->timeline->requests, 367 + link) { 368 + if (i915_gem_request_completed(rq)) 369 + return; 370 + 371 + __i915_gem_request_unsubmit(rq); 372 + unwind_wa_tail(rq); 373 + 374 + GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID); 375 + if (rq->priotree.priority != last_prio) { 376 + p = lookup_priolist(engine, 377 + &rq->priotree, 378 + rq->priotree.priority); 379 + p = ptr_mask_bits(p, 1); 380 + 381 + last_prio = rq->priotree.priority; 382 + } 383 + 384 + list_add(&rq->priotree.link, &p->requests); 385 + } 386 + } 387 + 351 388 static inline void 352 389 execlists_context_status_change(struct drm_i915_gem_request *rq, 353 390 unsigned long status) ··· 429 392 return ce->lrc_desc; 430 393 } 431 394 395 + static inline void elsp_write(u64 desc, u32 __iomem *elsp) 396 + { 397 + writel(upper_32_bits(desc), elsp); 398 + writel(lower_32_bits(desc), elsp); 399 + } 400 + 432 401 static void execlists_submit_ports(struct intel_engine_cs *engine) 433 402 { 434 403 struct execlist_port *port = engine->execlists.port; ··· 460 417 desc = 0; 461 418 } 462 419 463 - writel(upper_32_bits(desc), elsp); 464 - writel(lower_32_bits(desc), elsp); 420 + elsp_write(desc, elsp); 465 421 } 466 422 } 467 423 ··· 493 451 port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); 494 452 } 495 453 454 + static void inject_preempt_context(struct intel_engine_cs *engine) 455 + { 456 + struct intel_context *ce = 457 + &engine->i915->preempt_context->engine[engine->id]; 458 + u32 __iomem *elsp = 459 + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); 460 + unsigned int n; 461 + 462 + GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID); 463 + GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES)); 464 + 465 + memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES); 466 + ce->ring->tail += WA_TAIL_BYTES; 467 + ce->ring->tail &= (ce->ring->size - 1); 468 + ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail; 469 + 470 + for (n = execlists_num_ports(&engine->execlists); --n; ) 471 + elsp_write(0, elsp); 472 + 473 + elsp_write(ce->lrc_desc, elsp); 474 + } 475 + 476 + static bool can_preempt(struct intel_engine_cs *engine) 477 + { 478 + return INTEL_INFO(engine->i915)->has_logical_ring_preemption; 479 + } 480 + 496 481 static void execlists_dequeue(struct intel_engine_cs *engine) 497 482 { 498 - struct drm_i915_gem_request *last; 499 483 struct intel_engine_execlists * const execlists = &engine->execlists; 500 484 struct execlist_port *port = execlists->port; 501 485 const struct execlist_port * const last_port = 502 486 &execlists->port[execlists->port_mask]; 487 + struct drm_i915_gem_request *last = port_request(port); 503 488 struct rb_node *rb; 504 489 bool submit = false; 505 - 506 - last = port_request(port); 507 - if (last) 508 - /* WaIdleLiteRestore:bdw,skl 509 - * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL 510 - * as we resubmit the request. See gen8_emit_breadcrumb() 511 - * for where we prepare the padding after the end of the 512 - * request. 513 - */ 514 - last->tail = last->wa_tail; 515 490 516 491 /* Hardware submission is through 2 ports. Conceptually each port 517 492 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is ··· 554 495 spin_lock_irq(&engine->timeline->lock); 555 496 rb = execlists->first; 556 497 GEM_BUG_ON(rb_first(&execlists->queue) != rb); 557 - while (rb) { 498 + if (!rb) 499 + goto unlock; 500 + 501 + if (last) { 502 + /* 503 + * Don't resubmit or switch until all outstanding 504 + * preemptions (lite-restore) are seen. Then we 505 + * know the next preemption status we see corresponds 506 + * to this ELSP update. 507 + */ 508 + if (port_count(&port[0]) > 1) 509 + goto unlock; 510 + 511 + if (can_preempt(engine) && 512 + rb_entry(rb, struct i915_priolist, node)->priority > 513 + max(last->priotree.priority, 0)) { 514 + /* 515 + * Switch to our empty preempt context so 516 + * the state of the GPU is known (idle). 517 + */ 518 + inject_preempt_context(engine); 519 + execlists->preempt = true; 520 + goto unlock; 521 + } else { 522 + /* 523 + * In theory, we could coalesce more requests onto 524 + * the second port (the first port is active, with 525 + * no preemptions pending). However, that means we 526 + * then have to deal with the possible lite-restore 527 + * of the second port (as we submit the ELSP, there 528 + * may be a context-switch) but also we may complete 529 + * the resubmission before the context-switch. Ergo, 530 + * coalescing onto the second port will cause a 531 + * preemption event, but we cannot predict whether 532 + * that will affect port[0] or port[1]. 533 + * 534 + * If the second port is already active, we can wait 535 + * until the next context-switch before contemplating 536 + * new requests. The GPU will be busy and we should be 537 + * able to resubmit the new ELSP before it idles, 538 + * avoiding pipeline bubbles (momentary pauses where 539 + * the driver is unable to keep up the supply of new 540 + * work). 541 + */ 542 + if (port_count(&port[1])) 543 + goto unlock; 544 + 545 + /* WaIdleLiteRestore:bdw,skl 546 + * Apply the wa NOOPs to prevent 547 + * ring:HEAD == req:TAIL as we resubmit the 548 + * request. See gen8_emit_breadcrumb() for 549 + * where we prepare the padding after the 550 + * end of the request. 551 + */ 552 + last->tail = last->wa_tail; 553 + } 554 + } 555 + 556 + do { 558 557 struct i915_priolist *p = rb_entry(rb, typeof(*p), node); 559 558 struct drm_i915_gem_request *rq, *rn; 560 559 ··· 664 547 } 665 548 666 549 INIT_LIST_HEAD(&rq->priotree.link); 667 - rq->priotree.priority = INT_MAX; 668 - 669 550 __i915_gem_request_submit(rq); 670 551 trace_i915_gem_request_in(rq, port_index(port, execlists)); 671 552 last = rq; ··· 675 560 INIT_LIST_HEAD(&p->requests); 676 561 if (p->priority != I915_PRIORITY_NORMAL) 677 562 kmem_cache_free(engine->i915->priorities, p); 678 - } 563 + } while (rb); 679 564 done: 680 565 execlists->first = rb; 681 566 if (submit) 682 567 port_assign(port, last); 568 + unlock: 683 569 spin_unlock_irq(&engine->timeline->lock); 684 570 685 571 if (submit) ··· 691 575 execlist_cancel_port_requests(struct intel_engine_execlists *execlists) 692 576 { 693 577 struct execlist_port *port = execlists->port; 694 - unsigned int num_ports = ARRAY_SIZE(execlists->port); 578 + unsigned int num_ports = execlists_num_ports(execlists); 695 579 696 580 while (num_ports-- && port_isset(port)) { 697 581 struct drm_i915_gem_request *rq = port_request(port); 698 582 699 - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); 583 + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); 700 584 i915_gem_request_put(rq); 701 585 702 586 memset(port, 0, sizeof(*port)); ··· 761 645 spin_unlock_irqrestore(&engine->timeline->lock, flags); 762 646 } 763 647 764 - static bool execlists_elsp_ready(const struct intel_engine_cs *engine) 765 - { 766 - const struct execlist_port *port = engine->execlists.port; 767 - 768 - return port_count(&port[0]) + port_count(&port[1]) < 2; 769 - } 770 - 771 648 /* 772 649 * Check the unread Context Status Buffers and manage the submission of new 773 650 * contexts to the ELSP accordingly. ··· 769 660 { 770 661 struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; 771 662 struct intel_engine_execlists * const execlists = &engine->execlists; 772 - struct execlist_port *port = execlists->port; 663 + struct execlist_port * const port = execlists->port; 773 664 struct drm_i915_private *dev_priv = engine->i915; 774 665 775 666 /* We can skip acquiring intel_runtime_pm_get() here as it was taken ··· 854 745 if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) 855 746 continue; 856 747 748 + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE && 749 + buf[2*head + 1] == PREEMPT_ID) { 750 + execlist_cancel_port_requests(execlists); 751 + 752 + spin_lock_irq(&engine->timeline->lock); 753 + unwind_incomplete_requests(engine); 754 + spin_unlock_irq(&engine->timeline->lock); 755 + 756 + GEM_BUG_ON(!execlists->preempt); 757 + execlists->preempt = false; 758 + continue; 759 + } 760 + 761 + if (status & GEN8_CTX_STATUS_PREEMPTED && 762 + execlists->preempt) 763 + continue; 764 + 857 765 /* Check the context/desc id for this event matches */ 858 766 GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); 859 767 ··· 882 756 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); 883 757 884 758 trace_i915_gem_request_out(rq); 759 + rq->priotree.priority = INT_MAX; 885 760 i915_gem_request_put(rq); 886 761 887 762 execlists_port_complete(execlists, port); ··· 902 775 } 903 776 } 904 777 905 - if (execlists_elsp_ready(engine)) 778 + if (!execlists->preempt) 906 779 execlists_dequeue(engine); 907 780 908 781 intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); ··· 915 788 struct i915_priolist *p = lookup_priolist(engine, pt, prio); 916 789 917 790 list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); 918 - if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine)) 791 + if (ptr_unmask_bits(p, 1)) 919 792 tasklet_hi_schedule(&engine->execlists.irq_tasklet); 920 793 } 921 794 ··· 935 808 spin_unlock_irqrestore(&engine->timeline->lock, flags); 936 809 } 937 810 811 + static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt) 812 + { 813 + return container_of(pt, struct drm_i915_gem_request, priotree); 814 + } 815 + 938 816 static struct intel_engine_cs * 939 817 pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) 940 818 { 941 - struct intel_engine_cs *engine = 942 - container_of(pt, struct drm_i915_gem_request, priotree)->engine; 819 + struct intel_engine_cs *engine = pt_to_request(pt)->engine; 943 820 944 821 GEM_BUG_ON(!locked); 945 822 ··· 961 830 struct i915_dependency *dep, *p; 962 831 struct i915_dependency stack; 963 832 LIST_HEAD(dfs); 833 + 834 + GEM_BUG_ON(prio == I915_PRIORITY_INVALID); 964 835 965 836 if (prio <= READ_ONCE(request->priotree.priority)) 966 837 return; ··· 999 866 * engines. 1000 867 */ 1001 868 list_for_each_entry(p, &pt->signalers_list, signal_link) { 869 + if (i915_gem_request_completed(pt_to_request(p->signaler))) 870 + continue; 871 + 1002 872 GEM_BUG_ON(p->signaler->priority < pt->priority); 1003 873 if (prio > READ_ONCE(p->signaler->priority)) 1004 874 list_move_tail(&p->dfs_link, &dfs); ··· 1015 879 * execlists_submit_request()), we can set our own priority and skip 1016 880 * acquiring the engine locks. 1017 881 */ 1018 - if (request->priotree.priority == INT_MIN) { 882 + if (request->priotree.priority == I915_PRIORITY_INVALID) { 1019 883 GEM_BUG_ON(!list_empty(&request->priotree.link)); 1020 884 request->priotree.priority = prio; 1021 885 if (stack.dfs_link.next == stack.dfs_link.prev) ··· 1045 909 } 1046 910 1047 911 spin_unlock_irq(&engine->timeline->lock); 1048 - 1049 - /* XXX Do we need to preempt to make room for us and our deps? */ 1050 912 } 1051 913 1052 914 static struct intel_ring * ··· 1240 1106 i915_ggtt_offset(engine->scratch) + 1241 1107 2 * CACHELINE_BYTES); 1242 1108 1109 + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1110 + 1243 1111 /* Pad to end of cacheline */ 1244 1112 while ((unsigned long)batch % CACHELINE_BYTES) 1245 1113 *batch++ = MI_NOOP; ··· 1255 1119 return batch; 1256 1120 } 1257 1121 1258 - /* 1259 - * This batch is started immediately after indirect_ctx batch. Since we ensure 1260 - * that indirect_ctx ends on a cacheline this batch is aligned automatically. 1261 - * 1262 - * The number of DWORDS written are returned using this field. 1263 - * 1264 - * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding 1265 - * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. 1266 - */ 1267 - static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) 1268 - { 1269 - /* WaDisableCtxRestoreArbitration:bdw,chv */ 1270 - *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1271 - *batch++ = MI_BATCH_BUFFER_END; 1272 - 1273 - return batch; 1274 - } 1275 - 1276 1122 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1277 1123 { 1124 + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1125 + 1278 1126 /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ 1279 1127 batch = gen8_emit_flush_coherentl3_wa(engine, batch); 1280 1128 ··· 1303 1183 *batch++ = 0; 1304 1184 *batch++ = 0; 1305 1185 } 1186 + 1187 + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1306 1188 1307 1189 /* Pad to end of cacheline */ 1308 1190 while ((unsigned long)batch % CACHELINE_BYTES) ··· 1373 1251 break; 1374 1252 case 8: 1375 1253 wa_bb_fn[0] = gen8_init_indirectctx_bb; 1376 - wa_bb_fn[1] = gen8_init_perctx_bb; 1254 + wa_bb_fn[1] = NULL; 1377 1255 break; 1378 1256 default: 1379 1257 MISSING_CASE(INTEL_GEN(engine->i915)); ··· 1459 1337 GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); 1460 1338 clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); 1461 1339 execlists->csb_head = -1; 1340 + execlists->preempt = false; 1462 1341 1463 1342 /* After a GPU reset, we may have requests to replay */ 1464 1343 if (!i915_modparams.enable_guc_submission && execlists->first) ··· 1505 1382 struct drm_i915_gem_request *request) 1506 1383 { 1507 1384 struct intel_engine_execlists * const execlists = &engine->execlists; 1508 - struct drm_i915_gem_request *rq, *rn; 1509 1385 struct intel_context *ce; 1510 1386 unsigned long flags; 1511 1387 ··· 1522 1400 execlist_cancel_port_requests(execlists); 1523 1401 1524 1402 /* Push back any incomplete requests for replay after the reset. */ 1525 - list_for_each_entry_safe_reverse(rq, rn, 1526 - &engine->timeline->requests, link) { 1527 - struct i915_priolist *p; 1528 - 1529 - if (i915_gem_request_completed(rq)) 1530 - break; 1531 - 1532 - __i915_gem_request_unsubmit(rq); 1533 - 1534 - p = lookup_priolist(engine, 1535 - &rq->priotree, 1536 - rq->priotree.priority); 1537 - list_add(&rq->priotree.link, 1538 - &ptr_mask_bits(p, 1)->requests); 1539 - } 1403 + unwind_incomplete_requests(engine); 1540 1404 1541 1405 spin_unlock_irqrestore(&engine->timeline->lock, flags); 1542 1406 ··· 1559 1451 intel_ring_update_space(request->ring); 1560 1452 1561 1453 /* Reset WaIdleLiteRestore:bdw,skl as well */ 1562 - request->tail = 1563 - intel_ring_wrap(request->ring, 1564 - request->wa_tail - WA_TAIL_DWORDS*sizeof(u32)); 1565 - assert_ring_tail_valid(request->ring, request->tail); 1454 + unwind_wa_tail(request); 1566 1455 } 1567 1456 1568 1457 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) ··· 1618 1513 if (IS_ERR(cs)) 1619 1514 return PTR_ERR(cs); 1620 1515 1516 + /* 1517 + * WaDisableCtxRestoreArbitration:bdw,chv 1518 + * 1519 + * We don't need to perform MI_ARB_ENABLE as often as we do (in 1520 + * particular all the gen that do not need the w/a at all!), if we 1521 + * took care to make sure that on every switch into this context 1522 + * (both ordinary and for preemption) that arbitrartion was enabled 1523 + * we would be fine. However, there doesn't seem to be a downside to 1524 + * being paranoid and making sure it is set before each batch and 1525 + * every context-switch. 1526 + * 1527 + * Note that if we fail to enable arbitration before the request 1528 + * is complete, then we do not see the context-switch interrupt and 1529 + * the engine hangs (with RING_HEAD == RING_TAIL). 1530 + * 1531 + * That satisfies both the GPGPU w/a and our heavy-handed paranoia. 1532 + */ 1533 + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1534 + 1621 1535 /* FIXME(BDW): Address space and security selectors. */ 1622 1536 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 1623 1537 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) | 1624 1538 (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); 1625 1539 *cs++ = lower_32_bits(offset); 1626 1540 *cs++ = upper_32_bits(offset); 1627 - *cs++ = MI_NOOP; 1628 1541 intel_ring_advance(req, cs); 1629 1542 1630 1543 return 0; ··· 1771 1648 */ 1772 1649 static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) 1773 1650 { 1774 - *cs++ = MI_NOOP; 1651 + /* Ensure there's always at least one preemption point per-request. */ 1652 + *cs++ = MI_ARB_CHECK; 1775 1653 *cs++ = MI_NOOP; 1776 1654 request->wa_tail = intel_ring_offset(request, cs); 1777 1655 } ··· 1793 1669 1794 1670 gen8_emit_wa_tail(request, cs); 1795 1671 } 1796 - 1797 1672 static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; 1798 1673 1799 1674 static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, ··· 1820 1697 1821 1698 gen8_emit_wa_tail(request, cs); 1822 1699 } 1823 - 1824 1700 static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; 1825 1701 1826 1702 static int gen8_init_rcs_context(struct drm_i915_gem_request *req)

+1

drivers/gpu/drm/i915/intel_lrc.h

··· 61 61 enum { 62 62 INTEL_CONTEXT_SCHEDULE_IN = 0, 63 63 INTEL_CONTEXT_SCHEDULE_OUT, 64 + INTEL_CONTEXT_SCHEDULE_PREEMPTED, 64 65 }; 65 66 66 67 /* Logical Rings */

+5 -18

drivers/gpu/drm/i915/intel_lvds.c

··· 939 939 struct drm_display_mode *fixed_mode = NULL; 940 940 struct drm_display_mode *downclock_mode = NULL; 941 941 struct edid *edid; 942 - struct intel_crtc *crtc; 943 942 i915_reg_t lvds_reg; 944 943 u32 lvds; 945 - int pipe; 946 944 u8 pin; 947 945 u32 allowed_scalers; 948 946 ··· 1111 1113 * on. If so, assume that whatever is currently programmed is the 1112 1114 * correct mode. 1113 1115 */ 1114 - 1115 - /* Ironlake: FIXME if still fail, not try pipe mode now */ 1116 - if (HAS_PCH_SPLIT(dev_priv)) 1117 - goto failed; 1118 - 1119 - pipe = (lvds & LVDS_PIPEB_SELECT) ? 1 : 0; 1120 - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); 1121 - 1122 - if (crtc && (lvds & LVDS_PORT_EN)) { 1123 - fixed_mode = intel_crtc_mode_get(dev, &crtc->base); 1124 - if (fixed_mode) { 1125 - DRM_DEBUG_KMS("using current (BIOS) mode: "); 1126 - drm_mode_debug_printmodeline(fixed_mode); 1127 - fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; 1128 - goto out; 1129 - } 1116 + fixed_mode = intel_encoder_current_mode(intel_encoder); 1117 + if (fixed_mode) { 1118 + DRM_DEBUG_KMS("using current (BIOS) mode: "); 1119 + drm_mode_debug_printmodeline(fixed_mode); 1120 + fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; 1130 1121 } 1131 1122 1132 1123 /* If we still don't have a mode after all that, give up. */

+8 -7

drivers/gpu/drm/i915/intel_pipe_crc.c

··· 206 206 static int display_crc_ctl_show(struct seq_file *m, void *data) 207 207 { 208 208 struct drm_i915_private *dev_priv = m->private; 209 - int i; 209 + enum pipe pipe; 210 210 211 - for (i = 0; i < I915_MAX_PIPES; i++) 212 - seq_printf(m, "%c %s\n", pipe_name(i), 213 - pipe_crc_source_name(dev_priv->pipe_crc[i].source)); 211 + for_each_pipe(dev_priv, pipe) 212 + seq_printf(m, "%c %s\n", pipe_name(pipe), 213 + pipe_crc_source_name(dev_priv->pipe_crc[pipe].source)); 214 214 215 215 return 0; 216 216 } ··· 775 775 return -EINVAL; 776 776 } 777 777 778 - static int display_crc_ctl_parse_pipe(const char *buf, enum pipe *pipe) 778 + static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv, 779 + const char *buf, enum pipe *pipe) 779 780 { 780 781 const char name = buf[0]; 781 782 782 - if (name < 'A' || name >= pipe_name(I915_MAX_PIPES)) 783 + if (name < 'A' || name >= pipe_name(INTEL_INFO(dev_priv)->num_pipes)) 783 784 return -EINVAL; 784 785 785 786 *pipe = name - 'A'; ··· 829 828 return -EINVAL; 830 829 } 831 830 832 - if (display_crc_ctl_parse_pipe(words[1], &pipe) < 0) { 831 + if (display_crc_ctl_parse_pipe(dev_priv, words[1], &pipe) < 0) { 833 832 DRM_DEBUG_DRIVER("unknown pipe %s\n", words[1]); 834 833 return -EINVAL; 835 834 }

+430 -291

drivers/gpu/drm/i915/intel_pm.c

··· 322 322 { 323 323 u32 val; 324 324 325 - mutex_lock(&dev_priv->rps.hw_lock); 325 + mutex_lock(&dev_priv->pcu_lock); 326 326 327 327 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 328 328 if (enable) ··· 337 337 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) 338 338 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); 339 339 340 - mutex_unlock(&dev_priv->rps.hw_lock); 340 + mutex_unlock(&dev_priv->pcu_lock); 341 341 } 342 342 343 343 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) 344 344 { 345 345 u32 val; 346 346 347 - mutex_lock(&dev_priv->rps.hw_lock); 347 + mutex_lock(&dev_priv->pcu_lock); 348 348 349 349 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 350 350 if (enable) ··· 353 353 val &= ~DSP_MAXFIFO_PM5_ENABLE; 354 354 vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); 355 355 356 - mutex_unlock(&dev_priv->rps.hw_lock); 356 + mutex_unlock(&dev_priv->pcu_lock); 357 357 } 358 358 359 359 #define FW_WM(value, plane) \ ··· 2790 2790 2791 2791 /* read the first set of memory latencies[0:3] */ 2792 2792 val = 0; /* data0 to be programmed to 0 for first set */ 2793 - mutex_lock(&dev_priv->rps.hw_lock); 2793 + mutex_lock(&dev_priv->pcu_lock); 2794 2794 ret = sandybridge_pcode_read(dev_priv, 2795 2795 GEN9_PCODE_READ_MEM_LATENCY, 2796 2796 &val); 2797 - mutex_unlock(&dev_priv->rps.hw_lock); 2797 + mutex_unlock(&dev_priv->pcu_lock); 2798 2798 2799 2799 if (ret) { 2800 2800 DRM_ERROR("SKL Mailbox read error = %d\n", ret); ··· 2811 2811 2812 2812 /* read the second set of memory latencies[4:7] */ 2813 2813 val = 1; /* data0 to be programmed to 1 for second set */ 2814 - mutex_lock(&dev_priv->rps.hw_lock); 2814 + mutex_lock(&dev_priv->pcu_lock); 2815 2815 ret = sandybridge_pcode_read(dev_priv, 2816 2816 GEN9_PCODE_READ_MEM_LATENCY, 2817 2817 &val); 2818 - mutex_unlock(&dev_priv->rps.hw_lock); 2818 + mutex_unlock(&dev_priv->pcu_lock); 2819 2819 if (ret) { 2820 2820 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2821 2821 return; ··· 3608 3608 return 0; 3609 3609 3610 3610 DRM_DEBUG_KMS("Enabling the SAGV\n"); 3611 - mutex_lock(&dev_priv->rps.hw_lock); 3611 + mutex_lock(&dev_priv->pcu_lock); 3612 3612 3613 3613 ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, 3614 3614 GEN9_SAGV_ENABLE); 3615 3615 3616 3616 /* We don't need to wait for the SAGV when enabling */ 3617 - mutex_unlock(&dev_priv->rps.hw_lock); 3617 + mutex_unlock(&dev_priv->pcu_lock); 3618 3618 3619 3619 /* 3620 3620 * Some skl systems, pre-release machines in particular, ··· 3645 3645 return 0; 3646 3646 3647 3647 DRM_DEBUG_KMS("Disabling the SAGV\n"); 3648 - mutex_lock(&dev_priv->rps.hw_lock); 3648 + mutex_lock(&dev_priv->pcu_lock); 3649 3649 3650 3650 /* bspec says to keep retrying for at least 1 ms */ 3651 3651 ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, 3652 3652 GEN9_SAGV_DISABLE, 3653 3653 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, 3654 3654 1); 3655 - mutex_unlock(&dev_priv->rps.hw_lock); 3655 + mutex_unlock(&dev_priv->pcu_lock); 3656 3656 3657 3657 /* 3658 3658 * Some skl systems, pre-release machines in particular, ··· 4820 4820 return a->start < b->end && b->start < a->end; 4821 4821 } 4822 4822 4823 - bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, 4823 + bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv, 4824 + const struct skl_ddb_entry **entries, 4824 4825 const struct skl_ddb_entry *ddb, 4825 4826 int ignore) 4826 4827 { 4827 - int i; 4828 + enum pipe pipe; 4828 4829 4829 - for (i = 0; i < I915_MAX_PIPES; i++) 4830 - if (i != ignore && entries[i] && 4831 - skl_ddb_entries_overlap(ddb, entries[i])) 4830 + for_each_pipe(dev_priv, pipe) { 4831 + if (pipe != ignore && entries[pipe] && 4832 + skl_ddb_entries_overlap(ddb, entries[pipe])) 4832 4833 return true; 4834 + } 4833 4835 4834 4836 return false; 4835 4837 } ··· 5621 5619 wm->level = VLV_WM_LEVEL_PM2; 5622 5620 5623 5621 if (IS_CHERRYVIEW(dev_priv)) { 5624 - mutex_lock(&dev_priv->rps.hw_lock); 5622 + mutex_lock(&dev_priv->pcu_lock); 5625 5623 5626 5624 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 5627 5625 if (val & DSP_MAXFIFO_PM5_ENABLE) ··· 5651 5649 wm->level = VLV_WM_LEVEL_DDR_DVFS; 5652 5650 } 5653 5651 5654 - mutex_unlock(&dev_priv->rps.hw_lock); 5652 + mutex_unlock(&dev_priv->pcu_lock); 5655 5653 } 5656 5654 5657 5655 for_each_intel_crtc(dev, crtc) { ··· 5829 5827 { 5830 5828 u32 val; 5831 5829 5830 + /* Display WA #0477 WaDisableIPC: skl */ 5831 + if (IS_SKYLAKE(dev_priv)) { 5832 + dev_priv->ipc_enabled = false; 5833 + return; 5834 + } 5835 + 5832 5836 val = I915_READ(DISP_ARB_CTL2); 5833 5837 5834 5838 if (dev_priv->ipc_enabled) ··· 5988 5980 */ 5989 5981 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) 5990 5982 { 5983 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 5991 5984 u32 limits; 5992 5985 5993 5986 /* Only set the down limit when we've reached the lowest level to avoid ··· 5998 5989 * frequency, if the down threshold expires in that window we will not 5999 5990 * receive a down interrupt. */ 6000 5991 if (INTEL_GEN(dev_priv) >= 9) { 6001 - limits = (dev_priv->rps.max_freq_softlimit) << 23; 6002 - if (val <= dev_priv->rps.min_freq_softlimit) 6003 - limits |= (dev_priv->rps.min_freq_softlimit) << 14; 5992 + limits = (rps->max_freq_softlimit) << 23; 5993 + if (val <= rps->min_freq_softlimit) 5994 + limits |= (rps->min_freq_softlimit) << 14; 6004 5995 } else { 6005 - limits = dev_priv->rps.max_freq_softlimit << 24; 6006 - if (val <= dev_priv->rps.min_freq_softlimit) 6007 - limits |= dev_priv->rps.min_freq_softlimit << 16; 5996 + limits = rps->max_freq_softlimit << 24; 5997 + if (val <= rps->min_freq_softlimit) 5998 + limits |= rps->min_freq_softlimit << 16; 6008 5999 } 6009 6000 6010 6001 return limits; ··· 6012 6003 6013 6004 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) 6014 6005 { 6006 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 6015 6007 int new_power; 6016 6008 u32 threshold_up = 0, threshold_down = 0; /* in % */ 6017 6009 u32 ei_up = 0, ei_down = 0; 6018 6010 6019 - new_power = dev_priv->rps.power; 6020 - switch (dev_priv->rps.power) { 6011 + new_power = rps->power; 6012 + switch (rps->power) { 6021 6013 case LOW_POWER: 6022 - if (val > dev_priv->rps.efficient_freq + 1 && 6023 - val > dev_priv->rps.cur_freq) 6014 + if (val > rps->efficient_freq + 1 && 6015 + val > rps->cur_freq) 6024 6016 new_power = BETWEEN; 6025 6017 break; 6026 6018 6027 6019 case BETWEEN: 6028 - if (val <= dev_priv->rps.efficient_freq && 6029 - val < dev_priv->rps.cur_freq) 6020 + if (val <= rps->efficient_freq && 6021 + val < rps->cur_freq) 6030 6022 new_power = LOW_POWER; 6031 - else if (val >= dev_priv->rps.rp0_freq && 6032 - val > dev_priv->rps.cur_freq) 6023 + else if (val >= rps->rp0_freq && 6024 + val > rps->cur_freq) 6033 6025 new_power = HIGH_POWER; 6034 6026 break; 6035 6027 6036 6028 case HIGH_POWER: 6037 - if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && 6038 - val < dev_priv->rps.cur_freq) 6029 + if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && 6030 + val < rps->cur_freq) 6039 6031 new_power = BETWEEN; 6040 6032 break; 6041 6033 } 6042 6034 /* Max/min bins are special */ 6043 - if (val <= dev_priv->rps.min_freq_softlimit) 6035 + if (val <= rps->min_freq_softlimit) 6044 6036 new_power = LOW_POWER; 6045 - if (val >= dev_priv->rps.max_freq_softlimit) 6037 + if (val >= rps->max_freq_softlimit) 6046 6038 new_power = HIGH_POWER; 6047 - if (new_power == dev_priv->rps.power) 6039 + if (new_power == rps->power) 6048 6040 return; 6049 6041 6050 6042 /* Note the units here are not exactly 1us, but 1280ns. */ ··· 6108 6098 GEN6_RP_DOWN_IDLE_AVG); 6109 6099 6110 6100 skip_hw_write: 6111 - dev_priv->rps.power = new_power; 6112 - dev_priv->rps.up_threshold = threshold_up; 6113 - dev_priv->rps.down_threshold = threshold_down; 6114 - dev_priv->rps.last_adj = 0; 6101 + rps->power = new_power; 6102 + rps->up_threshold = threshold_up; 6103 + rps->down_threshold = threshold_down; 6104 + rps->last_adj = 0; 6115 6105 } 6116 6106 6117 6107 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) 6118 6108 { 6109 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 6119 6110 u32 mask = 0; 6120 6111 6121 6112 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ 6122 - if (val > dev_priv->rps.min_freq_softlimit) 6113 + if (val > rps->min_freq_softlimit) 6123 6114 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; 6124 - if (val < dev_priv->rps.max_freq_softlimit) 6115 + if (val < rps->max_freq_softlimit) 6125 6116 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 6126 6117 6127 6118 mask &= dev_priv->pm_rps_events; ··· 6135 6124 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ 6136 6125 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) 6137 6126 { 6127 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 6128 + 6138 6129 /* min/max delay may still have been modified so be sure to 6139 6130 * write the limits value. 6140 6131 */ 6141 - if (val != dev_priv->rps.cur_freq) { 6132 + if (val != rps->cur_freq) { 6142 6133 gen6_set_rps_thresholds(dev_priv, val); 6143 6134 6144 6135 if (INTEL_GEN(dev_priv) >= 9) ··· 6162 6149 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); 6163 6150 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 6164 6151 6165 - dev_priv->rps.cur_freq = val; 6152 + rps->cur_freq = val; 6166 6153 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 6167 6154 6168 6155 return 0; ··· 6178 6165 6179 6166 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 6180 6167 6181 - if (val != dev_priv->rps.cur_freq) { 6168 + if (val != dev_priv->gt_pm.rps.cur_freq) { 6182 6169 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); 6183 6170 if (err) 6184 6171 return err; ··· 6186 6173 gen6_set_rps_thresholds(dev_priv, val); 6187 6174 } 6188 6175 6189 - dev_priv->rps.cur_freq = val; 6176 + dev_priv->gt_pm.rps.cur_freq = val; 6190 6177 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 6191 6178 6192 6179 return 0; ··· 6201 6188 */ 6202 6189 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) 6203 6190 { 6204 - u32 val = dev_priv->rps.idle_freq; 6191 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 6192 + u32 val = rps->idle_freq; 6205 6193 int err; 6206 6194 6207 - if (dev_priv->rps.cur_freq <= val) 6195 + if (rps->cur_freq <= val) 6208 6196 return; 6209 6197 6210 6198 /* The punit delays the write of the frequency and voltage until it ··· 6230 6216 6231 6217 void gen6_rps_busy(struct drm_i915_private *dev_priv) 6232 6218 { 6233 - mutex_lock(&dev_priv->rps.hw_lock); 6234 - if (dev_priv->rps.enabled) { 6219 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 6220 + 6221 + mutex_lock(&dev_priv->pcu_lock); 6222 + if (rps->enabled) { 6235 6223 u8 freq; 6236 6224 6237 6225 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) 6238 6226 gen6_rps_reset_ei(dev_priv); 6239 6227 I915_WRITE(GEN6_PMINTRMSK, 6240 - gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); 6228 + gen6_rps_pm_mask(dev_priv, rps->cur_freq)); 6241 6229 6242 6230 gen6_enable_rps_interrupts(dev_priv); 6243 6231 6244 6232 /* Use the user's desired frequency as a guide, but for better 6245 6233 * performance, jump directly to RPe as our starting frequency. 6246 6234 */ 6247 - freq = max(dev_priv->rps.cur_freq, 6248 - dev_priv->rps.efficient_freq); 6235 + freq = max(rps->cur_freq, 6236 + rps->efficient_freq); 6249 6237 6250 6238 if (intel_set_rps(dev_priv, 6251 6239 clamp(freq, 6252 - dev_priv->rps.min_freq_softlimit, 6253 - dev_priv->rps.max_freq_softlimit))) 6240 + rps->min_freq_softlimit, 6241 + rps->max_freq_softlimit))) 6254 6242 DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); 6255 6243 } 6256 - mutex_unlock(&dev_priv->rps.hw_lock); 6244 + mutex_unlock(&dev_priv->pcu_lock); 6257 6245 } 6258 6246 6259 6247 void gen6_rps_idle(struct drm_i915_private *dev_priv) 6260 6248 { 6249 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 6250 + 6261 6251 /* Flush our bottom-half so that it does not race with us 6262 6252 * setting the idle frequency and so that it is bounded by 6263 6253 * our rpm wakeref. And then disable the interrupts to stop any ··· 6269 6251 */ 6270 6252 gen6_disable_rps_interrupts(dev_priv); 6271 6253 6272 - mutex_lock(&dev_priv->rps.hw_lock); 6273 - if (dev_priv->rps.enabled) { 6254 + mutex_lock(&dev_priv->pcu_lock); 6255 + if (rps->enabled) { 6274 6256 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 6275 6257 vlv_set_rps_idle(dev_priv); 6276 6258 else 6277 - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); 6278 - dev_priv->rps.last_adj = 0; 6259 + gen6_set_rps(dev_priv, rps->idle_freq); 6260 + rps->last_adj = 0; 6279 6261 I915_WRITE(GEN6_PMINTRMSK, 6280 6262 gen6_sanitize_rps_pm_mask(dev_priv, ~0)); 6281 6263 } 6282 - mutex_unlock(&dev_priv->rps.hw_lock); 6264 + mutex_unlock(&dev_priv->pcu_lock); 6283 6265 } 6284 6266 6285 6267 void gen6_rps_boost(struct drm_i915_gem_request *rq, 6286 - struct intel_rps_client *rps) 6268 + struct intel_rps_client *rps_client) 6287 6269 { 6288 - struct drm_i915_private *i915 = rq->i915; 6270 + struct intel_rps *rps = &rq->i915->gt_pm.rps; 6289 6271 unsigned long flags; 6290 6272 bool boost; 6291 6273 6292 6274 /* This is intentionally racy! We peek at the state here, then 6293 6275 * validate inside the RPS worker. 6294 6276 */ 6295 - if (!i915->rps.enabled) 6277 + if (!rps->enabled) 6296 6278 return; 6297 6279 6298 6280 boost = false; 6299 6281 spin_lock_irqsave(&rq->lock, flags); 6300 6282 if (!rq->waitboost && !i915_gem_request_completed(rq)) { 6301 - atomic_inc(&i915->rps.num_waiters); 6283 + atomic_inc(&rps->num_waiters); 6302 6284 rq->waitboost = true; 6303 6285 boost = true; 6304 6286 } ··· 6306 6288 if (!boost) 6307 6289 return; 6308 6290 6309 - if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq) 6310 - schedule_work(&i915->rps.work); 6291 + if (READ_ONCE(rps->cur_freq) < rps->boost_freq) 6292 + schedule_work(&rps->work); 6311 6293 6312 - atomic_inc(rps ? &rps->boosts : &i915->rps.boosts); 6294 + atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts); 6313 6295 } 6314 6296 6315 6297 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) 6316 6298 { 6299 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 6317 6300 int err; 6318 6301 6319 - lockdep_assert_held(&dev_priv->rps.hw_lock); 6320 - GEM_BUG_ON(val > dev_priv->rps.max_freq); 6321 - GEM_BUG_ON(val < dev_priv->rps.min_freq); 6302 + lockdep_assert_held(&dev_priv->pcu_lock); 6303 + GEM_BUG_ON(val > rps->max_freq); 6304 + GEM_BUG_ON(val < rps->min_freq); 6322 6305 6323 - if (!dev_priv->rps.enabled) { 6324 - dev_priv->rps.cur_freq = val; 6306 + if (!rps->enabled) { 6307 + rps->cur_freq = val; 6325 6308 return 0; 6326 6309 } 6327 6310 ··· 6345 6326 I915_WRITE(GEN6_RP_CONTROL, 0); 6346 6327 } 6347 6328 6348 - static void gen6_disable_rps(struct drm_i915_private *dev_priv) 6329 + static void gen6_disable_rc6(struct drm_i915_private *dev_priv) 6349 6330 { 6350 6331 I915_WRITE(GEN6_RC_CONTROL, 0); 6332 + } 6333 + 6334 + static void gen6_disable_rps(struct drm_i915_private *dev_priv) 6335 + { 6351 6336 I915_WRITE(GEN6_RPNSWREQ, 1 << 31); 6352 6337 I915_WRITE(GEN6_RP_CONTROL, 0); 6353 6338 } 6354 6339 6355 - static void cherryview_disable_rps(struct drm_i915_private *dev_priv) 6340 + static void cherryview_disable_rc6(struct drm_i915_private *dev_priv) 6356 6341 { 6357 6342 I915_WRITE(GEN6_RC_CONTROL, 0); 6358 6343 } 6359 6344 6360 - static void valleyview_disable_rps(struct drm_i915_private *dev_priv) 6345 + static void cherryview_disable_rps(struct drm_i915_private *dev_priv) 6361 6346 { 6362 - /* we're doing forcewake before Disabling RC6, 6347 + I915_WRITE(GEN6_RP_CONTROL, 0); 6348 + } 6349 + 6350 + static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) 6351 + { 6352 + /* We're doing forcewake before Disabling RC6, 6363 6353 * This what the BIOS expects when going into suspend */ 6364 6354 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6365 6355 6366 6356 I915_WRITE(GEN6_RC_CONTROL, 0); 6367 6357 6368 6358 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6359 + } 6360 + 6361 + static void valleyview_disable_rps(struct drm_i915_private *dev_priv) 6362 + { 6363 + I915_WRITE(GEN6_RP_CONTROL, 0); 6369 6364 } 6370 6365 6371 6366 static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode) ··· 6504 6471 6505 6472 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) 6506 6473 { 6474 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 6475 + 6507 6476 /* All of these values are in units of 50MHz */ 6508 6477 6509 6478 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 6510 6479 if (IS_GEN9_LP(dev_priv)) { 6511 6480 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); 6512 - dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; 6513 - dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 6514 - dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; 6481 + rps->rp0_freq = (rp_state_cap >> 16) & 0xff; 6482 + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; 6483 + rps->min_freq = (rp_state_cap >> 0) & 0xff; 6515 6484 } else { 6516 6485 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); 6517 - dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; 6518 - dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 6519 - dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; 6486 + rps->rp0_freq = (rp_state_cap >> 0) & 0xff; 6487 + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; 6488 + rps->min_freq = (rp_state_cap >> 16) & 0xff; 6520 6489 } 6521 6490 /* hw_max = RP0 until we check for overclocking */ 6522 - dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; 6491 + rps->max_freq = rps->rp0_freq; 6523 6492 6524 - dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; 6493 + rps->efficient_freq = rps->rp1_freq; 6525 6494 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || 6526 6495 IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { 6527 6496 u32 ddcc_status = 0; ··· 6531 6496 if (sandybridge_pcode_read(dev_priv, 6532 6497 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 6533 6498 &ddcc_status) == 0) 6534 - dev_priv->rps.efficient_freq = 6499 + rps->efficient_freq = 6535 6500 clamp_t(u8, 6536 6501 ((ddcc_status >> 8) & 0xff), 6537 - dev_priv->rps.min_freq, 6538 - dev_priv->rps.max_freq); 6502 + rps->min_freq, 6503 + rps->max_freq); 6539 6504 } 6540 6505 6541 6506 if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { 6542 6507 /* Store the frequency values in 16.66 MHZ units, which is 6543 6508 * the natural hardware unit for SKL 6544 6509 */ 6545 - dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; 6546 - dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; 6547 - dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; 6548 - dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; 6549 - dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; 6510 + rps->rp0_freq *= GEN9_FREQ_SCALER; 6511 + rps->rp1_freq *= GEN9_FREQ_SCALER; 6512 + rps->min_freq *= GEN9_FREQ_SCALER; 6513 + rps->max_freq *= GEN9_FREQ_SCALER; 6514 + rps->efficient_freq *= GEN9_FREQ_SCALER; 6550 6515 } 6551 6516 } 6552 6517 6553 6518 static void reset_rps(struct drm_i915_private *dev_priv, 6554 6519 int (*set)(struct drm_i915_private *, u8)) 6555 6520 { 6556 - u8 freq = dev_priv->rps.cur_freq; 6521 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 6522 + u8 freq = rps->cur_freq; 6557 6523 6558 6524 /* force a reset */ 6559 - dev_priv->rps.power = -1; 6560 - dev_priv->rps.cur_freq = -1; 6525 + rps->power = -1; 6526 + rps->cur_freq = -1; 6561 6527 6562 6528 if (set(dev_priv, freq)) 6563 6529 DRM_ERROR("Failed to reset RPS to initial values\n"); ··· 6571 6535 6572 6536 /* Program defaults and thresholds for RPS*/ 6573 6537 I915_WRITE(GEN6_RC_VIDEO_FREQ, 6574 - GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); 6538 + GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq)); 6575 6539 6576 6540 /* 1 second timeout*/ 6577 6541 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, ··· 6625 6589 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); 6626 6590 6627 6591 /* 3a: Enable RC6 */ 6628 - if (intel_enable_rc6() & INTEL_RC6_ENABLE) 6592 + if (intel_rc6_enabled() & INTEL_RC6_ENABLE) 6629 6593 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 6630 6594 DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); 6631 6595 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ ··· 6645 6609 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6646 6610 } 6647 6611 6648 - static void gen8_enable_rps(struct drm_i915_private *dev_priv) 6612 + static void gen8_enable_rc6(struct drm_i915_private *dev_priv) 6649 6613 { 6650 6614 struct intel_engine_cs *engine; 6651 6615 enum intel_engine_id id; ··· 6654 6618 /* 1a: Software RC state - RC0 */ 6655 6619 I915_WRITE(GEN6_RC_STATE, 0); 6656 6620 6657 - /* 1c & 1d: Get forcewake during program sequence. Although the driver 6621 + /* 1b: Get forcewake during program sequence. Although the driver 6658 6622 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 6659 6623 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6660 6624 ··· 6668 6632 for_each_engine(engine, dev_priv, id) 6669 6633 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 6670 6634 I915_WRITE(GEN6_RC_SLEEP, 0); 6671 - if (IS_BROADWELL(dev_priv)) 6672 - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ 6673 - else 6674 - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ 6635 + I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ 6675 6636 6676 6637 /* 3: Enable RC6 */ 6677 - if (intel_enable_rc6() & INTEL_RC6_ENABLE) 6638 + if (intel_rc6_enabled() & INTEL_RC6_ENABLE) 6678 6639 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 6679 6640 intel_print_rc6_info(dev_priv, rc6_mask); 6680 - if (IS_BROADWELL(dev_priv)) 6681 - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 6682 - GEN7_RC_CTL_TO_MODE | 6683 - rc6_mask); 6684 - else 6685 - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 6686 - GEN6_RC_CTL_EI_MODE(1) | 6687 - rc6_mask); 6688 6641 6689 - /* 4 Program defaults and thresholds for RPS*/ 6642 + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 6643 + GEN7_RC_CTL_TO_MODE | 6644 + rc6_mask); 6645 + 6646 + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6647 + } 6648 + 6649 + static void gen8_enable_rps(struct drm_i915_private *dev_priv) 6650 + { 6651 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 6652 + 6653 + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6654 + 6655 + /* 1 Program defaults and thresholds for RPS*/ 6690 6656 I915_WRITE(GEN6_RPNSWREQ, 6691 - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 6657 + HSW_FREQUENCY(rps->rp1_freq)); 6692 6658 I915_WRITE(GEN6_RC_VIDEO_FREQ, 6693 - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 6659 + HSW_FREQUENCY(rps->rp1_freq)); 6694 6660 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ 6695 6661 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ 6696 6662 6697 6663 /* Docs recommend 900MHz, and 300 MHz respectively */ 6698 6664 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, 6699 - dev_priv->rps.max_freq_softlimit << 24 | 6700 - dev_priv->rps.min_freq_softlimit << 16); 6665 + rps->max_freq_softlimit << 24 | 6666 + rps->min_freq_softlimit << 16); 6701 6667 6702 6668 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ 6703 6669 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ ··· 6708 6670 6709 6671 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 6710 6672 6711 - /* 5: Enable RPS */ 6673 + /* 2: Enable RPS */ 6712 6674 I915_WRITE(GEN6_RP_CONTROL, 6713 6675 GEN6_RP_MEDIA_TURBO | 6714 6676 GEN6_RP_MEDIA_HW_NORMAL_MODE | ··· 6717 6679 GEN6_RP_UP_BUSY_AVG | 6718 6680 GEN6_RP_DOWN_IDLE_AVG); 6719 6681 6720 - /* 6: Ring frequency + overclocking (our driver does this later */ 6721 - 6722 6682 reset_rps(dev_priv, gen6_set_rps); 6723 6683 6724 6684 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6725 6685 } 6726 6686 6727 - static void gen6_enable_rps(struct drm_i915_private *dev_priv) 6687 + static void gen6_enable_rc6(struct drm_i915_private *dev_priv) 6728 6688 { 6729 6689 struct intel_engine_cs *engine; 6730 6690 enum intel_engine_id id; ··· 6731 6695 int rc6_mode; 6732 6696 int ret; 6733 6697 6734 - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 6735 - 6736 - /* Here begins a magic sequence of register writes to enable 6737 - * auto-downclocking. 6738 - * 6739 - * Perhaps there might be some value in exposing these to 6740 - * userspace... 6741 - */ 6742 6698 I915_WRITE(GEN6_RC_STATE, 0); 6743 6699 6744 6700 /* Clear the DBG now so we don't confuse earlier errors */ ··· 6764 6736 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ 6765 6737 6766 6738 /* Check if we are enabling RC6 */ 6767 - rc6_mode = intel_enable_rc6(); 6739 + rc6_mode = intel_rc6_enabled(); 6768 6740 if (rc6_mode & INTEL_RC6_ENABLE) 6769 6741 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; 6770 6742 ··· 6784 6756 GEN6_RC_CTL_EI_MODE(1) | 6785 6757 GEN6_RC_CTL_HW_ENABLE); 6786 6758 6787 - /* Power down if completely idle for over 50ms */ 6788 - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); 6789 - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 6790 - 6791 - reset_rps(dev_priv, gen6_set_rps); 6792 - 6793 6759 rc6vids = 0; 6794 6760 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); 6795 6761 if (IS_GEN6(dev_priv) && ret) { ··· 6801 6779 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6802 6780 } 6803 6781 6782 + static void gen6_enable_rps(struct drm_i915_private *dev_priv) 6783 + { 6784 + /* Here begins a magic sequence of register writes to enable 6785 + * auto-downclocking. 6786 + * 6787 + * Perhaps there might be some value in exposing these to 6788 + * userspace... 6789 + */ 6790 + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6791 + 6792 + /* Power down if completely idle for over 50ms */ 6793 + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); 6794 + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 6795 + 6796 + reset_rps(dev_priv, gen6_set_rps); 6797 + 6798 + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6799 + } 6800 + 6804 6801 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) 6805 6802 { 6803 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 6806 6804 int min_freq = 15; 6807 6805 unsigned int gpu_freq; 6808 6806 unsigned int max_ia_freq, min_ring_freq; ··· 6830 6788 int scaling_factor = 180; 6831 6789 struct cpufreq_policy *policy; 6832 6790 6833 - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 6791 + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); 6834 6792 6835 6793 policy = cpufreq_cpu_get(0); 6836 6794 if (policy) { ··· 6853 6811 6854 6812 if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { 6855 6813 /* Convert GT frequency to 50 HZ units */ 6856 - min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; 6857 - max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; 6814 + min_gpu_freq = rps->min_freq / GEN9_FREQ_SCALER; 6815 + max_gpu_freq = rps->max_freq / GEN9_FREQ_SCALER; 6858 6816 } else { 6859 - min_gpu_freq = dev_priv->rps.min_freq; 6860 - max_gpu_freq = dev_priv->rps.max_freq; 6817 + min_gpu_freq = rps->min_freq; 6818 + max_gpu_freq = rps->max_freq; 6861 6819 } 6862 6820 6863 6821 /* ··· 7108 7066 7109 7067 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) 7110 7068 { 7111 - dev_priv->rps.gpll_ref_freq = 7069 + dev_priv->gt_pm.rps.gpll_ref_freq = 7112 7070 vlv_get_cck_clock(dev_priv, "GPLL ref", 7113 7071 CCK_GPLL_CLOCK_CONTROL, 7114 7072 dev_priv->czclk_freq); 7115 7073 7116 7074 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", 7117 - dev_priv->rps.gpll_ref_freq); 7075 + dev_priv->gt_pm.rps.gpll_ref_freq); 7118 7076 } 7119 7077 7120 7078 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) 7121 7079 { 7080 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 7122 7081 u32 val; 7123 7082 7124 7083 valleyview_setup_pctx(dev_priv); ··· 7141 7098 } 7142 7099 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 7143 7100 7144 - dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); 7145 - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 7101 + rps->max_freq = valleyview_rps_max_freq(dev_priv); 7102 + rps->rp0_freq = rps->max_freq; 7146 7103 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 7147 - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), 7148 - dev_priv->rps.max_freq); 7104 + intel_gpu_freq(dev_priv, rps->max_freq), 7105 + rps->max_freq); 7149 7106 7150 - dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); 7107 + rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv); 7151 7108 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 7152 - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 7153 - dev_priv->rps.efficient_freq); 7109 + intel_gpu_freq(dev_priv, rps->efficient_freq), 7110 + rps->efficient_freq); 7154 7111 7155 - dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); 7112 + rps->rp1_freq = valleyview_rps_guar_freq(dev_priv); 7156 7113 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 7157 - intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 7158 - dev_priv->rps.rp1_freq); 7114 + intel_gpu_freq(dev_priv, rps->rp1_freq), 7115 + rps->rp1_freq); 7159 7116 7160 - dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); 7117 + rps->min_freq = valleyview_rps_min_freq(dev_priv); 7161 7118 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 7162 - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 7163 - dev_priv->rps.min_freq); 7119 + intel_gpu_freq(dev_priv, rps->min_freq), 7120 + rps->min_freq); 7164 7121 } 7165 7122 7166 7123 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) 7167 7124 { 7125 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 7168 7126 u32 val; 7169 7127 7170 7128 cherryview_setup_pctx(dev_priv); ··· 7186 7142 } 7187 7143 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 7188 7144 7189 - dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); 7190 - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 7145 + rps->max_freq = cherryview_rps_max_freq(dev_priv); 7146 + rps->rp0_freq = rps->max_freq; 7191 7147 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 7192 - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), 7193 - dev_priv->rps.max_freq); 7148 + intel_gpu_freq(dev_priv, rps->max_freq), 7149 + rps->max_freq); 7194 7150 7195 - dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); 7151 + rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv); 7196 7152 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 7197 - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 7198 - dev_priv->rps.efficient_freq); 7153 + intel_gpu_freq(dev_priv, rps->efficient_freq), 7154 + rps->efficient_freq); 7199 7155 7200 - dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); 7156 + rps->rp1_freq = cherryview_rps_guar_freq(dev_priv); 7201 7157 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", 7202 - intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 7203 - dev_priv->rps.rp1_freq); 7158 + intel_gpu_freq(dev_priv, rps->rp1_freq), 7159 + rps->rp1_freq); 7204 7160 7205 - dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); 7161 + rps->min_freq = cherryview_rps_min_freq(dev_priv); 7206 7162 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 7207 - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 7208 - dev_priv->rps.min_freq); 7163 + intel_gpu_freq(dev_priv, rps->min_freq), 7164 + rps->min_freq); 7209 7165 7210 - WARN_ONCE((dev_priv->rps.max_freq | 7211 - dev_priv->rps.efficient_freq | 7212 - dev_priv->rps.rp1_freq | 7213 - dev_priv->rps.min_freq) & 1, 7166 + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | 7167 + rps->min_freq) & 1, 7214 7168 "Odd GPU freq values\n"); 7215 7169 } 7216 7170 ··· 7217 7175 valleyview_cleanup_pctx(dev_priv); 7218 7176 } 7219 7177 7220 - static void cherryview_enable_rps(struct drm_i915_private *dev_priv) 7178 + static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) 7221 7179 { 7222 7180 struct intel_engine_cs *engine; 7223 7181 enum intel_engine_id id; 7224 - u32 gtfifodbg, val, rc6_mode = 0, pcbr; 7225 - 7226 - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 7182 + u32 gtfifodbg, rc6_mode = 0, pcbr; 7227 7183 7228 7184 gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | 7229 7185 GT_FIFO_FREE_ENTRIES_CHV); ··· 7252 7212 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ 7253 7213 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); 7254 7214 7255 - /* allows RC6 residency counter to work */ 7215 + /* Allows RC6 residency counter to work */ 7256 7216 I915_WRITE(VLV_COUNTER_CONTROL, 7257 7217 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 7258 7218 VLV_MEDIA_RC6_COUNT_EN | ··· 7262 7222 pcbr = I915_READ(VLV_PCBR); 7263 7223 7264 7224 /* 3: Enable RC6 */ 7265 - if ((intel_enable_rc6() & INTEL_RC6_ENABLE) && 7225 + if ((intel_rc6_enabled() & INTEL_RC6_ENABLE) && 7266 7226 (pcbr >> VLV_PCBR_ADDR_SHIFT)) 7267 7227 rc6_mode = GEN7_RC_CTL_TO_MODE; 7268 7228 7269 7229 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 7270 7230 7271 - /* 4 Program defaults and thresholds for RPS*/ 7231 + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 7232 + } 7233 + 7234 + static void cherryview_enable_rps(struct drm_i915_private *dev_priv) 7235 + { 7236 + u32 val; 7237 + 7238 + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 7239 + 7240 + /* 1: Program defaults and thresholds for RPS*/ 7272 7241 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 7273 7242 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 7274 7243 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); ··· 7286 7237 7287 7238 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 7288 7239 7289 - /* 5: Enable RPS */ 7240 + /* 2: Enable RPS */ 7290 7241 I915_WRITE(GEN6_RP_CONTROL, 7291 7242 GEN6_RP_MEDIA_HW_NORMAL_MODE | 7292 7243 GEN6_RP_MEDIA_IS_GFX | ··· 7313 7264 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 7314 7265 } 7315 7266 7316 - static void valleyview_enable_rps(struct drm_i915_private *dev_priv) 7267 + static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) 7317 7268 { 7318 7269 struct intel_engine_cs *engine; 7319 7270 enum intel_engine_id id; 7320 - u32 gtfifodbg, val, rc6_mode = 0; 7321 - 7322 - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 7271 + u32 gtfifodbg, rc6_mode = 0; 7323 7272 7324 7273 valleyview_check_pctx(dev_priv); 7325 7274 ··· 7328 7281 I915_WRITE(GTFIFODBG, gtfifodbg); 7329 7282 } 7330 7283 7331 - /* If VLV, Forcewake all wells, else re-direct to regular path */ 7332 7284 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 7333 7285 7334 7286 /* Disable RC states. */ 7335 7287 I915_WRITE(GEN6_RC_CONTROL, 0); 7288 + 7289 + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); 7290 + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 7291 + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 7292 + 7293 + for_each_engine(engine, dev_priv, id) 7294 + I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 7295 + 7296 + I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); 7297 + 7298 + /* Allows RC6 residency counter to work */ 7299 + I915_WRITE(VLV_COUNTER_CONTROL, 7300 + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 7301 + VLV_MEDIA_RC0_COUNT_EN | 7302 + VLV_RENDER_RC0_COUNT_EN | 7303 + VLV_MEDIA_RC6_COUNT_EN | 7304 + VLV_RENDER_RC6_COUNT_EN)); 7305 + 7306 + if (intel_rc6_enabled() & INTEL_RC6_ENABLE) 7307 + rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; 7308 + 7309 + intel_print_rc6_info(dev_priv, rc6_mode); 7310 + 7311 + I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 7312 + 7313 + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 7314 + } 7315 + 7316 + static void valleyview_enable_rps(struct drm_i915_private *dev_priv) 7317 + { 7318 + u32 val; 7319 + 7320 + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 7336 7321 7337 7322 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 7338 7323 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); ··· 7381 7302 GEN6_RP_ENABLE | 7382 7303 GEN6_RP_UP_BUSY_AVG | 7383 7304 GEN6_RP_DOWN_IDLE_CONT); 7384 - 7385 - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); 7386 - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 7387 - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 7388 - 7389 - for_each_engine(engine, dev_priv, id) 7390 - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 7391 - 7392 - I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); 7393 - 7394 - /* allows RC6 residency counter to work */ 7395 - I915_WRITE(VLV_COUNTER_CONTROL, 7396 - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 7397 - VLV_MEDIA_RC0_COUNT_EN | 7398 - VLV_RENDER_RC0_COUNT_EN | 7399 - VLV_MEDIA_RC6_COUNT_EN | 7400 - VLV_RENDER_RC6_COUNT_EN)); 7401 - 7402 - if (intel_enable_rc6() & INTEL_RC6_ENABLE) 7403 - rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; 7404 - 7405 - intel_print_rc6_info(dev_priv, rc6_mode); 7406 - 7407 - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 7408 7305 7409 7306 /* Setting Fixed Bias */ 7410 7307 val = VLV_OVERRIDE_EN | ··· 7589 7534 7590 7535 lockdep_assert_held(&mchdev_lock); 7591 7536 7592 - pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); 7537 + pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq)); 7593 7538 pxvid = (pxvid >> 24) & 0x7f; 7594 7539 ext_v = pvid_to_extvid(dev_priv, pxvid); 7595 7540 ··· 7876 7821 7877 7822 void intel_init_gt_powersave(struct drm_i915_private *dev_priv) 7878 7823 { 7824 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 7825 + 7879 7826 /* 7880 7827 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a 7881 7828 * requirement. ··· 7888 7831 } 7889 7832 7890 7833 mutex_lock(&dev_priv->drm.struct_mutex); 7891 - mutex_lock(&dev_priv->rps.hw_lock); 7834 + mutex_lock(&dev_priv->pcu_lock); 7892 7835 7893 7836 /* Initialize RPS limits (for userspace) */ 7894 7837 if (IS_CHERRYVIEW(dev_priv)) ··· 7899 7842 gen6_init_rps_frequencies(dev_priv); 7900 7843 7901 7844 /* Derive initial user preferences/limits from the hardware limits */ 7902 - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 7903 - dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; 7845 + rps->idle_freq = rps->min_freq; 7846 + rps->cur_freq = rps->idle_freq; 7904 7847 7905 - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 7906 - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; 7848 + rps->max_freq_softlimit = rps->max_freq; 7849 + rps->min_freq_softlimit = rps->min_freq; 7907 7850 7908 7851 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 7909 - dev_priv->rps.min_freq_softlimit = 7852 + rps->min_freq_softlimit = 7910 7853 max_t(int, 7911 - dev_priv->rps.efficient_freq, 7854 + rps->efficient_freq, 7912 7855 intel_freq_opcode(dev_priv, 450)); 7913 7856 7914 7857 /* After setting max-softlimit, find the overclock max freq */ ··· 7919 7862 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params); 7920 7863 if (params & BIT(31)) { /* OC supported */ 7921 7864 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", 7922 - (dev_priv->rps.max_freq & 0xff) * 50, 7865 + (rps->max_freq & 0xff) * 50, 7923 7866 (params & 0xff) * 50); 7924 - dev_priv->rps.max_freq = params & 0xff; 7867 + rps->max_freq = params & 0xff; 7925 7868 } 7926 7869 } 7927 7870 7928 7871 /* Finally allow us to boost to max by default */ 7929 - dev_priv->rps.boost_freq = dev_priv->rps.max_freq; 7872 + rps->boost_freq = rps->max_freq; 7930 7873 7931 - mutex_unlock(&dev_priv->rps.hw_lock); 7874 + mutex_unlock(&dev_priv->pcu_lock); 7932 7875 mutex_unlock(&dev_priv->drm.struct_mutex); 7933 7876 7934 7877 intel_autoenable_gt_powersave(dev_priv); ··· 7956 7899 if (INTEL_GEN(dev_priv) < 6) 7957 7900 return; 7958 7901 7959 - if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) 7902 + if (cancel_delayed_work_sync(&dev_priv->gt_pm.autoenable_work)) 7960 7903 intel_runtime_pm_put(dev_priv); 7961 7904 7962 7905 /* gen6_rps_idle() will be called later to disable interrupts */ ··· 7964 7907 7965 7908 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) 7966 7909 { 7967 - dev_priv->rps.enabled = true; /* force disabling */ 7910 + dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ 7911 + dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ 7968 7912 intel_disable_gt_powersave(dev_priv); 7969 7913 7970 7914 gen6_reset_rps_interrupts(dev_priv); 7971 7915 } 7972 7916 7973 - void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) 7917 + static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) 7974 7918 { 7975 - if (!READ_ONCE(dev_priv->rps.enabled)) 7919 + lockdep_assert_held(&i915->pcu_lock); 7920 + 7921 + if (!i915->gt_pm.llc_pstate.enabled) 7976 7922 return; 7977 7923 7978 - mutex_lock(&dev_priv->rps.hw_lock); 7924 + /* Currently there is no HW configuration to be done to disable. */ 7979 7925 7980 - if (INTEL_GEN(dev_priv) >= 9) { 7981 - gen9_disable_rc6(dev_priv); 7982 - gen9_disable_rps(dev_priv); 7983 - } else if (IS_CHERRYVIEW(dev_priv)) { 7984 - cherryview_disable_rps(dev_priv); 7985 - } else if (IS_VALLEYVIEW(dev_priv)) { 7986 - valleyview_disable_rps(dev_priv); 7987 - } else if (INTEL_GEN(dev_priv) >= 6) { 7988 - gen6_disable_rps(dev_priv); 7989 - } else if (IS_IRONLAKE_M(dev_priv)) { 7990 - ironlake_disable_drps(dev_priv); 7991 - } 7992 - 7993 - dev_priv->rps.enabled = false; 7994 - mutex_unlock(&dev_priv->rps.hw_lock); 7926 + i915->gt_pm.llc_pstate.enabled = false; 7995 7927 } 7996 7928 7997 - void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) 7929 + static void intel_disable_rc6(struct drm_i915_private *dev_priv) 7998 7930 { 7999 - /* We shouldn't be disabling as we submit, so this should be less 8000 - * racy than it appears! 8001 - */ 8002 - if (READ_ONCE(dev_priv->rps.enabled)) 7931 + lockdep_assert_held(&dev_priv->pcu_lock); 7932 + 7933 + if (!dev_priv->gt_pm.rc6.enabled) 8003 7934 return; 8004 7935 8005 - /* Powersaving is controlled by the host when inside a VM */ 8006 - if (intel_vgpu_active(dev_priv)) 7936 + if (INTEL_GEN(dev_priv) >= 9) 7937 + gen9_disable_rc6(dev_priv); 7938 + else if (IS_CHERRYVIEW(dev_priv)) 7939 + cherryview_disable_rc6(dev_priv); 7940 + else if (IS_VALLEYVIEW(dev_priv)) 7941 + valleyview_disable_rc6(dev_priv); 7942 + else if (INTEL_GEN(dev_priv) >= 6) 7943 + gen6_disable_rc6(dev_priv); 7944 + 7945 + dev_priv->gt_pm.rc6.enabled = false; 7946 + } 7947 + 7948 + static void intel_disable_rps(struct drm_i915_private *dev_priv) 7949 + { 7950 + lockdep_assert_held(&dev_priv->pcu_lock); 7951 + 7952 + if (!dev_priv->gt_pm.rps.enabled) 8007 7953 return; 8008 7954 8009 - mutex_lock(&dev_priv->rps.hw_lock); 7955 + if (INTEL_GEN(dev_priv) >= 9) 7956 + gen9_disable_rps(dev_priv); 7957 + else if (IS_CHERRYVIEW(dev_priv)) 7958 + cherryview_disable_rps(dev_priv); 7959 + else if (IS_VALLEYVIEW(dev_priv)) 7960 + valleyview_disable_rps(dev_priv); 7961 + else if (INTEL_GEN(dev_priv) >= 6) 7962 + gen6_disable_rps(dev_priv); 7963 + else if (IS_IRONLAKE_M(dev_priv)) 7964 + ironlake_disable_drps(dev_priv); 7965 + 7966 + dev_priv->gt_pm.rps.enabled = false; 7967 + } 7968 + 7969 + void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) 7970 + { 7971 + mutex_lock(&dev_priv->pcu_lock); 7972 + 7973 + intel_disable_rc6(dev_priv); 7974 + intel_disable_rps(dev_priv); 7975 + if (HAS_LLC(dev_priv)) 7976 + intel_disable_llc_pstate(dev_priv); 7977 + 7978 + mutex_unlock(&dev_priv->pcu_lock); 7979 + } 7980 + 7981 + static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) 7982 + { 7983 + lockdep_assert_held(&i915->pcu_lock); 7984 + 7985 + if (i915->gt_pm.llc_pstate.enabled) 7986 + return; 7987 + 7988 + gen6_update_ring_freq(i915); 7989 + 7990 + i915->gt_pm.llc_pstate.enabled = true; 7991 + } 7992 + 7993 + static void intel_enable_rc6(struct drm_i915_private *dev_priv) 7994 + { 7995 + lockdep_assert_held(&dev_priv->pcu_lock); 7996 + 7997 + if (dev_priv->gt_pm.rc6.enabled) 7998 + return; 7999 + 8000 + if (IS_CHERRYVIEW(dev_priv)) 8001 + cherryview_enable_rc6(dev_priv); 8002 + else if (IS_VALLEYVIEW(dev_priv)) 8003 + valleyview_enable_rc6(dev_priv); 8004 + else if (INTEL_GEN(dev_priv) >= 9) 8005 + gen9_enable_rc6(dev_priv); 8006 + else if (IS_BROADWELL(dev_priv)) 8007 + gen8_enable_rc6(dev_priv); 8008 + else if (INTEL_GEN(dev_priv) >= 6) 8009 + gen6_enable_rc6(dev_priv); 8010 + 8011 + dev_priv->gt_pm.rc6.enabled = true; 8012 + } 8013 + 8014 + static void intel_enable_rps(struct drm_i915_private *dev_priv) 8015 + { 8016 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 8017 + 8018 + lockdep_assert_held(&dev_priv->pcu_lock); 8019 + 8020 + if (rps->enabled) 8021 + return; 8010 8022 8011 8023 if (IS_CHERRYVIEW(dev_priv)) { 8012 8024 cherryview_enable_rps(dev_priv); 8013 8025 } else if (IS_VALLEYVIEW(dev_priv)) { 8014 8026 valleyview_enable_rps(dev_priv); 8015 8027 } else if (INTEL_GEN(dev_priv) >= 9) { 8016 - gen9_enable_rc6(dev_priv); 8017 8028 gen9_enable_rps(dev_priv); 8018 - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) 8019 - gen6_update_ring_freq(dev_priv); 8020 8029 } else if (IS_BROADWELL(dev_priv)) { 8021 8030 gen8_enable_rps(dev_priv); 8022 - gen6_update_ring_freq(dev_priv); 8023 8031 } else if (INTEL_GEN(dev_priv) >= 6) { 8024 8032 gen6_enable_rps(dev_priv); 8025 - gen6_update_ring_freq(dev_priv); 8026 8033 } else if (IS_IRONLAKE_M(dev_priv)) { 8027 8034 ironlake_enable_drps(dev_priv); 8028 8035 intel_init_emon(dev_priv); 8029 8036 } 8030 8037 8031 - WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); 8032 - WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq); 8038 + WARN_ON(rps->max_freq < rps->min_freq); 8039 + WARN_ON(rps->idle_freq > rps->max_freq); 8033 8040 8034 - WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); 8035 - WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); 8041 + WARN_ON(rps->efficient_freq < rps->min_freq); 8042 + WARN_ON(rps->efficient_freq > rps->max_freq); 8036 8043 8037 - dev_priv->rps.enabled = true; 8038 - mutex_unlock(&dev_priv->rps.hw_lock); 8044 + rps->enabled = true; 8045 + } 8046 + 8047 + void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) 8048 + { 8049 + /* Powersaving is controlled by the host when inside a VM */ 8050 + if (intel_vgpu_active(dev_priv)) 8051 + return; 8052 + 8053 + mutex_lock(&dev_priv->pcu_lock); 8054 + 8055 + intel_enable_rc6(dev_priv); 8056 + intel_enable_rps(dev_priv); 8057 + if (HAS_LLC(dev_priv)) 8058 + intel_enable_llc_pstate(dev_priv); 8059 + 8060 + mutex_unlock(&dev_priv->pcu_lock); 8039 8061 } 8040 8062 8041 8063 static void __intel_autoenable_gt_powersave(struct work_struct *work) 8042 8064 { 8043 8065 struct drm_i915_private *dev_priv = 8044 - container_of(work, typeof(*dev_priv), rps.autoenable_work.work); 8066 + container_of(work, 8067 + typeof(*dev_priv), 8068 + gt_pm.autoenable_work.work); 8045 8069 struct intel_engine_cs *rcs; 8046 8070 struct drm_i915_gem_request *req; 8047 - 8048 - if (READ_ONCE(dev_priv->rps.enabled)) 8049 - goto out; 8050 8071 8051 8072 rcs = dev_priv->engine[RCS]; 8052 8073 if (rcs->last_retired_context) ··· 8153 8018 8154 8019 void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) 8155 8020 { 8156 - if (READ_ONCE(dev_priv->rps.enabled)) 8157 - return; 8158 - 8159 8021 if (IS_IRONLAKE_M(dev_priv)) { 8160 8022 ironlake_enable_drps(dev_priv); 8161 8023 intel_init_emon(dev_priv); ··· 8170 8038 * runtime resume it's necessary). 8171 8039 */ 8172 8040 if (queue_delayed_work(dev_priv->wq, 8173 - &dev_priv->rps.autoenable_work, 8041 + &dev_priv->gt_pm.autoenable_work, 8174 8042 round_jiffies_up_relative(HZ))) 8175 8043 intel_runtime_pm_get_noresume(dev_priv); 8176 8044 } ··· 8579 8447 8580 8448 static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) 8581 8449 { 8450 + /* The GTT cache must be disabled if the system is using 2M pages. */ 8451 + bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv, 8452 + I915_GTT_PAGE_SIZE_2M); 8582 8453 enum pipe pipe; 8583 8454 8584 8455 ilk_init_lp_watermarks(dev_priv); ··· 8616 8481 /* WaProgramL3SqcReg1Default:bdw */ 8617 8482 gen8_set_l3sqc_credits(dev_priv, 30, 2); 8618 8483 8619 - /* 8620 - * WaGttCachingOffByDefault:bdw 8621 - * GTT cache may not work with big pages, so if those 8622 - * are ever enabled GTT cache may need to be disabled. 8623 - */ 8624 - I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); 8484 + /* WaGttCachingOffByDefault:bdw */ 8485 + I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); 8625 8486 8626 8487 /* WaKVMNotificationOnConfigChange:bdw */ 8627 8488 I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1) ··· 9198 9067 { 9199 9068 int status; 9200 9069 9201 - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 9070 + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); 9202 9071 9203 9072 /* GEN6_PCODE_* are outside of the forcewake domain, we can 9204 9073 * use te fw I915_READ variants to reduce the amount of work ··· 9245 9114 { 9246 9115 int status; 9247 9116 9248 - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 9117 + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); 9249 9118 9250 9119 /* GEN6_PCODE_* are outside of the forcewake domain, we can 9251 9120 * use te fw I915_READ variants to reduce the amount of work ··· 9322 9191 u32 status; 9323 9192 int ret; 9324 9193 9325 - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 9194 + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); 9326 9195 9327 9196 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \ 9328 9197 &status) ··· 9364 9233 9365 9234 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) 9366 9235 { 9236 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 9237 + 9367 9238 /* 9368 9239 * N = val - 0xb7 9369 9240 * Slow = Fast = GPLL ref * N 9370 9241 */ 9371 - return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * (val - 0xb7), 1000); 9242 + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); 9372 9243 } 9373 9244 9374 9245 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) 9375 9246 { 9376 - return DIV_ROUND_CLOSEST(1000 * val, dev_priv->rps.gpll_ref_freq) + 0xb7; 9247 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 9248 + 9249 + return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; 9377 9250 } 9378 9251 9379 9252 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) 9380 9253 { 9254 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 9255 + 9381 9256 /* 9382 9257 * N = val / 2 9383 9258 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 9384 9259 */ 9385 - return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * val, 2 * 2 * 1000); 9260 + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); 9386 9261 } 9387 9262 9388 9263 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) 9389 9264 { 9265 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 9266 + 9390 9267 /* CHV needs even values */ 9391 - return DIV_ROUND_CLOSEST(2 * 1000 * val, dev_priv->rps.gpll_ref_freq) * 2; 9268 + return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; 9392 9269 } 9393 9270 9394 9271 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) ··· 9427 9288 9428 9289 void intel_pm_setup(struct drm_i915_private *dev_priv) 9429 9290 { 9430 - mutex_init(&dev_priv->rps.hw_lock); 9291 + mutex_init(&dev_priv->pcu_lock); 9431 9292 9432 - INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, 9293 + INIT_DELAYED_WORK(&dev_priv->gt_pm.autoenable_work, 9433 9294 __intel_autoenable_gt_powersave); 9434 - atomic_set(&dev_priv->rps.num_waiters, 0); 9295 + atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); 9435 9296 9436 - dev_priv->pm.suspended = false; 9437 - atomic_set(&dev_priv->pm.wakeref_count, 0); 9297 + dev_priv->runtime_pm.suspended = false; 9298 + atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); 9438 9299 } 9439 9300 9440 9301 static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, ··· 9487 9348 { 9488 9349 u64 time_hw, units, div; 9489 9350 9490 - if (!intel_enable_rc6()) 9351 + if (!intel_rc6_enabled()) 9491 9352 return 0; 9492 9353 9493 9354 intel_runtime_pm_get(dev_priv);

+10 -1

drivers/gpu/drm/i915/intel_ringbuffer.c

··· 579 579 static void reset_ring_common(struct intel_engine_cs *engine, 580 580 struct drm_i915_gem_request *request) 581 581 { 582 - /* Try to restore the logical GPU state to match the continuation 582 + /* 583 + * RC6 must be prevented until the reset is complete and the engine 584 + * reinitialised. If it occurs in the middle of this sequence, the 585 + * state written to/loaded from the power context is ill-defined (e.g. 586 + * the PP_BASE_DIR may be lost). 587 + */ 588 + assert_forcewakes_active(engine->i915, FORCEWAKE_ALL); 589 + 590 + /* 591 + * Try to restore the logical GPU state to match the continuation 583 592 * of the request queue. If we skip the context/PD restore, then 584 593 * the next request may try to execute assuming that its context 585 594 * is valid and loaded on the GPU and so may try to access invalid

+9

drivers/gpu/drm/i915/intel_ringbuffer.h

··· 7 7 #include "i915_gem_timeline.h" 8 8 #include "i915_selftest.h" 9 9 10 + struct drm_printer; 11 + 10 12 #define I915_CMD_HASH_ORDER 9 11 13 12 14 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, ··· 239 237 240 238 #define EXECLIST_MAX_PORTS 2 241 239 } port[EXECLIST_MAX_PORTS]; 240 + 241 + /** 242 + * @preempt: are we currently handling a preempting context switch? 243 + */ 244 + bool preempt; 242 245 243 246 /** 244 247 * @port_mask: number of execlist ports - 1 ··· 840 833 void intel_engines_reset_default_submission(struct drm_i915_private *i915); 841 834 842 835 bool intel_engine_can_store_dword(struct intel_engine_cs *engine); 836 + 837 + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p); 843 838 844 839 #endif /* _INTEL_RINGBUFFER_H_ */

+17 -14

drivers/gpu/drm/i915/intel_runtime_pm.c

··· 187 187 struct i915_power_well *power_well; 188 188 bool is_enabled; 189 189 190 - if (dev_priv->pm.suspended) 190 + if (dev_priv->runtime_pm.suspended) 191 191 return false; 192 192 193 193 is_enabled = true; ··· 368 368 { 369 369 enum i915_power_well_id id = power_well->id; 370 370 bool wait_fuses = power_well->hsw.has_fuses; 371 - enum skl_power_gate pg; 371 + enum skl_power_gate uninitialized_var(pg); 372 372 u32 val; 373 373 374 374 if (wait_fuses) { ··· 785 785 state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) : 786 786 PUNIT_PWRGT_PWR_GATE(power_well_id); 787 787 788 - mutex_lock(&dev_priv->rps.hw_lock); 788 + mutex_lock(&dev_priv->pcu_lock); 789 789 790 790 #define COND \ 791 791 ((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state) ··· 806 806 #undef COND 807 807 808 808 out: 809 - mutex_unlock(&dev_priv->rps.hw_lock); 809 + mutex_unlock(&dev_priv->pcu_lock); 810 810 } 811 811 812 812 static void vlv_power_well_enable(struct drm_i915_private *dev_priv, ··· 833 833 mask = PUNIT_PWRGT_MASK(power_well_id); 834 834 ctrl = PUNIT_PWRGT_PWR_ON(power_well_id); 835 835 836 - mutex_lock(&dev_priv->rps.hw_lock); 836 + mutex_lock(&dev_priv->pcu_lock); 837 837 838 838 state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask; 839 839 /* ··· 852 852 ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask; 853 853 WARN_ON(ctrl != state); 854 854 855 - mutex_unlock(&dev_priv->rps.hw_lock); 855 + mutex_unlock(&dev_priv->pcu_lock); 856 856 857 857 return enabled; 858 858 } ··· 1364 1364 bool enabled; 1365 1365 u32 state, ctrl; 1366 1366 1367 - mutex_lock(&dev_priv->rps.hw_lock); 1367 + mutex_lock(&dev_priv->pcu_lock); 1368 1368 1369 1369 state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe); 1370 1370 /* ··· 1381 1381 ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe); 1382 1382 WARN_ON(ctrl << 16 != state); 1383 1383 1384 - mutex_unlock(&dev_priv->rps.hw_lock); 1384 + mutex_unlock(&dev_priv->pcu_lock); 1385 1385 1386 1386 return enabled; 1387 1387 } ··· 1396 1396 1397 1397 state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); 1398 1398 1399 - mutex_lock(&dev_priv->rps.hw_lock); 1399 + mutex_lock(&dev_priv->pcu_lock); 1400 1400 1401 1401 #define COND \ 1402 1402 ((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state) ··· 1417 1417 #undef COND 1418 1418 1419 1419 out: 1420 - mutex_unlock(&dev_priv->rps.hw_lock); 1420 + mutex_unlock(&dev_priv->pcu_lock); 1421 1421 } 1422 1422 1423 1423 static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, ··· 2809 2809 2810 2810 /* 6. Enable DBUF */ 2811 2811 gen9_dbuf_enable(dev_priv); 2812 + 2813 + if (resume && dev_priv->csr.dmc_payload) 2814 + intel_csr_load_program(dev_priv); 2812 2815 } 2813 2816 2814 2817 static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) ··· 3128 3125 ret = pm_runtime_get_sync(kdev); 3129 3126 WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); 3130 3127 3131 - atomic_inc(&dev_priv->pm.wakeref_count); 3128 + atomic_inc(&dev_priv->runtime_pm.wakeref_count); 3132 3129 assert_rpm_wakelock_held(dev_priv); 3133 3130 } 3134 3131 ··· 3162 3159 return false; 3163 3160 } 3164 3161 3165 - atomic_inc(&dev_priv->pm.wakeref_count); 3162 + atomic_inc(&dev_priv->runtime_pm.wakeref_count); 3166 3163 assert_rpm_wakelock_held(dev_priv); 3167 3164 3168 3165 return true; ··· 3193 3190 assert_rpm_wakelock_held(dev_priv); 3194 3191 pm_runtime_get_noresume(kdev); 3195 3192 3196 - atomic_inc(&dev_priv->pm.wakeref_count); 3193 + atomic_inc(&dev_priv->runtime_pm.wakeref_count); 3197 3194 } 3198 3195 3199 3196 /** ··· 3210 3207 struct device *kdev = &pdev->dev; 3211 3208 3212 3209 assert_rpm_wakelock_held(dev_priv); 3213 - atomic_dec(&dev_priv->pm.wakeref_count); 3210 + atomic_dec(&dev_priv->runtime_pm.wakeref_count); 3214 3211 3215 3212 pm_runtime_mark_last_busy(kdev); 3216 3213 pm_runtime_put_autosuspend(kdev);

+3 -3

drivers/gpu/drm/i915/intel_sideband.c

··· 81 81 { 82 82 u32 val = 0; 83 83 84 - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 84 + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); 85 85 86 86 mutex_lock(&dev_priv->sb_lock); 87 87 vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, ··· 95 95 { 96 96 int err; 97 97 98 - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 98 + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); 99 99 100 100 mutex_lock(&dev_priv->sb_lock); 101 101 err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, ··· 125 125 { 126 126 u32 val = 0; 127 127 128 - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 128 + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); 129 129 130 130 mutex_lock(&dev_priv->sb_lock); 131 131 vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC,

+6

drivers/gpu/drm/i915/intel_sprite.c

··· 66 66 1000 * adjusted_mode->crtc_htotal); 67 67 } 68 68 69 + /* FIXME: We should instead only take spinlocks once for the entire update 70 + * instead of once per mmio. */ 71 + #if IS_ENABLED(CONFIG_PROVE_LOCKING) 72 + #define VBLANK_EVASION_TIME_US 250 73 + #else 69 74 #define VBLANK_EVASION_TIME_US 100 75 + #endif 70 76 71 77 /** 72 78 * intel_pipe_update_start() - start update of a set of display registers

+17 -296

drivers/gpu/drm/i915/intel_uc.c

··· 22 22 * 23 23 */ 24 24 25 - #include "i915_drv.h" 26 25 #include "intel_uc.h" 27 - #include <linux/firmware.h> 28 - 29 - /* Cleans up uC firmware by releasing the firmware GEM obj. 30 - */ 31 - static void __intel_uc_fw_fini(struct intel_uc_fw *uc_fw) 32 - { 33 - struct drm_i915_gem_object *obj; 34 - 35 - obj = fetch_and_zero(&uc_fw->obj); 36 - if (obj) 37 - i915_gem_object_put(obj); 38 - 39 - uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; 40 - } 26 + #include "i915_drv.h" 27 + #include "i915_guc_submission.h" 41 28 42 29 /* Reset GuC providing us with fresh state for both GuC and HuC. 43 30 */ ··· 81 94 i915_modparams.enable_guc_submission = HAS_GUC_SCHED(dev_priv); 82 95 } 83 96 84 - static void gen8_guc_raise_irq(struct intel_guc *guc) 85 - { 86 - struct drm_i915_private *dev_priv = guc_to_i915(guc); 87 - 88 - I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); 89 - } 90 - 91 97 void intel_uc_init_early(struct drm_i915_private *dev_priv) 92 98 { 93 - struct intel_guc *guc = &dev_priv->guc; 94 - 95 - intel_guc_ct_init_early(&guc->ct); 96 - 97 - mutex_init(&guc->send_mutex); 98 - guc->send = intel_guc_send_nop; 99 - guc->notify = gen8_guc_raise_irq; 100 - } 101 - 102 - static void fetch_uc_fw(struct drm_i915_private *dev_priv, 103 - struct intel_uc_fw *uc_fw) 104 - { 105 - struct pci_dev *pdev = dev_priv->drm.pdev; 106 - struct drm_i915_gem_object *obj; 107 - const struct firmware *fw = NULL; 108 - struct uc_css_header *css; 109 - size_t size; 110 - int err; 111 - 112 - if (!uc_fw->path) 113 - return; 114 - 115 - uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; 116 - 117 - DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", 118 - intel_uc_fw_status_repr(uc_fw->fetch_status)); 119 - 120 - err = request_firmware(&fw, uc_fw->path, &pdev->dev); 121 - if (err) 122 - goto fail; 123 - if (!fw) 124 - goto fail; 125 - 126 - DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", 127 - uc_fw->path, fw); 128 - 129 - /* Check the size of the blob before examining buffer contents */ 130 - if (fw->size < sizeof(struct uc_css_header)) { 131 - DRM_NOTE("Firmware header is missing\n"); 132 - goto fail; 133 - } 134 - 135 - css = (struct uc_css_header *)fw->data; 136 - 137 - /* Firmware bits always start from header */ 138 - uc_fw->header_offset = 0; 139 - uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - 140 - css->key_size_dw - css->exponent_size_dw) * sizeof(u32); 141 - 142 - if (uc_fw->header_size != sizeof(struct uc_css_header)) { 143 - DRM_NOTE("CSS header definition mismatch\n"); 144 - goto fail; 145 - } 146 - 147 - /* then, uCode */ 148 - uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; 149 - uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); 150 - 151 - /* now RSA */ 152 - if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { 153 - DRM_NOTE("RSA key size is bad\n"); 154 - goto fail; 155 - } 156 - uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; 157 - uc_fw->rsa_size = css->key_size_dw * sizeof(u32); 158 - 159 - /* At least, it should have header, uCode and RSA. Size of all three. */ 160 - size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; 161 - if (fw->size < size) { 162 - DRM_NOTE("Missing firmware components\n"); 163 - goto fail; 164 - } 165 - 166 - /* 167 - * The GuC firmware image has the version number embedded at a 168 - * well-known offset within the firmware blob; note that major / minor 169 - * version are TWO bytes each (i.e. u16), although all pointers and 170 - * offsets are defined in terms of bytes (u8). 171 - */ 172 - switch (uc_fw->type) { 173 - case INTEL_UC_FW_TYPE_GUC: 174 - /* Header and uCode will be loaded to WOPCM. Size of the two. */ 175 - size = uc_fw->header_size + uc_fw->ucode_size; 176 - 177 - /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ 178 - if (size > intel_guc_wopcm_size(dev_priv)) { 179 - DRM_ERROR("Firmware is too large to fit in WOPCM\n"); 180 - goto fail; 181 - } 182 - uc_fw->major_ver_found = css->guc.sw_version >> 16; 183 - uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; 184 - break; 185 - 186 - case INTEL_UC_FW_TYPE_HUC: 187 - uc_fw->major_ver_found = css->huc.sw_version >> 16; 188 - uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; 189 - break; 190 - 191 - default: 192 - DRM_ERROR("Unknown firmware type %d\n", uc_fw->type); 193 - err = -ENOEXEC; 194 - goto fail; 195 - } 196 - 197 - if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { 198 - DRM_NOTE("Skipping %s firmware version check\n", 199 - intel_uc_fw_type_repr(uc_fw->type)); 200 - } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || 201 - uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { 202 - DRM_NOTE("%s firmware version %d.%d, required %d.%d\n", 203 - intel_uc_fw_type_repr(uc_fw->type), 204 - uc_fw->major_ver_found, uc_fw->minor_ver_found, 205 - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); 206 - err = -ENOEXEC; 207 - goto fail; 208 - } 209 - 210 - DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", 211 - uc_fw->major_ver_found, uc_fw->minor_ver_found, 212 - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); 213 - 214 - obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); 215 - if (IS_ERR(obj)) { 216 - err = PTR_ERR(obj); 217 - goto fail; 218 - } 219 - 220 - uc_fw->obj = obj; 221 - uc_fw->size = fw->size; 222 - 223 - DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", 224 - uc_fw->obj); 225 - 226 - release_firmware(fw); 227 - uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; 228 - return; 229 - 230 - fail: 231 - DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", 232 - uc_fw->path, err); 233 - DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", 234 - err, fw, uc_fw->obj); 235 - 236 - release_firmware(fw); /* OK even if fw is NULL */ 237 - uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; 99 + intel_guc_init_early(&dev_priv->guc); 238 100 } 239 101 240 102 void intel_uc_init_fw(struct drm_i915_private *dev_priv) 241 103 { 242 - fetch_uc_fw(dev_priv, &dev_priv->huc.fw); 243 - fetch_uc_fw(dev_priv, &dev_priv->guc.fw); 104 + intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw); 105 + intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw); 244 106 } 245 107 246 108 void intel_uc_fini_fw(struct drm_i915_private *dev_priv) 247 109 { 248 - __intel_uc_fw_fini(&dev_priv->guc.fw); 249 - __intel_uc_fw_fini(&dev_priv->huc.fw); 110 + intel_uc_fw_fini(&dev_priv->guc.fw); 111 + intel_uc_fw_fini(&dev_priv->huc.fw); 250 112 } 251 113 252 - static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) 114 + /** 115 + * intel_uc_init_mmio - setup uC MMIO access 116 + * 117 + * @dev_priv: device private 118 + * 119 + * Setup minimal state necessary for MMIO accesses later in the 120 + * initialization sequence. 121 + */ 122 + void intel_uc_init_mmio(struct drm_i915_private *dev_priv) 253 123 { 254 - GEM_BUG_ON(!guc->send_regs.base); 255 - GEM_BUG_ON(!guc->send_regs.count); 256 - GEM_BUG_ON(i >= guc->send_regs.count); 257 - 258 - return _MMIO(guc->send_regs.base + 4 * i); 259 - } 260 - 261 - static void guc_init_send_regs(struct intel_guc *guc) 262 - { 263 - struct drm_i915_private *dev_priv = guc_to_i915(guc); 264 - enum forcewake_domains fw_domains = 0; 265 - unsigned int i; 266 - 267 - guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); 268 - guc->send_regs.count = SOFT_SCRATCH_COUNT - 1; 269 - 270 - for (i = 0; i < guc->send_regs.count; i++) { 271 - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, 272 - guc_send_reg(guc, i), 273 - FW_REG_READ | FW_REG_WRITE); 274 - } 275 - guc->send_regs.fw_domains = fw_domains; 124 + intel_guc_init_send_regs(&dev_priv->guc); 276 125 } 277 126 278 127 static void guc_capture_load_err_log(struct intel_guc *guc) ··· 132 309 { 133 310 struct drm_i915_private *dev_priv = guc_to_i915(guc); 134 311 135 - guc_init_send_regs(guc); 136 - 137 312 if (HAS_GUC_CT(dev_priv)) 138 313 return intel_guc_enable_ct(guc); 139 314 ··· 147 326 intel_guc_disable_ct(guc); 148 327 149 328 guc->send = intel_guc_send_nop; 150 - } 151 - 152 - /** 153 - * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode 154 - * @guc: intel_guc structure 155 - * @rsa_offset: rsa offset w.r.t ggtt base of huc vma 156 - * 157 - * Triggers a HuC firmware authentication request to the GuC via intel_guc_send 158 - * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by 159 - * intel_huc_auth(). 160 - * 161 - * Return: non-zero code on error 162 - */ 163 - int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) 164 - { 165 - u32 action[] = { 166 - INTEL_GUC_ACTION_AUTHENTICATE_HUC, 167 - rsa_offset 168 - }; 169 - 170 - return intel_guc_send(guc, action, ARRAY_SIZE(action)); 171 329 } 172 330 173 331 int intel_uc_init_hw(struct drm_i915_private *dev_priv) ··· 279 479 } 280 480 281 481 i915_ggtt_disable_guc(dev_priv); 282 - } 283 - 284 - int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) 285 - { 286 - WARN(1, "Unexpected send: action=%#x\n", *action); 287 - return -ENODEV; 288 - } 289 - 290 - /* 291 - * This function implements the MMIO based host to GuC interface. 292 - */ 293 - int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) 294 - { 295 - struct drm_i915_private *dev_priv = guc_to_i915(guc); 296 - u32 status; 297 - int i; 298 - int ret; 299 - 300 - GEM_BUG_ON(!len); 301 - GEM_BUG_ON(len > guc->send_regs.count); 302 - 303 - /* If CT is available, we expect to use MMIO only during init/fini */ 304 - GEM_BUG_ON(HAS_GUC_CT(dev_priv) && 305 - *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && 306 - *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); 307 - 308 - mutex_lock(&guc->send_mutex); 309 - intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); 310 - 311 - for (i = 0; i < len; i++) 312 - I915_WRITE(guc_send_reg(guc, i), action[i]); 313 - 314 - POSTING_READ(guc_send_reg(guc, i - 1)); 315 - 316 - intel_guc_notify(guc); 317 - 318 - /* 319 - * No GuC command should ever take longer than 10ms. 320 - * Fast commands should still complete in 10us. 321 - */ 322 - ret = __intel_wait_for_register_fw(dev_priv, 323 - guc_send_reg(guc, 0), 324 - INTEL_GUC_RECV_MASK, 325 - INTEL_GUC_RECV_MASK, 326 - 10, 10, &status); 327 - if (status != INTEL_GUC_STATUS_SUCCESS) { 328 - /* 329 - * Either the GuC explicitly returned an error (which 330 - * we convert to -EIO here) or no response at all was 331 - * received within the timeout limit (-ETIMEDOUT) 332 - */ 333 - if (ret != -ETIMEDOUT) 334 - ret = -EIO; 335 - 336 - DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" 337 - " ret=%d status=0x%08X response=0x%08X\n", 338 - action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); 339 - } 340 - 341 - intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); 342 - mutex_unlock(&guc->send_mutex); 343 - 344 - return ret; 345 - } 346 - 347 - int intel_guc_sample_forcewake(struct intel_guc *guc) 348 - { 349 - struct drm_i915_private *dev_priv = guc_to_i915(guc); 350 - u32 action[2]; 351 - 352 - action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; 353 - /* WaRsDisableCoarsePowerGating:skl,bxt */ 354 - if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) 355 - action[1] = 0; 356 - else 357 - /* bit 0 and 1 are for Render and Media domain separately */ 358 - action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; 359 - 360 - return intel_guc_send(guc, action, ARRAY_SIZE(action)); 361 482 }

+3 -225

drivers/gpu/drm/i915/intel_uc.h

··· 24 24 #ifndef _INTEL_UC_H_ 25 25 #define _INTEL_UC_H_ 26 26 27 - #include "intel_guc_fwif.h" 28 - #include "i915_guc_reg.h" 29 - #include "intel_ringbuffer.h" 30 - #include "intel_guc_ct.h" 31 - #include "i915_vma.h" 27 + #include "intel_guc.h" 28 + #include "intel_huc.h" 32 29 33 - struct drm_i915_gem_request; 34 - 35 - /* 36 - * This structure primarily describes the GEM object shared with the GuC. 37 - * The specs sometimes refer to this object as a "GuC context", but we use 38 - * the term "client" to avoid confusion with hardware contexts. This 39 - * GEM object is held for the entire lifetime of our interaction with 40 - * the GuC, being allocated before the GuC is loaded with its firmware. 41 - * Because there's no way to update the address used by the GuC after 42 - * initialisation, the shared object must stay pinned into the GGTT as 43 - * long as the GuC is in use. We also keep the first page (only) mapped 44 - * into kernel address space, as it includes shared data that must be 45 - * updated on every request submission. 46 - * 47 - * The single GEM object described here is actually made up of several 48 - * separate areas, as far as the GuC is concerned. The first page (kept 49 - * kmap'd) includes the "process descriptor" which holds sequence data for 50 - * the doorbell, and one cacheline which actually *is* the doorbell; a 51 - * write to this will "ring the doorbell" (i.e. send an interrupt to the 52 - * GuC). The subsequent pages of the client object constitute the work 53 - * queue (a circular array of work items), again described in the process 54 - * descriptor. Work queue pages are mapped momentarily as required. 55 - */ 56 - struct i915_guc_client { 57 - struct i915_vma *vma; 58 - void *vaddr; 59 - struct i915_gem_context *owner; 60 - struct intel_guc *guc; 61 - 62 - uint32_t engines; /* bitmap of (host) engine ids */ 63 - uint32_t priority; 64 - u32 stage_id; 65 - uint32_t proc_desc_offset; 66 - 67 - u16 doorbell_id; 68 - unsigned long doorbell_offset; 69 - 70 - spinlock_t wq_lock; 71 - /* Per-engine counts of GuC submissions */ 72 - uint64_t submissions[I915_NUM_ENGINES]; 73 - }; 74 - 75 - enum intel_uc_fw_status { 76 - INTEL_UC_FIRMWARE_FAIL = -1, 77 - INTEL_UC_FIRMWARE_NONE = 0, 78 - INTEL_UC_FIRMWARE_PENDING, 79 - INTEL_UC_FIRMWARE_SUCCESS 80 - }; 81 - 82 - /* User-friendly representation of an enum */ 83 - static inline 84 - const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) 85 - { 86 - switch (status) { 87 - case INTEL_UC_FIRMWARE_FAIL: 88 - return "FAIL"; 89 - case INTEL_UC_FIRMWARE_NONE: 90 - return "NONE"; 91 - case INTEL_UC_FIRMWARE_PENDING: 92 - return "PENDING"; 93 - case INTEL_UC_FIRMWARE_SUCCESS: 94 - return "SUCCESS"; 95 - } 96 - return "<invalid>"; 97 - } 98 - 99 - enum intel_uc_fw_type { 100 - INTEL_UC_FW_TYPE_GUC, 101 - INTEL_UC_FW_TYPE_HUC 102 - }; 103 - 104 - /* User-friendly representation of an enum */ 105 - static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) 106 - { 107 - switch (type) { 108 - case INTEL_UC_FW_TYPE_GUC: 109 - return "GuC"; 110 - case INTEL_UC_FW_TYPE_HUC: 111 - return "HuC"; 112 - } 113 - return "uC"; 114 - } 115 - 116 - /* 117 - * This structure encapsulates all the data needed during the process 118 - * of fetching, caching, and loading the firmware image into the GuC. 119 - */ 120 - struct intel_uc_fw { 121 - const char *path; 122 - size_t size; 123 - struct drm_i915_gem_object *obj; 124 - enum intel_uc_fw_status fetch_status; 125 - enum intel_uc_fw_status load_status; 126 - 127 - uint16_t major_ver_wanted; 128 - uint16_t minor_ver_wanted; 129 - uint16_t major_ver_found; 130 - uint16_t minor_ver_found; 131 - 132 - enum intel_uc_fw_type type; 133 - uint32_t header_size; 134 - uint32_t header_offset; 135 - uint32_t rsa_size; 136 - uint32_t rsa_offset; 137 - uint32_t ucode_size; 138 - uint32_t ucode_offset; 139 - }; 140 - 141 - struct intel_guc_log { 142 - uint32_t flags; 143 - struct i915_vma *vma; 144 - /* The runtime stuff gets created only when GuC logging gets enabled */ 145 - struct { 146 - void *buf_addr; 147 - struct workqueue_struct *flush_wq; 148 - struct work_struct flush_work; 149 - struct rchan *relay_chan; 150 - } runtime; 151 - /* logging related stats */ 152 - u32 capture_miss_count; 153 - u32 flush_interrupt_count; 154 - u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; 155 - u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; 156 - u32 flush_count[GUC_MAX_LOG_BUFFER]; 157 - }; 158 - 159 - struct intel_guc { 160 - struct intel_uc_fw fw; 161 - struct intel_guc_log log; 162 - struct intel_guc_ct ct; 163 - 164 - /* Log snapshot if GuC errors during load */ 165 - struct drm_i915_gem_object *load_err_log; 166 - 167 - /* intel_guc_recv interrupt related state */ 168 - bool interrupts_enabled; 169 - 170 - struct i915_vma *ads_vma; 171 - struct i915_vma *stage_desc_pool; 172 - void *stage_desc_pool_vaddr; 173 - struct ida stage_ids; 174 - 175 - struct i915_guc_client *execbuf_client; 176 - 177 - DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); 178 - uint32_t db_cacheline; /* Cyclic counter mod pagesize */ 179 - 180 - /* GuC's FW specific registers used in MMIO send */ 181 - struct { 182 - u32 base; 183 - unsigned int count; 184 - enum forcewake_domains fw_domains; 185 - } send_regs; 186 - 187 - /* To serialize the intel_guc_send actions */ 188 - struct mutex send_mutex; 189 - 190 - /* GuC's FW specific send function */ 191 - int (*send)(struct intel_guc *guc, const u32 *data, u32 len); 192 - 193 - /* GuC's FW specific notify function */ 194 - void (*notify)(struct intel_guc *guc); 195 - }; 196 - 197 - struct intel_huc { 198 - /* Generic uC firmware management */ 199 - struct intel_uc_fw fw; 200 - 201 - /* HuC-specific additions */ 202 - }; 203 - 204 - /* intel_uc.c */ 205 30 void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); 206 31 void intel_uc_init_early(struct drm_i915_private *dev_priv); 32 + void intel_uc_init_mmio(struct drm_i915_private *dev_priv); 207 33 void intel_uc_init_fw(struct drm_i915_private *dev_priv); 208 34 void intel_uc_fini_fw(struct drm_i915_private *dev_priv); 209 35 int intel_uc_init_hw(struct drm_i915_private *dev_priv); 210 36 void intel_uc_fini_hw(struct drm_i915_private *dev_priv); 211 - int intel_guc_sample_forcewake(struct intel_guc *guc); 212 - int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); 213 - int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); 214 - int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); 215 - 216 - static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) 217 - { 218 - return guc->send(guc, action, len); 219 - } 220 - 221 - static inline void intel_guc_notify(struct intel_guc *guc) 222 - { 223 - guc->notify(guc); 224 - } 225 - 226 - /* intel_guc_loader.c */ 227 - int intel_guc_select_fw(struct intel_guc *guc); 228 - int intel_guc_init_hw(struct intel_guc *guc); 229 - int intel_guc_suspend(struct drm_i915_private *dev_priv); 230 - int intel_guc_resume(struct drm_i915_private *dev_priv); 231 - u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); 232 - 233 - /* i915_guc_submission.c */ 234 - int i915_guc_submission_init(struct drm_i915_private *dev_priv); 235 - int i915_guc_submission_enable(struct drm_i915_private *dev_priv); 236 - void i915_guc_submission_disable(struct drm_i915_private *dev_priv); 237 - void i915_guc_submission_fini(struct drm_i915_private *dev_priv); 238 - struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); 239 - 240 - /* intel_guc_log.c */ 241 - int intel_guc_log_create(struct intel_guc *guc); 242 - void intel_guc_log_destroy(struct intel_guc *guc); 243 - int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); 244 - void i915_guc_log_register(struct drm_i915_private *dev_priv); 245 - void i915_guc_log_unregister(struct drm_i915_private *dev_priv); 246 - 247 - static inline u32 guc_ggtt_offset(struct i915_vma *vma) 248 - { 249 - u32 offset = i915_ggtt_offset(vma); 250 - GEM_BUG_ON(offset < GUC_WOPCM_TOP); 251 - GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); 252 - return offset; 253 - } 254 - 255 - /* intel_huc.c */ 256 - void intel_huc_select_fw(struct intel_huc *huc); 257 - void intel_huc_init_hw(struct intel_huc *huc); 258 - void intel_huc_auth(struct intel_huc *huc); 259 37 260 38 #endif

+193

drivers/gpu/drm/i915/intel_uc_fw.c

··· 1 + /* 2 + * Copyright © 2016-2017 Intel Corporation 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + * 23 + */ 24 + 25 + #include <linux/firmware.h> 26 + 27 + #include "intel_uc_fw.h" 28 + #include "i915_drv.h" 29 + 30 + /** 31 + * intel_uc_fw_fetch - fetch uC firmware 32 + * 33 + * @dev_priv: device private 34 + * @uc_fw: uC firmware 35 + * 36 + * Fetch uC firmware into GEM obj. 37 + */ 38 + void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, 39 + struct intel_uc_fw *uc_fw) 40 + { 41 + struct pci_dev *pdev = dev_priv->drm.pdev; 42 + struct drm_i915_gem_object *obj; 43 + const struct firmware *fw = NULL; 44 + struct uc_css_header *css; 45 + size_t size; 46 + int err; 47 + 48 + if (!uc_fw->path) 49 + return; 50 + 51 + uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; 52 + 53 + DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", 54 + intel_uc_fw_status_repr(uc_fw->fetch_status)); 55 + 56 + err = request_firmware(&fw, uc_fw->path, &pdev->dev); 57 + if (err) 58 + goto fail; 59 + if (!fw) 60 + goto fail; 61 + 62 + DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", 63 + uc_fw->path, fw); 64 + 65 + /* Check the size of the blob before examining buffer contents */ 66 + if (fw->size < sizeof(struct uc_css_header)) { 67 + DRM_NOTE("Firmware header is missing\n"); 68 + goto fail; 69 + } 70 + 71 + css = (struct uc_css_header *)fw->data; 72 + 73 + /* Firmware bits always start from header */ 74 + uc_fw->header_offset = 0; 75 + uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - 76 + css->key_size_dw - css->exponent_size_dw) * 77 + sizeof(u32); 78 + 79 + if (uc_fw->header_size != sizeof(struct uc_css_header)) { 80 + DRM_NOTE("CSS header definition mismatch\n"); 81 + goto fail; 82 + } 83 + 84 + /* then, uCode */ 85 + uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; 86 + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); 87 + 88 + /* now RSA */ 89 + if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { 90 + DRM_NOTE("RSA key size is bad\n"); 91 + goto fail; 92 + } 93 + uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; 94 + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); 95 + 96 + /* At least, it should have header, uCode and RSA. Size of all three. */ 97 + size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; 98 + if (fw->size < size) { 99 + DRM_NOTE("Missing firmware components\n"); 100 + goto fail; 101 + } 102 + 103 + /* 104 + * The GuC firmware image has the version number embedded at a 105 + * well-known offset within the firmware blob; note that major / minor 106 + * version are TWO bytes each (i.e. u16), although all pointers and 107 + * offsets are defined in terms of bytes (u8). 108 + */ 109 + switch (uc_fw->type) { 110 + case INTEL_UC_FW_TYPE_GUC: 111 + /* Header and uCode will be loaded to WOPCM. Size of the two. */ 112 + size = uc_fw->header_size + uc_fw->ucode_size; 113 + 114 + /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ 115 + if (size > intel_guc_wopcm_size(dev_priv)) { 116 + DRM_ERROR("Firmware is too large to fit in WOPCM\n"); 117 + goto fail; 118 + } 119 + uc_fw->major_ver_found = css->guc.sw_version >> 16; 120 + uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; 121 + break; 122 + 123 + case INTEL_UC_FW_TYPE_HUC: 124 + uc_fw->major_ver_found = css->huc.sw_version >> 16; 125 + uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; 126 + break; 127 + 128 + default: 129 + DRM_ERROR("Unknown firmware type %d\n", uc_fw->type); 130 + err = -ENOEXEC; 131 + goto fail; 132 + } 133 + 134 + if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { 135 + DRM_NOTE("Skipping %s firmware version check\n", 136 + intel_uc_fw_type_repr(uc_fw->type)); 137 + } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || 138 + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { 139 + DRM_NOTE("%s firmware version %d.%d, required %d.%d\n", 140 + intel_uc_fw_type_repr(uc_fw->type), 141 + uc_fw->major_ver_found, uc_fw->minor_ver_found, 142 + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); 143 + err = -ENOEXEC; 144 + goto fail; 145 + } 146 + 147 + DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", 148 + uc_fw->major_ver_found, uc_fw->minor_ver_found, 149 + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); 150 + 151 + obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); 152 + if (IS_ERR(obj)) { 153 + err = PTR_ERR(obj); 154 + goto fail; 155 + } 156 + 157 + uc_fw->obj = obj; 158 + uc_fw->size = fw->size; 159 + 160 + DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", 161 + uc_fw->obj); 162 + 163 + release_firmware(fw); 164 + uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; 165 + return; 166 + 167 + fail: 168 + DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", 169 + uc_fw->path, err); 170 + DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", 171 + err, fw, uc_fw->obj); 172 + 173 + release_firmware(fw); /* OK even if fw is NULL */ 174 + uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; 175 + } 176 + 177 + /** 178 + * intel_uc_fw_fini - cleanup uC firmware 179 + * 180 + * @uc_fw: uC firmware 181 + * 182 + * Cleans up uC firmware by releasing the firmware GEM obj. 183 + */ 184 + void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) 185 + { 186 + struct drm_i915_gem_object *obj; 187 + 188 + obj = fetch_and_zero(&uc_fw->obj); 189 + if (obj) 190 + i915_gem_object_put(obj); 191 + 192 + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; 193 + }

+107

drivers/gpu/drm/i915/intel_uc_fw.h

··· 1 + /* 2 + * Copyright © 2014-2017 Intel Corporation 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + * 23 + */ 24 + 25 + #ifndef _INTEL_UC_FW_H_ 26 + #define _INTEL_UC_FW_H_ 27 + 28 + struct drm_i915_private; 29 + 30 + enum intel_uc_fw_status { 31 + INTEL_UC_FIRMWARE_FAIL = -1, 32 + INTEL_UC_FIRMWARE_NONE = 0, 33 + INTEL_UC_FIRMWARE_PENDING, 34 + INTEL_UC_FIRMWARE_SUCCESS 35 + }; 36 + 37 + enum intel_uc_fw_type { 38 + INTEL_UC_FW_TYPE_GUC, 39 + INTEL_UC_FW_TYPE_HUC 40 + }; 41 + 42 + /* 43 + * This structure encapsulates all the data needed during the process 44 + * of fetching, caching, and loading the firmware image into the uC. 45 + */ 46 + struct intel_uc_fw { 47 + const char *path; 48 + size_t size; 49 + struct drm_i915_gem_object *obj; 50 + enum intel_uc_fw_status fetch_status; 51 + enum intel_uc_fw_status load_status; 52 + 53 + u16 major_ver_wanted; 54 + u16 minor_ver_wanted; 55 + u16 major_ver_found; 56 + u16 minor_ver_found; 57 + 58 + enum intel_uc_fw_type type; 59 + u32 header_size; 60 + u32 header_offset; 61 + u32 rsa_size; 62 + u32 rsa_offset; 63 + u32 ucode_size; 64 + u32 ucode_offset; 65 + }; 66 + 67 + static inline 68 + const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) 69 + { 70 + switch (status) { 71 + case INTEL_UC_FIRMWARE_FAIL: 72 + return "FAIL"; 73 + case INTEL_UC_FIRMWARE_NONE: 74 + return "NONE"; 75 + case INTEL_UC_FIRMWARE_PENDING: 76 + return "PENDING"; 77 + case INTEL_UC_FIRMWARE_SUCCESS: 78 + return "SUCCESS"; 79 + } 80 + return "<invalid>"; 81 + } 82 + 83 + static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) 84 + { 85 + switch (type) { 86 + case INTEL_UC_FW_TYPE_GUC: 87 + return "GuC"; 88 + case INTEL_UC_FW_TYPE_HUC: 89 + return "HuC"; 90 + } 91 + return "uC"; 92 + } 93 + 94 + static inline 95 + void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) 96 + { 97 + uc_fw->path = NULL; 98 + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; 99 + uc_fw->load_status = INTEL_UC_FIRMWARE_NONE; 100 + uc_fw->type = type; 101 + } 102 + 103 + void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, 104 + struct intel_uc_fw *uc_fw); 105 + void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); 106 + 107 + #endif

+17 -1

drivers/gpu/drm/i915/intel_uncore.c

··· 626 626 if (!dev_priv->uncore.funcs.force_wake_get) 627 627 return; 628 628 629 - WARN_ON(dev_priv->uncore.fw_domains_active); 629 + WARN(dev_priv->uncore.fw_domains_active, 630 + "Expected all fw_domains to be inactive, but %08x are still on\n", 631 + dev_priv->uncore.fw_domains_active); 632 + } 633 + 634 + void assert_forcewakes_active(struct drm_i915_private *dev_priv, 635 + enum forcewake_domains fw_domains) 636 + { 637 + if (!dev_priv->uncore.funcs.force_wake_get) 638 + return; 639 + 640 + assert_rpm_wakelock_held(dev_priv); 641 + 642 + fw_domains &= dev_priv->uncore.fw_domains; 643 + WARN(fw_domains & ~dev_priv->uncore.fw_domains_active, 644 + "Expected %08x fw_domains to be active, but %08x are off\n", 645 + fw_domains, fw_domains & ~dev_priv->uncore.fw_domains_active); 630 646 } 631 647 632 648 /* We give fast paths for the really cool registers */

+8

drivers/gpu/drm/i915/intel_uncore.h

··· 25 25 #ifndef __INTEL_UNCORE_H__ 26 26 #define __INTEL_UNCORE_H__ 27 27 28 + #include <linux/spinlock.h> 29 + #include <linux/notifier.h> 30 + #include <linux/hrtimer.h> 31 + 32 + #include "i915_reg.h" 33 + 28 34 struct drm_i915_private; 29 35 30 36 enum forcewake_domain_id { ··· 137 131 138 132 u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv); 139 133 void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); 134 + void assert_forcewakes_active(struct drm_i915_private *dev_priv, 135 + enum forcewake_domains fw_domains); 140 136 const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id); 141 137 142 138 enum forcewake_domains

+8 -6

drivers/gpu/drm/i915/selftests/huge_gem_object.c

··· 37 37 kfree(pages); 38 38 } 39 39 40 - static struct sg_table * 41 - huge_get_pages(struct drm_i915_gem_object *obj) 40 + static int huge_get_pages(struct drm_i915_gem_object *obj) 42 41 { 43 42 #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) 44 43 const unsigned long nreal = obj->scratch / PAGE_SIZE; ··· 48 49 49 50 pages = kmalloc(sizeof(*pages), GFP); 50 51 if (!pages) 51 - return ERR_PTR(-ENOMEM); 52 + return -ENOMEM; 52 53 53 54 if (sg_alloc_table(pages, npages, GFP)) { 54 55 kfree(pages); 55 - return ERR_PTR(-ENOMEM); 56 + return -ENOMEM; 56 57 } 57 58 58 59 sg = pages->sgl; ··· 80 81 if (i915_gem_gtt_prepare_pages(obj, pages)) 81 82 goto err; 82 83 83 - return pages; 84 + __i915_gem_object_set_pages(obj, pages, PAGE_SIZE); 85 + 86 + return 0; 84 87 85 88 err: 86 89 huge_free_pages(obj, pages); 87 - return ERR_PTR(-ENOMEM); 90 + 91 + return -ENOMEM; 88 92 #undef GFP 89 93 } 90 94

+1734

drivers/gpu/drm/i915/selftests/huge_pages.c

··· 1 + /* 2 + * Copyright © 2017 Intel Corporation 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + * 23 + */ 24 + 25 + #include "../i915_selftest.h" 26 + 27 + #include <linux/prime_numbers.h> 28 + 29 + #include "mock_drm.h" 30 + 31 + static const unsigned int page_sizes[] = { 32 + I915_GTT_PAGE_SIZE_2M, 33 + I915_GTT_PAGE_SIZE_64K, 34 + I915_GTT_PAGE_SIZE_4K, 35 + }; 36 + 37 + static unsigned int get_largest_page_size(struct drm_i915_private *i915, 38 + u64 rem) 39 + { 40 + int i; 41 + 42 + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { 43 + unsigned int page_size = page_sizes[i]; 44 + 45 + if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size) 46 + return page_size; 47 + } 48 + 49 + return 0; 50 + } 51 + 52 + static void huge_pages_free_pages(struct sg_table *st) 53 + { 54 + struct scatterlist *sg; 55 + 56 + for (sg = st->sgl; sg; sg = __sg_next(sg)) { 57 + if (sg_page(sg)) 58 + __free_pages(sg_page(sg), get_order(sg->length)); 59 + } 60 + 61 + sg_free_table(st); 62 + kfree(st); 63 + } 64 + 65 + static int get_huge_pages(struct drm_i915_gem_object *obj) 66 + { 67 + #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) 68 + unsigned int page_mask = obj->mm.page_mask; 69 + struct sg_table *st; 70 + struct scatterlist *sg; 71 + unsigned int sg_page_sizes; 72 + u64 rem; 73 + 74 + st = kmalloc(sizeof(*st), GFP); 75 + if (!st) 76 + return -ENOMEM; 77 + 78 + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { 79 + kfree(st); 80 + return -ENOMEM; 81 + } 82 + 83 + rem = obj->base.size; 84 + sg = st->sgl; 85 + st->nents = 0; 86 + sg_page_sizes = 0; 87 + 88 + /* 89 + * Our goal here is simple, we want to greedily fill the object from 90 + * largest to smallest page-size, while ensuring that we use *every* 91 + * page-size as per the given page-mask. 92 + */ 93 + do { 94 + unsigned int bit = ilog2(page_mask); 95 + unsigned int page_size = BIT(bit); 96 + int order = get_order(page_size); 97 + 98 + do { 99 + struct page *page; 100 + 101 + GEM_BUG_ON(order >= MAX_ORDER); 102 + page = alloc_pages(GFP | __GFP_ZERO, order); 103 + if (!page) 104 + goto err; 105 + 106 + sg_set_page(sg, page, page_size, 0); 107 + sg_page_sizes |= page_size; 108 + st->nents++; 109 + 110 + rem -= page_size; 111 + if (!rem) { 112 + sg_mark_end(sg); 113 + break; 114 + } 115 + 116 + sg = __sg_next(sg); 117 + } while ((rem - ((page_size-1) & page_mask)) >= page_size); 118 + 119 + page_mask &= (page_size-1); 120 + } while (page_mask); 121 + 122 + if (i915_gem_gtt_prepare_pages(obj, st)) 123 + goto err; 124 + 125 + obj->mm.madv = I915_MADV_DONTNEED; 126 + 127 + GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); 128 + __i915_gem_object_set_pages(obj, st, sg_page_sizes); 129 + 130 + return 0; 131 + 132 + err: 133 + sg_set_page(sg, NULL, 0, 0); 134 + sg_mark_end(sg); 135 + huge_pages_free_pages(st); 136 + 137 + return -ENOMEM; 138 + } 139 + 140 + static void put_huge_pages(struct drm_i915_gem_object *obj, 141 + struct sg_table *pages) 142 + { 143 + i915_gem_gtt_finish_pages(obj, pages); 144 + huge_pages_free_pages(pages); 145 + 146 + obj->mm.dirty = false; 147 + obj->mm.madv = I915_MADV_WILLNEED; 148 + } 149 + 150 + static const struct drm_i915_gem_object_ops huge_page_ops = { 151 + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 152 + I915_GEM_OBJECT_IS_SHRINKABLE, 153 + .get_pages = get_huge_pages, 154 + .put_pages = put_huge_pages, 155 + }; 156 + 157 + static struct drm_i915_gem_object * 158 + huge_pages_object(struct drm_i915_private *i915, 159 + u64 size, 160 + unsigned int page_mask) 161 + { 162 + struct drm_i915_gem_object *obj; 163 + 164 + GEM_BUG_ON(!size); 165 + GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); 166 + 167 + if (size >> PAGE_SHIFT > INT_MAX) 168 + return ERR_PTR(-E2BIG); 169 + 170 + if (overflows_type(size, obj->base.size)) 171 + return ERR_PTR(-E2BIG); 172 + 173 + obj = i915_gem_object_alloc(i915); 174 + if (!obj) 175 + return ERR_PTR(-ENOMEM); 176 + 177 + drm_gem_private_object_init(&i915->drm, &obj->base, size); 178 + i915_gem_object_init(obj, &huge_page_ops); 179 + 180 + obj->base.write_domain = I915_GEM_DOMAIN_CPU; 181 + obj->base.read_domains = I915_GEM_DOMAIN_CPU; 182 + obj->cache_level = I915_CACHE_NONE; 183 + 184 + obj->mm.page_mask = page_mask; 185 + 186 + return obj; 187 + } 188 + 189 + static int fake_get_huge_pages(struct drm_i915_gem_object *obj) 190 + { 191 + struct drm_i915_private *i915 = to_i915(obj->base.dev); 192 + const u64 max_len = rounddown_pow_of_two(UINT_MAX); 193 + struct sg_table *st; 194 + struct scatterlist *sg; 195 + unsigned int sg_page_sizes; 196 + u64 rem; 197 + 198 + st = kmalloc(sizeof(*st), GFP); 199 + if (!st) 200 + return -ENOMEM; 201 + 202 + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { 203 + kfree(st); 204 + return -ENOMEM; 205 + } 206 + 207 + /* Use optimal page sized chunks to fill in the sg table */ 208 + rem = obj->base.size; 209 + sg = st->sgl; 210 + st->nents = 0; 211 + sg_page_sizes = 0; 212 + do { 213 + unsigned int page_size = get_largest_page_size(i915, rem); 214 + unsigned int len = min(page_size * div_u64(rem, page_size), 215 + max_len); 216 + 217 + GEM_BUG_ON(!page_size); 218 + 219 + sg->offset = 0; 220 + sg->length = len; 221 + sg_dma_len(sg) = len; 222 + sg_dma_address(sg) = page_size; 223 + 224 + sg_page_sizes |= len; 225 + 226 + st->nents++; 227 + 228 + rem -= len; 229 + if (!rem) { 230 + sg_mark_end(sg); 231 + break; 232 + } 233 + 234 + sg = sg_next(sg); 235 + } while (1); 236 + 237 + obj->mm.madv = I915_MADV_DONTNEED; 238 + 239 + __i915_gem_object_set_pages(obj, st, sg_page_sizes); 240 + 241 + return 0; 242 + } 243 + 244 + static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) 245 + { 246 + struct drm_i915_private *i915 = to_i915(obj->base.dev); 247 + struct sg_table *st; 248 + struct scatterlist *sg; 249 + unsigned int page_size; 250 + 251 + st = kmalloc(sizeof(*st), GFP); 252 + if (!st) 253 + return -ENOMEM; 254 + 255 + if (sg_alloc_table(st, 1, GFP)) { 256 + kfree(st); 257 + return -ENOMEM; 258 + } 259 + 260 + sg = st->sgl; 261 + st->nents = 1; 262 + 263 + page_size = get_largest_page_size(i915, obj->base.size); 264 + GEM_BUG_ON(!page_size); 265 + 266 + sg->offset = 0; 267 + sg->length = obj->base.size; 268 + sg_dma_len(sg) = obj->base.size; 269 + sg_dma_address(sg) = page_size; 270 + 271 + obj->mm.madv = I915_MADV_DONTNEED; 272 + 273 + __i915_gem_object_set_pages(obj, st, sg->length); 274 + 275 + return 0; 276 + #undef GFP 277 + } 278 + 279 + static void fake_free_huge_pages(struct drm_i915_gem_object *obj, 280 + struct sg_table *pages) 281 + { 282 + sg_free_table(pages); 283 + kfree(pages); 284 + } 285 + 286 + static void fake_put_huge_pages(struct drm_i915_gem_object *obj, 287 + struct sg_table *pages) 288 + { 289 + fake_free_huge_pages(obj, pages); 290 + obj->mm.dirty = false; 291 + obj->mm.madv = I915_MADV_WILLNEED; 292 + } 293 + 294 + static const struct drm_i915_gem_object_ops fake_ops = { 295 + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, 296 + .get_pages = fake_get_huge_pages, 297 + .put_pages = fake_put_huge_pages, 298 + }; 299 + 300 + static const struct drm_i915_gem_object_ops fake_ops_single = { 301 + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, 302 + .get_pages = fake_get_huge_pages_single, 303 + .put_pages = fake_put_huge_pages, 304 + }; 305 + 306 + static struct drm_i915_gem_object * 307 + fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) 308 + { 309 + struct drm_i915_gem_object *obj; 310 + 311 + GEM_BUG_ON(!size); 312 + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 313 + 314 + if (size >> PAGE_SHIFT > UINT_MAX) 315 + return ERR_PTR(-E2BIG); 316 + 317 + if (overflows_type(size, obj->base.size)) 318 + return ERR_PTR(-E2BIG); 319 + 320 + obj = i915_gem_object_alloc(i915); 321 + if (!obj) 322 + return ERR_PTR(-ENOMEM); 323 + 324 + drm_gem_private_object_init(&i915->drm, &obj->base, size); 325 + 326 + if (single) 327 + i915_gem_object_init(obj, &fake_ops_single); 328 + else 329 + i915_gem_object_init(obj, &fake_ops); 330 + 331 + obj->base.write_domain = I915_GEM_DOMAIN_CPU; 332 + obj->base.read_domains = I915_GEM_DOMAIN_CPU; 333 + obj->cache_level = I915_CACHE_NONE; 334 + 335 + return obj; 336 + } 337 + 338 + static int igt_check_page_sizes(struct i915_vma *vma) 339 + { 340 + struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); 341 + unsigned int supported = INTEL_INFO(i915)->page_sizes; 342 + struct drm_i915_gem_object *obj = vma->obj; 343 + int err = 0; 344 + 345 + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { 346 + pr_err("unsupported page_sizes.sg=%u, supported=%u\n", 347 + vma->page_sizes.sg & ~supported, supported); 348 + err = -EINVAL; 349 + } 350 + 351 + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) { 352 + pr_err("unsupported page_sizes.gtt=%u, supported=%u\n", 353 + vma->page_sizes.gtt & ~supported, supported); 354 + err = -EINVAL; 355 + } 356 + 357 + if (vma->page_sizes.phys != obj->mm.page_sizes.phys) { 358 + pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n", 359 + vma->page_sizes.phys, obj->mm.page_sizes.phys); 360 + err = -EINVAL; 361 + } 362 + 363 + if (vma->page_sizes.sg != obj->mm.page_sizes.sg) { 364 + pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n", 365 + vma->page_sizes.sg, obj->mm.page_sizes.sg); 366 + err = -EINVAL; 367 + } 368 + 369 + if (obj->mm.page_sizes.gtt) { 370 + pr_err("obj->page_sizes.gtt(%u) should never be set\n", 371 + obj->mm.page_sizes.gtt); 372 + err = -EINVAL; 373 + } 374 + 375 + return err; 376 + } 377 + 378 + static int igt_mock_exhaust_device_supported_pages(void *arg) 379 + { 380 + struct i915_hw_ppgtt *ppgtt = arg; 381 + struct drm_i915_private *i915 = ppgtt->base.i915; 382 + unsigned int saved_mask = INTEL_INFO(i915)->page_sizes; 383 + struct drm_i915_gem_object *obj; 384 + struct i915_vma *vma; 385 + int i, j, single; 386 + int err; 387 + 388 + /* 389 + * Sanity check creating objects with every valid page support 390 + * combination for our mock device. 391 + */ 392 + 393 + for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { 394 + unsigned int combination = 0; 395 + 396 + for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { 397 + if (i & BIT(j)) 398 + combination |= page_sizes[j]; 399 + } 400 + 401 + mkwrite_device_info(i915)->page_sizes = combination; 402 + 403 + for (single = 0; single <= 1; ++single) { 404 + obj = fake_huge_pages_object(i915, combination, !!single); 405 + if (IS_ERR(obj)) { 406 + err = PTR_ERR(obj); 407 + goto out_device; 408 + } 409 + 410 + if (obj->base.size != combination) { 411 + pr_err("obj->base.size=%zu, expected=%u\n", 412 + obj->base.size, combination); 413 + err = -EINVAL; 414 + goto out_put; 415 + } 416 + 417 + vma = i915_vma_instance(obj, &ppgtt->base, NULL); 418 + if (IS_ERR(vma)) { 419 + err = PTR_ERR(vma); 420 + goto out_put; 421 + } 422 + 423 + err = i915_vma_pin(vma, 0, 0, PIN_USER); 424 + if (err) 425 + goto out_close; 426 + 427 + err = igt_check_page_sizes(vma); 428 + 429 + if (vma->page_sizes.sg != combination) { 430 + pr_err("page_sizes.sg=%u, expected=%u\n", 431 + vma->page_sizes.sg, combination); 432 + err = -EINVAL; 433 + } 434 + 435 + i915_vma_unpin(vma); 436 + i915_vma_close(vma); 437 + 438 + i915_gem_object_put(obj); 439 + 440 + if (err) 441 + goto out_device; 442 + } 443 + } 444 + 445 + goto out_device; 446 + 447 + out_close: 448 + i915_vma_close(vma); 449 + out_put: 450 + i915_gem_object_put(obj); 451 + out_device: 452 + mkwrite_device_info(i915)->page_sizes = saved_mask; 453 + 454 + return err; 455 + } 456 + 457 + static int igt_mock_ppgtt_misaligned_dma(void *arg) 458 + { 459 + struct i915_hw_ppgtt *ppgtt = arg; 460 + struct drm_i915_private *i915 = ppgtt->base.i915; 461 + unsigned long supported = INTEL_INFO(i915)->page_sizes; 462 + struct drm_i915_gem_object *obj; 463 + int bit; 464 + int err; 465 + 466 + /* 467 + * Sanity check dma misalignment for huge pages -- the dma addresses we 468 + * insert into the paging structures need to always respect the page 469 + * size alignment. 470 + */ 471 + 472 + bit = ilog2(I915_GTT_PAGE_SIZE_64K); 473 + 474 + for_each_set_bit_from(bit, &supported, 475 + ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 476 + IGT_TIMEOUT(end_time); 477 + unsigned int page_size = BIT(bit); 478 + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; 479 + unsigned int offset; 480 + unsigned int size = 481 + round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1; 482 + struct i915_vma *vma; 483 + 484 + obj = fake_huge_pages_object(i915, size, true); 485 + if (IS_ERR(obj)) 486 + return PTR_ERR(obj); 487 + 488 + if (obj->base.size != size) { 489 + pr_err("obj->base.size=%zu, expected=%u\n", 490 + obj->base.size, size); 491 + err = -EINVAL; 492 + goto out_put; 493 + } 494 + 495 + err = i915_gem_object_pin_pages(obj); 496 + if (err) 497 + goto out_put; 498 + 499 + /* Force the page size for this object */ 500 + obj->mm.page_sizes.sg = page_size; 501 + 502 + vma = i915_vma_instance(obj, &ppgtt->base, NULL); 503 + if (IS_ERR(vma)) { 504 + err = PTR_ERR(vma); 505 + goto out_unpin; 506 + } 507 + 508 + err = i915_vma_pin(vma, 0, 0, flags); 509 + if (err) { 510 + i915_vma_close(vma); 511 + goto out_unpin; 512 + } 513 + 514 + 515 + err = igt_check_page_sizes(vma); 516 + 517 + if (vma->page_sizes.gtt != page_size) { 518 + pr_err("page_sizes.gtt=%u, expected %u\n", 519 + vma->page_sizes.gtt, page_size); 520 + err = -EINVAL; 521 + } 522 + 523 + i915_vma_unpin(vma); 524 + 525 + if (err) { 526 + i915_vma_close(vma); 527 + goto out_unpin; 528 + } 529 + 530 + /* 531 + * Try all the other valid offsets until the next 532 + * boundary -- should always fall back to using 4K 533 + * pages. 534 + */ 535 + for (offset = 4096; offset < page_size; offset += 4096) { 536 + err = i915_vma_unbind(vma); 537 + if (err) { 538 + i915_vma_close(vma); 539 + goto out_unpin; 540 + } 541 + 542 + err = i915_vma_pin(vma, 0, 0, flags | offset); 543 + if (err) { 544 + i915_vma_close(vma); 545 + goto out_unpin; 546 + } 547 + 548 + err = igt_check_page_sizes(vma); 549 + 550 + if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) { 551 + pr_err("page_sizes.gtt=%u, expected %lu\n", 552 + vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K); 553 + err = -EINVAL; 554 + } 555 + 556 + i915_vma_unpin(vma); 557 + 558 + if (err) { 559 + i915_vma_close(vma); 560 + goto out_unpin; 561 + } 562 + 563 + if (igt_timeout(end_time, 564 + "%s timed out at offset %x with page-size %x\n", 565 + __func__, offset, page_size)) 566 + break; 567 + } 568 + 569 + i915_vma_close(vma); 570 + 571 + i915_gem_object_unpin_pages(obj); 572 + i915_gem_object_put(obj); 573 + } 574 + 575 + return 0; 576 + 577 + out_unpin: 578 + i915_gem_object_unpin_pages(obj); 579 + out_put: 580 + i915_gem_object_put(obj); 581 + 582 + return err; 583 + } 584 + 585 + static void close_object_list(struct list_head *objects, 586 + struct i915_hw_ppgtt *ppgtt) 587 + { 588 + struct drm_i915_gem_object *obj, *on; 589 + 590 + list_for_each_entry_safe(obj, on, objects, st_link) { 591 + struct i915_vma *vma; 592 + 593 + vma = i915_vma_instance(obj, &ppgtt->base, NULL); 594 + if (!IS_ERR(vma)) 595 + i915_vma_close(vma); 596 + 597 + list_del(&obj->st_link); 598 + i915_gem_object_unpin_pages(obj); 599 + i915_gem_object_put(obj); 600 + } 601 + } 602 + 603 + static int igt_mock_ppgtt_huge_fill(void *arg) 604 + { 605 + struct i915_hw_ppgtt *ppgtt = arg; 606 + struct drm_i915_private *i915 = ppgtt->base.i915; 607 + unsigned long max_pages = ppgtt->base.total >> PAGE_SHIFT; 608 + unsigned long page_num; 609 + bool single = false; 610 + LIST_HEAD(objects); 611 + IGT_TIMEOUT(end_time); 612 + int err; 613 + 614 + for_each_prime_number_from(page_num, 1, max_pages) { 615 + struct drm_i915_gem_object *obj; 616 + u64 size = page_num << PAGE_SHIFT; 617 + struct i915_vma *vma; 618 + unsigned int expected_gtt = 0; 619 + int i; 620 + 621 + obj = fake_huge_pages_object(i915, size, single); 622 + if (IS_ERR(obj)) { 623 + err = PTR_ERR(obj); 624 + break; 625 + } 626 + 627 + if (obj->base.size != size) { 628 + pr_err("obj->base.size=%zd, expected=%llu\n", 629 + obj->base.size, size); 630 + i915_gem_object_put(obj); 631 + err = -EINVAL; 632 + break; 633 + } 634 + 635 + err = i915_gem_object_pin_pages(obj); 636 + if (err) { 637 + i915_gem_object_put(obj); 638 + break; 639 + } 640 + 641 + list_add(&obj->st_link, &objects); 642 + 643 + vma = i915_vma_instance(obj, &ppgtt->base, NULL); 644 + if (IS_ERR(vma)) { 645 + err = PTR_ERR(vma); 646 + break; 647 + } 648 + 649 + err = i915_vma_pin(vma, 0, 0, PIN_USER); 650 + if (err) 651 + break; 652 + 653 + err = igt_check_page_sizes(vma); 654 + if (err) { 655 + i915_vma_unpin(vma); 656 + break; 657 + } 658 + 659 + /* 660 + * Figure out the expected gtt page size knowing that we go from 661 + * largest to smallest page size sg chunks, and that we align to 662 + * the largest page size. 663 + */ 664 + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { 665 + unsigned int page_size = page_sizes[i]; 666 + 667 + if (HAS_PAGE_SIZES(i915, page_size) && 668 + size >= page_size) { 669 + expected_gtt |= page_size; 670 + size &= page_size-1; 671 + } 672 + } 673 + 674 + GEM_BUG_ON(!expected_gtt); 675 + GEM_BUG_ON(size); 676 + 677 + if (expected_gtt & I915_GTT_PAGE_SIZE_4K) 678 + expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; 679 + 680 + i915_vma_unpin(vma); 681 + 682 + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { 683 + if (!IS_ALIGNED(vma->node.start, 684 + I915_GTT_PAGE_SIZE_2M)) { 685 + pr_err("node.start(%llx) not aligned to 2M\n", 686 + vma->node.start); 687 + err = -EINVAL; 688 + break; 689 + } 690 + 691 + if (!IS_ALIGNED(vma->node.size, 692 + I915_GTT_PAGE_SIZE_2M)) { 693 + pr_err("node.size(%llx) not aligned to 2M\n", 694 + vma->node.size); 695 + err = -EINVAL; 696 + break; 697 + } 698 + } 699 + 700 + if (vma->page_sizes.gtt != expected_gtt) { 701 + pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n", 702 + vma->page_sizes.gtt, expected_gtt, 703 + obj->base.size, yesno(!!single)); 704 + err = -EINVAL; 705 + break; 706 + } 707 + 708 + if (igt_timeout(end_time, 709 + "%s timed out at size %zd\n", 710 + __func__, obj->base.size)) 711 + break; 712 + 713 + single = !single; 714 + } 715 + 716 + close_object_list(&objects, ppgtt); 717 + 718 + if (err == -ENOMEM || err == -ENOSPC) 719 + err = 0; 720 + 721 + return err; 722 + } 723 + 724 + static int igt_mock_ppgtt_64K(void *arg) 725 + { 726 + struct i915_hw_ppgtt *ppgtt = arg; 727 + struct drm_i915_private *i915 = ppgtt->base.i915; 728 + struct drm_i915_gem_object *obj; 729 + const struct object_info { 730 + unsigned int size; 731 + unsigned int gtt; 732 + unsigned int offset; 733 + } objects[] = { 734 + /* Cases with forced padding/alignment */ 735 + { 736 + .size = SZ_64K, 737 + .gtt = I915_GTT_PAGE_SIZE_64K, 738 + .offset = 0, 739 + }, 740 + { 741 + .size = SZ_64K + SZ_4K, 742 + .gtt = I915_GTT_PAGE_SIZE_4K, 743 + .offset = 0, 744 + }, 745 + { 746 + .size = SZ_64K - SZ_4K, 747 + .gtt = I915_GTT_PAGE_SIZE_4K, 748 + .offset = 0, 749 + }, 750 + { 751 + .size = SZ_2M, 752 + .gtt = I915_GTT_PAGE_SIZE_64K, 753 + .offset = 0, 754 + }, 755 + { 756 + .size = SZ_2M - SZ_4K, 757 + .gtt = I915_GTT_PAGE_SIZE_4K, 758 + .offset = 0, 759 + }, 760 + { 761 + .size = SZ_2M + SZ_4K, 762 + .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K, 763 + .offset = 0, 764 + }, 765 + { 766 + .size = SZ_2M + SZ_64K, 767 + .gtt = I915_GTT_PAGE_SIZE_64K, 768 + .offset = 0, 769 + }, 770 + { 771 + .size = SZ_2M - SZ_64K, 772 + .gtt = I915_GTT_PAGE_SIZE_64K, 773 + .offset = 0, 774 + }, 775 + /* Try without any forced padding/alignment */ 776 + { 777 + .size = SZ_64K, 778 + .offset = SZ_2M, 779 + .gtt = I915_GTT_PAGE_SIZE_4K, 780 + }, 781 + { 782 + .size = SZ_128K, 783 + .offset = SZ_2M - SZ_64K, 784 + .gtt = I915_GTT_PAGE_SIZE_4K, 785 + }, 786 + }; 787 + struct i915_vma *vma; 788 + int i, single; 789 + int err; 790 + 791 + /* 792 + * Sanity check some of the trickiness with 64K pages -- either we can 793 + * safely mark the whole page-table(2M block) as 64K, or we have to 794 + * always fallback to 4K. 795 + */ 796 + 797 + if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) 798 + return 0; 799 + 800 + for (i = 0; i < ARRAY_SIZE(objects); ++i) { 801 + unsigned int size = objects[i].size; 802 + unsigned int expected_gtt = objects[i].gtt; 803 + unsigned int offset = objects[i].offset; 804 + unsigned int flags = PIN_USER; 805 + 806 + for (single = 0; single <= 1; single++) { 807 + obj = fake_huge_pages_object(i915, size, !!single); 808 + if (IS_ERR(obj)) 809 + return PTR_ERR(obj); 810 + 811 + err = i915_gem_object_pin_pages(obj); 812 + if (err) 813 + goto out_object_put; 814 + 815 + /* 816 + * Disable 2M pages -- We only want to use 64K/4K pages 817 + * for this test. 818 + */ 819 + obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; 820 + 821 + vma = i915_vma_instance(obj, &ppgtt->base, NULL); 822 + if (IS_ERR(vma)) { 823 + err = PTR_ERR(vma); 824 + goto out_object_unpin; 825 + } 826 + 827 + if (offset) 828 + flags |= PIN_OFFSET_FIXED | offset; 829 + 830 + err = i915_vma_pin(vma, 0, 0, flags); 831 + if (err) 832 + goto out_vma_close; 833 + 834 + err = igt_check_page_sizes(vma); 835 + if (err) 836 + goto out_vma_unpin; 837 + 838 + if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { 839 + if (!IS_ALIGNED(vma->node.start, 840 + I915_GTT_PAGE_SIZE_2M)) { 841 + pr_err("node.start(%llx) not aligned to 2M\n", 842 + vma->node.start); 843 + err = -EINVAL; 844 + goto out_vma_unpin; 845 + } 846 + 847 + if (!IS_ALIGNED(vma->node.size, 848 + I915_GTT_PAGE_SIZE_2M)) { 849 + pr_err("node.size(%llx) not aligned to 2M\n", 850 + vma->node.size); 851 + err = -EINVAL; 852 + goto out_vma_unpin; 853 + } 854 + } 855 + 856 + if (vma->page_sizes.gtt != expected_gtt) { 857 + pr_err("gtt=%u, expected=%u, i=%d, single=%s\n", 858 + vma->page_sizes.gtt, expected_gtt, i, 859 + yesno(!!single)); 860 + err = -EINVAL; 861 + goto out_vma_unpin; 862 + } 863 + 864 + i915_vma_unpin(vma); 865 + i915_vma_close(vma); 866 + 867 + i915_gem_object_unpin_pages(obj); 868 + i915_gem_object_put(obj); 869 + } 870 + } 871 + 872 + return 0; 873 + 874 + out_vma_unpin: 875 + i915_vma_unpin(vma); 876 + out_vma_close: 877 + i915_vma_close(vma); 878 + out_object_unpin: 879 + i915_gem_object_unpin_pages(obj); 880 + out_object_put: 881 + i915_gem_object_put(obj); 882 + 883 + return err; 884 + } 885 + 886 + static struct i915_vma * 887 + gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) 888 + { 889 + struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); 890 + const int gen = INTEL_GEN(vma->vm->i915); 891 + unsigned int count = vma->size >> PAGE_SHIFT; 892 + struct drm_i915_gem_object *obj; 893 + struct i915_vma *batch; 894 + unsigned int size; 895 + u32 *cmd; 896 + int n; 897 + int err; 898 + 899 + size = (1 + 4 * count) * sizeof(u32); 900 + size = round_up(size, PAGE_SIZE); 901 + obj = i915_gem_object_create_internal(i915, size); 902 + if (IS_ERR(obj)) 903 + return ERR_CAST(obj); 904 + 905 + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 906 + if (IS_ERR(cmd)) { 907 + err = PTR_ERR(cmd); 908 + goto err; 909 + } 910 + 911 + offset += vma->node.start; 912 + 913 + for (n = 0; n < count; n++) { 914 + if (gen >= 8) { 915 + *cmd++ = MI_STORE_DWORD_IMM_GEN4; 916 + *cmd++ = lower_32_bits(offset); 917 + *cmd++ = upper_32_bits(offset); 918 + *cmd++ = val; 919 + } else if (gen >= 4) { 920 + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | 921 + (gen < 6 ? 1 << 22 : 0); 922 + *cmd++ = 0; 923 + *cmd++ = offset; 924 + *cmd++ = val; 925 + } else { 926 + *cmd++ = MI_STORE_DWORD_IMM | 1 << 22; 927 + *cmd++ = offset; 928 + *cmd++ = val; 929 + } 930 + 931 + offset += PAGE_SIZE; 932 + } 933 + 934 + *cmd = MI_BATCH_BUFFER_END; 935 + 936 + i915_gem_object_unpin_map(obj); 937 + 938 + err = i915_gem_object_set_to_gtt_domain(obj, false); 939 + if (err) 940 + goto err; 941 + 942 + batch = i915_vma_instance(obj, vma->vm, NULL); 943 + if (IS_ERR(batch)) { 944 + err = PTR_ERR(batch); 945 + goto err; 946 + } 947 + 948 + err = i915_vma_pin(batch, 0, 0, PIN_USER); 949 + if (err) 950 + goto err; 951 + 952 + return batch; 953 + 954 + err: 955 + i915_gem_object_put(obj); 956 + 957 + return ERR_PTR(err); 958 + } 959 + 960 + static int gpu_write(struct i915_vma *vma, 961 + struct i915_gem_context *ctx, 962 + struct intel_engine_cs *engine, 963 + u32 dword, 964 + u32 value) 965 + { 966 + struct drm_i915_gem_request *rq; 967 + struct i915_vma *batch; 968 + int flags = 0; 969 + int err; 970 + 971 + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); 972 + 973 + err = i915_gem_object_set_to_gtt_domain(vma->obj, true); 974 + if (err) 975 + return err; 976 + 977 + rq = i915_gem_request_alloc(engine, ctx); 978 + if (IS_ERR(rq)) 979 + return PTR_ERR(rq); 980 + 981 + batch = gpu_write_dw(vma, dword * sizeof(u32), value); 982 + if (IS_ERR(batch)) { 983 + err = PTR_ERR(batch); 984 + goto err_request; 985 + } 986 + 987 + i915_vma_move_to_active(batch, rq, 0); 988 + i915_gem_object_set_active_reference(batch->obj); 989 + i915_vma_unpin(batch); 990 + i915_vma_close(batch); 991 + 992 + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 993 + if (err) 994 + goto err_request; 995 + 996 + err = i915_switch_context(rq); 997 + if (err) 998 + goto err_request; 999 + 1000 + err = rq->engine->emit_bb_start(rq, 1001 + batch->node.start, batch->node.size, 1002 + flags); 1003 + if (err) 1004 + goto err_request; 1005 + 1006 + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1007 + 1008 + reservation_object_lock(vma->resv, NULL); 1009 + reservation_object_add_excl_fence(vma->resv, &rq->fence); 1010 + reservation_object_unlock(vma->resv); 1011 + 1012 + err_request: 1013 + __i915_add_request(rq, err == 0); 1014 + 1015 + return err; 1016 + } 1017 + 1018 + static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) 1019 + { 1020 + unsigned int needs_flush; 1021 + unsigned long n; 1022 + int err; 1023 + 1024 + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); 1025 + if (err) 1026 + return err; 1027 + 1028 + for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { 1029 + u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); 1030 + 1031 + if (needs_flush & CLFLUSH_BEFORE) 1032 + drm_clflush_virt_range(ptr, PAGE_SIZE); 1033 + 1034 + if (ptr[dword] != val) { 1035 + pr_err("n=%lu ptr[%u]=%u, val=%u\n", 1036 + n, dword, ptr[dword], val); 1037 + kunmap_atomic(ptr); 1038 + err = -EINVAL; 1039 + break; 1040 + } 1041 + 1042 + kunmap_atomic(ptr); 1043 + } 1044 + 1045 + i915_gem_obj_finish_shmem_access(obj); 1046 + 1047 + return err; 1048 + } 1049 + 1050 + static int igt_write_huge(struct i915_gem_context *ctx, 1051 + struct drm_i915_gem_object *obj) 1052 + { 1053 + struct drm_i915_private *i915 = to_i915(obj->base.dev); 1054 + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; 1055 + struct intel_engine_cs *engine; 1056 + struct i915_vma *vma; 1057 + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; 1058 + unsigned int max_page_size; 1059 + unsigned int id; 1060 + u64 max; 1061 + u64 num; 1062 + u64 size; 1063 + int err = 0; 1064 + 1065 + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1066 + 1067 + size = obj->base.size; 1068 + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) 1069 + size = round_up(size, I915_GTT_PAGE_SIZE_2M); 1070 + 1071 + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); 1072 + max = div_u64((vm->total - size), max_page_size); 1073 + 1074 + vma = i915_vma_instance(obj, vm, NULL); 1075 + if (IS_ERR(vma)) 1076 + return PTR_ERR(vma); 1077 + 1078 + for_each_engine(engine, i915, id) { 1079 + IGT_TIMEOUT(end_time); 1080 + 1081 + if (!intel_engine_can_store_dword(engine)) { 1082 + pr_info("store-dword-imm not supported on engine=%u\n", 1083 + id); 1084 + continue; 1085 + } 1086 + 1087 + /* 1088 + * Try various offsets until we timeout -- we want to avoid 1089 + * issues hidden by effectively always using offset = 0. 1090 + */ 1091 + for_each_prime_number_from(num, 0, max) { 1092 + u64 offset = num * max_page_size; 1093 + u32 dword; 1094 + 1095 + err = i915_vma_unbind(vma); 1096 + if (err) 1097 + goto out_vma_close; 1098 + 1099 + err = i915_vma_pin(vma, size, max_page_size, flags | offset); 1100 + if (err) { 1101 + /* 1102 + * The ggtt may have some pages reserved so 1103 + * refrain from erroring out. 1104 + */ 1105 + if (err == -ENOSPC && i915_is_ggtt(vm)) { 1106 + err = 0; 1107 + continue; 1108 + } 1109 + 1110 + goto out_vma_close; 1111 + } 1112 + 1113 + err = igt_check_page_sizes(vma); 1114 + if (err) 1115 + goto out_vma_unpin; 1116 + 1117 + dword = offset_in_page(num) / 4; 1118 + 1119 + err = gpu_write(vma, ctx, engine, dword, num + 1); 1120 + if (err) { 1121 + pr_err("gpu-write failed at offset=%llx", offset); 1122 + goto out_vma_unpin; 1123 + } 1124 + 1125 + err = cpu_check(obj, dword, num + 1); 1126 + if (err) { 1127 + pr_err("cpu-check failed at offset=%llx", offset); 1128 + goto out_vma_unpin; 1129 + } 1130 + 1131 + i915_vma_unpin(vma); 1132 + 1133 + if (num > 0 && 1134 + igt_timeout(end_time, 1135 + "%s timed out on engine=%u at offset=%llx, max_page_size=%x\n", 1136 + __func__, id, offset, max_page_size)) 1137 + break; 1138 + } 1139 + } 1140 + 1141 + out_vma_unpin: 1142 + if (i915_vma_is_pinned(vma)) 1143 + i915_vma_unpin(vma); 1144 + out_vma_close: 1145 + i915_vma_close(vma); 1146 + 1147 + return err; 1148 + } 1149 + 1150 + static int igt_ppgtt_exhaust_huge(void *arg) 1151 + { 1152 + struct i915_gem_context *ctx = arg; 1153 + struct drm_i915_private *i915 = ctx->i915; 1154 + unsigned long supported = INTEL_INFO(i915)->page_sizes; 1155 + static unsigned int pages[ARRAY_SIZE(page_sizes)]; 1156 + struct drm_i915_gem_object *obj; 1157 + unsigned int size_mask; 1158 + unsigned int page_mask; 1159 + int n, i; 1160 + int err; 1161 + 1162 + /* 1163 + * Sanity check creating objects with a varying mix of page sizes -- 1164 + * ensuring that our writes lands in the right place. 1165 + */ 1166 + 1167 + n = 0; 1168 + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) 1169 + pages[n++] = BIT(i); 1170 + 1171 + for (size_mask = 2; size_mask < BIT(n); size_mask++) { 1172 + unsigned int size = 0; 1173 + 1174 + for (i = 0; i < n; i++) { 1175 + if (size_mask & BIT(i)) 1176 + size |= pages[i]; 1177 + } 1178 + 1179 + /* 1180 + * For our page mask we want to enumerate all the page-size 1181 + * combinations which will fit into our chosen object size. 1182 + */ 1183 + for (page_mask = 2; page_mask <= size_mask; page_mask++) { 1184 + unsigned int page_sizes = 0; 1185 + 1186 + for (i = 0; i < n; i++) { 1187 + if (page_mask & BIT(i)) 1188 + page_sizes |= pages[i]; 1189 + } 1190 + 1191 + /* 1192 + * Ensure that we can actually fill the given object 1193 + * with our chosen page mask. 1194 + */ 1195 + if (!IS_ALIGNED(size, BIT(__ffs(page_sizes)))) 1196 + continue; 1197 + 1198 + obj = huge_pages_object(i915, size, page_sizes); 1199 + if (IS_ERR(obj)) { 1200 + err = PTR_ERR(obj); 1201 + goto out_device; 1202 + } 1203 + 1204 + err = i915_gem_object_pin_pages(obj); 1205 + if (err) { 1206 + i915_gem_object_put(obj); 1207 + 1208 + if (err == -ENOMEM) { 1209 + pr_info("unable to get pages, size=%u, pages=%u\n", 1210 + size, page_sizes); 1211 + err = 0; 1212 + break; 1213 + } 1214 + 1215 + pr_err("pin_pages failed, size=%u, pages=%u\n", 1216 + size_mask, page_mask); 1217 + 1218 + goto out_device; 1219 + } 1220 + 1221 + /* Force the page-size for the gtt insertion */ 1222 + obj->mm.page_sizes.sg = page_sizes; 1223 + 1224 + err = igt_write_huge(ctx, obj); 1225 + if (err) { 1226 + pr_err("exhaust write-huge failed with size=%u\n", 1227 + size); 1228 + goto out_unpin; 1229 + } 1230 + 1231 + i915_gem_object_unpin_pages(obj); 1232 + i915_gem_object_put(obj); 1233 + } 1234 + } 1235 + 1236 + goto out_device; 1237 + 1238 + out_unpin: 1239 + i915_gem_object_unpin_pages(obj); 1240 + i915_gem_object_put(obj); 1241 + out_device: 1242 + mkwrite_device_info(i915)->page_sizes = supported; 1243 + 1244 + return err; 1245 + } 1246 + 1247 + static int igt_ppgtt_internal_huge(void *arg) 1248 + { 1249 + struct i915_gem_context *ctx = arg; 1250 + struct drm_i915_private *i915 = ctx->i915; 1251 + struct drm_i915_gem_object *obj; 1252 + static const unsigned int sizes[] = { 1253 + SZ_64K, 1254 + SZ_128K, 1255 + SZ_256K, 1256 + SZ_512K, 1257 + SZ_1M, 1258 + SZ_2M, 1259 + }; 1260 + int i; 1261 + int err; 1262 + 1263 + /* 1264 + * Sanity check that the HW uses huge pages correctly through internal 1265 + * -- ensure that our writes land in the right place. 1266 + */ 1267 + 1268 + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { 1269 + unsigned int size = sizes[i]; 1270 + 1271 + obj = i915_gem_object_create_internal(i915, size); 1272 + if (IS_ERR(obj)) 1273 + return PTR_ERR(obj); 1274 + 1275 + err = i915_gem_object_pin_pages(obj); 1276 + if (err) 1277 + goto out_put; 1278 + 1279 + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { 1280 + pr_info("internal unable to allocate huge-page(s) with size=%u\n", 1281 + size); 1282 + goto out_unpin; 1283 + } 1284 + 1285 + err = igt_write_huge(ctx, obj); 1286 + if (err) { 1287 + pr_err("internal write-huge failed with size=%u\n", 1288 + size); 1289 + goto out_unpin; 1290 + } 1291 + 1292 + i915_gem_object_unpin_pages(obj); 1293 + i915_gem_object_put(obj); 1294 + } 1295 + 1296 + return 0; 1297 + 1298 + out_unpin: 1299 + i915_gem_object_unpin_pages(obj); 1300 + out_put: 1301 + i915_gem_object_put(obj); 1302 + 1303 + return err; 1304 + } 1305 + 1306 + static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) 1307 + { 1308 + return i915->mm.gemfs && has_transparent_hugepage(); 1309 + } 1310 + 1311 + static int igt_ppgtt_gemfs_huge(void *arg) 1312 + { 1313 + struct i915_gem_context *ctx = arg; 1314 + struct drm_i915_private *i915 = ctx->i915; 1315 + struct drm_i915_gem_object *obj; 1316 + static const unsigned int sizes[] = { 1317 + SZ_2M, 1318 + SZ_4M, 1319 + SZ_8M, 1320 + SZ_16M, 1321 + SZ_32M, 1322 + }; 1323 + int i; 1324 + int err; 1325 + 1326 + /* 1327 + * Sanity check that the HW uses huge pages correctly through gemfs -- 1328 + * ensure that our writes land in the right place. 1329 + */ 1330 + 1331 + if (!igt_can_allocate_thp(i915)) { 1332 + pr_info("missing THP support, skipping\n"); 1333 + return 0; 1334 + } 1335 + 1336 + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { 1337 + unsigned int size = sizes[i]; 1338 + 1339 + obj = i915_gem_object_create(i915, size); 1340 + if (IS_ERR(obj)) 1341 + return PTR_ERR(obj); 1342 + 1343 + err = i915_gem_object_pin_pages(obj); 1344 + if (err) 1345 + goto out_put; 1346 + 1347 + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { 1348 + pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", 1349 + size); 1350 + goto out_unpin; 1351 + } 1352 + 1353 + err = igt_write_huge(ctx, obj); 1354 + if (err) { 1355 + pr_err("gemfs write-huge failed with size=%u\n", 1356 + size); 1357 + goto out_unpin; 1358 + } 1359 + 1360 + i915_gem_object_unpin_pages(obj); 1361 + i915_gem_object_put(obj); 1362 + } 1363 + 1364 + return 0; 1365 + 1366 + out_unpin: 1367 + i915_gem_object_unpin_pages(obj); 1368 + out_put: 1369 + i915_gem_object_put(obj); 1370 + 1371 + return err; 1372 + } 1373 + 1374 + static int igt_ppgtt_pin_update(void *arg) 1375 + { 1376 + struct i915_gem_context *ctx = arg; 1377 + struct drm_i915_private *dev_priv = ctx->i915; 1378 + unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; 1379 + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; 1380 + struct drm_i915_gem_object *obj; 1381 + struct i915_vma *vma; 1382 + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; 1383 + int first, last; 1384 + int err; 1385 + 1386 + /* 1387 + * Make sure there's no funny business when doing a PIN_UPDATE -- in the 1388 + * past we had a subtle issue with being able to incorrectly do multiple 1389 + * alloc va ranges on the same object when doing a PIN_UPDATE, which 1390 + * resulted in some pretty nasty bugs, though only when using 1391 + * huge-gtt-pages. 1392 + */ 1393 + 1394 + if (!USES_FULL_48BIT_PPGTT(dev_priv)) { 1395 + pr_info("48b PPGTT not supported, skipping\n"); 1396 + return 0; 1397 + } 1398 + 1399 + first = ilog2(I915_GTT_PAGE_SIZE_64K); 1400 + last = ilog2(I915_GTT_PAGE_SIZE_2M); 1401 + 1402 + for_each_set_bit_from(first, &supported, last + 1) { 1403 + unsigned int page_size = BIT(first); 1404 + 1405 + obj = i915_gem_object_create_internal(dev_priv, page_size); 1406 + if (IS_ERR(obj)) 1407 + return PTR_ERR(obj); 1408 + 1409 + vma = i915_vma_instance(obj, &ppgtt->base, NULL); 1410 + if (IS_ERR(vma)) { 1411 + err = PTR_ERR(vma); 1412 + goto out_put; 1413 + } 1414 + 1415 + err = i915_vma_pin(vma, SZ_2M, 0, flags); 1416 + if (err) 1417 + goto out_close; 1418 + 1419 + if (vma->page_sizes.sg < page_size) { 1420 + pr_info("Unable to allocate page-size %x, finishing test early\n", 1421 + page_size); 1422 + goto out_unpin; 1423 + } 1424 + 1425 + err = igt_check_page_sizes(vma); 1426 + if (err) 1427 + goto out_unpin; 1428 + 1429 + if (vma->page_sizes.gtt != page_size) { 1430 + dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0); 1431 + 1432 + /* 1433 + * The only valid reason for this to ever fail would be 1434 + * if the dma-mapper screwed us over when we did the 1435 + * dma_map_sg(), since it has the final say over the dma 1436 + * address. 1437 + */ 1438 + if (IS_ALIGNED(addr, page_size)) { 1439 + pr_err("page_sizes.gtt=%u, expected=%u\n", 1440 + vma->page_sizes.gtt, page_size); 1441 + err = -EINVAL; 1442 + } else { 1443 + pr_info("dma address misaligned, finishing test early\n"); 1444 + } 1445 + 1446 + goto out_unpin; 1447 + } 1448 + 1449 + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); 1450 + if (err) 1451 + goto out_unpin; 1452 + 1453 + i915_vma_unpin(vma); 1454 + i915_vma_close(vma); 1455 + 1456 + i915_gem_object_put(obj); 1457 + } 1458 + 1459 + obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE); 1460 + if (IS_ERR(obj)) 1461 + return PTR_ERR(obj); 1462 + 1463 + vma = i915_vma_instance(obj, &ppgtt->base, NULL); 1464 + if (IS_ERR(vma)) { 1465 + err = PTR_ERR(vma); 1466 + goto out_put; 1467 + } 1468 + 1469 + err = i915_vma_pin(vma, 0, 0, flags); 1470 + if (err) 1471 + goto out_close; 1472 + 1473 + /* 1474 + * Make sure we don't end up with something like where the pde is still 1475 + * pointing to the 2M page, and the pt we just filled-in is dangling -- 1476 + * we can check this by writing to the first page where it would then 1477 + * land in the now stale 2M page. 1478 + */ 1479 + 1480 + err = gpu_write(vma, ctx, dev_priv->engine[RCS], 0, 0xdeadbeaf); 1481 + if (err) 1482 + goto out_unpin; 1483 + 1484 + err = cpu_check(obj, 0, 0xdeadbeaf); 1485 + 1486 + out_unpin: 1487 + i915_vma_unpin(vma); 1488 + out_close: 1489 + i915_vma_close(vma); 1490 + out_put: 1491 + i915_gem_object_put(obj); 1492 + 1493 + return err; 1494 + } 1495 + 1496 + static int igt_tmpfs_fallback(void *arg) 1497 + { 1498 + struct i915_gem_context *ctx = arg; 1499 + struct drm_i915_private *i915 = ctx->i915; 1500 + struct vfsmount *gemfs = i915->mm.gemfs; 1501 + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; 1502 + struct drm_i915_gem_object *obj; 1503 + struct i915_vma *vma; 1504 + u32 *vaddr; 1505 + int err = 0; 1506 + 1507 + /* 1508 + * Make sure that we don't burst into a ball of flames upon falling back 1509 + * to tmpfs, which we rely on if on the off-chance we encouter a failure 1510 + * when setting up gemfs. 1511 + */ 1512 + 1513 + i915->mm.gemfs = NULL; 1514 + 1515 + obj = i915_gem_object_create(i915, PAGE_SIZE); 1516 + if (IS_ERR(obj)) { 1517 + err = PTR_ERR(obj); 1518 + goto out_restore; 1519 + } 1520 + 1521 + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 1522 + if (IS_ERR(vaddr)) { 1523 + err = PTR_ERR(vaddr); 1524 + goto out_put; 1525 + } 1526 + *vaddr = 0xdeadbeaf; 1527 + 1528 + i915_gem_object_unpin_map(obj); 1529 + 1530 + vma = i915_vma_instance(obj, vm, NULL); 1531 + if (IS_ERR(vma)) { 1532 + err = PTR_ERR(vma); 1533 + goto out_put; 1534 + } 1535 + 1536 + err = i915_vma_pin(vma, 0, 0, PIN_USER); 1537 + if (err) 1538 + goto out_close; 1539 + 1540 + err = igt_check_page_sizes(vma); 1541 + 1542 + i915_vma_unpin(vma); 1543 + out_close: 1544 + i915_vma_close(vma); 1545 + out_put: 1546 + i915_gem_object_put(obj); 1547 + out_restore: 1548 + i915->mm.gemfs = gemfs; 1549 + 1550 + return err; 1551 + } 1552 + 1553 + static int igt_shrink_thp(void *arg) 1554 + { 1555 + struct i915_gem_context *ctx = arg; 1556 + struct drm_i915_private *i915 = ctx->i915; 1557 + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; 1558 + struct drm_i915_gem_object *obj; 1559 + struct i915_vma *vma; 1560 + unsigned int flags = PIN_USER; 1561 + int err; 1562 + 1563 + /* 1564 + * Sanity check shrinking huge-paged object -- make sure nothing blows 1565 + * up. 1566 + */ 1567 + 1568 + if (!igt_can_allocate_thp(i915)) { 1569 + pr_info("missing THP support, skipping\n"); 1570 + return 0; 1571 + } 1572 + 1573 + obj = i915_gem_object_create(i915, SZ_2M); 1574 + if (IS_ERR(obj)) 1575 + return PTR_ERR(obj); 1576 + 1577 + vma = i915_vma_instance(obj, vm, NULL); 1578 + if (IS_ERR(vma)) { 1579 + err = PTR_ERR(vma); 1580 + goto out_put; 1581 + } 1582 + 1583 + err = i915_vma_pin(vma, 0, 0, flags); 1584 + if (err) 1585 + goto out_close; 1586 + 1587 + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { 1588 + pr_info("failed to allocate THP, finishing test early\n"); 1589 + goto out_unpin; 1590 + } 1591 + 1592 + err = igt_check_page_sizes(vma); 1593 + if (err) 1594 + goto out_unpin; 1595 + 1596 + err = gpu_write(vma, ctx, i915->engine[RCS], 0, 0xdeadbeaf); 1597 + if (err) 1598 + goto out_unpin; 1599 + 1600 + i915_vma_unpin(vma); 1601 + 1602 + /* 1603 + * Now that the pages are *unpinned* shrink-all should invoke 1604 + * shmem to truncate our pages. 1605 + */ 1606 + i915_gem_shrink_all(i915); 1607 + if (!IS_ERR_OR_NULL(obj->mm.pages)) { 1608 + pr_err("shrink-all didn't truncate the pages\n"); 1609 + err = -EINVAL; 1610 + goto out_close; 1611 + } 1612 + 1613 + if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { 1614 + pr_err("residual page-size bits left\n"); 1615 + err = -EINVAL; 1616 + goto out_close; 1617 + } 1618 + 1619 + err = i915_vma_pin(vma, 0, 0, flags); 1620 + if (err) 1621 + goto out_close; 1622 + 1623 + err = cpu_check(obj, 0, 0xdeadbeaf); 1624 + 1625 + out_unpin: 1626 + i915_vma_unpin(vma); 1627 + out_close: 1628 + i915_vma_close(vma); 1629 + out_put: 1630 + i915_gem_object_put(obj); 1631 + 1632 + return err; 1633 + } 1634 + 1635 + int i915_gem_huge_page_mock_selftests(void) 1636 + { 1637 + static const struct i915_subtest tests[] = { 1638 + SUBTEST(igt_mock_exhaust_device_supported_pages), 1639 + SUBTEST(igt_mock_ppgtt_misaligned_dma), 1640 + SUBTEST(igt_mock_ppgtt_huge_fill), 1641 + SUBTEST(igt_mock_ppgtt_64K), 1642 + }; 1643 + int saved_ppgtt = i915_modparams.enable_ppgtt; 1644 + struct drm_i915_private *dev_priv; 1645 + struct pci_dev *pdev; 1646 + struct i915_hw_ppgtt *ppgtt; 1647 + int err; 1648 + 1649 + dev_priv = mock_gem_device(); 1650 + if (!dev_priv) 1651 + return -ENOMEM; 1652 + 1653 + /* Pretend to be a device which supports the 48b PPGTT */ 1654 + i915_modparams.enable_ppgtt = 3; 1655 + 1656 + pdev = dev_priv->drm.pdev; 1657 + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(39)); 1658 + 1659 + mutex_lock(&dev_priv->drm.struct_mutex); 1660 + ppgtt = i915_ppgtt_create(dev_priv, ERR_PTR(-ENODEV), "mock"); 1661 + if (IS_ERR(ppgtt)) { 1662 + err = PTR_ERR(ppgtt); 1663 + goto out_unlock; 1664 + } 1665 + 1666 + if (!i915_vm_is_48bit(&ppgtt->base)) { 1667 + pr_err("failed to create 48b PPGTT\n"); 1668 + err = -EINVAL; 1669 + goto out_close; 1670 + } 1671 + 1672 + /* If we were ever hit this then it's time to mock the 64K scratch */ 1673 + if (!i915_vm_has_scratch_64K(&ppgtt->base)) { 1674 + pr_err("PPGTT missing 64K scratch page\n"); 1675 + err = -EINVAL; 1676 + goto out_close; 1677 + } 1678 + 1679 + err = i915_subtests(tests, ppgtt); 1680 + 1681 + out_close: 1682 + i915_ppgtt_close(&ppgtt->base); 1683 + i915_ppgtt_put(ppgtt); 1684 + 1685 + out_unlock: 1686 + mutex_unlock(&dev_priv->drm.struct_mutex); 1687 + 1688 + i915_modparams.enable_ppgtt = saved_ppgtt; 1689 + 1690 + drm_dev_unref(&dev_priv->drm); 1691 + 1692 + return err; 1693 + } 1694 + 1695 + int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) 1696 + { 1697 + static const struct i915_subtest tests[] = { 1698 + SUBTEST(igt_shrink_thp), 1699 + SUBTEST(igt_ppgtt_pin_update), 1700 + SUBTEST(igt_tmpfs_fallback), 1701 + SUBTEST(igt_ppgtt_exhaust_huge), 1702 + SUBTEST(igt_ppgtt_gemfs_huge), 1703 + SUBTEST(igt_ppgtt_internal_huge), 1704 + }; 1705 + struct drm_file *file; 1706 + struct i915_gem_context *ctx; 1707 + int err; 1708 + 1709 + if (!USES_PPGTT(dev_priv)) { 1710 + pr_info("PPGTT not supported, skipping live-selftests\n"); 1711 + return 0; 1712 + } 1713 + 1714 + file = mock_file(dev_priv); 1715 + if (IS_ERR(file)) 1716 + return PTR_ERR(file); 1717 + 1718 + mutex_lock(&dev_priv->drm.struct_mutex); 1719 + 1720 + ctx = live_context(dev_priv, file); 1721 + if (IS_ERR(ctx)) { 1722 + err = PTR_ERR(ctx); 1723 + goto out_unlock; 1724 + } 1725 + 1726 + err = i915_subtests(tests, ctx); 1727 + 1728 + out_unlock: 1729 + mutex_unlock(&dev_priv->drm.struct_mutex); 1730 + 1731 + mock_file_free(dev_priv, file); 1732 + 1733 + return err; 1734 + }

+10 -5

drivers/gpu/drm/i915/selftests/i915_gem_gtt.c

··· 39 39 kfree(pages); 40 40 } 41 41 42 - static struct sg_table * 43 - fake_get_pages(struct drm_i915_gem_object *obj) 42 + static int fake_get_pages(struct drm_i915_gem_object *obj) 44 43 { 45 44 #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) 46 45 #define PFN_BIAS 0x1000 47 46 struct sg_table *pages; 48 47 struct scatterlist *sg; 48 + unsigned int sg_page_sizes; 49 49 typeof(obj->base.size) rem; 50 50 51 51 pages = kmalloc(sizeof(*pages), GFP); 52 52 if (!pages) 53 - return ERR_PTR(-ENOMEM); 53 + return -ENOMEM; 54 54 55 55 rem = round_up(obj->base.size, BIT(31)) >> 31; 56 56 if (sg_alloc_table(pages, rem, GFP)) { 57 57 kfree(pages); 58 - return ERR_PTR(-ENOMEM); 58 + return -ENOMEM; 59 59 } 60 60 61 + sg_page_sizes = 0; 61 62 rem = obj->base.size; 62 63 for (sg = pages->sgl; sg; sg = sg_next(sg)) { 63 64 unsigned long len = min_t(typeof(rem), rem, BIT(31)); ··· 67 66 sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); 68 67 sg_dma_address(sg) = page_to_phys(sg_page(sg)); 69 68 sg_dma_len(sg) = len; 69 + sg_page_sizes |= len; 70 70 71 71 rem -= len; 72 72 } 73 73 GEM_BUG_ON(rem); 74 74 75 75 obj->mm.madv = I915_MADV_DONTNEED; 76 - return pages; 76 + 77 + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); 78 + 79 + return 0; 77 80 #undef GFP 78 81 } 79 82

-8

drivers/gpu/drm/i915/selftests/i915_gem_object.c

··· 251 251 return PTR_ERR(io); 252 252 } 253 253 254 - err = i915_vma_get_fence(vma); 255 - if (err) { 256 - pr_err("Failed to get fence for partial view: offset=%lu\n", 257 - page); 258 - i915_vma_unpin_iomap(vma); 259 - return err; 260 - } 261 - 262 254 iowrite32(page, io + n * PAGE_SIZE/sizeof(*io)); 263 255 i915_vma_unpin_iomap(vma); 264 256

+9 -3

drivers/gpu/drm/i915/selftests/i915_gem_request.c

··· 215 215 } 216 216 i915_gem_request_get(vip); 217 217 i915_add_request(vip); 218 + rcu_read_lock(); 218 219 request->engine->submit_request(request); 220 + rcu_read_unlock(); 219 221 220 222 mutex_unlock(&i915->drm.struct_mutex); 221 223 ··· 420 418 err = PTR_ERR(cmd); 421 419 goto err; 422 420 } 421 + 423 422 *cmd = MI_BATCH_BUFFER_END; 423 + i915_gem_chipset_flush(i915); 424 + 424 425 i915_gem_object_unpin_map(obj); 425 426 426 427 err = i915_gem_object_set_to_gtt_domain(obj, false); ··· 610 605 *cmd++ = lower_32_bits(vma->node.start); 611 606 } 612 607 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 608 + i915_gem_chipset_flush(i915); 613 609 614 - wmb(); 615 610 i915_gem_object_unpin_map(obj); 616 611 617 612 return vma; ··· 630 625 return PTR_ERR(cmd); 631 626 632 627 *cmd = MI_BATCH_BUFFER_END; 633 - wmb(); 628 + i915_gem_chipset_flush(batch->vm->i915); 634 629 635 630 i915_gem_object_unpin_map(batch->obj); 636 631 ··· 863 858 I915_MAP_WC); 864 859 if (!IS_ERR(cmd)) { 865 860 *cmd = MI_BATCH_BUFFER_END; 866 - wmb(); 861 + i915_gem_chipset_flush(i915); 862 + 867 863 i915_gem_object_unpin_map(request[id]->batch->obj); 868 864 } 869 865

+1

drivers/gpu/drm/i915/selftests/i915_live_selftests.h

··· 15 15 selftest(dmabuf, i915_gem_dmabuf_live_selftests) 16 16 selftest(coherency, i915_gem_coherency_live_selftests) 17 17 selftest(gtt, i915_gem_gtt_live_selftests) 18 + selftest(hugepages, i915_gem_huge_page_live_selftests) 18 19 selftest(contexts, i915_gem_context_live_selftests) 19 20 selftest(hangcheck, intel_hangcheck_live_selftests)

+1

drivers/gpu/drm/i915/selftests/i915_mock_selftests.h

··· 21 21 selftest(vma, i915_vma_mock_selftests) 22 22 selftest(evict, i915_gem_evict_mock_selftests) 23 23 selftest(gtt, i915_gem_gtt_mock_selftests) 24 + selftest(hugepages, i915_gem_huge_page_mock_selftests)

+23 -3

drivers/gpu/drm/i915/selftests/intel_hangcheck.c

··· 165 165 *batch++ = lower_32_bits(vma->node.start); 166 166 } 167 167 *batch++ = MI_BATCH_BUFFER_END; /* not reached */ 168 + i915_gem_chipset_flush(h->i915); 168 169 169 170 flags = 0; 170 171 if (INTEL_GEN(vm->i915) <= 5) ··· 232 231 static void hang_fini(struct hang *h) 233 232 { 234 233 *h->batch = MI_BATCH_BUFFER_END; 235 - wmb(); 234 + i915_gem_chipset_flush(h->i915); 236 235 237 236 i915_gem_object_unpin_map(h->obj); 238 237 i915_gem_object_put(h->obj); ··· 276 275 i915_gem_request_get(rq); 277 276 278 277 *h.batch = MI_BATCH_BUFFER_END; 278 + i915_gem_chipset_flush(i915); 279 + 279 280 __i915_add_request(rq, true); 280 281 281 282 timeout = i915_wait_request(rq, ··· 624 621 __i915_add_request(rq, true); 625 622 626 623 if (!wait_for_hang(&h, rq)) { 624 + struct drm_printer p = drm_info_printer(i915->drm.dev); 625 + 627 626 pr_err("Failed to start request %x, at %x\n", 628 627 rq->fence.seqno, hws_seqno(&h, rq)); 628 + intel_engine_dump(rq->engine, &p); 629 629 630 630 i915_reset(i915, 0); 631 631 i915_gem_set_wedged(i915); ··· 719 713 __i915_add_request(rq, true); 720 714 721 715 if (!wait_for_hang(&h, prev)) { 716 + struct drm_printer p = drm_info_printer(i915->drm.dev); 717 + 722 718 pr_err("Failed to start request %x, at %x\n", 723 719 prev->fence.seqno, hws_seqno(&h, prev)); 720 + intel_engine_dump(rq->engine, &p); 721 + 724 722 i915_gem_request_put(rq); 725 723 i915_gem_request_put(prev); 726 724 ··· 775 765 pr_info("%s: Completed %d resets\n", engine->name, count); 776 766 777 767 *h.batch = MI_BATCH_BUFFER_END; 778 - wmb(); 768 + i915_gem_chipset_flush(i915); 779 769 780 770 i915_gem_request_put(prev); 781 771 } ··· 825 815 __i915_add_request(rq, true); 826 816 827 817 if (!wait_for_hang(&h, rq)) { 818 + struct drm_printer p = drm_info_printer(i915->drm.dev); 819 + 828 820 pr_err("Failed to start request %x, at %x\n", 829 821 rq->fence.seqno, hws_seqno(&h, rq)); 822 + intel_engine_dump(rq->engine, &p); 830 823 831 824 i915_reset(i915, 0); 832 825 i915_gem_set_wedged(i915); ··· 878 865 SUBTEST(igt_reset_queue), 879 866 SUBTEST(igt_handle_error), 880 867 }; 868 + int err; 881 869 882 870 if (!intel_has_gpu_reset(i915)) 883 871 return 0; 884 872 885 - return i915_subtests(tests, i915); 873 + intel_runtime_pm_get(i915); 874 + 875 + err = i915_subtests(tests, i915); 876 + 877 + intel_runtime_pm_put(i915); 878 + 879 + return err; 886 880 }

+16 -1

drivers/gpu/drm/i915/selftests/mock_gem_device.c

··· 83 83 kmem_cache_destroy(i915->vmas); 84 84 kmem_cache_destroy(i915->objects); 85 85 86 + i915_gemfs_fini(i915); 87 + 86 88 drm_dev_fini(&i915->drm); 87 89 put_device(&i915->drm.pdev->dev); 88 90 } ··· 148 146 dev_set_name(&pdev->dev, "mock"); 149 147 dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); 150 148 151 - #if IS_ENABLED(CONFIG_IOMMU_API) 149 + #if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) 152 150 /* hack to disable iommu for the fake device; force identity mapping */ 153 151 pdev->dev.archdata.iommu = (void *)-1; 154 152 #endif ··· 173 171 drm_mode_config_init(&i915->drm); 174 172 175 173 mkwrite_device_info(i915)->gen = -1; 174 + 175 + mkwrite_device_info(i915)->page_sizes = 176 + I915_GTT_PAGE_SIZE_4K | 177 + I915_GTT_PAGE_SIZE_64K | 178 + I915_GTT_PAGE_SIZE_2M; 176 179 177 180 spin_lock_init(&i915->mm.object_stat_lock); 178 181 mock_uncore_init(i915); ··· 246 239 if (!i915->kernel_context) 247 240 goto err_engine; 248 241 242 + i915->preempt_context = mock_context(i915, NULL); 243 + if (!i915->preempt_context) 244 + goto err_kernel_context; 245 + 246 + WARN_ON(i915_gemfs_init(i915)); 247 + 249 248 return i915; 250 249 250 + err_kernel_context: 251 + i915_gem_context_put(i915->kernel_context); 251 252 err_engine: 252 253 for_each_engine(engine, i915, id) 253 254 mock_engine_free(engine);

+4 -7

drivers/gpu/drm/i915/selftests/mock_gtt.c

··· 43 43 u32 flags) 44 44 { 45 45 GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); 46 - vma->pages = vma->obj->mm.pages; 47 46 vma->flags |= I915_VMA_LOCAL_BIND; 48 47 return 0; 49 48 } ··· 83 84 ppgtt->base.insert_entries = mock_insert_entries; 84 85 ppgtt->base.bind_vma = mock_bind_ppgtt; 85 86 ppgtt->base.unbind_vma = mock_unbind_ppgtt; 87 + ppgtt->base.set_pages = ppgtt_set_pages; 88 + ppgtt->base.clear_pages = clear_pages; 86 89 ppgtt->base.cleanup = mock_cleanup; 87 90 88 91 return ppgtt; ··· 94 93 enum i915_cache_level cache_level, 95 94 u32 flags) 96 95 { 97 - int err; 98 - 99 - err = i915_get_ggtt_vma_pages(vma); 100 - if (err) 101 - return err; 102 - 103 96 vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 104 97 return 0; 105 98 } ··· 119 124 ggtt->base.insert_entries = mock_insert_entries; 120 125 ggtt->base.bind_vma = mock_bind_ggtt; 121 126 ggtt->base.unbind_vma = mock_unbind_ggtt; 127 + ggtt->base.set_pages = ggtt_set_pages; 128 + ggtt->base.clear_pages = clear_pages; 122 129 ggtt->base.cleanup = mock_cleanup; 123 130 124 131 i915_address_space_init(&ggtt->base, i915, "global");

+15

drivers/gpu/drm/i915/selftests/scatterlist.c

··· 189 189 return 1 + (prandom_u32_state(rnd) % 1024); 190 190 } 191 191 192 + static unsigned int random_page_size_pages(unsigned long n, 193 + unsigned long count, 194 + struct rnd_state *rnd) 195 + { 196 + /* 4K, 64K, 2M */ 197 + static unsigned int page_count[] = { 198 + BIT(12) >> PAGE_SHIFT, 199 + BIT(16) >> PAGE_SHIFT, 200 + BIT(21) >> PAGE_SHIFT, 201 + }; 202 + 203 + return page_count[(prandom_u32_state(rnd) % 3)]; 204 + } 205 + 192 206 static inline bool page_contiguous(struct page *first, 193 207 struct page *last, 194 208 unsigned long npages) ··· 266 252 grow, 267 253 shrink, 268 254 random, 255 + random_page_size_pages, 269 256 NULL, 270 257 }; 271 258

+2

include/linux/shmem_fs.h

··· 53 53 loff_t size, unsigned long flags); 54 54 extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, 55 55 unsigned long flags); 56 + extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, 57 + const char *name, loff_t size, unsigned long flags); 56 58 extern int shmem_zero_setup(struct vm_area_struct *); 57 59 extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, 58 60 unsigned long len, unsigned long pgoff, unsigned long flags);

+16 -2

include/uapi/drm/i915_drm.h

··· 397 397 #define I915_PARAM_MIN_EU_IN_POOL 39 398 398 #define I915_PARAM_MMAP_GTT_VERSION 40 399 399 400 - /* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution 400 + /* 401 + * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution 401 402 * priorities and the driver will attempt to execute batches in priority order. 403 + * The param returns a capability bitmask, nonzero implies that the scheduler 404 + * is enabled, with different features present according to the mask. 405 + * 406 + * The initial priority for each batch is supplied by the context and is 407 + * controlled via I915_CONTEXT_PARAM_PRIORITY. 402 408 */ 403 409 #define I915_PARAM_HAS_SCHEDULER 41 410 + #define I915_SCHEDULER_CAP_ENABLED (1ul << 0) 411 + #define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) 412 + #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) 413 + 404 414 #define I915_PARAM_HUC_STATUS 42 405 415 406 416 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of ··· 1318 1308 * be specified 1319 1309 */ 1320 1310 __u64 offset; 1321 - #define I915_REG_READ_8B_WA BIT(0) 1311 + #define I915_REG_READ_8B_WA (1ul << 0) 1322 1312 1323 1313 __u64 val; /* Return value */ 1324 1314 }; ··· 1370 1360 #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 1371 1361 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 1372 1362 #define I915_CONTEXT_PARAM_BANNABLE 0x5 1363 + #define I915_CONTEXT_PARAM_PRIORITY 0x6 1364 + #define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ 1365 + #define I915_CONTEXT_DEFAULT_PRIORITY 0 1366 + #define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ 1373 1367 __u64 value; 1374 1368 }; 1375 1369

+22 -8

mm/shmem.c

··· 4183 4183 .d_dname = simple_dname 4184 4184 }; 4185 4185 4186 - static struct file *__shmem_file_setup(const char *name, loff_t size, 4186 + static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size, 4187 4187 unsigned long flags, unsigned int i_flags) 4188 4188 { 4189 4189 struct file *res; ··· 4192 4192 struct super_block *sb; 4193 4193 struct qstr this; 4194 4194 4195 - if (IS_ERR(shm_mnt)) 4196 - return ERR_CAST(shm_mnt); 4195 + if (IS_ERR(mnt)) 4196 + return ERR_CAST(mnt); 4197 4197 4198 4198 if (size < 0 || size > MAX_LFS_FILESIZE) 4199 4199 return ERR_PTR(-EINVAL); ··· 4205 4205 this.name = name; 4206 4206 this.len = strlen(name); 4207 4207 this.hash = 0; /* will go */ 4208 - sb = shm_mnt->mnt_sb; 4209 - path.mnt = mntget(shm_mnt); 4208 + sb = mnt->mnt_sb; 4209 + path.mnt = mntget(mnt); 4210 4210 path.dentry = d_alloc_pseudo(sb, &this); 4211 4211 if (!path.dentry) 4212 4212 goto put_memory; ··· 4251 4251 */ 4252 4252 struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) 4253 4253 { 4254 - return __shmem_file_setup(name, size, flags, S_PRIVATE); 4254 + return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE); 4255 4255 } 4256 4256 4257 4257 /** ··· 4262 4262 */ 4263 4263 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) 4264 4264 { 4265 - return __shmem_file_setup(name, size, flags, 0); 4265 + return __shmem_file_setup(shm_mnt, name, size, flags, 0); 4266 4266 } 4267 4267 EXPORT_SYMBOL_GPL(shmem_file_setup); 4268 + 4269 + /** 4270 + * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs 4271 + * @mnt: the tmpfs mount where the file will be created 4272 + * @name: name for dentry (to be seen in /proc/<pid>/maps 4273 + * @size: size to be set for the file 4274 + * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size 4275 + */ 4276 + struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, 4277 + loff_t size, unsigned long flags) 4278 + { 4279 + return __shmem_file_setup(mnt, name, size, flags, 0); 4280 + } 4281 + EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt); 4268 4282 4269 4283 /** 4270 4284 * shmem_zero_setup - setup a shared anonymous mapping ··· 4295 4281 * accessible to the user through its mapping, use S_PRIVATE flag to 4296 4282 * bypass file security, in the same way as shmem_kernel_file_setup(). 4297 4283 */ 4298 - file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE); 4284 + file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags); 4299 4285 if (IS_ERR(file)) 4300 4286 return PTR_ERR(file); 4301 4287