Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
29#include "radeon_asic.h"
30#include "cikd.h"
31#include "atom.h"
32#include "cik_blit_shaders.h"
33#include "radeon_ucode.h"
34#include "clearstate_ci.h"
35
36MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51MODULE_FIRMWARE("radeon/KABINI_me.bin");
52MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56
57extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58extern void r600_ih_ring_fini(struct radeon_device *rdev);
59extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62extern void sumo_rlc_fini(struct radeon_device *rdev);
63extern int sumo_rlc_init(struct radeon_device *rdev);
64extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65extern void si_rlc_reset(struct radeon_device *rdev);
66extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67extern int cik_sdma_resume(struct radeon_device *rdev);
68extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69extern void cik_sdma_fini(struct radeon_device *rdev);
70extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71 struct radeon_ib *ib,
72 uint64_t pe,
73 uint64_t addr, unsigned count,
74 uint32_t incr, uint32_t flags);
75static void cik_rlc_stop(struct radeon_device *rdev);
76static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77static void cik_program_aspm(struct radeon_device *rdev);
78static void cik_init_pg(struct radeon_device *rdev);
79static void cik_init_cg(struct radeon_device *rdev);
80static void cik_fini_pg(struct radeon_device *rdev);
81static void cik_fini_cg(struct radeon_device *rdev);
82static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
83 bool enable);
84
85/* get temperature in millidegrees */
86int ci_get_temp(struct radeon_device *rdev)
87{
88 u32 temp;
89 int actual_temp = 0;
90
91 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
92 CTF_TEMP_SHIFT;
93
94 if (temp & 0x200)
95 actual_temp = 255;
96 else
97 actual_temp = temp & 0x1ff;
98
99 actual_temp = actual_temp * 1000;
100
101 return actual_temp;
102}
103
104/* get temperature in millidegrees */
105int kv_get_temp(struct radeon_device *rdev)
106{
107 u32 temp;
108 int actual_temp = 0;
109
110 temp = RREG32_SMC(0xC0300E0C);
111
112 if (temp)
113 actual_temp = (temp / 8) - 49;
114 else
115 actual_temp = 0;
116
117 actual_temp = actual_temp * 1000;
118
119 return actual_temp;
120}
121
122/*
123 * Indirect registers accessor
124 */
125u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
126{
127 unsigned long flags;
128 u32 r;
129
130 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
131 WREG32(PCIE_INDEX, reg);
132 (void)RREG32(PCIE_INDEX);
133 r = RREG32(PCIE_DATA);
134 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
135 return r;
136}
137
138void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
139{
140 unsigned long flags;
141
142 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
143 WREG32(PCIE_INDEX, reg);
144 (void)RREG32(PCIE_INDEX);
145 WREG32(PCIE_DATA, v);
146 (void)RREG32(PCIE_DATA);
147 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
148}
149
150static const u32 spectre_rlc_save_restore_register_list[] =
151{
152 (0x0e00 << 16) | (0xc12c >> 2),
153 0x00000000,
154 (0x0e00 << 16) | (0xc140 >> 2),
155 0x00000000,
156 (0x0e00 << 16) | (0xc150 >> 2),
157 0x00000000,
158 (0x0e00 << 16) | (0xc15c >> 2),
159 0x00000000,
160 (0x0e00 << 16) | (0xc168 >> 2),
161 0x00000000,
162 (0x0e00 << 16) | (0xc170 >> 2),
163 0x00000000,
164 (0x0e00 << 16) | (0xc178 >> 2),
165 0x00000000,
166 (0x0e00 << 16) | (0xc204 >> 2),
167 0x00000000,
168 (0x0e00 << 16) | (0xc2b4 >> 2),
169 0x00000000,
170 (0x0e00 << 16) | (0xc2b8 >> 2),
171 0x00000000,
172 (0x0e00 << 16) | (0xc2bc >> 2),
173 0x00000000,
174 (0x0e00 << 16) | (0xc2c0 >> 2),
175 0x00000000,
176 (0x0e00 << 16) | (0x8228 >> 2),
177 0x00000000,
178 (0x0e00 << 16) | (0x829c >> 2),
179 0x00000000,
180 (0x0e00 << 16) | (0x869c >> 2),
181 0x00000000,
182 (0x0600 << 16) | (0x98f4 >> 2),
183 0x00000000,
184 (0x0e00 << 16) | (0x98f8 >> 2),
185 0x00000000,
186 (0x0e00 << 16) | (0x9900 >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0xc260 >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0x90e8 >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0x3c000 >> 2),
193 0x00000000,
194 (0x0e00 << 16) | (0x3c00c >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0x8c1c >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0x9700 >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xcd20 >> 2),
201 0x00000000,
202 (0x4e00 << 16) | (0xcd20 >> 2),
203 0x00000000,
204 (0x5e00 << 16) | (0xcd20 >> 2),
205 0x00000000,
206 (0x6e00 << 16) | (0xcd20 >> 2),
207 0x00000000,
208 (0x7e00 << 16) | (0xcd20 >> 2),
209 0x00000000,
210 (0x8e00 << 16) | (0xcd20 >> 2),
211 0x00000000,
212 (0x9e00 << 16) | (0xcd20 >> 2),
213 0x00000000,
214 (0xae00 << 16) | (0xcd20 >> 2),
215 0x00000000,
216 (0xbe00 << 16) | (0xcd20 >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0x89bc >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0x8900 >> 2),
221 0x00000000,
222 0x3,
223 (0x0e00 << 16) | (0xc130 >> 2),
224 0x00000000,
225 (0x0e00 << 16) | (0xc134 >> 2),
226 0x00000000,
227 (0x0e00 << 16) | (0xc1fc >> 2),
228 0x00000000,
229 (0x0e00 << 16) | (0xc208 >> 2),
230 0x00000000,
231 (0x0e00 << 16) | (0xc264 >> 2),
232 0x00000000,
233 (0x0e00 << 16) | (0xc268 >> 2),
234 0x00000000,
235 (0x0e00 << 16) | (0xc26c >> 2),
236 0x00000000,
237 (0x0e00 << 16) | (0xc270 >> 2),
238 0x00000000,
239 (0x0e00 << 16) | (0xc274 >> 2),
240 0x00000000,
241 (0x0e00 << 16) | (0xc278 >> 2),
242 0x00000000,
243 (0x0e00 << 16) | (0xc27c >> 2),
244 0x00000000,
245 (0x0e00 << 16) | (0xc280 >> 2),
246 0x00000000,
247 (0x0e00 << 16) | (0xc284 >> 2),
248 0x00000000,
249 (0x0e00 << 16) | (0xc288 >> 2),
250 0x00000000,
251 (0x0e00 << 16) | (0xc28c >> 2),
252 0x00000000,
253 (0x0e00 << 16) | (0xc290 >> 2),
254 0x00000000,
255 (0x0e00 << 16) | (0xc294 >> 2),
256 0x00000000,
257 (0x0e00 << 16) | (0xc298 >> 2),
258 0x00000000,
259 (0x0e00 << 16) | (0xc29c >> 2),
260 0x00000000,
261 (0x0e00 << 16) | (0xc2a0 >> 2),
262 0x00000000,
263 (0x0e00 << 16) | (0xc2a4 >> 2),
264 0x00000000,
265 (0x0e00 << 16) | (0xc2a8 >> 2),
266 0x00000000,
267 (0x0e00 << 16) | (0xc2ac >> 2),
268 0x00000000,
269 (0x0e00 << 16) | (0xc2b0 >> 2),
270 0x00000000,
271 (0x0e00 << 16) | (0x301d0 >> 2),
272 0x00000000,
273 (0x0e00 << 16) | (0x30238 >> 2),
274 0x00000000,
275 (0x0e00 << 16) | (0x30250 >> 2),
276 0x00000000,
277 (0x0e00 << 16) | (0x30254 >> 2),
278 0x00000000,
279 (0x0e00 << 16) | (0x30258 >> 2),
280 0x00000000,
281 (0x0e00 << 16) | (0x3025c >> 2),
282 0x00000000,
283 (0x4e00 << 16) | (0xc900 >> 2),
284 0x00000000,
285 (0x5e00 << 16) | (0xc900 >> 2),
286 0x00000000,
287 (0x6e00 << 16) | (0xc900 >> 2),
288 0x00000000,
289 (0x7e00 << 16) | (0xc900 >> 2),
290 0x00000000,
291 (0x8e00 << 16) | (0xc900 >> 2),
292 0x00000000,
293 (0x9e00 << 16) | (0xc900 >> 2),
294 0x00000000,
295 (0xae00 << 16) | (0xc900 >> 2),
296 0x00000000,
297 (0xbe00 << 16) | (0xc900 >> 2),
298 0x00000000,
299 (0x4e00 << 16) | (0xc904 >> 2),
300 0x00000000,
301 (0x5e00 << 16) | (0xc904 >> 2),
302 0x00000000,
303 (0x6e00 << 16) | (0xc904 >> 2),
304 0x00000000,
305 (0x7e00 << 16) | (0xc904 >> 2),
306 0x00000000,
307 (0x8e00 << 16) | (0xc904 >> 2),
308 0x00000000,
309 (0x9e00 << 16) | (0xc904 >> 2),
310 0x00000000,
311 (0xae00 << 16) | (0xc904 >> 2),
312 0x00000000,
313 (0xbe00 << 16) | (0xc904 >> 2),
314 0x00000000,
315 (0x4e00 << 16) | (0xc908 >> 2),
316 0x00000000,
317 (0x5e00 << 16) | (0xc908 >> 2),
318 0x00000000,
319 (0x6e00 << 16) | (0xc908 >> 2),
320 0x00000000,
321 (0x7e00 << 16) | (0xc908 >> 2),
322 0x00000000,
323 (0x8e00 << 16) | (0xc908 >> 2),
324 0x00000000,
325 (0x9e00 << 16) | (0xc908 >> 2),
326 0x00000000,
327 (0xae00 << 16) | (0xc908 >> 2),
328 0x00000000,
329 (0xbe00 << 16) | (0xc908 >> 2),
330 0x00000000,
331 (0x4e00 << 16) | (0xc90c >> 2),
332 0x00000000,
333 (0x5e00 << 16) | (0xc90c >> 2),
334 0x00000000,
335 (0x6e00 << 16) | (0xc90c >> 2),
336 0x00000000,
337 (0x7e00 << 16) | (0xc90c >> 2),
338 0x00000000,
339 (0x8e00 << 16) | (0xc90c >> 2),
340 0x00000000,
341 (0x9e00 << 16) | (0xc90c >> 2),
342 0x00000000,
343 (0xae00 << 16) | (0xc90c >> 2),
344 0x00000000,
345 (0xbe00 << 16) | (0xc90c >> 2),
346 0x00000000,
347 (0x4e00 << 16) | (0xc910 >> 2),
348 0x00000000,
349 (0x5e00 << 16) | (0xc910 >> 2),
350 0x00000000,
351 (0x6e00 << 16) | (0xc910 >> 2),
352 0x00000000,
353 (0x7e00 << 16) | (0xc910 >> 2),
354 0x00000000,
355 (0x8e00 << 16) | (0xc910 >> 2),
356 0x00000000,
357 (0x9e00 << 16) | (0xc910 >> 2),
358 0x00000000,
359 (0xae00 << 16) | (0xc910 >> 2),
360 0x00000000,
361 (0xbe00 << 16) | (0xc910 >> 2),
362 0x00000000,
363 (0x0e00 << 16) | (0xc99c >> 2),
364 0x00000000,
365 (0x0e00 << 16) | (0x9834 >> 2),
366 0x00000000,
367 (0x0000 << 16) | (0x30f00 >> 2),
368 0x00000000,
369 (0x0001 << 16) | (0x30f00 >> 2),
370 0x00000000,
371 (0x0000 << 16) | (0x30f04 >> 2),
372 0x00000000,
373 (0x0001 << 16) | (0x30f04 >> 2),
374 0x00000000,
375 (0x0000 << 16) | (0x30f08 >> 2),
376 0x00000000,
377 (0x0001 << 16) | (0x30f08 >> 2),
378 0x00000000,
379 (0x0000 << 16) | (0x30f0c >> 2),
380 0x00000000,
381 (0x0001 << 16) | (0x30f0c >> 2),
382 0x00000000,
383 (0x0600 << 16) | (0x9b7c >> 2),
384 0x00000000,
385 (0x0e00 << 16) | (0x8a14 >> 2),
386 0x00000000,
387 (0x0e00 << 16) | (0x8a18 >> 2),
388 0x00000000,
389 (0x0600 << 16) | (0x30a00 >> 2),
390 0x00000000,
391 (0x0e00 << 16) | (0x8bf0 >> 2),
392 0x00000000,
393 (0x0e00 << 16) | (0x8bcc >> 2),
394 0x00000000,
395 (0x0e00 << 16) | (0x8b24 >> 2),
396 0x00000000,
397 (0x0e00 << 16) | (0x30a04 >> 2),
398 0x00000000,
399 (0x0600 << 16) | (0x30a10 >> 2),
400 0x00000000,
401 (0x0600 << 16) | (0x30a14 >> 2),
402 0x00000000,
403 (0x0600 << 16) | (0x30a18 >> 2),
404 0x00000000,
405 (0x0600 << 16) | (0x30a2c >> 2),
406 0x00000000,
407 (0x0e00 << 16) | (0xc700 >> 2),
408 0x00000000,
409 (0x0e00 << 16) | (0xc704 >> 2),
410 0x00000000,
411 (0x0e00 << 16) | (0xc708 >> 2),
412 0x00000000,
413 (0x0e00 << 16) | (0xc768 >> 2),
414 0x00000000,
415 (0x0400 << 16) | (0xc770 >> 2),
416 0x00000000,
417 (0x0400 << 16) | (0xc774 >> 2),
418 0x00000000,
419 (0x0400 << 16) | (0xc778 >> 2),
420 0x00000000,
421 (0x0400 << 16) | (0xc77c >> 2),
422 0x00000000,
423 (0x0400 << 16) | (0xc780 >> 2),
424 0x00000000,
425 (0x0400 << 16) | (0xc784 >> 2),
426 0x00000000,
427 (0x0400 << 16) | (0xc788 >> 2),
428 0x00000000,
429 (0x0400 << 16) | (0xc78c >> 2),
430 0x00000000,
431 (0x0400 << 16) | (0xc798 >> 2),
432 0x00000000,
433 (0x0400 << 16) | (0xc79c >> 2),
434 0x00000000,
435 (0x0400 << 16) | (0xc7a0 >> 2),
436 0x00000000,
437 (0x0400 << 16) | (0xc7a4 >> 2),
438 0x00000000,
439 (0x0400 << 16) | (0xc7a8 >> 2),
440 0x00000000,
441 (0x0400 << 16) | (0xc7ac >> 2),
442 0x00000000,
443 (0x0400 << 16) | (0xc7b0 >> 2),
444 0x00000000,
445 (0x0400 << 16) | (0xc7b4 >> 2),
446 0x00000000,
447 (0x0e00 << 16) | (0x9100 >> 2),
448 0x00000000,
449 (0x0e00 << 16) | (0x3c010 >> 2),
450 0x00000000,
451 (0x0e00 << 16) | (0x92a8 >> 2),
452 0x00000000,
453 (0x0e00 << 16) | (0x92ac >> 2),
454 0x00000000,
455 (0x0e00 << 16) | (0x92b4 >> 2),
456 0x00000000,
457 (0x0e00 << 16) | (0x92b8 >> 2),
458 0x00000000,
459 (0x0e00 << 16) | (0x92bc >> 2),
460 0x00000000,
461 (0x0e00 << 16) | (0x92c0 >> 2),
462 0x00000000,
463 (0x0e00 << 16) | (0x92c4 >> 2),
464 0x00000000,
465 (0x0e00 << 16) | (0x92c8 >> 2),
466 0x00000000,
467 (0x0e00 << 16) | (0x92cc >> 2),
468 0x00000000,
469 (0x0e00 << 16) | (0x92d0 >> 2),
470 0x00000000,
471 (0x0e00 << 16) | (0x8c00 >> 2),
472 0x00000000,
473 (0x0e00 << 16) | (0x8c04 >> 2),
474 0x00000000,
475 (0x0e00 << 16) | (0x8c20 >> 2),
476 0x00000000,
477 (0x0e00 << 16) | (0x8c38 >> 2),
478 0x00000000,
479 (0x0e00 << 16) | (0x8c3c >> 2),
480 0x00000000,
481 (0x0e00 << 16) | (0xae00 >> 2),
482 0x00000000,
483 (0x0e00 << 16) | (0x9604 >> 2),
484 0x00000000,
485 (0x0e00 << 16) | (0xac08 >> 2),
486 0x00000000,
487 (0x0e00 << 16) | (0xac0c >> 2),
488 0x00000000,
489 (0x0e00 << 16) | (0xac10 >> 2),
490 0x00000000,
491 (0x0e00 << 16) | (0xac14 >> 2),
492 0x00000000,
493 (0x0e00 << 16) | (0xac58 >> 2),
494 0x00000000,
495 (0x0e00 << 16) | (0xac68 >> 2),
496 0x00000000,
497 (0x0e00 << 16) | (0xac6c >> 2),
498 0x00000000,
499 (0x0e00 << 16) | (0xac70 >> 2),
500 0x00000000,
501 (0x0e00 << 16) | (0xac74 >> 2),
502 0x00000000,
503 (0x0e00 << 16) | (0xac78 >> 2),
504 0x00000000,
505 (0x0e00 << 16) | (0xac7c >> 2),
506 0x00000000,
507 (0x0e00 << 16) | (0xac80 >> 2),
508 0x00000000,
509 (0x0e00 << 16) | (0xac84 >> 2),
510 0x00000000,
511 (0x0e00 << 16) | (0xac88 >> 2),
512 0x00000000,
513 (0x0e00 << 16) | (0xac8c >> 2),
514 0x00000000,
515 (0x0e00 << 16) | (0x970c >> 2),
516 0x00000000,
517 (0x0e00 << 16) | (0x9714 >> 2),
518 0x00000000,
519 (0x0e00 << 16) | (0x9718 >> 2),
520 0x00000000,
521 (0x0e00 << 16) | (0x971c >> 2),
522 0x00000000,
523 (0x0e00 << 16) | (0x31068 >> 2),
524 0x00000000,
525 (0x4e00 << 16) | (0x31068 >> 2),
526 0x00000000,
527 (0x5e00 << 16) | (0x31068 >> 2),
528 0x00000000,
529 (0x6e00 << 16) | (0x31068 >> 2),
530 0x00000000,
531 (0x7e00 << 16) | (0x31068 >> 2),
532 0x00000000,
533 (0x8e00 << 16) | (0x31068 >> 2),
534 0x00000000,
535 (0x9e00 << 16) | (0x31068 >> 2),
536 0x00000000,
537 (0xae00 << 16) | (0x31068 >> 2),
538 0x00000000,
539 (0xbe00 << 16) | (0x31068 >> 2),
540 0x00000000,
541 (0x0e00 << 16) | (0xcd10 >> 2),
542 0x00000000,
543 (0x0e00 << 16) | (0xcd14 >> 2),
544 0x00000000,
545 (0x0e00 << 16) | (0x88b0 >> 2),
546 0x00000000,
547 (0x0e00 << 16) | (0x88b4 >> 2),
548 0x00000000,
549 (0x0e00 << 16) | (0x88b8 >> 2),
550 0x00000000,
551 (0x0e00 << 16) | (0x88bc >> 2),
552 0x00000000,
553 (0x0400 << 16) | (0x89c0 >> 2),
554 0x00000000,
555 (0x0e00 << 16) | (0x88c4 >> 2),
556 0x00000000,
557 (0x0e00 << 16) | (0x88c8 >> 2),
558 0x00000000,
559 (0x0e00 << 16) | (0x88d0 >> 2),
560 0x00000000,
561 (0x0e00 << 16) | (0x88d4 >> 2),
562 0x00000000,
563 (0x0e00 << 16) | (0x88d8 >> 2),
564 0x00000000,
565 (0x0e00 << 16) | (0x8980 >> 2),
566 0x00000000,
567 (0x0e00 << 16) | (0x30938 >> 2),
568 0x00000000,
569 (0x0e00 << 16) | (0x3093c >> 2),
570 0x00000000,
571 (0x0e00 << 16) | (0x30940 >> 2),
572 0x00000000,
573 (0x0e00 << 16) | (0x89a0 >> 2),
574 0x00000000,
575 (0x0e00 << 16) | (0x30900 >> 2),
576 0x00000000,
577 (0x0e00 << 16) | (0x30904 >> 2),
578 0x00000000,
579 (0x0e00 << 16) | (0x89b4 >> 2),
580 0x00000000,
581 (0x0e00 << 16) | (0x3c210 >> 2),
582 0x00000000,
583 (0x0e00 << 16) | (0x3c214 >> 2),
584 0x00000000,
585 (0x0e00 << 16) | (0x3c218 >> 2),
586 0x00000000,
587 (0x0e00 << 16) | (0x8904 >> 2),
588 0x00000000,
589 0x5,
590 (0x0e00 << 16) | (0x8c28 >> 2),
591 (0x0e00 << 16) | (0x8c2c >> 2),
592 (0x0e00 << 16) | (0x8c30 >> 2),
593 (0x0e00 << 16) | (0x8c34 >> 2),
594 (0x0e00 << 16) | (0x9600 >> 2),
595};
596
597static const u32 kalindi_rlc_save_restore_register_list[] =
598{
599 (0x0e00 << 16) | (0xc12c >> 2),
600 0x00000000,
601 (0x0e00 << 16) | (0xc140 >> 2),
602 0x00000000,
603 (0x0e00 << 16) | (0xc150 >> 2),
604 0x00000000,
605 (0x0e00 << 16) | (0xc15c >> 2),
606 0x00000000,
607 (0x0e00 << 16) | (0xc168 >> 2),
608 0x00000000,
609 (0x0e00 << 16) | (0xc170 >> 2),
610 0x00000000,
611 (0x0e00 << 16) | (0xc204 >> 2),
612 0x00000000,
613 (0x0e00 << 16) | (0xc2b4 >> 2),
614 0x00000000,
615 (0x0e00 << 16) | (0xc2b8 >> 2),
616 0x00000000,
617 (0x0e00 << 16) | (0xc2bc >> 2),
618 0x00000000,
619 (0x0e00 << 16) | (0xc2c0 >> 2),
620 0x00000000,
621 (0x0e00 << 16) | (0x8228 >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0x829c >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0x869c >> 2),
626 0x00000000,
627 (0x0600 << 16) | (0x98f4 >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0x98f8 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0x9900 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xc260 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0x90e8 >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0x3c000 >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0x3c00c >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0x8c1c >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0x9700 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xcd20 >> 2),
646 0x00000000,
647 (0x4e00 << 16) | (0xcd20 >> 2),
648 0x00000000,
649 (0x5e00 << 16) | (0xcd20 >> 2),
650 0x00000000,
651 (0x6e00 << 16) | (0xcd20 >> 2),
652 0x00000000,
653 (0x7e00 << 16) | (0xcd20 >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0x89bc >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0x8900 >> 2),
658 0x00000000,
659 0x3,
660 (0x0e00 << 16) | (0xc130 >> 2),
661 0x00000000,
662 (0x0e00 << 16) | (0xc134 >> 2),
663 0x00000000,
664 (0x0e00 << 16) | (0xc1fc >> 2),
665 0x00000000,
666 (0x0e00 << 16) | (0xc208 >> 2),
667 0x00000000,
668 (0x0e00 << 16) | (0xc264 >> 2),
669 0x00000000,
670 (0x0e00 << 16) | (0xc268 >> 2),
671 0x00000000,
672 (0x0e00 << 16) | (0xc26c >> 2),
673 0x00000000,
674 (0x0e00 << 16) | (0xc270 >> 2),
675 0x00000000,
676 (0x0e00 << 16) | (0xc274 >> 2),
677 0x00000000,
678 (0x0e00 << 16) | (0xc28c >> 2),
679 0x00000000,
680 (0x0e00 << 16) | (0xc290 >> 2),
681 0x00000000,
682 (0x0e00 << 16) | (0xc294 >> 2),
683 0x00000000,
684 (0x0e00 << 16) | (0xc298 >> 2),
685 0x00000000,
686 (0x0e00 << 16) | (0xc2a0 >> 2),
687 0x00000000,
688 (0x0e00 << 16) | (0xc2a4 >> 2),
689 0x00000000,
690 (0x0e00 << 16) | (0xc2a8 >> 2),
691 0x00000000,
692 (0x0e00 << 16) | (0xc2ac >> 2),
693 0x00000000,
694 (0x0e00 << 16) | (0x301d0 >> 2),
695 0x00000000,
696 (0x0e00 << 16) | (0x30238 >> 2),
697 0x00000000,
698 (0x0e00 << 16) | (0x30250 >> 2),
699 0x00000000,
700 (0x0e00 << 16) | (0x30254 >> 2),
701 0x00000000,
702 (0x0e00 << 16) | (0x30258 >> 2),
703 0x00000000,
704 (0x0e00 << 16) | (0x3025c >> 2),
705 0x00000000,
706 (0x4e00 << 16) | (0xc900 >> 2),
707 0x00000000,
708 (0x5e00 << 16) | (0xc900 >> 2),
709 0x00000000,
710 (0x6e00 << 16) | (0xc900 >> 2),
711 0x00000000,
712 (0x7e00 << 16) | (0xc900 >> 2),
713 0x00000000,
714 (0x4e00 << 16) | (0xc904 >> 2),
715 0x00000000,
716 (0x5e00 << 16) | (0xc904 >> 2),
717 0x00000000,
718 (0x6e00 << 16) | (0xc904 >> 2),
719 0x00000000,
720 (0x7e00 << 16) | (0xc904 >> 2),
721 0x00000000,
722 (0x4e00 << 16) | (0xc908 >> 2),
723 0x00000000,
724 (0x5e00 << 16) | (0xc908 >> 2),
725 0x00000000,
726 (0x6e00 << 16) | (0xc908 >> 2),
727 0x00000000,
728 (0x7e00 << 16) | (0xc908 >> 2),
729 0x00000000,
730 (0x4e00 << 16) | (0xc90c >> 2),
731 0x00000000,
732 (0x5e00 << 16) | (0xc90c >> 2),
733 0x00000000,
734 (0x6e00 << 16) | (0xc90c >> 2),
735 0x00000000,
736 (0x7e00 << 16) | (0xc90c >> 2),
737 0x00000000,
738 (0x4e00 << 16) | (0xc910 >> 2),
739 0x00000000,
740 (0x5e00 << 16) | (0xc910 >> 2),
741 0x00000000,
742 (0x6e00 << 16) | (0xc910 >> 2),
743 0x00000000,
744 (0x7e00 << 16) | (0xc910 >> 2),
745 0x00000000,
746 (0x0e00 << 16) | (0xc99c >> 2),
747 0x00000000,
748 (0x0e00 << 16) | (0x9834 >> 2),
749 0x00000000,
750 (0x0000 << 16) | (0x30f00 >> 2),
751 0x00000000,
752 (0x0000 << 16) | (0x30f04 >> 2),
753 0x00000000,
754 (0x0000 << 16) | (0x30f08 >> 2),
755 0x00000000,
756 (0x0000 << 16) | (0x30f0c >> 2),
757 0x00000000,
758 (0x0600 << 16) | (0x9b7c >> 2),
759 0x00000000,
760 (0x0e00 << 16) | (0x8a14 >> 2),
761 0x00000000,
762 (0x0e00 << 16) | (0x8a18 >> 2),
763 0x00000000,
764 (0x0600 << 16) | (0x30a00 >> 2),
765 0x00000000,
766 (0x0e00 << 16) | (0x8bf0 >> 2),
767 0x00000000,
768 (0x0e00 << 16) | (0x8bcc >> 2),
769 0x00000000,
770 (0x0e00 << 16) | (0x8b24 >> 2),
771 0x00000000,
772 (0x0e00 << 16) | (0x30a04 >> 2),
773 0x00000000,
774 (0x0600 << 16) | (0x30a10 >> 2),
775 0x00000000,
776 (0x0600 << 16) | (0x30a14 >> 2),
777 0x00000000,
778 (0x0600 << 16) | (0x30a18 >> 2),
779 0x00000000,
780 (0x0600 << 16) | (0x30a2c >> 2),
781 0x00000000,
782 (0x0e00 << 16) | (0xc700 >> 2),
783 0x00000000,
784 (0x0e00 << 16) | (0xc704 >> 2),
785 0x00000000,
786 (0x0e00 << 16) | (0xc708 >> 2),
787 0x00000000,
788 (0x0e00 << 16) | (0xc768 >> 2),
789 0x00000000,
790 (0x0400 << 16) | (0xc770 >> 2),
791 0x00000000,
792 (0x0400 << 16) | (0xc774 >> 2),
793 0x00000000,
794 (0x0400 << 16) | (0xc798 >> 2),
795 0x00000000,
796 (0x0400 << 16) | (0xc79c >> 2),
797 0x00000000,
798 (0x0e00 << 16) | (0x9100 >> 2),
799 0x00000000,
800 (0x0e00 << 16) | (0x3c010 >> 2),
801 0x00000000,
802 (0x0e00 << 16) | (0x8c00 >> 2),
803 0x00000000,
804 (0x0e00 << 16) | (0x8c04 >> 2),
805 0x00000000,
806 (0x0e00 << 16) | (0x8c20 >> 2),
807 0x00000000,
808 (0x0e00 << 16) | (0x8c38 >> 2),
809 0x00000000,
810 (0x0e00 << 16) | (0x8c3c >> 2),
811 0x00000000,
812 (0x0e00 << 16) | (0xae00 >> 2),
813 0x00000000,
814 (0x0e00 << 16) | (0x9604 >> 2),
815 0x00000000,
816 (0x0e00 << 16) | (0xac08 >> 2),
817 0x00000000,
818 (0x0e00 << 16) | (0xac0c >> 2),
819 0x00000000,
820 (0x0e00 << 16) | (0xac10 >> 2),
821 0x00000000,
822 (0x0e00 << 16) | (0xac14 >> 2),
823 0x00000000,
824 (0x0e00 << 16) | (0xac58 >> 2),
825 0x00000000,
826 (0x0e00 << 16) | (0xac68 >> 2),
827 0x00000000,
828 (0x0e00 << 16) | (0xac6c >> 2),
829 0x00000000,
830 (0x0e00 << 16) | (0xac70 >> 2),
831 0x00000000,
832 (0x0e00 << 16) | (0xac74 >> 2),
833 0x00000000,
834 (0x0e00 << 16) | (0xac78 >> 2),
835 0x00000000,
836 (0x0e00 << 16) | (0xac7c >> 2),
837 0x00000000,
838 (0x0e00 << 16) | (0xac80 >> 2),
839 0x00000000,
840 (0x0e00 << 16) | (0xac84 >> 2),
841 0x00000000,
842 (0x0e00 << 16) | (0xac88 >> 2),
843 0x00000000,
844 (0x0e00 << 16) | (0xac8c >> 2),
845 0x00000000,
846 (0x0e00 << 16) | (0x970c >> 2),
847 0x00000000,
848 (0x0e00 << 16) | (0x9714 >> 2),
849 0x00000000,
850 (0x0e00 << 16) | (0x9718 >> 2),
851 0x00000000,
852 (0x0e00 << 16) | (0x971c >> 2),
853 0x00000000,
854 (0x0e00 << 16) | (0x31068 >> 2),
855 0x00000000,
856 (0x4e00 << 16) | (0x31068 >> 2),
857 0x00000000,
858 (0x5e00 << 16) | (0x31068 >> 2),
859 0x00000000,
860 (0x6e00 << 16) | (0x31068 >> 2),
861 0x00000000,
862 (0x7e00 << 16) | (0x31068 >> 2),
863 0x00000000,
864 (0x0e00 << 16) | (0xcd10 >> 2),
865 0x00000000,
866 (0x0e00 << 16) | (0xcd14 >> 2),
867 0x00000000,
868 (0x0e00 << 16) | (0x88b0 >> 2),
869 0x00000000,
870 (0x0e00 << 16) | (0x88b4 >> 2),
871 0x00000000,
872 (0x0e00 << 16) | (0x88b8 >> 2),
873 0x00000000,
874 (0x0e00 << 16) | (0x88bc >> 2),
875 0x00000000,
876 (0x0400 << 16) | (0x89c0 >> 2),
877 0x00000000,
878 (0x0e00 << 16) | (0x88c4 >> 2),
879 0x00000000,
880 (0x0e00 << 16) | (0x88c8 >> 2),
881 0x00000000,
882 (0x0e00 << 16) | (0x88d0 >> 2),
883 0x00000000,
884 (0x0e00 << 16) | (0x88d4 >> 2),
885 0x00000000,
886 (0x0e00 << 16) | (0x88d8 >> 2),
887 0x00000000,
888 (0x0e00 << 16) | (0x8980 >> 2),
889 0x00000000,
890 (0x0e00 << 16) | (0x30938 >> 2),
891 0x00000000,
892 (0x0e00 << 16) | (0x3093c >> 2),
893 0x00000000,
894 (0x0e00 << 16) | (0x30940 >> 2),
895 0x00000000,
896 (0x0e00 << 16) | (0x89a0 >> 2),
897 0x00000000,
898 (0x0e00 << 16) | (0x30900 >> 2),
899 0x00000000,
900 (0x0e00 << 16) | (0x30904 >> 2),
901 0x00000000,
902 (0x0e00 << 16) | (0x89b4 >> 2),
903 0x00000000,
904 (0x0e00 << 16) | (0x3e1fc >> 2),
905 0x00000000,
906 (0x0e00 << 16) | (0x3c210 >> 2),
907 0x00000000,
908 (0x0e00 << 16) | (0x3c214 >> 2),
909 0x00000000,
910 (0x0e00 << 16) | (0x3c218 >> 2),
911 0x00000000,
912 (0x0e00 << 16) | (0x8904 >> 2),
913 0x00000000,
914 0x5,
915 (0x0e00 << 16) | (0x8c28 >> 2),
916 (0x0e00 << 16) | (0x8c2c >> 2),
917 (0x0e00 << 16) | (0x8c30 >> 2),
918 (0x0e00 << 16) | (0x8c34 >> 2),
919 (0x0e00 << 16) | (0x9600 >> 2),
920};
921
922static const u32 bonaire_golden_spm_registers[] =
923{
924 0x30800, 0xe0ffffff, 0xe0000000
925};
926
927static const u32 bonaire_golden_common_registers[] =
928{
929 0xc770, 0xffffffff, 0x00000800,
930 0xc774, 0xffffffff, 0x00000800,
931 0xc798, 0xffffffff, 0x00007fbf,
932 0xc79c, 0xffffffff, 0x00007faf
933};
934
935static const u32 bonaire_golden_registers[] =
936{
937 0x3354, 0x00000333, 0x00000333,
938 0x3350, 0x000c0fc0, 0x00040200,
939 0x9a10, 0x00010000, 0x00058208,
940 0x3c000, 0xffff1fff, 0x00140000,
941 0x3c200, 0xfdfc0fff, 0x00000100,
942 0x3c234, 0x40000000, 0x40000200,
943 0x9830, 0xffffffff, 0x00000000,
944 0x9834, 0xf00fffff, 0x00000400,
945 0x9838, 0x0002021c, 0x00020200,
946 0xc78, 0x00000080, 0x00000000,
947 0x5bb0, 0x000000f0, 0x00000070,
948 0x5bc0, 0xf0311fff, 0x80300000,
949 0x98f8, 0x73773777, 0x12010001,
950 0x350c, 0x00810000, 0x408af000,
951 0x7030, 0x31000111, 0x00000011,
952 0x2f48, 0x73773777, 0x12010001,
953 0x220c, 0x00007fb6, 0x0021a1b1,
954 0x2210, 0x00007fb6, 0x002021b1,
955 0x2180, 0x00007fb6, 0x00002191,
956 0x2218, 0x00007fb6, 0x002121b1,
957 0x221c, 0x00007fb6, 0x002021b1,
958 0x21dc, 0x00007fb6, 0x00002191,
959 0x21e0, 0x00007fb6, 0x00002191,
960 0x3628, 0x0000003f, 0x0000000a,
961 0x362c, 0x0000003f, 0x0000000a,
962 0x2ae4, 0x00073ffe, 0x000022a2,
963 0x240c, 0x000007ff, 0x00000000,
964 0x8a14, 0xf000003f, 0x00000007,
965 0x8bf0, 0x00002001, 0x00000001,
966 0x8b24, 0xffffffff, 0x00ffffff,
967 0x30a04, 0x0000ff0f, 0x00000000,
968 0x28a4c, 0x07ffffff, 0x06000000,
969 0x4d8, 0x00000fff, 0x00000100,
970 0x3e78, 0x00000001, 0x00000002,
971 0x9100, 0x03000000, 0x0362c688,
972 0x8c00, 0x000000ff, 0x00000001,
973 0xe40, 0x00001fff, 0x00001fff,
974 0x9060, 0x0000007f, 0x00000020,
975 0x9508, 0x00010000, 0x00010000,
976 0xac14, 0x000003ff, 0x000000f3,
977 0xac0c, 0xffffffff, 0x00001032
978};
979
980static const u32 bonaire_mgcg_cgcg_init[] =
981{
982 0xc420, 0xffffffff, 0xfffffffc,
983 0x30800, 0xffffffff, 0xe0000000,
984 0x3c2a0, 0xffffffff, 0x00000100,
985 0x3c208, 0xffffffff, 0x00000100,
986 0x3c2c0, 0xffffffff, 0xc0000100,
987 0x3c2c8, 0xffffffff, 0xc0000100,
988 0x3c2c4, 0xffffffff, 0xc0000100,
989 0x55e4, 0xffffffff, 0x00600100,
990 0x3c280, 0xffffffff, 0x00000100,
991 0x3c214, 0xffffffff, 0x06000100,
992 0x3c220, 0xffffffff, 0x00000100,
993 0x3c218, 0xffffffff, 0x06000100,
994 0x3c204, 0xffffffff, 0x00000100,
995 0x3c2e0, 0xffffffff, 0x00000100,
996 0x3c224, 0xffffffff, 0x00000100,
997 0x3c200, 0xffffffff, 0x00000100,
998 0x3c230, 0xffffffff, 0x00000100,
999 0x3c234, 0xffffffff, 0x00000100,
1000 0x3c250, 0xffffffff, 0x00000100,
1001 0x3c254, 0xffffffff, 0x00000100,
1002 0x3c258, 0xffffffff, 0x00000100,
1003 0x3c25c, 0xffffffff, 0x00000100,
1004 0x3c260, 0xffffffff, 0x00000100,
1005 0x3c27c, 0xffffffff, 0x00000100,
1006 0x3c278, 0xffffffff, 0x00000100,
1007 0x3c210, 0xffffffff, 0x06000100,
1008 0x3c290, 0xffffffff, 0x00000100,
1009 0x3c274, 0xffffffff, 0x00000100,
1010 0x3c2b4, 0xffffffff, 0x00000100,
1011 0x3c2b0, 0xffffffff, 0x00000100,
1012 0x3c270, 0xffffffff, 0x00000100,
1013 0x30800, 0xffffffff, 0xe0000000,
1014 0x3c020, 0xffffffff, 0x00010000,
1015 0x3c024, 0xffffffff, 0x00030002,
1016 0x3c028, 0xffffffff, 0x00040007,
1017 0x3c02c, 0xffffffff, 0x00060005,
1018 0x3c030, 0xffffffff, 0x00090008,
1019 0x3c034, 0xffffffff, 0x00010000,
1020 0x3c038, 0xffffffff, 0x00030002,
1021 0x3c03c, 0xffffffff, 0x00040007,
1022 0x3c040, 0xffffffff, 0x00060005,
1023 0x3c044, 0xffffffff, 0x00090008,
1024 0x3c048, 0xffffffff, 0x00010000,
1025 0x3c04c, 0xffffffff, 0x00030002,
1026 0x3c050, 0xffffffff, 0x00040007,
1027 0x3c054, 0xffffffff, 0x00060005,
1028 0x3c058, 0xffffffff, 0x00090008,
1029 0x3c05c, 0xffffffff, 0x00010000,
1030 0x3c060, 0xffffffff, 0x00030002,
1031 0x3c064, 0xffffffff, 0x00040007,
1032 0x3c068, 0xffffffff, 0x00060005,
1033 0x3c06c, 0xffffffff, 0x00090008,
1034 0x3c070, 0xffffffff, 0x00010000,
1035 0x3c074, 0xffffffff, 0x00030002,
1036 0x3c078, 0xffffffff, 0x00040007,
1037 0x3c07c, 0xffffffff, 0x00060005,
1038 0x3c080, 0xffffffff, 0x00090008,
1039 0x3c084, 0xffffffff, 0x00010000,
1040 0x3c088, 0xffffffff, 0x00030002,
1041 0x3c08c, 0xffffffff, 0x00040007,
1042 0x3c090, 0xffffffff, 0x00060005,
1043 0x3c094, 0xffffffff, 0x00090008,
1044 0x3c098, 0xffffffff, 0x00010000,
1045 0x3c09c, 0xffffffff, 0x00030002,
1046 0x3c0a0, 0xffffffff, 0x00040007,
1047 0x3c0a4, 0xffffffff, 0x00060005,
1048 0x3c0a8, 0xffffffff, 0x00090008,
1049 0x3c000, 0xffffffff, 0x96e00200,
1050 0x8708, 0xffffffff, 0x00900100,
1051 0xc424, 0xffffffff, 0x0020003f,
1052 0x38, 0xffffffff, 0x0140001c,
1053 0x3c, 0x000f0000, 0x000f0000,
1054 0x220, 0xffffffff, 0xC060000C,
1055 0x224, 0xc0000fff, 0x00000100,
1056 0xf90, 0xffffffff, 0x00000100,
1057 0xf98, 0x00000101, 0x00000000,
1058 0x20a8, 0xffffffff, 0x00000104,
1059 0x55e4, 0xff000fff, 0x00000100,
1060 0x30cc, 0xc0000fff, 0x00000104,
1061 0xc1e4, 0x00000001, 0x00000001,
1062 0xd00c, 0xff000ff0, 0x00000100,
1063 0xd80c, 0xff000ff0, 0x00000100
1064};
1065
1066static const u32 spectre_golden_spm_registers[] =
1067{
1068 0x30800, 0xe0ffffff, 0xe0000000
1069};
1070
1071static const u32 spectre_golden_common_registers[] =
1072{
1073 0xc770, 0xffffffff, 0x00000800,
1074 0xc774, 0xffffffff, 0x00000800,
1075 0xc798, 0xffffffff, 0x00007fbf,
1076 0xc79c, 0xffffffff, 0x00007faf
1077};
1078
1079static const u32 spectre_golden_registers[] =
1080{
1081 0x3c000, 0xffff1fff, 0x96940200,
1082 0x3c00c, 0xffff0001, 0xff000000,
1083 0x3c200, 0xfffc0fff, 0x00000100,
1084 0x6ed8, 0x00010101, 0x00010000,
1085 0x9834, 0xf00fffff, 0x00000400,
1086 0x9838, 0xfffffffc, 0x00020200,
1087 0x5bb0, 0x000000f0, 0x00000070,
1088 0x5bc0, 0xf0311fff, 0x80300000,
1089 0x98f8, 0x73773777, 0x12010001,
1090 0x9b7c, 0x00ff0000, 0x00fc0000,
1091 0x2f48, 0x73773777, 0x12010001,
1092 0x8a14, 0xf000003f, 0x00000007,
1093 0x8b24, 0xffffffff, 0x00ffffff,
1094 0x28350, 0x3f3f3fff, 0x00000082,
1095 0x28355, 0x0000003f, 0x00000000,
1096 0x3e78, 0x00000001, 0x00000002,
1097 0x913c, 0xffff03df, 0x00000004,
1098 0xc768, 0x00000008, 0x00000008,
1099 0x8c00, 0x000008ff, 0x00000800,
1100 0x9508, 0x00010000, 0x00010000,
1101 0xac0c, 0xffffffff, 0x54763210,
1102 0x214f8, 0x01ff01ff, 0x00000002,
1103 0x21498, 0x007ff800, 0x00200000,
1104 0x2015c, 0xffffffff, 0x00000f40,
1105 0x30934, 0xffffffff, 0x00000001
1106};
1107
1108static const u32 spectre_mgcg_cgcg_init[] =
1109{
1110 0xc420, 0xffffffff, 0xfffffffc,
1111 0x30800, 0xffffffff, 0xe0000000,
1112 0x3c2a0, 0xffffffff, 0x00000100,
1113 0x3c208, 0xffffffff, 0x00000100,
1114 0x3c2c0, 0xffffffff, 0x00000100,
1115 0x3c2c8, 0xffffffff, 0x00000100,
1116 0x3c2c4, 0xffffffff, 0x00000100,
1117 0x55e4, 0xffffffff, 0x00600100,
1118 0x3c280, 0xffffffff, 0x00000100,
1119 0x3c214, 0xffffffff, 0x06000100,
1120 0x3c220, 0xffffffff, 0x00000100,
1121 0x3c218, 0xffffffff, 0x06000100,
1122 0x3c204, 0xffffffff, 0x00000100,
1123 0x3c2e0, 0xffffffff, 0x00000100,
1124 0x3c224, 0xffffffff, 0x00000100,
1125 0x3c200, 0xffffffff, 0x00000100,
1126 0x3c230, 0xffffffff, 0x00000100,
1127 0x3c234, 0xffffffff, 0x00000100,
1128 0x3c250, 0xffffffff, 0x00000100,
1129 0x3c254, 0xffffffff, 0x00000100,
1130 0x3c258, 0xffffffff, 0x00000100,
1131 0x3c25c, 0xffffffff, 0x00000100,
1132 0x3c260, 0xffffffff, 0x00000100,
1133 0x3c27c, 0xffffffff, 0x00000100,
1134 0x3c278, 0xffffffff, 0x00000100,
1135 0x3c210, 0xffffffff, 0x06000100,
1136 0x3c290, 0xffffffff, 0x00000100,
1137 0x3c274, 0xffffffff, 0x00000100,
1138 0x3c2b4, 0xffffffff, 0x00000100,
1139 0x3c2b0, 0xffffffff, 0x00000100,
1140 0x3c270, 0xffffffff, 0x00000100,
1141 0x30800, 0xffffffff, 0xe0000000,
1142 0x3c020, 0xffffffff, 0x00010000,
1143 0x3c024, 0xffffffff, 0x00030002,
1144 0x3c028, 0xffffffff, 0x00040007,
1145 0x3c02c, 0xffffffff, 0x00060005,
1146 0x3c030, 0xffffffff, 0x00090008,
1147 0x3c034, 0xffffffff, 0x00010000,
1148 0x3c038, 0xffffffff, 0x00030002,
1149 0x3c03c, 0xffffffff, 0x00040007,
1150 0x3c040, 0xffffffff, 0x00060005,
1151 0x3c044, 0xffffffff, 0x00090008,
1152 0x3c048, 0xffffffff, 0x00010000,
1153 0x3c04c, 0xffffffff, 0x00030002,
1154 0x3c050, 0xffffffff, 0x00040007,
1155 0x3c054, 0xffffffff, 0x00060005,
1156 0x3c058, 0xffffffff, 0x00090008,
1157 0x3c05c, 0xffffffff, 0x00010000,
1158 0x3c060, 0xffffffff, 0x00030002,
1159 0x3c064, 0xffffffff, 0x00040007,
1160 0x3c068, 0xffffffff, 0x00060005,
1161 0x3c06c, 0xffffffff, 0x00090008,
1162 0x3c070, 0xffffffff, 0x00010000,
1163 0x3c074, 0xffffffff, 0x00030002,
1164 0x3c078, 0xffffffff, 0x00040007,
1165 0x3c07c, 0xffffffff, 0x00060005,
1166 0x3c080, 0xffffffff, 0x00090008,
1167 0x3c084, 0xffffffff, 0x00010000,
1168 0x3c088, 0xffffffff, 0x00030002,
1169 0x3c08c, 0xffffffff, 0x00040007,
1170 0x3c090, 0xffffffff, 0x00060005,
1171 0x3c094, 0xffffffff, 0x00090008,
1172 0x3c098, 0xffffffff, 0x00010000,
1173 0x3c09c, 0xffffffff, 0x00030002,
1174 0x3c0a0, 0xffffffff, 0x00040007,
1175 0x3c0a4, 0xffffffff, 0x00060005,
1176 0x3c0a8, 0xffffffff, 0x00090008,
1177 0x3c0ac, 0xffffffff, 0x00010000,
1178 0x3c0b0, 0xffffffff, 0x00030002,
1179 0x3c0b4, 0xffffffff, 0x00040007,
1180 0x3c0b8, 0xffffffff, 0x00060005,
1181 0x3c0bc, 0xffffffff, 0x00090008,
1182 0x3c000, 0xffffffff, 0x96e00200,
1183 0x8708, 0xffffffff, 0x00900100,
1184 0xc424, 0xffffffff, 0x0020003f,
1185 0x38, 0xffffffff, 0x0140001c,
1186 0x3c, 0x000f0000, 0x000f0000,
1187 0x220, 0xffffffff, 0xC060000C,
1188 0x224, 0xc0000fff, 0x00000100,
1189 0xf90, 0xffffffff, 0x00000100,
1190 0xf98, 0x00000101, 0x00000000,
1191 0x20a8, 0xffffffff, 0x00000104,
1192 0x55e4, 0xff000fff, 0x00000100,
1193 0x30cc, 0xc0000fff, 0x00000104,
1194 0xc1e4, 0x00000001, 0x00000001,
1195 0xd00c, 0xff000ff0, 0x00000100,
1196 0xd80c, 0xff000ff0, 0x00000100
1197};
1198
1199static const u32 kalindi_golden_spm_registers[] =
1200{
1201 0x30800, 0xe0ffffff, 0xe0000000
1202};
1203
1204static const u32 kalindi_golden_common_registers[] =
1205{
1206 0xc770, 0xffffffff, 0x00000800,
1207 0xc774, 0xffffffff, 0x00000800,
1208 0xc798, 0xffffffff, 0x00007fbf,
1209 0xc79c, 0xffffffff, 0x00007faf
1210};
1211
1212static const u32 kalindi_golden_registers[] =
1213{
1214 0x3c000, 0xffffdfff, 0x6e944040,
1215 0x55e4, 0xff607fff, 0xfc000100,
1216 0x3c220, 0xff000fff, 0x00000100,
1217 0x3c224, 0xff000fff, 0x00000100,
1218 0x3c200, 0xfffc0fff, 0x00000100,
1219 0x6ed8, 0x00010101, 0x00010000,
1220 0x9830, 0xffffffff, 0x00000000,
1221 0x9834, 0xf00fffff, 0x00000400,
1222 0x5bb0, 0x000000f0, 0x00000070,
1223 0x5bc0, 0xf0311fff, 0x80300000,
1224 0x98f8, 0x73773777, 0x12010001,
1225 0x98fc, 0xffffffff, 0x00000010,
1226 0x9b7c, 0x00ff0000, 0x00fc0000,
1227 0x8030, 0x00001f0f, 0x0000100a,
1228 0x2f48, 0x73773777, 0x12010001,
1229 0x2408, 0x000fffff, 0x000c007f,
1230 0x8a14, 0xf000003f, 0x00000007,
1231 0x8b24, 0x3fff3fff, 0x00ffcfff,
1232 0x30a04, 0x0000ff0f, 0x00000000,
1233 0x28a4c, 0x07ffffff, 0x06000000,
1234 0x4d8, 0x00000fff, 0x00000100,
1235 0x3e78, 0x00000001, 0x00000002,
1236 0xc768, 0x00000008, 0x00000008,
1237 0x8c00, 0x000000ff, 0x00000003,
1238 0x214f8, 0x01ff01ff, 0x00000002,
1239 0x21498, 0x007ff800, 0x00200000,
1240 0x2015c, 0xffffffff, 0x00000f40,
1241 0x88c4, 0x001f3ae3, 0x00000082,
1242 0x88d4, 0x0000001f, 0x00000010,
1243 0x30934, 0xffffffff, 0x00000000
1244};
1245
1246static const u32 kalindi_mgcg_cgcg_init[] =
1247{
1248 0xc420, 0xffffffff, 0xfffffffc,
1249 0x30800, 0xffffffff, 0xe0000000,
1250 0x3c2a0, 0xffffffff, 0x00000100,
1251 0x3c208, 0xffffffff, 0x00000100,
1252 0x3c2c0, 0xffffffff, 0x00000100,
1253 0x3c2c8, 0xffffffff, 0x00000100,
1254 0x3c2c4, 0xffffffff, 0x00000100,
1255 0x55e4, 0xffffffff, 0x00600100,
1256 0x3c280, 0xffffffff, 0x00000100,
1257 0x3c214, 0xffffffff, 0x06000100,
1258 0x3c220, 0xffffffff, 0x00000100,
1259 0x3c218, 0xffffffff, 0x06000100,
1260 0x3c204, 0xffffffff, 0x00000100,
1261 0x3c2e0, 0xffffffff, 0x00000100,
1262 0x3c224, 0xffffffff, 0x00000100,
1263 0x3c200, 0xffffffff, 0x00000100,
1264 0x3c230, 0xffffffff, 0x00000100,
1265 0x3c234, 0xffffffff, 0x00000100,
1266 0x3c250, 0xffffffff, 0x00000100,
1267 0x3c254, 0xffffffff, 0x00000100,
1268 0x3c258, 0xffffffff, 0x00000100,
1269 0x3c25c, 0xffffffff, 0x00000100,
1270 0x3c260, 0xffffffff, 0x00000100,
1271 0x3c27c, 0xffffffff, 0x00000100,
1272 0x3c278, 0xffffffff, 0x00000100,
1273 0x3c210, 0xffffffff, 0x06000100,
1274 0x3c290, 0xffffffff, 0x00000100,
1275 0x3c274, 0xffffffff, 0x00000100,
1276 0x3c2b4, 0xffffffff, 0x00000100,
1277 0x3c2b0, 0xffffffff, 0x00000100,
1278 0x3c270, 0xffffffff, 0x00000100,
1279 0x30800, 0xffffffff, 0xe0000000,
1280 0x3c020, 0xffffffff, 0x00010000,
1281 0x3c024, 0xffffffff, 0x00030002,
1282 0x3c028, 0xffffffff, 0x00040007,
1283 0x3c02c, 0xffffffff, 0x00060005,
1284 0x3c030, 0xffffffff, 0x00090008,
1285 0x3c034, 0xffffffff, 0x00010000,
1286 0x3c038, 0xffffffff, 0x00030002,
1287 0x3c03c, 0xffffffff, 0x00040007,
1288 0x3c040, 0xffffffff, 0x00060005,
1289 0x3c044, 0xffffffff, 0x00090008,
1290 0x3c000, 0xffffffff, 0x96e00200,
1291 0x8708, 0xffffffff, 0x00900100,
1292 0xc424, 0xffffffff, 0x0020003f,
1293 0x38, 0xffffffff, 0x0140001c,
1294 0x3c, 0x000f0000, 0x000f0000,
1295 0x220, 0xffffffff, 0xC060000C,
1296 0x224, 0xc0000fff, 0x00000100,
1297 0x20a8, 0xffffffff, 0x00000104,
1298 0x55e4, 0xff000fff, 0x00000100,
1299 0x30cc, 0xc0000fff, 0x00000104,
1300 0xc1e4, 0x00000001, 0x00000001,
1301 0xd00c, 0xff000ff0, 0x00000100,
1302 0xd80c, 0xff000ff0, 0x00000100
1303};
1304
1305static void cik_init_golden_registers(struct radeon_device *rdev)
1306{
1307 switch (rdev->family) {
1308 case CHIP_BONAIRE:
1309 radeon_program_register_sequence(rdev,
1310 bonaire_mgcg_cgcg_init,
1311 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1312 radeon_program_register_sequence(rdev,
1313 bonaire_golden_registers,
1314 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1315 radeon_program_register_sequence(rdev,
1316 bonaire_golden_common_registers,
1317 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1318 radeon_program_register_sequence(rdev,
1319 bonaire_golden_spm_registers,
1320 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1321 break;
1322 case CHIP_KABINI:
1323 radeon_program_register_sequence(rdev,
1324 kalindi_mgcg_cgcg_init,
1325 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1326 radeon_program_register_sequence(rdev,
1327 kalindi_golden_registers,
1328 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1329 radeon_program_register_sequence(rdev,
1330 kalindi_golden_common_registers,
1331 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1332 radeon_program_register_sequence(rdev,
1333 kalindi_golden_spm_registers,
1334 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1335 break;
1336 case CHIP_KAVERI:
1337 radeon_program_register_sequence(rdev,
1338 spectre_mgcg_cgcg_init,
1339 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1340 radeon_program_register_sequence(rdev,
1341 spectre_golden_registers,
1342 (const u32)ARRAY_SIZE(spectre_golden_registers));
1343 radeon_program_register_sequence(rdev,
1344 spectre_golden_common_registers,
1345 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1346 radeon_program_register_sequence(rdev,
1347 spectre_golden_spm_registers,
1348 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1349 break;
1350 default:
1351 break;
1352 }
1353}
1354
1355/**
1356 * cik_get_xclk - get the xclk
1357 *
1358 * @rdev: radeon_device pointer
1359 *
1360 * Returns the reference clock used by the gfx engine
1361 * (CIK).
1362 */
1363u32 cik_get_xclk(struct radeon_device *rdev)
1364{
1365 u32 reference_clock = rdev->clock.spll.reference_freq;
1366
1367 if (rdev->flags & RADEON_IS_IGP) {
1368 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1369 return reference_clock / 2;
1370 } else {
1371 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1372 return reference_clock / 4;
1373 }
1374 return reference_clock;
1375}
1376
1377/**
1378 * cik_mm_rdoorbell - read a doorbell dword
1379 *
1380 * @rdev: radeon_device pointer
1381 * @offset: byte offset into the aperture
1382 *
1383 * Returns the value in the doorbell aperture at the
1384 * requested offset (CIK).
1385 */
1386u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1387{
1388 if (offset < rdev->doorbell.size) {
1389 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1390 } else {
1391 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1392 return 0;
1393 }
1394}
1395
1396/**
1397 * cik_mm_wdoorbell - write a doorbell dword
1398 *
1399 * @rdev: radeon_device pointer
1400 * @offset: byte offset into the aperture
1401 * @v: value to write
1402 *
1403 * Writes @v to the doorbell aperture at the
1404 * requested offset (CIK).
1405 */
1406void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1407{
1408 if (offset < rdev->doorbell.size) {
1409 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1410 } else {
1411 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1412 }
1413}
1414
1415#define BONAIRE_IO_MC_REGS_SIZE 36
1416
1417static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1418{
1419 {0x00000070, 0x04400000},
1420 {0x00000071, 0x80c01803},
1421 {0x00000072, 0x00004004},
1422 {0x00000073, 0x00000100},
1423 {0x00000074, 0x00ff0000},
1424 {0x00000075, 0x34000000},
1425 {0x00000076, 0x08000014},
1426 {0x00000077, 0x00cc08ec},
1427 {0x00000078, 0x00000400},
1428 {0x00000079, 0x00000000},
1429 {0x0000007a, 0x04090000},
1430 {0x0000007c, 0x00000000},
1431 {0x0000007e, 0x4408a8e8},
1432 {0x0000007f, 0x00000304},
1433 {0x00000080, 0x00000000},
1434 {0x00000082, 0x00000001},
1435 {0x00000083, 0x00000002},
1436 {0x00000084, 0xf3e4f400},
1437 {0x00000085, 0x052024e3},
1438 {0x00000087, 0x00000000},
1439 {0x00000088, 0x01000000},
1440 {0x0000008a, 0x1c0a0000},
1441 {0x0000008b, 0xff010000},
1442 {0x0000008d, 0xffffefff},
1443 {0x0000008e, 0xfff3efff},
1444 {0x0000008f, 0xfff3efbf},
1445 {0x00000092, 0xf7ffffff},
1446 {0x00000093, 0xffffff7f},
1447 {0x00000095, 0x00101101},
1448 {0x00000096, 0x00000fff},
1449 {0x00000097, 0x00116fff},
1450 {0x00000098, 0x60010000},
1451 {0x00000099, 0x10010000},
1452 {0x0000009a, 0x00006000},
1453 {0x0000009b, 0x00001000},
1454 {0x0000009f, 0x00b48000}
1455};
1456
1457/**
1458 * cik_srbm_select - select specific register instances
1459 *
1460 * @rdev: radeon_device pointer
1461 * @me: selected ME (micro engine)
1462 * @pipe: pipe
1463 * @queue: queue
1464 * @vmid: VMID
1465 *
1466 * Switches the currently active registers instances. Some
1467 * registers are instanced per VMID, others are instanced per
1468 * me/pipe/queue combination.
1469 */
1470static void cik_srbm_select(struct radeon_device *rdev,
1471 u32 me, u32 pipe, u32 queue, u32 vmid)
1472{
1473 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1474 MEID(me & 0x3) |
1475 VMID(vmid & 0xf) |
1476 QUEUEID(queue & 0x7));
1477 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1478}
1479
1480/* ucode loading */
1481/**
1482 * ci_mc_load_microcode - load MC ucode into the hw
1483 *
1484 * @rdev: radeon_device pointer
1485 *
1486 * Load the GDDR MC ucode into the hw (CIK).
1487 * Returns 0 on success, error on failure.
1488 */
1489static int ci_mc_load_microcode(struct radeon_device *rdev)
1490{
1491 const __be32 *fw_data;
1492 u32 running, blackout = 0;
1493 u32 *io_mc_regs;
1494 int i, ucode_size, regs_size;
1495
1496 if (!rdev->mc_fw)
1497 return -EINVAL;
1498
1499 switch (rdev->family) {
1500 case CHIP_BONAIRE:
1501 default:
1502 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1503 ucode_size = CIK_MC_UCODE_SIZE;
1504 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1505 break;
1506 }
1507
1508 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1509
1510 if (running == 0) {
1511 if (running) {
1512 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1513 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1514 }
1515
1516 /* reset the engine and set to writable */
1517 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1518 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1519
1520 /* load mc io regs */
1521 for (i = 0; i < regs_size; i++) {
1522 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1523 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1524 }
1525 /* load the MC ucode */
1526 fw_data = (const __be32 *)rdev->mc_fw->data;
1527 for (i = 0; i < ucode_size; i++)
1528 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1529
1530 /* put the engine back into the active state */
1531 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1532 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1533 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1534
1535 /* wait for training to complete */
1536 for (i = 0; i < rdev->usec_timeout; i++) {
1537 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1538 break;
1539 udelay(1);
1540 }
1541 for (i = 0; i < rdev->usec_timeout; i++) {
1542 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1543 break;
1544 udelay(1);
1545 }
1546
1547 if (running)
1548 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1549 }
1550
1551 return 0;
1552}
1553
1554/**
1555 * cik_init_microcode - load ucode images from disk
1556 *
1557 * @rdev: radeon_device pointer
1558 *
1559 * Use the firmware interface to load the ucode images into
1560 * the driver (not loaded into hw).
1561 * Returns 0 on success, error on failure.
1562 */
1563static int cik_init_microcode(struct radeon_device *rdev)
1564{
1565 const char *chip_name;
1566 size_t pfp_req_size, me_req_size, ce_req_size,
1567 mec_req_size, rlc_req_size, mc_req_size,
1568 sdma_req_size, smc_req_size;
1569 char fw_name[30];
1570 int err;
1571
1572 DRM_DEBUG("\n");
1573
1574 switch (rdev->family) {
1575 case CHIP_BONAIRE:
1576 chip_name = "BONAIRE";
1577 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578 me_req_size = CIK_ME_UCODE_SIZE * 4;
1579 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1582 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1583 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1584 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1585 break;
1586 case CHIP_KAVERI:
1587 chip_name = "KAVERI";
1588 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1589 me_req_size = CIK_ME_UCODE_SIZE * 4;
1590 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1591 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1592 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1593 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1594 break;
1595 case CHIP_KABINI:
1596 chip_name = "KABINI";
1597 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1598 me_req_size = CIK_ME_UCODE_SIZE * 4;
1599 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1600 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1601 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1602 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1603 break;
1604 default: BUG();
1605 }
1606
1607 DRM_INFO("Loading %s Microcode\n", chip_name);
1608
1609 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1610 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1611 if (err)
1612 goto out;
1613 if (rdev->pfp_fw->size != pfp_req_size) {
1614 printk(KERN_ERR
1615 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1616 rdev->pfp_fw->size, fw_name);
1617 err = -EINVAL;
1618 goto out;
1619 }
1620
1621 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1622 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1623 if (err)
1624 goto out;
1625 if (rdev->me_fw->size != me_req_size) {
1626 printk(KERN_ERR
1627 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628 rdev->me_fw->size, fw_name);
1629 err = -EINVAL;
1630 }
1631
1632 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1633 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1634 if (err)
1635 goto out;
1636 if (rdev->ce_fw->size != ce_req_size) {
1637 printk(KERN_ERR
1638 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639 rdev->ce_fw->size, fw_name);
1640 err = -EINVAL;
1641 }
1642
1643 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1644 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1645 if (err)
1646 goto out;
1647 if (rdev->mec_fw->size != mec_req_size) {
1648 printk(KERN_ERR
1649 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1650 rdev->mec_fw->size, fw_name);
1651 err = -EINVAL;
1652 }
1653
1654 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1655 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1656 if (err)
1657 goto out;
1658 if (rdev->rlc_fw->size != rlc_req_size) {
1659 printk(KERN_ERR
1660 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1661 rdev->rlc_fw->size, fw_name);
1662 err = -EINVAL;
1663 }
1664
1665 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1666 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1667 if (err)
1668 goto out;
1669 if (rdev->sdma_fw->size != sdma_req_size) {
1670 printk(KERN_ERR
1671 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1672 rdev->sdma_fw->size, fw_name);
1673 err = -EINVAL;
1674 }
1675
1676 /* No SMC, MC ucode on APUs */
1677 if (!(rdev->flags & RADEON_IS_IGP)) {
1678 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1679 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1680 if (err)
1681 goto out;
1682 if (rdev->mc_fw->size != mc_req_size) {
1683 printk(KERN_ERR
1684 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1685 rdev->mc_fw->size, fw_name);
1686 err = -EINVAL;
1687 }
1688
1689 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1690 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1691 if (err) {
1692 printk(KERN_ERR
1693 "smc: error loading firmware \"%s\"\n",
1694 fw_name);
1695 release_firmware(rdev->smc_fw);
1696 rdev->smc_fw = NULL;
1697 err = 0;
1698 } else if (rdev->smc_fw->size != smc_req_size) {
1699 printk(KERN_ERR
1700 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1701 rdev->smc_fw->size, fw_name);
1702 err = -EINVAL;
1703 }
1704 }
1705
1706out:
1707 if (err) {
1708 if (err != -EINVAL)
1709 printk(KERN_ERR
1710 "cik_cp: Failed to load firmware \"%s\"\n",
1711 fw_name);
1712 release_firmware(rdev->pfp_fw);
1713 rdev->pfp_fw = NULL;
1714 release_firmware(rdev->me_fw);
1715 rdev->me_fw = NULL;
1716 release_firmware(rdev->ce_fw);
1717 rdev->ce_fw = NULL;
1718 release_firmware(rdev->rlc_fw);
1719 rdev->rlc_fw = NULL;
1720 release_firmware(rdev->mc_fw);
1721 rdev->mc_fw = NULL;
1722 release_firmware(rdev->smc_fw);
1723 rdev->smc_fw = NULL;
1724 }
1725 return err;
1726}
1727
1728/*
1729 * Core functions
1730 */
1731/**
1732 * cik_tiling_mode_table_init - init the hw tiling table
1733 *
1734 * @rdev: radeon_device pointer
1735 *
1736 * Starting with SI, the tiling setup is done globally in a
1737 * set of 32 tiling modes. Rather than selecting each set of
1738 * parameters per surface as on older asics, we just select
1739 * which index in the tiling table we want to use, and the
1740 * surface uses those parameters (CIK).
1741 */
1742static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1743{
1744 const u32 num_tile_mode_states = 32;
1745 const u32 num_secondary_tile_mode_states = 16;
1746 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1747 u32 num_pipe_configs;
1748 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1749 rdev->config.cik.max_shader_engines;
1750
1751 switch (rdev->config.cik.mem_row_size_in_kb) {
1752 case 1:
1753 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1754 break;
1755 case 2:
1756 default:
1757 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1758 break;
1759 case 4:
1760 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1761 break;
1762 }
1763
1764 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1765 if (num_pipe_configs > 8)
1766 num_pipe_configs = 8; /* ??? */
1767
1768 if (num_pipe_configs == 8) {
1769 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1770 switch (reg_offset) {
1771 case 0:
1772 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1774 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1776 break;
1777 case 1:
1778 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1779 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1780 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1781 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1782 break;
1783 case 2:
1784 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1785 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1786 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1788 break;
1789 case 3:
1790 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1792 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1793 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1794 break;
1795 case 4:
1796 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1797 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1798 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1799 TILE_SPLIT(split_equal_to_row_size));
1800 break;
1801 case 5:
1802 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1803 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1804 break;
1805 case 6:
1806 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1807 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1808 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1809 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1810 break;
1811 case 7:
1812 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1813 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1814 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1815 TILE_SPLIT(split_equal_to_row_size));
1816 break;
1817 case 8:
1818 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1819 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1820 break;
1821 case 9:
1822 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1823 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1824 break;
1825 case 10:
1826 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1827 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1828 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1830 break;
1831 case 11:
1832 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1833 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1834 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1836 break;
1837 case 12:
1838 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1839 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1840 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1842 break;
1843 case 13:
1844 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1845 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1846 break;
1847 case 14:
1848 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1850 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1851 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1852 break;
1853 case 16:
1854 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1855 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1856 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1857 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1858 break;
1859 case 17:
1860 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1861 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1862 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1863 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1864 break;
1865 case 27:
1866 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1867 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1868 break;
1869 case 28:
1870 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1871 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1872 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1874 break;
1875 case 29:
1876 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1877 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1878 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1880 break;
1881 case 30:
1882 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1883 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1884 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1886 break;
1887 default:
1888 gb_tile_moden = 0;
1889 break;
1890 }
1891 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1892 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1893 }
1894 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1895 switch (reg_offset) {
1896 case 0:
1897 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1898 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1899 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1900 NUM_BANKS(ADDR_SURF_16_BANK));
1901 break;
1902 case 1:
1903 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1906 NUM_BANKS(ADDR_SURF_16_BANK));
1907 break;
1908 case 2:
1909 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1910 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1911 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1912 NUM_BANKS(ADDR_SURF_16_BANK));
1913 break;
1914 case 3:
1915 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1918 NUM_BANKS(ADDR_SURF_16_BANK));
1919 break;
1920 case 4:
1921 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1924 NUM_BANKS(ADDR_SURF_8_BANK));
1925 break;
1926 case 5:
1927 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1928 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1929 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1930 NUM_BANKS(ADDR_SURF_4_BANK));
1931 break;
1932 case 6:
1933 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1936 NUM_BANKS(ADDR_SURF_2_BANK));
1937 break;
1938 case 8:
1939 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1942 NUM_BANKS(ADDR_SURF_16_BANK));
1943 break;
1944 case 9:
1945 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1948 NUM_BANKS(ADDR_SURF_16_BANK));
1949 break;
1950 case 10:
1951 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1954 NUM_BANKS(ADDR_SURF_16_BANK));
1955 break;
1956 case 11:
1957 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1960 NUM_BANKS(ADDR_SURF_16_BANK));
1961 break;
1962 case 12:
1963 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1966 NUM_BANKS(ADDR_SURF_8_BANK));
1967 break;
1968 case 13:
1969 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1970 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1971 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1972 NUM_BANKS(ADDR_SURF_4_BANK));
1973 break;
1974 case 14:
1975 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1978 NUM_BANKS(ADDR_SURF_2_BANK));
1979 break;
1980 default:
1981 gb_tile_moden = 0;
1982 break;
1983 }
1984 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1985 }
1986 } else if (num_pipe_configs == 4) {
1987 if (num_rbs == 4) {
1988 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1989 switch (reg_offset) {
1990 case 0:
1991 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1995 break;
1996 case 1:
1997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2001 break;
2002 case 2:
2003 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2007 break;
2008 case 3:
2009 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2011 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2012 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2013 break;
2014 case 4:
2015 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2016 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2017 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2018 TILE_SPLIT(split_equal_to_row_size));
2019 break;
2020 case 5:
2021 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2023 break;
2024 case 6:
2025 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2027 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2028 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2029 break;
2030 case 7:
2031 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2032 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2033 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2034 TILE_SPLIT(split_equal_to_row_size));
2035 break;
2036 case 8:
2037 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2038 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2039 break;
2040 case 9:
2041 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2042 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2043 break;
2044 case 10:
2045 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2049 break;
2050 case 11:
2051 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2052 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2053 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2055 break;
2056 case 12:
2057 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2058 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2059 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2061 break;
2062 case 13:
2063 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2064 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2065 break;
2066 case 14:
2067 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071 break;
2072 case 16:
2073 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2074 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077 break;
2078 case 17:
2079 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2080 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2081 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083 break;
2084 case 27:
2085 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2086 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2087 break;
2088 case 28:
2089 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 break;
2094 case 29:
2095 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2097 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2099 break;
2100 case 30:
2101 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2102 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2103 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2105 break;
2106 default:
2107 gb_tile_moden = 0;
2108 break;
2109 }
2110 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2111 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2112 }
2113 } else if (num_rbs < 4) {
2114 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2115 switch (reg_offset) {
2116 case 0:
2117 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2119 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2121 break;
2122 case 1:
2123 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2125 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2127 break;
2128 case 2:
2129 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2131 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2133 break;
2134 case 3:
2135 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2137 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2138 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2139 break;
2140 case 4:
2141 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2142 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2143 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2144 TILE_SPLIT(split_equal_to_row_size));
2145 break;
2146 case 5:
2147 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2148 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2149 break;
2150 case 6:
2151 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2153 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2155 break;
2156 case 7:
2157 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2159 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2160 TILE_SPLIT(split_equal_to_row_size));
2161 break;
2162 case 8:
2163 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2164 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2165 break;
2166 case 9:
2167 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2168 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2169 break;
2170 case 10:
2171 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2175 break;
2176 case 11:
2177 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2178 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2179 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181 break;
2182 case 12:
2183 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2184 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187 break;
2188 case 13:
2189 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2190 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2191 break;
2192 case 14:
2193 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2194 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197 break;
2198 case 16:
2199 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2200 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2203 break;
2204 case 17:
2205 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2206 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2207 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2209 break;
2210 case 27:
2211 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2212 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2213 break;
2214 case 28:
2215 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2216 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2219 break;
2220 case 29:
2221 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2222 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2223 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2225 break;
2226 case 30:
2227 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2228 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2229 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2231 break;
2232 default:
2233 gb_tile_moden = 0;
2234 break;
2235 }
2236 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2237 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2238 }
2239 }
2240 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2241 switch (reg_offset) {
2242 case 0:
2243 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2246 NUM_BANKS(ADDR_SURF_16_BANK));
2247 break;
2248 case 1:
2249 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252 NUM_BANKS(ADDR_SURF_16_BANK));
2253 break;
2254 case 2:
2255 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 NUM_BANKS(ADDR_SURF_16_BANK));
2259 break;
2260 case 3:
2261 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264 NUM_BANKS(ADDR_SURF_16_BANK));
2265 break;
2266 case 4:
2267 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270 NUM_BANKS(ADDR_SURF_16_BANK));
2271 break;
2272 case 5:
2273 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276 NUM_BANKS(ADDR_SURF_8_BANK));
2277 break;
2278 case 6:
2279 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2282 NUM_BANKS(ADDR_SURF_4_BANK));
2283 break;
2284 case 8:
2285 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288 NUM_BANKS(ADDR_SURF_16_BANK));
2289 break;
2290 case 9:
2291 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294 NUM_BANKS(ADDR_SURF_16_BANK));
2295 break;
2296 case 10:
2297 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2300 NUM_BANKS(ADDR_SURF_16_BANK));
2301 break;
2302 case 11:
2303 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2306 NUM_BANKS(ADDR_SURF_16_BANK));
2307 break;
2308 case 12:
2309 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2312 NUM_BANKS(ADDR_SURF_16_BANK));
2313 break;
2314 case 13:
2315 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2318 NUM_BANKS(ADDR_SURF_8_BANK));
2319 break;
2320 case 14:
2321 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2324 NUM_BANKS(ADDR_SURF_4_BANK));
2325 break;
2326 default:
2327 gb_tile_moden = 0;
2328 break;
2329 }
2330 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2331 }
2332 } else if (num_pipe_configs == 2) {
2333 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2334 switch (reg_offset) {
2335 case 0:
2336 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 PIPE_CONFIG(ADDR_SURF_P2) |
2339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2340 break;
2341 case 1:
2342 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344 PIPE_CONFIG(ADDR_SURF_P2) |
2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2346 break;
2347 case 2:
2348 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350 PIPE_CONFIG(ADDR_SURF_P2) |
2351 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2352 break;
2353 case 3:
2354 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2356 PIPE_CONFIG(ADDR_SURF_P2) |
2357 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2358 break;
2359 case 4:
2360 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2362 PIPE_CONFIG(ADDR_SURF_P2) |
2363 TILE_SPLIT(split_equal_to_row_size));
2364 break;
2365 case 5:
2366 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2368 break;
2369 case 6:
2370 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372 PIPE_CONFIG(ADDR_SURF_P2) |
2373 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2374 break;
2375 case 7:
2376 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2377 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2378 PIPE_CONFIG(ADDR_SURF_P2) |
2379 TILE_SPLIT(split_equal_to_row_size));
2380 break;
2381 case 8:
2382 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2383 break;
2384 case 9:
2385 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2386 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2387 break;
2388 case 10:
2389 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391 PIPE_CONFIG(ADDR_SURF_P2) |
2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2393 break;
2394 case 11:
2395 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2397 PIPE_CONFIG(ADDR_SURF_P2) |
2398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399 break;
2400 case 12:
2401 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403 PIPE_CONFIG(ADDR_SURF_P2) |
2404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 break;
2406 case 13:
2407 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2409 break;
2410 case 14:
2411 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413 PIPE_CONFIG(ADDR_SURF_P2) |
2414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415 break;
2416 case 16:
2417 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2419 PIPE_CONFIG(ADDR_SURF_P2) |
2420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 break;
2422 case 17:
2423 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2425 PIPE_CONFIG(ADDR_SURF_P2) |
2426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427 break;
2428 case 27:
2429 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2431 break;
2432 case 28:
2433 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435 PIPE_CONFIG(ADDR_SURF_P2) |
2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 break;
2438 case 29:
2439 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441 PIPE_CONFIG(ADDR_SURF_P2) |
2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443 break;
2444 case 30:
2445 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2447 PIPE_CONFIG(ADDR_SURF_P2) |
2448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449 break;
2450 default:
2451 gb_tile_moden = 0;
2452 break;
2453 }
2454 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2455 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2456 }
2457 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2458 switch (reg_offset) {
2459 case 0:
2460 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2463 NUM_BANKS(ADDR_SURF_16_BANK));
2464 break;
2465 case 1:
2466 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2469 NUM_BANKS(ADDR_SURF_16_BANK));
2470 break;
2471 case 2:
2472 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2475 NUM_BANKS(ADDR_SURF_16_BANK));
2476 break;
2477 case 3:
2478 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2481 NUM_BANKS(ADDR_SURF_16_BANK));
2482 break;
2483 case 4:
2484 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2487 NUM_BANKS(ADDR_SURF_16_BANK));
2488 break;
2489 case 5:
2490 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493 NUM_BANKS(ADDR_SURF_16_BANK));
2494 break;
2495 case 6:
2496 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499 NUM_BANKS(ADDR_SURF_8_BANK));
2500 break;
2501 case 8:
2502 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505 NUM_BANKS(ADDR_SURF_16_BANK));
2506 break;
2507 case 9:
2508 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2511 NUM_BANKS(ADDR_SURF_16_BANK));
2512 break;
2513 case 10:
2514 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517 NUM_BANKS(ADDR_SURF_16_BANK));
2518 break;
2519 case 11:
2520 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523 NUM_BANKS(ADDR_SURF_16_BANK));
2524 break;
2525 case 12:
2526 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2529 NUM_BANKS(ADDR_SURF_16_BANK));
2530 break;
2531 case 13:
2532 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2535 NUM_BANKS(ADDR_SURF_16_BANK));
2536 break;
2537 case 14:
2538 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541 NUM_BANKS(ADDR_SURF_8_BANK));
2542 break;
2543 default:
2544 gb_tile_moden = 0;
2545 break;
2546 }
2547 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2548 }
2549 } else
2550 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2551}
2552
2553/**
2554 * cik_select_se_sh - select which SE, SH to address
2555 *
2556 * @rdev: radeon_device pointer
2557 * @se_num: shader engine to address
2558 * @sh_num: sh block to address
2559 *
2560 * Select which SE, SH combinations to address. Certain
2561 * registers are instanced per SE or SH. 0xffffffff means
2562 * broadcast to all SEs or SHs (CIK).
2563 */
2564static void cik_select_se_sh(struct radeon_device *rdev,
2565 u32 se_num, u32 sh_num)
2566{
2567 u32 data = INSTANCE_BROADCAST_WRITES;
2568
2569 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2570 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2571 else if (se_num == 0xffffffff)
2572 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2573 else if (sh_num == 0xffffffff)
2574 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2575 else
2576 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2577 WREG32(GRBM_GFX_INDEX, data);
2578}
2579
2580/**
2581 * cik_create_bitmask - create a bitmask
2582 *
2583 * @bit_width: length of the mask
2584 *
2585 * create a variable length bit mask (CIK).
2586 * Returns the bitmask.
2587 */
2588static u32 cik_create_bitmask(u32 bit_width)
2589{
2590 u32 i, mask = 0;
2591
2592 for (i = 0; i < bit_width; i++) {
2593 mask <<= 1;
2594 mask |= 1;
2595 }
2596 return mask;
2597}
2598
2599/**
2600 * cik_select_se_sh - select which SE, SH to address
2601 *
2602 * @rdev: radeon_device pointer
2603 * @max_rb_num: max RBs (render backends) for the asic
2604 * @se_num: number of SEs (shader engines) for the asic
2605 * @sh_per_se: number of SH blocks per SE for the asic
2606 *
2607 * Calculates the bitmask of disabled RBs (CIK).
2608 * Returns the disabled RB bitmask.
2609 */
2610static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2611 u32 max_rb_num, u32 se_num,
2612 u32 sh_per_se)
2613{
2614 u32 data, mask;
2615
2616 data = RREG32(CC_RB_BACKEND_DISABLE);
2617 if (data & 1)
2618 data &= BACKEND_DISABLE_MASK;
2619 else
2620 data = 0;
2621 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2622
2623 data >>= BACKEND_DISABLE_SHIFT;
2624
2625 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2626
2627 return data & mask;
2628}
2629
2630/**
2631 * cik_setup_rb - setup the RBs on the asic
2632 *
2633 * @rdev: radeon_device pointer
2634 * @se_num: number of SEs (shader engines) for the asic
2635 * @sh_per_se: number of SH blocks per SE for the asic
2636 * @max_rb_num: max RBs (render backends) for the asic
2637 *
2638 * Configures per-SE/SH RB registers (CIK).
2639 */
2640static void cik_setup_rb(struct radeon_device *rdev,
2641 u32 se_num, u32 sh_per_se,
2642 u32 max_rb_num)
2643{
2644 int i, j;
2645 u32 data, mask;
2646 u32 disabled_rbs = 0;
2647 u32 enabled_rbs = 0;
2648
2649 for (i = 0; i < se_num; i++) {
2650 for (j = 0; j < sh_per_se; j++) {
2651 cik_select_se_sh(rdev, i, j);
2652 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2653 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2654 }
2655 }
2656 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2657
2658 mask = 1;
2659 for (i = 0; i < max_rb_num; i++) {
2660 if (!(disabled_rbs & mask))
2661 enabled_rbs |= mask;
2662 mask <<= 1;
2663 }
2664
2665 for (i = 0; i < se_num; i++) {
2666 cik_select_se_sh(rdev, i, 0xffffffff);
2667 data = 0;
2668 for (j = 0; j < sh_per_se; j++) {
2669 switch (enabled_rbs & 3) {
2670 case 1:
2671 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2672 break;
2673 case 2:
2674 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2675 break;
2676 case 3:
2677 default:
2678 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2679 break;
2680 }
2681 enabled_rbs >>= 2;
2682 }
2683 WREG32(PA_SC_RASTER_CONFIG, data);
2684 }
2685 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2686}
2687
2688/**
2689 * cik_gpu_init - setup the 3D engine
2690 *
2691 * @rdev: radeon_device pointer
2692 *
2693 * Configures the 3D engine and tiling configuration
2694 * registers so that the 3D engine is usable.
2695 */
2696static void cik_gpu_init(struct radeon_device *rdev)
2697{
2698 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2699 u32 mc_shared_chmap, mc_arb_ramcfg;
2700 u32 hdp_host_path_cntl;
2701 u32 tmp;
2702 int i, j;
2703
2704 switch (rdev->family) {
2705 case CHIP_BONAIRE:
2706 rdev->config.cik.max_shader_engines = 2;
2707 rdev->config.cik.max_tile_pipes = 4;
2708 rdev->config.cik.max_cu_per_sh = 7;
2709 rdev->config.cik.max_sh_per_se = 1;
2710 rdev->config.cik.max_backends_per_se = 2;
2711 rdev->config.cik.max_texture_channel_caches = 4;
2712 rdev->config.cik.max_gprs = 256;
2713 rdev->config.cik.max_gs_threads = 32;
2714 rdev->config.cik.max_hw_contexts = 8;
2715
2716 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2717 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2718 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2719 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2720 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2721 break;
2722 case CHIP_KAVERI:
2723 rdev->config.cik.max_shader_engines = 1;
2724 rdev->config.cik.max_tile_pipes = 4;
2725 if ((rdev->pdev->device == 0x1304) ||
2726 (rdev->pdev->device == 0x1305) ||
2727 (rdev->pdev->device == 0x130C) ||
2728 (rdev->pdev->device == 0x130F) ||
2729 (rdev->pdev->device == 0x1310) ||
2730 (rdev->pdev->device == 0x1311) ||
2731 (rdev->pdev->device == 0x131C)) {
2732 rdev->config.cik.max_cu_per_sh = 8;
2733 rdev->config.cik.max_backends_per_se = 2;
2734 } else if ((rdev->pdev->device == 0x1309) ||
2735 (rdev->pdev->device == 0x130A) ||
2736 (rdev->pdev->device == 0x130D) ||
2737 (rdev->pdev->device == 0x1313) ||
2738 (rdev->pdev->device == 0x131D)) {
2739 rdev->config.cik.max_cu_per_sh = 6;
2740 rdev->config.cik.max_backends_per_se = 2;
2741 } else if ((rdev->pdev->device == 0x1306) ||
2742 (rdev->pdev->device == 0x1307) ||
2743 (rdev->pdev->device == 0x130B) ||
2744 (rdev->pdev->device == 0x130E) ||
2745 (rdev->pdev->device == 0x1315) ||
2746 (rdev->pdev->device == 0x131B)) {
2747 rdev->config.cik.max_cu_per_sh = 4;
2748 rdev->config.cik.max_backends_per_se = 1;
2749 } else {
2750 rdev->config.cik.max_cu_per_sh = 3;
2751 rdev->config.cik.max_backends_per_se = 1;
2752 }
2753 rdev->config.cik.max_sh_per_se = 1;
2754 rdev->config.cik.max_texture_channel_caches = 4;
2755 rdev->config.cik.max_gprs = 256;
2756 rdev->config.cik.max_gs_threads = 16;
2757 rdev->config.cik.max_hw_contexts = 8;
2758
2759 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2760 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2761 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2762 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2763 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2764 break;
2765 case CHIP_KABINI:
2766 default:
2767 rdev->config.cik.max_shader_engines = 1;
2768 rdev->config.cik.max_tile_pipes = 2;
2769 rdev->config.cik.max_cu_per_sh = 2;
2770 rdev->config.cik.max_sh_per_se = 1;
2771 rdev->config.cik.max_backends_per_se = 1;
2772 rdev->config.cik.max_texture_channel_caches = 2;
2773 rdev->config.cik.max_gprs = 256;
2774 rdev->config.cik.max_gs_threads = 16;
2775 rdev->config.cik.max_hw_contexts = 8;
2776
2777 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2778 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2779 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2780 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2781 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2782 break;
2783 }
2784
2785 /* Initialize HDP */
2786 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2787 WREG32((0x2c14 + j), 0x00000000);
2788 WREG32((0x2c18 + j), 0x00000000);
2789 WREG32((0x2c1c + j), 0x00000000);
2790 WREG32((0x2c20 + j), 0x00000000);
2791 WREG32((0x2c24 + j), 0x00000000);
2792 }
2793
2794 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2795
2796 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2797
2798 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2799 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2800
2801 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2802 rdev->config.cik.mem_max_burst_length_bytes = 256;
2803 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2804 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2805 if (rdev->config.cik.mem_row_size_in_kb > 4)
2806 rdev->config.cik.mem_row_size_in_kb = 4;
2807 /* XXX use MC settings? */
2808 rdev->config.cik.shader_engine_tile_size = 32;
2809 rdev->config.cik.num_gpus = 1;
2810 rdev->config.cik.multi_gpu_tile_size = 64;
2811
2812 /* fix up row size */
2813 gb_addr_config &= ~ROW_SIZE_MASK;
2814 switch (rdev->config.cik.mem_row_size_in_kb) {
2815 case 1:
2816 default:
2817 gb_addr_config |= ROW_SIZE(0);
2818 break;
2819 case 2:
2820 gb_addr_config |= ROW_SIZE(1);
2821 break;
2822 case 4:
2823 gb_addr_config |= ROW_SIZE(2);
2824 break;
2825 }
2826
2827 /* setup tiling info dword. gb_addr_config is not adequate since it does
2828 * not have bank info, so create a custom tiling dword.
2829 * bits 3:0 num_pipes
2830 * bits 7:4 num_banks
2831 * bits 11:8 group_size
2832 * bits 15:12 row_size
2833 */
2834 rdev->config.cik.tile_config = 0;
2835 switch (rdev->config.cik.num_tile_pipes) {
2836 case 1:
2837 rdev->config.cik.tile_config |= (0 << 0);
2838 break;
2839 case 2:
2840 rdev->config.cik.tile_config |= (1 << 0);
2841 break;
2842 case 4:
2843 rdev->config.cik.tile_config |= (2 << 0);
2844 break;
2845 case 8:
2846 default:
2847 /* XXX what about 12? */
2848 rdev->config.cik.tile_config |= (3 << 0);
2849 break;
2850 }
2851 rdev->config.cik.tile_config |=
2852 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2853 rdev->config.cik.tile_config |=
2854 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2855 rdev->config.cik.tile_config |=
2856 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2857
2858 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2859 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2860 WREG32(DMIF_ADDR_CALC, gb_addr_config);
2861 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2862 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2863 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2864 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2865 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2866
2867 cik_tiling_mode_table_init(rdev);
2868
2869 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2870 rdev->config.cik.max_sh_per_se,
2871 rdev->config.cik.max_backends_per_se);
2872
2873 /* set HW defaults for 3D engine */
2874 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2875
2876 WREG32(SX_DEBUG_1, 0x20);
2877
2878 WREG32(TA_CNTL_AUX, 0x00010000);
2879
2880 tmp = RREG32(SPI_CONFIG_CNTL);
2881 tmp |= 0x03000000;
2882 WREG32(SPI_CONFIG_CNTL, tmp);
2883
2884 WREG32(SQ_CONFIG, 1);
2885
2886 WREG32(DB_DEBUG, 0);
2887
2888 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2889 tmp |= 0x00000400;
2890 WREG32(DB_DEBUG2, tmp);
2891
2892 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2893 tmp |= 0x00020200;
2894 WREG32(DB_DEBUG3, tmp);
2895
2896 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2897 tmp |= 0x00018208;
2898 WREG32(CB_HW_CONTROL, tmp);
2899
2900 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2901
2902 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2903 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2904 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2905 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2906
2907 WREG32(VGT_NUM_INSTANCES, 1);
2908
2909 WREG32(CP_PERFMON_CNTL, 0);
2910
2911 WREG32(SQ_CONFIG, 0);
2912
2913 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2914 FORCE_EOV_MAX_REZ_CNT(255)));
2915
2916 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2917 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2918
2919 WREG32(VGT_GS_VERTEX_REUSE, 16);
2920 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2921
2922 tmp = RREG32(HDP_MISC_CNTL);
2923 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2924 WREG32(HDP_MISC_CNTL, tmp);
2925
2926 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2927 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2928
2929 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2930 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2931
2932 udelay(50);
2933}
2934
2935/*
2936 * GPU scratch registers helpers function.
2937 */
2938/**
2939 * cik_scratch_init - setup driver info for CP scratch regs
2940 *
2941 * @rdev: radeon_device pointer
2942 *
2943 * Set up the number and offset of the CP scratch registers.
2944 * NOTE: use of CP scratch registers is a legacy inferface and
2945 * is not used by default on newer asics (r6xx+). On newer asics,
2946 * memory buffers are used for fences rather than scratch regs.
2947 */
2948static void cik_scratch_init(struct radeon_device *rdev)
2949{
2950 int i;
2951
2952 rdev->scratch.num_reg = 7;
2953 rdev->scratch.reg_base = SCRATCH_REG0;
2954 for (i = 0; i < rdev->scratch.num_reg; i++) {
2955 rdev->scratch.free[i] = true;
2956 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2957 }
2958}
2959
2960/**
2961 * cik_ring_test - basic gfx ring test
2962 *
2963 * @rdev: radeon_device pointer
2964 * @ring: radeon_ring structure holding ring information
2965 *
2966 * Allocate a scratch register and write to it using the gfx ring (CIK).
2967 * Provides a basic gfx ring test to verify that the ring is working.
2968 * Used by cik_cp_gfx_resume();
2969 * Returns 0 on success, error on failure.
2970 */
2971int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2972{
2973 uint32_t scratch;
2974 uint32_t tmp = 0;
2975 unsigned i;
2976 int r;
2977
2978 r = radeon_scratch_get(rdev, &scratch);
2979 if (r) {
2980 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2981 return r;
2982 }
2983 WREG32(scratch, 0xCAFEDEAD);
2984 r = radeon_ring_lock(rdev, ring, 3);
2985 if (r) {
2986 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2987 radeon_scratch_free(rdev, scratch);
2988 return r;
2989 }
2990 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2991 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2992 radeon_ring_write(ring, 0xDEADBEEF);
2993 radeon_ring_unlock_commit(rdev, ring);
2994
2995 for (i = 0; i < rdev->usec_timeout; i++) {
2996 tmp = RREG32(scratch);
2997 if (tmp == 0xDEADBEEF)
2998 break;
2999 DRM_UDELAY(1);
3000 }
3001 if (i < rdev->usec_timeout) {
3002 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3003 } else {
3004 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3005 ring->idx, scratch, tmp);
3006 r = -EINVAL;
3007 }
3008 radeon_scratch_free(rdev, scratch);
3009 return r;
3010}
3011
3012/**
3013 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3014 *
3015 * @rdev: radeon_device pointer
3016 * @fence: radeon fence object
3017 *
3018 * Emits a fence sequnce number on the gfx ring and flushes
3019 * GPU caches.
3020 */
3021void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3022 struct radeon_fence *fence)
3023{
3024 struct radeon_ring *ring = &rdev->ring[fence->ring];
3025 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3026
3027 /* EVENT_WRITE_EOP - flush caches, send int */
3028 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3029 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3030 EOP_TC_ACTION_EN |
3031 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3032 EVENT_INDEX(5)));
3033 radeon_ring_write(ring, addr & 0xfffffffc);
3034 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3035 radeon_ring_write(ring, fence->seq);
3036 radeon_ring_write(ring, 0);
3037 /* HDP flush */
3038 /* We should be using the new WAIT_REG_MEM special op packet here
3039 * but it causes the CP to hang
3040 */
3041 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3042 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3043 WRITE_DATA_DST_SEL(0)));
3044 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3045 radeon_ring_write(ring, 0);
3046 radeon_ring_write(ring, 0);
3047}
3048
3049/**
3050 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3051 *
3052 * @rdev: radeon_device pointer
3053 * @fence: radeon fence object
3054 *
3055 * Emits a fence sequnce number on the compute ring and flushes
3056 * GPU caches.
3057 */
3058void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3059 struct radeon_fence *fence)
3060{
3061 struct radeon_ring *ring = &rdev->ring[fence->ring];
3062 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3063
3064 /* RELEASE_MEM - flush caches, send int */
3065 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3066 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3067 EOP_TC_ACTION_EN |
3068 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3069 EVENT_INDEX(5)));
3070 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3071 radeon_ring_write(ring, addr & 0xfffffffc);
3072 radeon_ring_write(ring, upper_32_bits(addr));
3073 radeon_ring_write(ring, fence->seq);
3074 radeon_ring_write(ring, 0);
3075 /* HDP flush */
3076 /* We should be using the new WAIT_REG_MEM special op packet here
3077 * but it causes the CP to hang
3078 */
3079 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3080 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3081 WRITE_DATA_DST_SEL(0)));
3082 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3083 radeon_ring_write(ring, 0);
3084 radeon_ring_write(ring, 0);
3085}
3086
3087void cik_semaphore_ring_emit(struct radeon_device *rdev,
3088 struct radeon_ring *ring,
3089 struct radeon_semaphore *semaphore,
3090 bool emit_wait)
3091{
3092 uint64_t addr = semaphore->gpu_addr;
3093 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3094
3095 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3096 radeon_ring_write(ring, addr & 0xffffffff);
3097 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3098}
3099
3100/*
3101 * IB stuff
3102 */
3103/**
3104 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3105 *
3106 * @rdev: radeon_device pointer
3107 * @ib: radeon indirect buffer object
3108 *
3109 * Emits an DE (drawing engine) or CE (constant engine) IB
3110 * on the gfx ring. IBs are usually generated by userspace
3111 * acceleration drivers and submitted to the kernel for
3112 * sheduling on the ring. This function schedules the IB
3113 * on the gfx ring for execution by the GPU.
3114 */
3115void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3116{
3117 struct radeon_ring *ring = &rdev->ring[ib->ring];
3118 u32 header, control = INDIRECT_BUFFER_VALID;
3119
3120 if (ib->is_const_ib) {
3121 /* set switch buffer packet before const IB */
3122 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3123 radeon_ring_write(ring, 0);
3124
3125 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3126 } else {
3127 u32 next_rptr;
3128 if (ring->rptr_save_reg) {
3129 next_rptr = ring->wptr + 3 + 4;
3130 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3131 radeon_ring_write(ring, ((ring->rptr_save_reg -
3132 PACKET3_SET_UCONFIG_REG_START) >> 2));
3133 radeon_ring_write(ring, next_rptr);
3134 } else if (rdev->wb.enabled) {
3135 next_rptr = ring->wptr + 5 + 4;
3136 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3137 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3138 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3139 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3140 radeon_ring_write(ring, next_rptr);
3141 }
3142
3143 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3144 }
3145
3146 control |= ib->length_dw |
3147 (ib->vm ? (ib->vm->id << 24) : 0);
3148
3149 radeon_ring_write(ring, header);
3150 radeon_ring_write(ring,
3151#ifdef __BIG_ENDIAN
3152 (2 << 0) |
3153#endif
3154 (ib->gpu_addr & 0xFFFFFFFC));
3155 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3156 radeon_ring_write(ring, control);
3157}
3158
3159/**
3160 * cik_ib_test - basic gfx ring IB test
3161 *
3162 * @rdev: radeon_device pointer
3163 * @ring: radeon_ring structure holding ring information
3164 *
3165 * Allocate an IB and execute it on the gfx ring (CIK).
3166 * Provides a basic gfx ring test to verify that IBs are working.
3167 * Returns 0 on success, error on failure.
3168 */
3169int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3170{
3171 struct radeon_ib ib;
3172 uint32_t scratch;
3173 uint32_t tmp = 0;
3174 unsigned i;
3175 int r;
3176
3177 r = radeon_scratch_get(rdev, &scratch);
3178 if (r) {
3179 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3180 return r;
3181 }
3182 WREG32(scratch, 0xCAFEDEAD);
3183 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3184 if (r) {
3185 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3186 radeon_scratch_free(rdev, scratch);
3187 return r;
3188 }
3189 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3190 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3191 ib.ptr[2] = 0xDEADBEEF;
3192 ib.length_dw = 3;
3193 r = radeon_ib_schedule(rdev, &ib, NULL);
3194 if (r) {
3195 radeon_scratch_free(rdev, scratch);
3196 radeon_ib_free(rdev, &ib);
3197 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3198 return r;
3199 }
3200 r = radeon_fence_wait(ib.fence, false);
3201 if (r) {
3202 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3203 radeon_scratch_free(rdev, scratch);
3204 radeon_ib_free(rdev, &ib);
3205 return r;
3206 }
3207 for (i = 0; i < rdev->usec_timeout; i++) {
3208 tmp = RREG32(scratch);
3209 if (tmp == 0xDEADBEEF)
3210 break;
3211 DRM_UDELAY(1);
3212 }
3213 if (i < rdev->usec_timeout) {
3214 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3215 } else {
3216 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3217 scratch, tmp);
3218 r = -EINVAL;
3219 }
3220 radeon_scratch_free(rdev, scratch);
3221 radeon_ib_free(rdev, &ib);
3222 return r;
3223}
3224
3225/*
3226 * CP.
3227 * On CIK, gfx and compute now have independant command processors.
3228 *
3229 * GFX
3230 * Gfx consists of a single ring and can process both gfx jobs and
3231 * compute jobs. The gfx CP consists of three microengines (ME):
3232 * PFP - Pre-Fetch Parser
3233 * ME - Micro Engine
3234 * CE - Constant Engine
3235 * The PFP and ME make up what is considered the Drawing Engine (DE).
3236 * The CE is an asynchronous engine used for updating buffer desciptors
3237 * used by the DE so that they can be loaded into cache in parallel
3238 * while the DE is processing state update packets.
3239 *
3240 * Compute
3241 * The compute CP consists of two microengines (ME):
3242 * MEC1 - Compute MicroEngine 1
3243 * MEC2 - Compute MicroEngine 2
3244 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3245 * The queues are exposed to userspace and are programmed directly
3246 * by the compute runtime.
3247 */
3248/**
3249 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3250 *
3251 * @rdev: radeon_device pointer
3252 * @enable: enable or disable the MEs
3253 *
3254 * Halts or unhalts the gfx MEs.
3255 */
3256static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3257{
3258 if (enable)
3259 WREG32(CP_ME_CNTL, 0);
3260 else {
3261 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3262 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3263 }
3264 udelay(50);
3265}
3266
3267/**
3268 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3269 *
3270 * @rdev: radeon_device pointer
3271 *
3272 * Loads the gfx PFP, ME, and CE ucode.
3273 * Returns 0 for success, -EINVAL if the ucode is not available.
3274 */
3275static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3276{
3277 const __be32 *fw_data;
3278 int i;
3279
3280 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3281 return -EINVAL;
3282
3283 cik_cp_gfx_enable(rdev, false);
3284
3285 /* PFP */
3286 fw_data = (const __be32 *)rdev->pfp_fw->data;
3287 WREG32(CP_PFP_UCODE_ADDR, 0);
3288 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3289 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3290 WREG32(CP_PFP_UCODE_ADDR, 0);
3291
3292 /* CE */
3293 fw_data = (const __be32 *)rdev->ce_fw->data;
3294 WREG32(CP_CE_UCODE_ADDR, 0);
3295 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3296 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3297 WREG32(CP_CE_UCODE_ADDR, 0);
3298
3299 /* ME */
3300 fw_data = (const __be32 *)rdev->me_fw->data;
3301 WREG32(CP_ME_RAM_WADDR, 0);
3302 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3303 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3304 WREG32(CP_ME_RAM_WADDR, 0);
3305
3306 WREG32(CP_PFP_UCODE_ADDR, 0);
3307 WREG32(CP_CE_UCODE_ADDR, 0);
3308 WREG32(CP_ME_RAM_WADDR, 0);
3309 WREG32(CP_ME_RAM_RADDR, 0);
3310 return 0;
3311}
3312
3313/**
3314 * cik_cp_gfx_start - start the gfx ring
3315 *
3316 * @rdev: radeon_device pointer
3317 *
3318 * Enables the ring and loads the clear state context and other
3319 * packets required to init the ring.
3320 * Returns 0 for success, error for failure.
3321 */
3322static int cik_cp_gfx_start(struct radeon_device *rdev)
3323{
3324 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3325 int r, i;
3326
3327 /* init the CP */
3328 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3329 WREG32(CP_ENDIAN_SWAP, 0);
3330 WREG32(CP_DEVICE_ID, 1);
3331
3332 cik_cp_gfx_enable(rdev, true);
3333
3334 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3335 if (r) {
3336 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3337 return r;
3338 }
3339
3340 /* init the CE partitions. CE only used for gfx on CIK */
3341 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3342 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3343 radeon_ring_write(ring, 0xc000);
3344 radeon_ring_write(ring, 0xc000);
3345
3346 /* setup clear context state */
3347 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3348 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3349
3350 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3351 radeon_ring_write(ring, 0x80000000);
3352 radeon_ring_write(ring, 0x80000000);
3353
3354 for (i = 0; i < cik_default_size; i++)
3355 radeon_ring_write(ring, cik_default_state[i]);
3356
3357 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3358 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3359
3360 /* set clear context state */
3361 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3362 radeon_ring_write(ring, 0);
3363
3364 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3365 radeon_ring_write(ring, 0x00000316);
3366 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3367 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3368
3369 radeon_ring_unlock_commit(rdev, ring);
3370
3371 return 0;
3372}
3373
3374/**
3375 * cik_cp_gfx_fini - stop the gfx ring
3376 *
3377 * @rdev: radeon_device pointer
3378 *
3379 * Stop the gfx ring and tear down the driver ring
3380 * info.
3381 */
3382static void cik_cp_gfx_fini(struct radeon_device *rdev)
3383{
3384 cik_cp_gfx_enable(rdev, false);
3385 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3386}
3387
3388/**
3389 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3390 *
3391 * @rdev: radeon_device pointer
3392 *
3393 * Program the location and size of the gfx ring buffer
3394 * and test it to make sure it's working.
3395 * Returns 0 for success, error for failure.
3396 */
3397static int cik_cp_gfx_resume(struct radeon_device *rdev)
3398{
3399 struct radeon_ring *ring;
3400 u32 tmp;
3401 u32 rb_bufsz;
3402 u64 rb_addr;
3403 int r;
3404
3405 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3406 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3407
3408 /* Set the write pointer delay */
3409 WREG32(CP_RB_WPTR_DELAY, 0);
3410
3411 /* set the RB to use vmid 0 */
3412 WREG32(CP_RB_VMID, 0);
3413
3414 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3415
3416 /* ring 0 - compute and gfx */
3417 /* Set ring buffer size */
3418 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3419 rb_bufsz = order_base_2(ring->ring_size / 8);
3420 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3421#ifdef __BIG_ENDIAN
3422 tmp |= BUF_SWAP_32BIT;
3423#endif
3424 WREG32(CP_RB0_CNTL, tmp);
3425
3426 /* Initialize the ring buffer's read and write pointers */
3427 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3428 ring->wptr = 0;
3429 WREG32(CP_RB0_WPTR, ring->wptr);
3430
3431 /* set the wb address wether it's enabled or not */
3432 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3433 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3434
3435 /* scratch register shadowing is no longer supported */
3436 WREG32(SCRATCH_UMSK, 0);
3437
3438 if (!rdev->wb.enabled)
3439 tmp |= RB_NO_UPDATE;
3440
3441 mdelay(1);
3442 WREG32(CP_RB0_CNTL, tmp);
3443
3444 rb_addr = ring->gpu_addr >> 8;
3445 WREG32(CP_RB0_BASE, rb_addr);
3446 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3447
3448 ring->rptr = RREG32(CP_RB0_RPTR);
3449
3450 /* start the ring */
3451 cik_cp_gfx_start(rdev);
3452 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3453 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3454 if (r) {
3455 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3456 return r;
3457 }
3458 return 0;
3459}
3460
3461u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3462 struct radeon_ring *ring)
3463{
3464 u32 rptr;
3465
3466
3467
3468 if (rdev->wb.enabled) {
3469 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3470 } else {
3471 mutex_lock(&rdev->srbm_mutex);
3472 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3473 rptr = RREG32(CP_HQD_PQ_RPTR);
3474 cik_srbm_select(rdev, 0, 0, 0, 0);
3475 mutex_unlock(&rdev->srbm_mutex);
3476 }
3477
3478 return rptr;
3479}
3480
3481u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3482 struct radeon_ring *ring)
3483{
3484 u32 wptr;
3485
3486 if (rdev->wb.enabled) {
3487 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3488 } else {
3489 mutex_lock(&rdev->srbm_mutex);
3490 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3491 wptr = RREG32(CP_HQD_PQ_WPTR);
3492 cik_srbm_select(rdev, 0, 0, 0, 0);
3493 mutex_unlock(&rdev->srbm_mutex);
3494 }
3495
3496 return wptr;
3497}
3498
3499void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3500 struct radeon_ring *ring)
3501{
3502 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3503 WDOORBELL32(ring->doorbell_offset, ring->wptr);
3504}
3505
3506/**
3507 * cik_cp_compute_enable - enable/disable the compute CP MEs
3508 *
3509 * @rdev: radeon_device pointer
3510 * @enable: enable or disable the MEs
3511 *
3512 * Halts or unhalts the compute MEs.
3513 */
3514static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3515{
3516 if (enable)
3517 WREG32(CP_MEC_CNTL, 0);
3518 else
3519 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3520 udelay(50);
3521}
3522
3523/**
3524 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3525 *
3526 * @rdev: radeon_device pointer
3527 *
3528 * Loads the compute MEC1&2 ucode.
3529 * Returns 0 for success, -EINVAL if the ucode is not available.
3530 */
3531static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3532{
3533 const __be32 *fw_data;
3534 int i;
3535
3536 if (!rdev->mec_fw)
3537 return -EINVAL;
3538
3539 cik_cp_compute_enable(rdev, false);
3540
3541 /* MEC1 */
3542 fw_data = (const __be32 *)rdev->mec_fw->data;
3543 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3544 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3545 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3546 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3547
3548 if (rdev->family == CHIP_KAVERI) {
3549 /* MEC2 */
3550 fw_data = (const __be32 *)rdev->mec_fw->data;
3551 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3552 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3553 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3554 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3555 }
3556
3557 return 0;
3558}
3559
3560/**
3561 * cik_cp_compute_start - start the compute queues
3562 *
3563 * @rdev: radeon_device pointer
3564 *
3565 * Enable the compute queues.
3566 * Returns 0 for success, error for failure.
3567 */
3568static int cik_cp_compute_start(struct radeon_device *rdev)
3569{
3570 cik_cp_compute_enable(rdev, true);
3571
3572 return 0;
3573}
3574
3575/**
3576 * cik_cp_compute_fini - stop the compute queues
3577 *
3578 * @rdev: radeon_device pointer
3579 *
3580 * Stop the compute queues and tear down the driver queue
3581 * info.
3582 */
3583static void cik_cp_compute_fini(struct radeon_device *rdev)
3584{
3585 int i, idx, r;
3586
3587 cik_cp_compute_enable(rdev, false);
3588
3589 for (i = 0; i < 2; i++) {
3590 if (i == 0)
3591 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3592 else
3593 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3594
3595 if (rdev->ring[idx].mqd_obj) {
3596 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3597 if (unlikely(r != 0))
3598 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3599
3600 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3601 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3602
3603 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3604 rdev->ring[idx].mqd_obj = NULL;
3605 }
3606 }
3607}
3608
3609static void cik_mec_fini(struct radeon_device *rdev)
3610{
3611 int r;
3612
3613 if (rdev->mec.hpd_eop_obj) {
3614 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3615 if (unlikely(r != 0))
3616 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3617 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3618 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3619
3620 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3621 rdev->mec.hpd_eop_obj = NULL;
3622 }
3623}
3624
3625#define MEC_HPD_SIZE 2048
3626
3627static int cik_mec_init(struct radeon_device *rdev)
3628{
3629 int r;
3630 u32 *hpd;
3631
3632 /*
3633 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3634 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3635 */
3636 if (rdev->family == CHIP_KAVERI)
3637 rdev->mec.num_mec = 2;
3638 else
3639 rdev->mec.num_mec = 1;
3640 rdev->mec.num_pipe = 4;
3641 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3642
3643 if (rdev->mec.hpd_eop_obj == NULL) {
3644 r = radeon_bo_create(rdev,
3645 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3646 PAGE_SIZE, true,
3647 RADEON_GEM_DOMAIN_GTT, NULL,
3648 &rdev->mec.hpd_eop_obj);
3649 if (r) {
3650 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3651 return r;
3652 }
3653 }
3654
3655 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3656 if (unlikely(r != 0)) {
3657 cik_mec_fini(rdev);
3658 return r;
3659 }
3660 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3661 &rdev->mec.hpd_eop_gpu_addr);
3662 if (r) {
3663 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3664 cik_mec_fini(rdev);
3665 return r;
3666 }
3667 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3668 if (r) {
3669 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3670 cik_mec_fini(rdev);
3671 return r;
3672 }
3673
3674 /* clear memory. Not sure if this is required or not */
3675 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3676
3677 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3678 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3679
3680 return 0;
3681}
3682
3683struct hqd_registers
3684{
3685 u32 cp_mqd_base_addr;
3686 u32 cp_mqd_base_addr_hi;
3687 u32 cp_hqd_active;
3688 u32 cp_hqd_vmid;
3689 u32 cp_hqd_persistent_state;
3690 u32 cp_hqd_pipe_priority;
3691 u32 cp_hqd_queue_priority;
3692 u32 cp_hqd_quantum;
3693 u32 cp_hqd_pq_base;
3694 u32 cp_hqd_pq_base_hi;
3695 u32 cp_hqd_pq_rptr;
3696 u32 cp_hqd_pq_rptr_report_addr;
3697 u32 cp_hqd_pq_rptr_report_addr_hi;
3698 u32 cp_hqd_pq_wptr_poll_addr;
3699 u32 cp_hqd_pq_wptr_poll_addr_hi;
3700 u32 cp_hqd_pq_doorbell_control;
3701 u32 cp_hqd_pq_wptr;
3702 u32 cp_hqd_pq_control;
3703 u32 cp_hqd_ib_base_addr;
3704 u32 cp_hqd_ib_base_addr_hi;
3705 u32 cp_hqd_ib_rptr;
3706 u32 cp_hqd_ib_control;
3707 u32 cp_hqd_iq_timer;
3708 u32 cp_hqd_iq_rptr;
3709 u32 cp_hqd_dequeue_request;
3710 u32 cp_hqd_dma_offload;
3711 u32 cp_hqd_sema_cmd;
3712 u32 cp_hqd_msg_type;
3713 u32 cp_hqd_atomic0_preop_lo;
3714 u32 cp_hqd_atomic0_preop_hi;
3715 u32 cp_hqd_atomic1_preop_lo;
3716 u32 cp_hqd_atomic1_preop_hi;
3717 u32 cp_hqd_hq_scheduler0;
3718 u32 cp_hqd_hq_scheduler1;
3719 u32 cp_mqd_control;
3720};
3721
3722struct bonaire_mqd
3723{
3724 u32 header;
3725 u32 dispatch_initiator;
3726 u32 dimensions[3];
3727 u32 start_idx[3];
3728 u32 num_threads[3];
3729 u32 pipeline_stat_enable;
3730 u32 perf_counter_enable;
3731 u32 pgm[2];
3732 u32 tba[2];
3733 u32 tma[2];
3734 u32 pgm_rsrc[2];
3735 u32 vmid;
3736 u32 resource_limits;
3737 u32 static_thread_mgmt01[2];
3738 u32 tmp_ring_size;
3739 u32 static_thread_mgmt23[2];
3740 u32 restart[3];
3741 u32 thread_trace_enable;
3742 u32 reserved1;
3743 u32 user_data[16];
3744 u32 vgtcs_invoke_count[2];
3745 struct hqd_registers queue_state;
3746 u32 dequeue_cntr;
3747 u32 interrupt_queue[64];
3748};
3749
3750/**
3751 * cik_cp_compute_resume - setup the compute queue registers
3752 *
3753 * @rdev: radeon_device pointer
3754 *
3755 * Program the compute queues and test them to make sure they
3756 * are working.
3757 * Returns 0 for success, error for failure.
3758 */
3759static int cik_cp_compute_resume(struct radeon_device *rdev)
3760{
3761 int r, i, idx;
3762 u32 tmp;
3763 bool use_doorbell = true;
3764 u64 hqd_gpu_addr;
3765 u64 mqd_gpu_addr;
3766 u64 eop_gpu_addr;
3767 u64 wb_gpu_addr;
3768 u32 *buf;
3769 struct bonaire_mqd *mqd;
3770
3771 r = cik_cp_compute_start(rdev);
3772 if (r)
3773 return r;
3774
3775 /* fix up chicken bits */
3776 tmp = RREG32(CP_CPF_DEBUG);
3777 tmp |= (1 << 23);
3778 WREG32(CP_CPF_DEBUG, tmp);
3779
3780 /* init the pipes */
3781 mutex_lock(&rdev->srbm_mutex);
3782 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3783 int me = (i < 4) ? 1 : 2;
3784 int pipe = (i < 4) ? i : (i - 4);
3785
3786 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3787
3788 cik_srbm_select(rdev, me, pipe, 0, 0);
3789
3790 /* write the EOP addr */
3791 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3792 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3793
3794 /* set the VMID assigned */
3795 WREG32(CP_HPD_EOP_VMID, 0);
3796
3797 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3798 tmp = RREG32(CP_HPD_EOP_CONTROL);
3799 tmp &= ~EOP_SIZE_MASK;
3800 tmp |= order_base_2(MEC_HPD_SIZE / 8);
3801 WREG32(CP_HPD_EOP_CONTROL, tmp);
3802 }
3803 cik_srbm_select(rdev, 0, 0, 0, 0);
3804 mutex_unlock(&rdev->srbm_mutex);
3805
3806 /* init the queues. Just two for now. */
3807 for (i = 0; i < 2; i++) {
3808 if (i == 0)
3809 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3810 else
3811 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3812
3813 if (rdev->ring[idx].mqd_obj == NULL) {
3814 r = radeon_bo_create(rdev,
3815 sizeof(struct bonaire_mqd),
3816 PAGE_SIZE, true,
3817 RADEON_GEM_DOMAIN_GTT, NULL,
3818 &rdev->ring[idx].mqd_obj);
3819 if (r) {
3820 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3821 return r;
3822 }
3823 }
3824
3825 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3826 if (unlikely(r != 0)) {
3827 cik_cp_compute_fini(rdev);
3828 return r;
3829 }
3830 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3831 &mqd_gpu_addr);
3832 if (r) {
3833 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3834 cik_cp_compute_fini(rdev);
3835 return r;
3836 }
3837 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3838 if (r) {
3839 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3840 cik_cp_compute_fini(rdev);
3841 return r;
3842 }
3843
3844 /* doorbell offset */
3845 rdev->ring[idx].doorbell_offset =
3846 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3847
3848 /* init the mqd struct */
3849 memset(buf, 0, sizeof(struct bonaire_mqd));
3850
3851 mqd = (struct bonaire_mqd *)buf;
3852 mqd->header = 0xC0310800;
3853 mqd->static_thread_mgmt01[0] = 0xffffffff;
3854 mqd->static_thread_mgmt01[1] = 0xffffffff;
3855 mqd->static_thread_mgmt23[0] = 0xffffffff;
3856 mqd->static_thread_mgmt23[1] = 0xffffffff;
3857
3858 mutex_lock(&rdev->srbm_mutex);
3859 cik_srbm_select(rdev, rdev->ring[idx].me,
3860 rdev->ring[idx].pipe,
3861 rdev->ring[idx].queue, 0);
3862
3863 /* disable wptr polling */
3864 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3865 tmp &= ~WPTR_POLL_EN;
3866 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3867
3868 /* enable doorbell? */
3869 mqd->queue_state.cp_hqd_pq_doorbell_control =
3870 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3871 if (use_doorbell)
3872 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3873 else
3874 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3875 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3876 mqd->queue_state.cp_hqd_pq_doorbell_control);
3877
3878 /* disable the queue if it's active */
3879 mqd->queue_state.cp_hqd_dequeue_request = 0;
3880 mqd->queue_state.cp_hqd_pq_rptr = 0;
3881 mqd->queue_state.cp_hqd_pq_wptr= 0;
3882 if (RREG32(CP_HQD_ACTIVE) & 1) {
3883 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3884 for (i = 0; i < rdev->usec_timeout; i++) {
3885 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3886 break;
3887 udelay(1);
3888 }
3889 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3890 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3891 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3892 }
3893
3894 /* set the pointer to the MQD */
3895 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3896 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3897 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3898 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3899 /* set MQD vmid to 0 */
3900 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3901 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3902 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3903
3904 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3905 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3906 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3907 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3908 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3909 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3910
3911 /* set up the HQD, this is similar to CP_RB0_CNTL */
3912 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3913 mqd->queue_state.cp_hqd_pq_control &=
3914 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3915
3916 mqd->queue_state.cp_hqd_pq_control |=
3917 order_base_2(rdev->ring[idx].ring_size / 8);
3918 mqd->queue_state.cp_hqd_pq_control |=
3919 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3920#ifdef __BIG_ENDIAN
3921 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3922#endif
3923 mqd->queue_state.cp_hqd_pq_control &=
3924 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3925 mqd->queue_state.cp_hqd_pq_control |=
3926 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3927 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3928
3929 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3930 if (i == 0)
3931 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3932 else
3933 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3934 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3935 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3936 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3937 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3938 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3939
3940 /* set the wb address wether it's enabled or not */
3941 if (i == 0)
3942 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3943 else
3944 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3945 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3946 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3947 upper_32_bits(wb_gpu_addr) & 0xffff;
3948 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3949 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3950 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3951 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3952
3953 /* enable the doorbell if requested */
3954 if (use_doorbell) {
3955 mqd->queue_state.cp_hqd_pq_doorbell_control =
3956 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3957 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3958 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3959 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3960 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3961 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3962 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3963
3964 } else {
3965 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3966 }
3967 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3968 mqd->queue_state.cp_hqd_pq_doorbell_control);
3969
3970 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3971 rdev->ring[idx].wptr = 0;
3972 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3973 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3974 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3975 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3976
3977 /* set the vmid for the queue */
3978 mqd->queue_state.cp_hqd_vmid = 0;
3979 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3980
3981 /* activate the queue */
3982 mqd->queue_state.cp_hqd_active = 1;
3983 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3984
3985 cik_srbm_select(rdev, 0, 0, 0, 0);
3986 mutex_unlock(&rdev->srbm_mutex);
3987
3988 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3989 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3990
3991 rdev->ring[idx].ready = true;
3992 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3993 if (r)
3994 rdev->ring[idx].ready = false;
3995 }
3996
3997 return 0;
3998}
3999
4000static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4001{
4002 cik_cp_gfx_enable(rdev, enable);
4003 cik_cp_compute_enable(rdev, enable);
4004}
4005
4006static int cik_cp_load_microcode(struct radeon_device *rdev)
4007{
4008 int r;
4009
4010 r = cik_cp_gfx_load_microcode(rdev);
4011 if (r)
4012 return r;
4013 r = cik_cp_compute_load_microcode(rdev);
4014 if (r)
4015 return r;
4016
4017 return 0;
4018}
4019
4020static void cik_cp_fini(struct radeon_device *rdev)
4021{
4022 cik_cp_gfx_fini(rdev);
4023 cik_cp_compute_fini(rdev);
4024}
4025
4026static int cik_cp_resume(struct radeon_device *rdev)
4027{
4028 int r;
4029
4030 cik_enable_gui_idle_interrupt(rdev, false);
4031
4032 r = cik_cp_load_microcode(rdev);
4033 if (r)
4034 return r;
4035
4036 r = cik_cp_gfx_resume(rdev);
4037 if (r)
4038 return r;
4039 r = cik_cp_compute_resume(rdev);
4040 if (r)
4041 return r;
4042
4043 cik_enable_gui_idle_interrupt(rdev, true);
4044
4045 return 0;
4046}
4047
4048static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4049{
4050 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4051 RREG32(GRBM_STATUS));
4052 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4053 RREG32(GRBM_STATUS2));
4054 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4055 RREG32(GRBM_STATUS_SE0));
4056 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4057 RREG32(GRBM_STATUS_SE1));
4058 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4059 RREG32(GRBM_STATUS_SE2));
4060 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4061 RREG32(GRBM_STATUS_SE3));
4062 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4063 RREG32(SRBM_STATUS));
4064 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4065 RREG32(SRBM_STATUS2));
4066 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4067 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4068 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4069 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4070 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4071 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4072 RREG32(CP_STALLED_STAT1));
4073 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4074 RREG32(CP_STALLED_STAT2));
4075 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4076 RREG32(CP_STALLED_STAT3));
4077 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4078 RREG32(CP_CPF_BUSY_STAT));
4079 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4080 RREG32(CP_CPF_STALLED_STAT1));
4081 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4082 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4083 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4084 RREG32(CP_CPC_STALLED_STAT1));
4085 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4086}
4087
4088/**
4089 * cik_gpu_check_soft_reset - check which blocks are busy
4090 *
4091 * @rdev: radeon_device pointer
4092 *
4093 * Check which blocks are busy and return the relevant reset
4094 * mask to be used by cik_gpu_soft_reset().
4095 * Returns a mask of the blocks to be reset.
4096 */
4097u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4098{
4099 u32 reset_mask = 0;
4100 u32 tmp;
4101
4102 /* GRBM_STATUS */
4103 tmp = RREG32(GRBM_STATUS);
4104 if (tmp & (PA_BUSY | SC_BUSY |
4105 BCI_BUSY | SX_BUSY |
4106 TA_BUSY | VGT_BUSY |
4107 DB_BUSY | CB_BUSY |
4108 GDS_BUSY | SPI_BUSY |
4109 IA_BUSY | IA_BUSY_NO_DMA))
4110 reset_mask |= RADEON_RESET_GFX;
4111
4112 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4113 reset_mask |= RADEON_RESET_CP;
4114
4115 /* GRBM_STATUS2 */
4116 tmp = RREG32(GRBM_STATUS2);
4117 if (tmp & RLC_BUSY)
4118 reset_mask |= RADEON_RESET_RLC;
4119
4120 /* SDMA0_STATUS_REG */
4121 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4122 if (!(tmp & SDMA_IDLE))
4123 reset_mask |= RADEON_RESET_DMA;
4124
4125 /* SDMA1_STATUS_REG */
4126 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4127 if (!(tmp & SDMA_IDLE))
4128 reset_mask |= RADEON_RESET_DMA1;
4129
4130 /* SRBM_STATUS2 */
4131 tmp = RREG32(SRBM_STATUS2);
4132 if (tmp & SDMA_BUSY)
4133 reset_mask |= RADEON_RESET_DMA;
4134
4135 if (tmp & SDMA1_BUSY)
4136 reset_mask |= RADEON_RESET_DMA1;
4137
4138 /* SRBM_STATUS */
4139 tmp = RREG32(SRBM_STATUS);
4140
4141 if (tmp & IH_BUSY)
4142 reset_mask |= RADEON_RESET_IH;
4143
4144 if (tmp & SEM_BUSY)
4145 reset_mask |= RADEON_RESET_SEM;
4146
4147 if (tmp & GRBM_RQ_PENDING)
4148 reset_mask |= RADEON_RESET_GRBM;
4149
4150 if (tmp & VMC_BUSY)
4151 reset_mask |= RADEON_RESET_VMC;
4152
4153 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4154 MCC_BUSY | MCD_BUSY))
4155 reset_mask |= RADEON_RESET_MC;
4156
4157 if (evergreen_is_display_hung(rdev))
4158 reset_mask |= RADEON_RESET_DISPLAY;
4159
4160 /* Skip MC reset as it's mostly likely not hung, just busy */
4161 if (reset_mask & RADEON_RESET_MC) {
4162 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4163 reset_mask &= ~RADEON_RESET_MC;
4164 }
4165
4166 return reset_mask;
4167}
4168
4169/**
4170 * cik_gpu_soft_reset - soft reset GPU
4171 *
4172 * @rdev: radeon_device pointer
4173 * @reset_mask: mask of which blocks to reset
4174 *
4175 * Soft reset the blocks specified in @reset_mask.
4176 */
4177static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4178{
4179 struct evergreen_mc_save save;
4180 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4181 u32 tmp;
4182
4183 if (reset_mask == 0)
4184 return;
4185
4186 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4187
4188 cik_print_gpu_status_regs(rdev);
4189 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4190 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4191 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4192 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4193
4194 /* disable CG/PG */
4195 cik_fini_pg(rdev);
4196 cik_fini_cg(rdev);
4197
4198 /* stop the rlc */
4199 cik_rlc_stop(rdev);
4200
4201 /* Disable GFX parsing/prefetching */
4202 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4203
4204 /* Disable MEC parsing/prefetching */
4205 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4206
4207 if (reset_mask & RADEON_RESET_DMA) {
4208 /* sdma0 */
4209 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4210 tmp |= SDMA_HALT;
4211 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4212 }
4213 if (reset_mask & RADEON_RESET_DMA1) {
4214 /* sdma1 */
4215 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4216 tmp |= SDMA_HALT;
4217 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4218 }
4219
4220 evergreen_mc_stop(rdev, &save);
4221 if (evergreen_mc_wait_for_idle(rdev)) {
4222 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4223 }
4224
4225 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4226 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4227
4228 if (reset_mask & RADEON_RESET_CP) {
4229 grbm_soft_reset |= SOFT_RESET_CP;
4230
4231 srbm_soft_reset |= SOFT_RESET_GRBM;
4232 }
4233
4234 if (reset_mask & RADEON_RESET_DMA)
4235 srbm_soft_reset |= SOFT_RESET_SDMA;
4236
4237 if (reset_mask & RADEON_RESET_DMA1)
4238 srbm_soft_reset |= SOFT_RESET_SDMA1;
4239
4240 if (reset_mask & RADEON_RESET_DISPLAY)
4241 srbm_soft_reset |= SOFT_RESET_DC;
4242
4243 if (reset_mask & RADEON_RESET_RLC)
4244 grbm_soft_reset |= SOFT_RESET_RLC;
4245
4246 if (reset_mask & RADEON_RESET_SEM)
4247 srbm_soft_reset |= SOFT_RESET_SEM;
4248
4249 if (reset_mask & RADEON_RESET_IH)
4250 srbm_soft_reset |= SOFT_RESET_IH;
4251
4252 if (reset_mask & RADEON_RESET_GRBM)
4253 srbm_soft_reset |= SOFT_RESET_GRBM;
4254
4255 if (reset_mask & RADEON_RESET_VMC)
4256 srbm_soft_reset |= SOFT_RESET_VMC;
4257
4258 if (!(rdev->flags & RADEON_IS_IGP)) {
4259 if (reset_mask & RADEON_RESET_MC)
4260 srbm_soft_reset |= SOFT_RESET_MC;
4261 }
4262
4263 if (grbm_soft_reset) {
4264 tmp = RREG32(GRBM_SOFT_RESET);
4265 tmp |= grbm_soft_reset;
4266 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4267 WREG32(GRBM_SOFT_RESET, tmp);
4268 tmp = RREG32(GRBM_SOFT_RESET);
4269
4270 udelay(50);
4271
4272 tmp &= ~grbm_soft_reset;
4273 WREG32(GRBM_SOFT_RESET, tmp);
4274 tmp = RREG32(GRBM_SOFT_RESET);
4275 }
4276
4277 if (srbm_soft_reset) {
4278 tmp = RREG32(SRBM_SOFT_RESET);
4279 tmp |= srbm_soft_reset;
4280 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4281 WREG32(SRBM_SOFT_RESET, tmp);
4282 tmp = RREG32(SRBM_SOFT_RESET);
4283
4284 udelay(50);
4285
4286 tmp &= ~srbm_soft_reset;
4287 WREG32(SRBM_SOFT_RESET, tmp);
4288 tmp = RREG32(SRBM_SOFT_RESET);
4289 }
4290
4291 /* Wait a little for things to settle down */
4292 udelay(50);
4293
4294 evergreen_mc_resume(rdev, &save);
4295 udelay(50);
4296
4297 cik_print_gpu_status_regs(rdev);
4298}
4299
4300/**
4301 * cik_asic_reset - soft reset GPU
4302 *
4303 * @rdev: radeon_device pointer
4304 *
4305 * Look up which blocks are hung and attempt
4306 * to reset them.
4307 * Returns 0 for success.
4308 */
4309int cik_asic_reset(struct radeon_device *rdev)
4310{
4311 u32 reset_mask;
4312
4313 reset_mask = cik_gpu_check_soft_reset(rdev);
4314
4315 if (reset_mask)
4316 r600_set_bios_scratch_engine_hung(rdev, true);
4317
4318 cik_gpu_soft_reset(rdev, reset_mask);
4319
4320 reset_mask = cik_gpu_check_soft_reset(rdev);
4321
4322 if (!reset_mask)
4323 r600_set_bios_scratch_engine_hung(rdev, false);
4324
4325 return 0;
4326}
4327
4328/**
4329 * cik_gfx_is_lockup - check if the 3D engine is locked up
4330 *
4331 * @rdev: radeon_device pointer
4332 * @ring: radeon_ring structure holding ring information
4333 *
4334 * Check if the 3D engine is locked up (CIK).
4335 * Returns true if the engine is locked, false if not.
4336 */
4337bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4338{
4339 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4340
4341 if (!(reset_mask & (RADEON_RESET_GFX |
4342 RADEON_RESET_COMPUTE |
4343 RADEON_RESET_CP))) {
4344 radeon_ring_lockup_update(ring);
4345 return false;
4346 }
4347 /* force CP activities */
4348 radeon_ring_force_activity(rdev, ring);
4349 return radeon_ring_test_lockup(rdev, ring);
4350}
4351
4352/* MC */
4353/**
4354 * cik_mc_program - program the GPU memory controller
4355 *
4356 * @rdev: radeon_device pointer
4357 *
4358 * Set the location of vram, gart, and AGP in the GPU's
4359 * physical address space (CIK).
4360 */
4361static void cik_mc_program(struct radeon_device *rdev)
4362{
4363 struct evergreen_mc_save save;
4364 u32 tmp;
4365 int i, j;
4366
4367 /* Initialize HDP */
4368 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4369 WREG32((0x2c14 + j), 0x00000000);
4370 WREG32((0x2c18 + j), 0x00000000);
4371 WREG32((0x2c1c + j), 0x00000000);
4372 WREG32((0x2c20 + j), 0x00000000);
4373 WREG32((0x2c24 + j), 0x00000000);
4374 }
4375 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4376
4377 evergreen_mc_stop(rdev, &save);
4378 if (radeon_mc_wait_for_idle(rdev)) {
4379 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4380 }
4381 /* Lockout access through VGA aperture*/
4382 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4383 /* Update configuration */
4384 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4385 rdev->mc.vram_start >> 12);
4386 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4387 rdev->mc.vram_end >> 12);
4388 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4389 rdev->vram_scratch.gpu_addr >> 12);
4390 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4391 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4392 WREG32(MC_VM_FB_LOCATION, tmp);
4393 /* XXX double check these! */
4394 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4395 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4396 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4397 WREG32(MC_VM_AGP_BASE, 0);
4398 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4399 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4400 if (radeon_mc_wait_for_idle(rdev)) {
4401 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4402 }
4403 evergreen_mc_resume(rdev, &save);
4404 /* we need to own VRAM, so turn off the VGA renderer here
4405 * to stop it overwriting our objects */
4406 rv515_vga_render_disable(rdev);
4407}
4408
4409/**
4410 * cik_mc_init - initialize the memory controller driver params
4411 *
4412 * @rdev: radeon_device pointer
4413 *
4414 * Look up the amount of vram, vram width, and decide how to place
4415 * vram and gart within the GPU's physical address space (CIK).
4416 * Returns 0 for success.
4417 */
4418static int cik_mc_init(struct radeon_device *rdev)
4419{
4420 u32 tmp;
4421 int chansize, numchan;
4422
4423 /* Get VRAM informations */
4424 rdev->mc.vram_is_ddr = true;
4425 tmp = RREG32(MC_ARB_RAMCFG);
4426 if (tmp & CHANSIZE_MASK) {
4427 chansize = 64;
4428 } else {
4429 chansize = 32;
4430 }
4431 tmp = RREG32(MC_SHARED_CHMAP);
4432 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4433 case 0:
4434 default:
4435 numchan = 1;
4436 break;
4437 case 1:
4438 numchan = 2;
4439 break;
4440 case 2:
4441 numchan = 4;
4442 break;
4443 case 3:
4444 numchan = 8;
4445 break;
4446 case 4:
4447 numchan = 3;
4448 break;
4449 case 5:
4450 numchan = 6;
4451 break;
4452 case 6:
4453 numchan = 10;
4454 break;
4455 case 7:
4456 numchan = 12;
4457 break;
4458 case 8:
4459 numchan = 16;
4460 break;
4461 }
4462 rdev->mc.vram_width = numchan * chansize;
4463 /* Could aper size report 0 ? */
4464 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4465 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4466 /* size in MB on si */
4467 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4468 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4469 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4470 si_vram_gtt_location(rdev, &rdev->mc);
4471 radeon_update_bandwidth_info(rdev);
4472
4473 return 0;
4474}
4475
4476/*
4477 * GART
4478 * VMID 0 is the physical GPU addresses as used by the kernel.
4479 * VMIDs 1-15 are used for userspace clients and are handled
4480 * by the radeon vm/hsa code.
4481 */
4482/**
4483 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4484 *
4485 * @rdev: radeon_device pointer
4486 *
4487 * Flush the TLB for the VMID 0 page table (CIK).
4488 */
4489void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4490{
4491 /* flush hdp cache */
4492 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4493
4494 /* bits 0-15 are the VM contexts0-15 */
4495 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4496}
4497
4498/**
4499 * cik_pcie_gart_enable - gart enable
4500 *
4501 * @rdev: radeon_device pointer
4502 *
4503 * This sets up the TLBs, programs the page tables for VMID0,
4504 * sets up the hw for VMIDs 1-15 which are allocated on
4505 * demand, and sets up the global locations for the LDS, GDS,
4506 * and GPUVM for FSA64 clients (CIK).
4507 * Returns 0 for success, errors for failure.
4508 */
4509static int cik_pcie_gart_enable(struct radeon_device *rdev)
4510{
4511 int r, i;
4512
4513 if (rdev->gart.robj == NULL) {
4514 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4515 return -EINVAL;
4516 }
4517 r = radeon_gart_table_vram_pin(rdev);
4518 if (r)
4519 return r;
4520 radeon_gart_restore(rdev);
4521 /* Setup TLB control */
4522 WREG32(MC_VM_MX_L1_TLB_CNTL,
4523 (0xA << 7) |
4524 ENABLE_L1_TLB |
4525 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4526 ENABLE_ADVANCED_DRIVER_MODEL |
4527 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4528 /* Setup L2 cache */
4529 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4530 ENABLE_L2_FRAGMENT_PROCESSING |
4531 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4532 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4533 EFFECTIVE_L2_QUEUE_SIZE(7) |
4534 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4535 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4536 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4537 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4538 /* setup context0 */
4539 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4540 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4541 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4542 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4543 (u32)(rdev->dummy_page.addr >> 12));
4544 WREG32(VM_CONTEXT0_CNTL2, 0);
4545 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4546 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4547
4548 WREG32(0x15D4, 0);
4549 WREG32(0x15D8, 0);
4550 WREG32(0x15DC, 0);
4551
4552 /* empty context1-15 */
4553 /* FIXME start with 4G, once using 2 level pt switch to full
4554 * vm size space
4555 */
4556 /* set vm size, must be a multiple of 4 */
4557 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4558 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4559 for (i = 1; i < 16; i++) {
4560 if (i < 8)
4561 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4562 rdev->gart.table_addr >> 12);
4563 else
4564 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4565 rdev->gart.table_addr >> 12);
4566 }
4567
4568 /* enable context1-15 */
4569 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4570 (u32)(rdev->dummy_page.addr >> 12));
4571 WREG32(VM_CONTEXT1_CNTL2, 4);
4572 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4573 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4574 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4575 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4576 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4577 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4578 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4579 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4580 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4581 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4582 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4583 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4584 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4585
4586 /* TC cache setup ??? */
4587 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4588 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4589 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4590
4591 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4592 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4593 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4594 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4595 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4596
4597 WREG32(TC_CFG_L1_VOLATILE, 0);
4598 WREG32(TC_CFG_L2_VOLATILE, 0);
4599
4600 if (rdev->family == CHIP_KAVERI) {
4601 u32 tmp = RREG32(CHUB_CONTROL);
4602 tmp &= ~BYPASS_VM;
4603 WREG32(CHUB_CONTROL, tmp);
4604 }
4605
4606 /* XXX SH_MEM regs */
4607 /* where to put LDS, scratch, GPUVM in FSA64 space */
4608 mutex_lock(&rdev->srbm_mutex);
4609 for (i = 0; i < 16; i++) {
4610 cik_srbm_select(rdev, 0, 0, 0, i);
4611 /* CP and shaders */
4612 WREG32(SH_MEM_CONFIG, 0);
4613 WREG32(SH_MEM_APE1_BASE, 1);
4614 WREG32(SH_MEM_APE1_LIMIT, 0);
4615 WREG32(SH_MEM_BASES, 0);
4616 /* SDMA GFX */
4617 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4618 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4619 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4620 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4621 /* XXX SDMA RLC - todo */
4622 }
4623 cik_srbm_select(rdev, 0, 0, 0, 0);
4624 mutex_unlock(&rdev->srbm_mutex);
4625
4626 cik_pcie_gart_tlb_flush(rdev);
4627 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4628 (unsigned)(rdev->mc.gtt_size >> 20),
4629 (unsigned long long)rdev->gart.table_addr);
4630 rdev->gart.ready = true;
4631 return 0;
4632}
4633
4634/**
4635 * cik_pcie_gart_disable - gart disable
4636 *
4637 * @rdev: radeon_device pointer
4638 *
4639 * This disables all VM page table (CIK).
4640 */
4641static void cik_pcie_gart_disable(struct radeon_device *rdev)
4642{
4643 /* Disable all tables */
4644 WREG32(VM_CONTEXT0_CNTL, 0);
4645 WREG32(VM_CONTEXT1_CNTL, 0);
4646 /* Setup TLB control */
4647 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4648 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4649 /* Setup L2 cache */
4650 WREG32(VM_L2_CNTL,
4651 ENABLE_L2_FRAGMENT_PROCESSING |
4652 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4653 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4654 EFFECTIVE_L2_QUEUE_SIZE(7) |
4655 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4656 WREG32(VM_L2_CNTL2, 0);
4657 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4658 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4659 radeon_gart_table_vram_unpin(rdev);
4660}
4661
4662/**
4663 * cik_pcie_gart_fini - vm fini callback
4664 *
4665 * @rdev: radeon_device pointer
4666 *
4667 * Tears down the driver GART/VM setup (CIK).
4668 */
4669static void cik_pcie_gart_fini(struct radeon_device *rdev)
4670{
4671 cik_pcie_gart_disable(rdev);
4672 radeon_gart_table_vram_free(rdev);
4673 radeon_gart_fini(rdev);
4674}
4675
4676/* vm parser */
4677/**
4678 * cik_ib_parse - vm ib_parse callback
4679 *
4680 * @rdev: radeon_device pointer
4681 * @ib: indirect buffer pointer
4682 *
4683 * CIK uses hw IB checking so this is a nop (CIK).
4684 */
4685int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4686{
4687 return 0;
4688}
4689
4690/*
4691 * vm
4692 * VMID 0 is the physical GPU addresses as used by the kernel.
4693 * VMIDs 1-15 are used for userspace clients and are handled
4694 * by the radeon vm/hsa code.
4695 */
4696/**
4697 * cik_vm_init - cik vm init callback
4698 *
4699 * @rdev: radeon_device pointer
4700 *
4701 * Inits cik specific vm parameters (number of VMs, base of vram for
4702 * VMIDs 1-15) (CIK).
4703 * Returns 0 for success.
4704 */
4705int cik_vm_init(struct radeon_device *rdev)
4706{
4707 /* number of VMs */
4708 rdev->vm_manager.nvm = 16;
4709 /* base offset of vram pages */
4710 if (rdev->flags & RADEON_IS_IGP) {
4711 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4712 tmp <<= 22;
4713 rdev->vm_manager.vram_base_offset = tmp;
4714 } else
4715 rdev->vm_manager.vram_base_offset = 0;
4716
4717 return 0;
4718}
4719
4720/**
4721 * cik_vm_fini - cik vm fini callback
4722 *
4723 * @rdev: radeon_device pointer
4724 *
4725 * Tear down any asic specific VM setup (CIK).
4726 */
4727void cik_vm_fini(struct radeon_device *rdev)
4728{
4729}
4730
4731/**
4732 * cik_vm_decode_fault - print human readable fault info
4733 *
4734 * @rdev: radeon_device pointer
4735 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4736 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4737 *
4738 * Print human readable fault information (CIK).
4739 */
4740static void cik_vm_decode_fault(struct radeon_device *rdev,
4741 u32 status, u32 addr, u32 mc_client)
4742{
4743 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4744 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4745 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4746 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4747 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4748
4749 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4750 protections, vmid, addr,
4751 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4752 block, mc_client, mc_id);
4753}
4754
4755/**
4756 * cik_vm_flush - cik vm flush using the CP
4757 *
4758 * @rdev: radeon_device pointer
4759 *
4760 * Update the page table base and flush the VM TLB
4761 * using the CP (CIK).
4762 */
4763void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4764{
4765 struct radeon_ring *ring = &rdev->ring[ridx];
4766
4767 if (vm == NULL)
4768 return;
4769
4770 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4771 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4772 WRITE_DATA_DST_SEL(0)));
4773 if (vm->id < 8) {
4774 radeon_ring_write(ring,
4775 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4776 } else {
4777 radeon_ring_write(ring,
4778 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4779 }
4780 radeon_ring_write(ring, 0);
4781 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4782
4783 /* update SH_MEM_* regs */
4784 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4785 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4786 WRITE_DATA_DST_SEL(0)));
4787 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4788 radeon_ring_write(ring, 0);
4789 radeon_ring_write(ring, VMID(vm->id));
4790
4791 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4792 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4793 WRITE_DATA_DST_SEL(0)));
4794 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4795 radeon_ring_write(ring, 0);
4796
4797 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4798 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4799 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4800 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4801
4802 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4803 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4804 WRITE_DATA_DST_SEL(0)));
4805 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4806 radeon_ring_write(ring, 0);
4807 radeon_ring_write(ring, VMID(0));
4808
4809 /* HDP flush */
4810 /* We should be using the WAIT_REG_MEM packet here like in
4811 * cik_fence_ring_emit(), but it causes the CP to hang in this
4812 * context...
4813 */
4814 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4815 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4816 WRITE_DATA_DST_SEL(0)));
4817 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4818 radeon_ring_write(ring, 0);
4819 radeon_ring_write(ring, 0);
4820
4821 /* bits 0-15 are the VM contexts0-15 */
4822 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4823 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4824 WRITE_DATA_DST_SEL(0)));
4825 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4826 radeon_ring_write(ring, 0);
4827 radeon_ring_write(ring, 1 << vm->id);
4828
4829 /* compute doesn't have PFP */
4830 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4831 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4832 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4833 radeon_ring_write(ring, 0x0);
4834 }
4835}
4836
4837/**
4838 * cik_vm_set_page - update the page tables using sDMA
4839 *
4840 * @rdev: radeon_device pointer
4841 * @ib: indirect buffer to fill with commands
4842 * @pe: addr of the page entry
4843 * @addr: dst addr to write into pe
4844 * @count: number of page entries to update
4845 * @incr: increase next addr by incr bytes
4846 * @flags: access flags
4847 *
4848 * Update the page tables using CP or sDMA (CIK).
4849 */
4850void cik_vm_set_page(struct radeon_device *rdev,
4851 struct radeon_ib *ib,
4852 uint64_t pe,
4853 uint64_t addr, unsigned count,
4854 uint32_t incr, uint32_t flags)
4855{
4856 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4857 uint64_t value;
4858 unsigned ndw;
4859
4860 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4861 /* CP */
4862 while (count) {
4863 ndw = 2 + count * 2;
4864 if (ndw > 0x3FFE)
4865 ndw = 0x3FFE;
4866
4867 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4868 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4869 WRITE_DATA_DST_SEL(1));
4870 ib->ptr[ib->length_dw++] = pe;
4871 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4872 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4873 if (flags & RADEON_VM_PAGE_SYSTEM) {
4874 value = radeon_vm_map_gart(rdev, addr);
4875 value &= 0xFFFFFFFFFFFFF000ULL;
4876 } else if (flags & RADEON_VM_PAGE_VALID) {
4877 value = addr;
4878 } else {
4879 value = 0;
4880 }
4881 addr += incr;
4882 value |= r600_flags;
4883 ib->ptr[ib->length_dw++] = value;
4884 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4885 }
4886 }
4887 } else {
4888 /* DMA */
4889 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4890 }
4891}
4892
4893/*
4894 * RLC
4895 * The RLC is a multi-purpose microengine that handles a
4896 * variety of functions, the most important of which is
4897 * the interrupt controller.
4898 */
4899static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4900 bool enable)
4901{
4902 u32 tmp = RREG32(CP_INT_CNTL_RING0);
4903
4904 if (enable)
4905 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4906 else
4907 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4908 WREG32(CP_INT_CNTL_RING0, tmp);
4909}
4910
4911static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4912{
4913 u32 tmp;
4914
4915 tmp = RREG32(RLC_LB_CNTL);
4916 if (enable)
4917 tmp |= LOAD_BALANCE_ENABLE;
4918 else
4919 tmp &= ~LOAD_BALANCE_ENABLE;
4920 WREG32(RLC_LB_CNTL, tmp);
4921}
4922
4923static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4924{
4925 u32 i, j, k;
4926 u32 mask;
4927
4928 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4929 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4930 cik_select_se_sh(rdev, i, j);
4931 for (k = 0; k < rdev->usec_timeout; k++) {
4932 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4933 break;
4934 udelay(1);
4935 }
4936 }
4937 }
4938 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4939
4940 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4941 for (k = 0; k < rdev->usec_timeout; k++) {
4942 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4943 break;
4944 udelay(1);
4945 }
4946}
4947
4948static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4949{
4950 u32 tmp;
4951
4952 tmp = RREG32(RLC_CNTL);
4953 if (tmp != rlc)
4954 WREG32(RLC_CNTL, rlc);
4955}
4956
4957static u32 cik_halt_rlc(struct radeon_device *rdev)
4958{
4959 u32 data, orig;
4960
4961 orig = data = RREG32(RLC_CNTL);
4962
4963 if (data & RLC_ENABLE) {
4964 u32 i;
4965
4966 data &= ~RLC_ENABLE;
4967 WREG32(RLC_CNTL, data);
4968
4969 for (i = 0; i < rdev->usec_timeout; i++) {
4970 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4971 break;
4972 udelay(1);
4973 }
4974
4975 cik_wait_for_rlc_serdes(rdev);
4976 }
4977
4978 return orig;
4979}
4980
4981void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4982{
4983 u32 tmp, i, mask;
4984
4985 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4986 WREG32(RLC_GPR_REG2, tmp);
4987
4988 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4989 for (i = 0; i < rdev->usec_timeout; i++) {
4990 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4991 break;
4992 udelay(1);
4993 }
4994
4995 for (i = 0; i < rdev->usec_timeout; i++) {
4996 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4997 break;
4998 udelay(1);
4999 }
5000}
5001
5002void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5003{
5004 u32 tmp;
5005
5006 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5007 WREG32(RLC_GPR_REG2, tmp);
5008}
5009
5010/**
5011 * cik_rlc_stop - stop the RLC ME
5012 *
5013 * @rdev: radeon_device pointer
5014 *
5015 * Halt the RLC ME (MicroEngine) (CIK).
5016 */
5017static void cik_rlc_stop(struct radeon_device *rdev)
5018{
5019 WREG32(RLC_CNTL, 0);
5020
5021 cik_enable_gui_idle_interrupt(rdev, false);
5022
5023 cik_wait_for_rlc_serdes(rdev);
5024}
5025
5026/**
5027 * cik_rlc_start - start the RLC ME
5028 *
5029 * @rdev: radeon_device pointer
5030 *
5031 * Unhalt the RLC ME (MicroEngine) (CIK).
5032 */
5033static void cik_rlc_start(struct radeon_device *rdev)
5034{
5035 WREG32(RLC_CNTL, RLC_ENABLE);
5036
5037 cik_enable_gui_idle_interrupt(rdev, true);
5038
5039 udelay(50);
5040}
5041
5042/**
5043 * cik_rlc_resume - setup the RLC hw
5044 *
5045 * @rdev: radeon_device pointer
5046 *
5047 * Initialize the RLC registers, load the ucode,
5048 * and start the RLC (CIK).
5049 * Returns 0 for success, -EINVAL if the ucode is not available.
5050 */
5051static int cik_rlc_resume(struct radeon_device *rdev)
5052{
5053 u32 i, size, tmp;
5054 const __be32 *fw_data;
5055
5056 if (!rdev->rlc_fw)
5057 return -EINVAL;
5058
5059 switch (rdev->family) {
5060 case CHIP_BONAIRE:
5061 default:
5062 size = BONAIRE_RLC_UCODE_SIZE;
5063 break;
5064 case CHIP_KAVERI:
5065 size = KV_RLC_UCODE_SIZE;
5066 break;
5067 case CHIP_KABINI:
5068 size = KB_RLC_UCODE_SIZE;
5069 break;
5070 }
5071
5072 cik_rlc_stop(rdev);
5073
5074 /* disable CG */
5075 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5076 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5077
5078 si_rlc_reset(rdev);
5079
5080 cik_init_pg(rdev);
5081
5082 cik_init_cg(rdev);
5083
5084 WREG32(RLC_LB_CNTR_INIT, 0);
5085 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5086
5087 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5088 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5089 WREG32(RLC_LB_PARAMS, 0x00600408);
5090 WREG32(RLC_LB_CNTL, 0x80000004);
5091
5092 WREG32(RLC_MC_CNTL, 0);
5093 WREG32(RLC_UCODE_CNTL, 0);
5094
5095 fw_data = (const __be32 *)rdev->rlc_fw->data;
5096 WREG32(RLC_GPM_UCODE_ADDR, 0);
5097 for (i = 0; i < size; i++)
5098 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5099 WREG32(RLC_GPM_UCODE_ADDR, 0);
5100
5101 /* XXX - find out what chips support lbpw */
5102 cik_enable_lbpw(rdev, false);
5103
5104 if (rdev->family == CHIP_BONAIRE)
5105 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5106
5107 cik_rlc_start(rdev);
5108
5109 return 0;
5110}
5111
5112static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5113{
5114 u32 data, orig, tmp, tmp2;
5115
5116 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5117
5118 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5119 cik_enable_gui_idle_interrupt(rdev, true);
5120
5121 tmp = cik_halt_rlc(rdev);
5122
5123 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5124 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5125 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5126 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5127 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5128
5129 cik_update_rlc(rdev, tmp);
5130
5131 data |= CGCG_EN | CGLS_EN;
5132 } else {
5133 cik_enable_gui_idle_interrupt(rdev, false);
5134
5135 RREG32(CB_CGTT_SCLK_CTRL);
5136 RREG32(CB_CGTT_SCLK_CTRL);
5137 RREG32(CB_CGTT_SCLK_CTRL);
5138 RREG32(CB_CGTT_SCLK_CTRL);
5139
5140 data &= ~(CGCG_EN | CGLS_EN);
5141 }
5142
5143 if (orig != data)
5144 WREG32(RLC_CGCG_CGLS_CTRL, data);
5145
5146}
5147
5148static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5149{
5150 u32 data, orig, tmp = 0;
5151
5152 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5153 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5154 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5155 orig = data = RREG32(CP_MEM_SLP_CNTL);
5156 data |= CP_MEM_LS_EN;
5157 if (orig != data)
5158 WREG32(CP_MEM_SLP_CNTL, data);
5159 }
5160 }
5161
5162 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5163 data &= 0xfffffffd;
5164 if (orig != data)
5165 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5166
5167 tmp = cik_halt_rlc(rdev);
5168
5169 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5170 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5171 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5172 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5173 WREG32(RLC_SERDES_WR_CTRL, data);
5174
5175 cik_update_rlc(rdev, tmp);
5176
5177 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5178 orig = data = RREG32(CGTS_SM_CTRL_REG);
5179 data &= ~SM_MODE_MASK;
5180 data |= SM_MODE(0x2);
5181 data |= SM_MODE_ENABLE;
5182 data &= ~CGTS_OVERRIDE;
5183 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5184 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5185 data &= ~CGTS_LS_OVERRIDE;
5186 data &= ~ON_MONITOR_ADD_MASK;
5187 data |= ON_MONITOR_ADD_EN;
5188 data |= ON_MONITOR_ADD(0x96);
5189 if (orig != data)
5190 WREG32(CGTS_SM_CTRL_REG, data);
5191 }
5192 } else {
5193 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5194 data |= 0x00000002;
5195 if (orig != data)
5196 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5197
5198 data = RREG32(RLC_MEM_SLP_CNTL);
5199 if (data & RLC_MEM_LS_EN) {
5200 data &= ~RLC_MEM_LS_EN;
5201 WREG32(RLC_MEM_SLP_CNTL, data);
5202 }
5203
5204 data = RREG32(CP_MEM_SLP_CNTL);
5205 if (data & CP_MEM_LS_EN) {
5206 data &= ~CP_MEM_LS_EN;
5207 WREG32(CP_MEM_SLP_CNTL, data);
5208 }
5209
5210 orig = data = RREG32(CGTS_SM_CTRL_REG);
5211 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5212 if (orig != data)
5213 WREG32(CGTS_SM_CTRL_REG, data);
5214
5215 tmp = cik_halt_rlc(rdev);
5216
5217 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5218 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5219 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5220 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5221 WREG32(RLC_SERDES_WR_CTRL, data);
5222
5223 cik_update_rlc(rdev, tmp);
5224 }
5225}
5226
5227static const u32 mc_cg_registers[] =
5228{
5229 MC_HUB_MISC_HUB_CG,
5230 MC_HUB_MISC_SIP_CG,
5231 MC_HUB_MISC_VM_CG,
5232 MC_XPB_CLK_GAT,
5233 ATC_MISC_CG,
5234 MC_CITF_MISC_WR_CG,
5235 MC_CITF_MISC_RD_CG,
5236 MC_CITF_MISC_VM_CG,
5237 VM_L2_CG,
5238};
5239
5240static void cik_enable_mc_ls(struct radeon_device *rdev,
5241 bool enable)
5242{
5243 int i;
5244 u32 orig, data;
5245
5246 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5247 orig = data = RREG32(mc_cg_registers[i]);
5248 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5249 data |= MC_LS_ENABLE;
5250 else
5251 data &= ~MC_LS_ENABLE;
5252 if (data != orig)
5253 WREG32(mc_cg_registers[i], data);
5254 }
5255}
5256
5257static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5258 bool enable)
5259{
5260 int i;
5261 u32 orig, data;
5262
5263 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5264 orig = data = RREG32(mc_cg_registers[i]);
5265 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5266 data |= MC_CG_ENABLE;
5267 else
5268 data &= ~MC_CG_ENABLE;
5269 if (data != orig)
5270 WREG32(mc_cg_registers[i], data);
5271 }
5272}
5273
5274static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5275 bool enable)
5276{
5277 u32 orig, data;
5278
5279 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5280 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5281 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5282 } else {
5283 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5284 data |= 0xff000000;
5285 if (data != orig)
5286 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5287
5288 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5289 data |= 0xff000000;
5290 if (data != orig)
5291 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5292 }
5293}
5294
5295static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5296 bool enable)
5297{
5298 u32 orig, data;
5299
5300 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5301 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5302 data |= 0x100;
5303 if (orig != data)
5304 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5305
5306 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5307 data |= 0x100;
5308 if (orig != data)
5309 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5310 } else {
5311 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5312 data &= ~0x100;
5313 if (orig != data)
5314 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5315
5316 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5317 data &= ~0x100;
5318 if (orig != data)
5319 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5320 }
5321}
5322
5323static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5324 bool enable)
5325{
5326 u32 orig, data;
5327
5328 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5329 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5330 data = 0xfff;
5331 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5332
5333 orig = data = RREG32(UVD_CGC_CTRL);
5334 data |= DCM;
5335 if (orig != data)
5336 WREG32(UVD_CGC_CTRL, data);
5337 } else {
5338 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5339 data &= ~0xfff;
5340 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5341
5342 orig = data = RREG32(UVD_CGC_CTRL);
5343 data &= ~DCM;
5344 if (orig != data)
5345 WREG32(UVD_CGC_CTRL, data);
5346 }
5347}
5348
5349static void cik_enable_bif_mgls(struct radeon_device *rdev,
5350 bool enable)
5351{
5352 u32 orig, data;
5353
5354 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5355
5356 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5357 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5358 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5359 else
5360 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5361 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5362
5363 if (orig != data)
5364 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5365}
5366
5367static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5368 bool enable)
5369{
5370 u32 orig, data;
5371
5372 orig = data = RREG32(HDP_HOST_PATH_CNTL);
5373
5374 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5375 data &= ~CLOCK_GATING_DIS;
5376 else
5377 data |= CLOCK_GATING_DIS;
5378
5379 if (orig != data)
5380 WREG32(HDP_HOST_PATH_CNTL, data);
5381}
5382
5383static void cik_enable_hdp_ls(struct radeon_device *rdev,
5384 bool enable)
5385{
5386 u32 orig, data;
5387
5388 orig = data = RREG32(HDP_MEM_POWER_LS);
5389
5390 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5391 data |= HDP_LS_ENABLE;
5392 else
5393 data &= ~HDP_LS_ENABLE;
5394
5395 if (orig != data)
5396 WREG32(HDP_MEM_POWER_LS, data);
5397}
5398
5399void cik_update_cg(struct radeon_device *rdev,
5400 u32 block, bool enable)
5401{
5402
5403 if (block & RADEON_CG_BLOCK_GFX) {
5404 cik_enable_gui_idle_interrupt(rdev, false);
5405 /* order matters! */
5406 if (enable) {
5407 cik_enable_mgcg(rdev, true);
5408 cik_enable_cgcg(rdev, true);
5409 } else {
5410 cik_enable_cgcg(rdev, false);
5411 cik_enable_mgcg(rdev, false);
5412 }
5413 cik_enable_gui_idle_interrupt(rdev, true);
5414 }
5415
5416 if (block & RADEON_CG_BLOCK_MC) {
5417 if (!(rdev->flags & RADEON_IS_IGP)) {
5418 cik_enable_mc_mgcg(rdev, enable);
5419 cik_enable_mc_ls(rdev, enable);
5420 }
5421 }
5422
5423 if (block & RADEON_CG_BLOCK_SDMA) {
5424 cik_enable_sdma_mgcg(rdev, enable);
5425 cik_enable_sdma_mgls(rdev, enable);
5426 }
5427
5428 if (block & RADEON_CG_BLOCK_BIF) {
5429 cik_enable_bif_mgls(rdev, enable);
5430 }
5431
5432 if (block & RADEON_CG_BLOCK_UVD) {
5433 if (rdev->has_uvd)
5434 cik_enable_uvd_mgcg(rdev, enable);
5435 }
5436
5437 if (block & RADEON_CG_BLOCK_HDP) {
5438 cik_enable_hdp_mgcg(rdev, enable);
5439 cik_enable_hdp_ls(rdev, enable);
5440 }
5441}
5442
5443static void cik_init_cg(struct radeon_device *rdev)
5444{
5445
5446 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5447
5448 if (rdev->has_uvd)
5449 si_init_uvd_internal_cg(rdev);
5450
5451 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5452 RADEON_CG_BLOCK_SDMA |
5453 RADEON_CG_BLOCK_BIF |
5454 RADEON_CG_BLOCK_UVD |
5455 RADEON_CG_BLOCK_HDP), true);
5456}
5457
5458static void cik_fini_cg(struct radeon_device *rdev)
5459{
5460 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5461 RADEON_CG_BLOCK_SDMA |
5462 RADEON_CG_BLOCK_BIF |
5463 RADEON_CG_BLOCK_UVD |
5464 RADEON_CG_BLOCK_HDP), false);
5465
5466 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5467}
5468
5469static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5470 bool enable)
5471{
5472 u32 data, orig;
5473
5474 orig = data = RREG32(RLC_PG_CNTL);
5475 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5476 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5477 else
5478 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5479 if (orig != data)
5480 WREG32(RLC_PG_CNTL, data);
5481}
5482
5483static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5484 bool enable)
5485{
5486 u32 data, orig;
5487
5488 orig = data = RREG32(RLC_PG_CNTL);
5489 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5490 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5491 else
5492 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5493 if (orig != data)
5494 WREG32(RLC_PG_CNTL, data);
5495}
5496
5497static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5498{
5499 u32 data, orig;
5500
5501 orig = data = RREG32(RLC_PG_CNTL);
5502 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5503 data &= ~DISABLE_CP_PG;
5504 else
5505 data |= DISABLE_CP_PG;
5506 if (orig != data)
5507 WREG32(RLC_PG_CNTL, data);
5508}
5509
5510static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5511{
5512 u32 data, orig;
5513
5514 orig = data = RREG32(RLC_PG_CNTL);
5515 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5516 data &= ~DISABLE_GDS_PG;
5517 else
5518 data |= DISABLE_GDS_PG;
5519 if (orig != data)
5520 WREG32(RLC_PG_CNTL, data);
5521}
5522
5523#define CP_ME_TABLE_SIZE 96
5524#define CP_ME_TABLE_OFFSET 2048
5525#define CP_MEC_TABLE_OFFSET 4096
5526
5527void cik_init_cp_pg_table(struct radeon_device *rdev)
5528{
5529 const __be32 *fw_data;
5530 volatile u32 *dst_ptr;
5531 int me, i, max_me = 4;
5532 u32 bo_offset = 0;
5533 u32 table_offset;
5534
5535 if (rdev->family == CHIP_KAVERI)
5536 max_me = 5;
5537
5538 if (rdev->rlc.cp_table_ptr == NULL)
5539 return;
5540
5541 /* write the cp table buffer */
5542 dst_ptr = rdev->rlc.cp_table_ptr;
5543 for (me = 0; me < max_me; me++) {
5544 if (me == 0) {
5545 fw_data = (const __be32 *)rdev->ce_fw->data;
5546 table_offset = CP_ME_TABLE_OFFSET;
5547 } else if (me == 1) {
5548 fw_data = (const __be32 *)rdev->pfp_fw->data;
5549 table_offset = CP_ME_TABLE_OFFSET;
5550 } else if (me == 2) {
5551 fw_data = (const __be32 *)rdev->me_fw->data;
5552 table_offset = CP_ME_TABLE_OFFSET;
5553 } else {
5554 fw_data = (const __be32 *)rdev->mec_fw->data;
5555 table_offset = CP_MEC_TABLE_OFFSET;
5556 }
5557
5558 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5559 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5560 }
5561 bo_offset += CP_ME_TABLE_SIZE;
5562 }
5563}
5564
5565static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5566 bool enable)
5567{
5568 u32 data, orig;
5569
5570 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5571 orig = data = RREG32(RLC_PG_CNTL);
5572 data |= GFX_PG_ENABLE;
5573 if (orig != data)
5574 WREG32(RLC_PG_CNTL, data);
5575
5576 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5577 data |= AUTO_PG_EN;
5578 if (orig != data)
5579 WREG32(RLC_AUTO_PG_CTRL, data);
5580 } else {
5581 orig = data = RREG32(RLC_PG_CNTL);
5582 data &= ~GFX_PG_ENABLE;
5583 if (orig != data)
5584 WREG32(RLC_PG_CNTL, data);
5585
5586 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5587 data &= ~AUTO_PG_EN;
5588 if (orig != data)
5589 WREG32(RLC_AUTO_PG_CTRL, data);
5590
5591 data = RREG32(DB_RENDER_CONTROL);
5592 }
5593}
5594
5595static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5596{
5597 u32 mask = 0, tmp, tmp1;
5598 int i;
5599
5600 cik_select_se_sh(rdev, se, sh);
5601 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5602 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5603 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5604
5605 tmp &= 0xffff0000;
5606
5607 tmp |= tmp1;
5608 tmp >>= 16;
5609
5610 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5611 mask <<= 1;
5612 mask |= 1;
5613 }
5614
5615 return (~tmp) & mask;
5616}
5617
5618static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5619{
5620 u32 i, j, k, active_cu_number = 0;
5621 u32 mask, counter, cu_bitmap;
5622 u32 tmp = 0;
5623
5624 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5625 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5626 mask = 1;
5627 cu_bitmap = 0;
5628 counter = 0;
5629 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5630 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5631 if (counter < 2)
5632 cu_bitmap |= mask;
5633 counter ++;
5634 }
5635 mask <<= 1;
5636 }
5637
5638 active_cu_number += counter;
5639 tmp |= (cu_bitmap << (i * 16 + j * 8));
5640 }
5641 }
5642
5643 WREG32(RLC_PG_AO_CU_MASK, tmp);
5644
5645 tmp = RREG32(RLC_MAX_PG_CU);
5646 tmp &= ~MAX_PU_CU_MASK;
5647 tmp |= MAX_PU_CU(active_cu_number);
5648 WREG32(RLC_MAX_PG_CU, tmp);
5649}
5650
5651static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5652 bool enable)
5653{
5654 u32 data, orig;
5655
5656 orig = data = RREG32(RLC_PG_CNTL);
5657 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5658 data |= STATIC_PER_CU_PG_ENABLE;
5659 else
5660 data &= ~STATIC_PER_CU_PG_ENABLE;
5661 if (orig != data)
5662 WREG32(RLC_PG_CNTL, data);
5663}
5664
5665static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5666 bool enable)
5667{
5668 u32 data, orig;
5669
5670 orig = data = RREG32(RLC_PG_CNTL);
5671 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5672 data |= DYN_PER_CU_PG_ENABLE;
5673 else
5674 data &= ~DYN_PER_CU_PG_ENABLE;
5675 if (orig != data)
5676 WREG32(RLC_PG_CNTL, data);
5677}
5678
5679#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5680#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
5681
5682static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5683{
5684 u32 data, orig;
5685 u32 i;
5686
5687 if (rdev->rlc.cs_data) {
5688 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5689 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5690 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5691 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5692 } else {
5693 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5694 for (i = 0; i < 3; i++)
5695 WREG32(RLC_GPM_SCRATCH_DATA, 0);
5696 }
5697 if (rdev->rlc.reg_list) {
5698 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5699 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5700 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5701 }
5702
5703 orig = data = RREG32(RLC_PG_CNTL);
5704 data |= GFX_PG_SRC;
5705 if (orig != data)
5706 WREG32(RLC_PG_CNTL, data);
5707
5708 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5709 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5710
5711 data = RREG32(CP_RB_WPTR_POLL_CNTL);
5712 data &= ~IDLE_POLL_COUNT_MASK;
5713 data |= IDLE_POLL_COUNT(0x60);
5714 WREG32(CP_RB_WPTR_POLL_CNTL, data);
5715
5716 data = 0x10101010;
5717 WREG32(RLC_PG_DELAY, data);
5718
5719 data = RREG32(RLC_PG_DELAY_2);
5720 data &= ~0xff;
5721 data |= 0x3;
5722 WREG32(RLC_PG_DELAY_2, data);
5723
5724 data = RREG32(RLC_AUTO_PG_CTRL);
5725 data &= ~GRBM_REG_SGIT_MASK;
5726 data |= GRBM_REG_SGIT(0x700);
5727 WREG32(RLC_AUTO_PG_CTRL, data);
5728
5729}
5730
5731static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5732{
5733 cik_enable_gfx_cgpg(rdev, enable);
5734 cik_enable_gfx_static_mgpg(rdev, enable);
5735 cik_enable_gfx_dynamic_mgpg(rdev, enable);
5736}
5737
5738u32 cik_get_csb_size(struct radeon_device *rdev)
5739{
5740 u32 count = 0;
5741 const struct cs_section_def *sect = NULL;
5742 const struct cs_extent_def *ext = NULL;
5743
5744 if (rdev->rlc.cs_data == NULL)
5745 return 0;
5746
5747 /* begin clear state */
5748 count += 2;
5749 /* context control state */
5750 count += 3;
5751
5752 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5753 for (ext = sect->section; ext->extent != NULL; ++ext) {
5754 if (sect->id == SECT_CONTEXT)
5755 count += 2 + ext->reg_count;
5756 else
5757 return 0;
5758 }
5759 }
5760 /* pa_sc_raster_config/pa_sc_raster_config1 */
5761 count += 4;
5762 /* end clear state */
5763 count += 2;
5764 /* clear state */
5765 count += 2;
5766
5767 return count;
5768}
5769
5770void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5771{
5772 u32 count = 0, i;
5773 const struct cs_section_def *sect = NULL;
5774 const struct cs_extent_def *ext = NULL;
5775
5776 if (rdev->rlc.cs_data == NULL)
5777 return;
5778 if (buffer == NULL)
5779 return;
5780
5781 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5782 buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5783
5784 buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5785 buffer[count++] = 0x80000000;
5786 buffer[count++] = 0x80000000;
5787
5788 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5789 for (ext = sect->section; ext->extent != NULL; ++ext) {
5790 if (sect->id == SECT_CONTEXT) {
5791 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5792 buffer[count++] = ext->reg_index - 0xa000;
5793 for (i = 0; i < ext->reg_count; i++)
5794 buffer[count++] = ext->extent[i];
5795 } else {
5796 return;
5797 }
5798 }
5799 }
5800
5801 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5802 buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5803 switch (rdev->family) {
5804 case CHIP_BONAIRE:
5805 buffer[count++] = 0x16000012;
5806 buffer[count++] = 0x00000000;
5807 break;
5808 case CHIP_KAVERI:
5809 buffer[count++] = 0x00000000; /* XXX */
5810 buffer[count++] = 0x00000000;
5811 break;
5812 case CHIP_KABINI:
5813 buffer[count++] = 0x00000000; /* XXX */
5814 buffer[count++] = 0x00000000;
5815 break;
5816 default:
5817 buffer[count++] = 0x00000000;
5818 buffer[count++] = 0x00000000;
5819 break;
5820 }
5821
5822 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5823 buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5824
5825 buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5826 buffer[count++] = 0;
5827}
5828
5829static void cik_init_pg(struct radeon_device *rdev)
5830{
5831 if (rdev->pg_flags) {
5832 cik_enable_sck_slowdown_on_pu(rdev, true);
5833 cik_enable_sck_slowdown_on_pd(rdev, true);
5834 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5835 cik_init_gfx_cgpg(rdev);
5836 cik_enable_cp_pg(rdev, true);
5837 cik_enable_gds_pg(rdev, true);
5838 }
5839 cik_init_ao_cu_mask(rdev);
5840 cik_update_gfx_pg(rdev, true);
5841 }
5842}
5843
5844static void cik_fini_pg(struct radeon_device *rdev)
5845{
5846 if (rdev->pg_flags) {
5847 cik_update_gfx_pg(rdev, false);
5848 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5849 cik_enable_cp_pg(rdev, false);
5850 cik_enable_gds_pg(rdev, false);
5851 }
5852 }
5853}
5854
5855/*
5856 * Interrupts
5857 * Starting with r6xx, interrupts are handled via a ring buffer.
5858 * Ring buffers are areas of GPU accessible memory that the GPU
5859 * writes interrupt vectors into and the host reads vectors out of.
5860 * There is a rptr (read pointer) that determines where the
5861 * host is currently reading, and a wptr (write pointer)
5862 * which determines where the GPU has written. When the
5863 * pointers are equal, the ring is idle. When the GPU
5864 * writes vectors to the ring buffer, it increments the
5865 * wptr. When there is an interrupt, the host then starts
5866 * fetching commands and processing them until the pointers are
5867 * equal again at which point it updates the rptr.
5868 */
5869
5870/**
5871 * cik_enable_interrupts - Enable the interrupt ring buffer
5872 *
5873 * @rdev: radeon_device pointer
5874 *
5875 * Enable the interrupt ring buffer (CIK).
5876 */
5877static void cik_enable_interrupts(struct radeon_device *rdev)
5878{
5879 u32 ih_cntl = RREG32(IH_CNTL);
5880 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5881
5882 ih_cntl |= ENABLE_INTR;
5883 ih_rb_cntl |= IH_RB_ENABLE;
5884 WREG32(IH_CNTL, ih_cntl);
5885 WREG32(IH_RB_CNTL, ih_rb_cntl);
5886 rdev->ih.enabled = true;
5887}
5888
5889/**
5890 * cik_disable_interrupts - Disable the interrupt ring buffer
5891 *
5892 * @rdev: radeon_device pointer
5893 *
5894 * Disable the interrupt ring buffer (CIK).
5895 */
5896static void cik_disable_interrupts(struct radeon_device *rdev)
5897{
5898 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5899 u32 ih_cntl = RREG32(IH_CNTL);
5900
5901 ih_rb_cntl &= ~IH_RB_ENABLE;
5902 ih_cntl &= ~ENABLE_INTR;
5903 WREG32(IH_RB_CNTL, ih_rb_cntl);
5904 WREG32(IH_CNTL, ih_cntl);
5905 /* set rptr, wptr to 0 */
5906 WREG32(IH_RB_RPTR, 0);
5907 WREG32(IH_RB_WPTR, 0);
5908 rdev->ih.enabled = false;
5909 rdev->ih.rptr = 0;
5910}
5911
5912/**
5913 * cik_disable_interrupt_state - Disable all interrupt sources
5914 *
5915 * @rdev: radeon_device pointer
5916 *
5917 * Clear all interrupt enable bits used by the driver (CIK).
5918 */
5919static void cik_disable_interrupt_state(struct radeon_device *rdev)
5920{
5921 u32 tmp;
5922
5923 /* gfx ring */
5924 tmp = RREG32(CP_INT_CNTL_RING0) &
5925 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5926 WREG32(CP_INT_CNTL_RING0, tmp);
5927 /* sdma */
5928 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5929 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5930 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5931 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5932 /* compute queues */
5933 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5934 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5935 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5936 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5937 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5938 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5939 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5940 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5941 /* grbm */
5942 WREG32(GRBM_INT_CNTL, 0);
5943 /* vline/vblank, etc. */
5944 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5945 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5946 if (rdev->num_crtc >= 4) {
5947 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5948 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5949 }
5950 if (rdev->num_crtc >= 6) {
5951 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5952 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5953 }
5954
5955 /* dac hotplug */
5956 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5957
5958 /* digital hotplug */
5959 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5960 WREG32(DC_HPD1_INT_CONTROL, tmp);
5961 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5962 WREG32(DC_HPD2_INT_CONTROL, tmp);
5963 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5964 WREG32(DC_HPD3_INT_CONTROL, tmp);
5965 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5966 WREG32(DC_HPD4_INT_CONTROL, tmp);
5967 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5968 WREG32(DC_HPD5_INT_CONTROL, tmp);
5969 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5970 WREG32(DC_HPD6_INT_CONTROL, tmp);
5971
5972}
5973
5974/**
5975 * cik_irq_init - init and enable the interrupt ring
5976 *
5977 * @rdev: radeon_device pointer
5978 *
5979 * Allocate a ring buffer for the interrupt controller,
5980 * enable the RLC, disable interrupts, enable the IH
5981 * ring buffer and enable it (CIK).
5982 * Called at device load and reume.
5983 * Returns 0 for success, errors for failure.
5984 */
5985static int cik_irq_init(struct radeon_device *rdev)
5986{
5987 int ret = 0;
5988 int rb_bufsz;
5989 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5990
5991 /* allocate ring */
5992 ret = r600_ih_ring_alloc(rdev);
5993 if (ret)
5994 return ret;
5995
5996 /* disable irqs */
5997 cik_disable_interrupts(rdev);
5998
5999 /* init rlc */
6000 ret = cik_rlc_resume(rdev);
6001 if (ret) {
6002 r600_ih_ring_fini(rdev);
6003 return ret;
6004 }
6005
6006 /* setup interrupt control */
6007 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6008 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6009 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6010 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6011 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6012 */
6013 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6014 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6015 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6016 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6017
6018 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6019 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6020
6021 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6022 IH_WPTR_OVERFLOW_CLEAR |
6023 (rb_bufsz << 1));
6024
6025 if (rdev->wb.enabled)
6026 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6027
6028 /* set the writeback address whether it's enabled or not */
6029 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6030 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6031
6032 WREG32(IH_RB_CNTL, ih_rb_cntl);
6033
6034 /* set rptr, wptr to 0 */
6035 WREG32(IH_RB_RPTR, 0);
6036 WREG32(IH_RB_WPTR, 0);
6037
6038 /* Default settings for IH_CNTL (disabled at first) */
6039 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6040 /* RPTR_REARM only works if msi's are enabled */
6041 if (rdev->msi_enabled)
6042 ih_cntl |= RPTR_REARM;
6043 WREG32(IH_CNTL, ih_cntl);
6044
6045 /* force the active interrupt state to all disabled */
6046 cik_disable_interrupt_state(rdev);
6047
6048 pci_set_master(rdev->pdev);
6049
6050 /* enable irqs */
6051 cik_enable_interrupts(rdev);
6052
6053 return ret;
6054}
6055
6056/**
6057 * cik_irq_set - enable/disable interrupt sources
6058 *
6059 * @rdev: radeon_device pointer
6060 *
6061 * Enable interrupt sources on the GPU (vblanks, hpd,
6062 * etc.) (CIK).
6063 * Returns 0 for success, errors for failure.
6064 */
6065int cik_irq_set(struct radeon_device *rdev)
6066{
6067 u32 cp_int_cntl;
6068 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6069 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6070 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6071 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6072 u32 grbm_int_cntl = 0;
6073 u32 dma_cntl, dma_cntl1;
6074 u32 thermal_int;
6075
6076 if (!rdev->irq.installed) {
6077 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6078 return -EINVAL;
6079 }
6080 /* don't enable anything if the ih is disabled */
6081 if (!rdev->ih.enabled) {
6082 cik_disable_interrupts(rdev);
6083 /* force the active interrupt state to all disabled */
6084 cik_disable_interrupt_state(rdev);
6085 return 0;
6086 }
6087
6088 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6089 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6090 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6091
6092 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6093 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6094 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6095 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6096 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6097 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6098
6099 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6100 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6101
6102 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6103 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6104 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6105 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6106 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6107 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6108 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6109 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6110
6111 if (rdev->flags & RADEON_IS_IGP)
6112 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6113 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6114 else
6115 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6116 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6117
6118 /* enable CP interrupts on all rings */
6119 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6120 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6121 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6122 }
6123 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6124 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6125 DRM_DEBUG("si_irq_set: sw int cp1\n");
6126 if (ring->me == 1) {
6127 switch (ring->pipe) {
6128 case 0:
6129 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6130 break;
6131 case 1:
6132 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6133 break;
6134 case 2:
6135 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6136 break;
6137 case 3:
6138 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6139 break;
6140 default:
6141 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6142 break;
6143 }
6144 } else if (ring->me == 2) {
6145 switch (ring->pipe) {
6146 case 0:
6147 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6148 break;
6149 case 1:
6150 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6151 break;
6152 case 2:
6153 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6154 break;
6155 case 3:
6156 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6157 break;
6158 default:
6159 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6160 break;
6161 }
6162 } else {
6163 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6164 }
6165 }
6166 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6167 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6168 DRM_DEBUG("si_irq_set: sw int cp2\n");
6169 if (ring->me == 1) {
6170 switch (ring->pipe) {
6171 case 0:
6172 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6173 break;
6174 case 1:
6175 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6176 break;
6177 case 2:
6178 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6179 break;
6180 case 3:
6181 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6182 break;
6183 default:
6184 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6185 break;
6186 }
6187 } else if (ring->me == 2) {
6188 switch (ring->pipe) {
6189 case 0:
6190 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6191 break;
6192 case 1:
6193 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6194 break;
6195 case 2:
6196 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6197 break;
6198 case 3:
6199 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6200 break;
6201 default:
6202 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6203 break;
6204 }
6205 } else {
6206 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6207 }
6208 }
6209
6210 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6211 DRM_DEBUG("cik_irq_set: sw int dma\n");
6212 dma_cntl |= TRAP_ENABLE;
6213 }
6214
6215 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6216 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6217 dma_cntl1 |= TRAP_ENABLE;
6218 }
6219
6220 if (rdev->irq.crtc_vblank_int[0] ||
6221 atomic_read(&rdev->irq.pflip[0])) {
6222 DRM_DEBUG("cik_irq_set: vblank 0\n");
6223 crtc1 |= VBLANK_INTERRUPT_MASK;
6224 }
6225 if (rdev->irq.crtc_vblank_int[1] ||
6226 atomic_read(&rdev->irq.pflip[1])) {
6227 DRM_DEBUG("cik_irq_set: vblank 1\n");
6228 crtc2 |= VBLANK_INTERRUPT_MASK;
6229 }
6230 if (rdev->irq.crtc_vblank_int[2] ||
6231 atomic_read(&rdev->irq.pflip[2])) {
6232 DRM_DEBUG("cik_irq_set: vblank 2\n");
6233 crtc3 |= VBLANK_INTERRUPT_MASK;
6234 }
6235 if (rdev->irq.crtc_vblank_int[3] ||
6236 atomic_read(&rdev->irq.pflip[3])) {
6237 DRM_DEBUG("cik_irq_set: vblank 3\n");
6238 crtc4 |= VBLANK_INTERRUPT_MASK;
6239 }
6240 if (rdev->irq.crtc_vblank_int[4] ||
6241 atomic_read(&rdev->irq.pflip[4])) {
6242 DRM_DEBUG("cik_irq_set: vblank 4\n");
6243 crtc5 |= VBLANK_INTERRUPT_MASK;
6244 }
6245 if (rdev->irq.crtc_vblank_int[5] ||
6246 atomic_read(&rdev->irq.pflip[5])) {
6247 DRM_DEBUG("cik_irq_set: vblank 5\n");
6248 crtc6 |= VBLANK_INTERRUPT_MASK;
6249 }
6250 if (rdev->irq.hpd[0]) {
6251 DRM_DEBUG("cik_irq_set: hpd 1\n");
6252 hpd1 |= DC_HPDx_INT_EN;
6253 }
6254 if (rdev->irq.hpd[1]) {
6255 DRM_DEBUG("cik_irq_set: hpd 2\n");
6256 hpd2 |= DC_HPDx_INT_EN;
6257 }
6258 if (rdev->irq.hpd[2]) {
6259 DRM_DEBUG("cik_irq_set: hpd 3\n");
6260 hpd3 |= DC_HPDx_INT_EN;
6261 }
6262 if (rdev->irq.hpd[3]) {
6263 DRM_DEBUG("cik_irq_set: hpd 4\n");
6264 hpd4 |= DC_HPDx_INT_EN;
6265 }
6266 if (rdev->irq.hpd[4]) {
6267 DRM_DEBUG("cik_irq_set: hpd 5\n");
6268 hpd5 |= DC_HPDx_INT_EN;
6269 }
6270 if (rdev->irq.hpd[5]) {
6271 DRM_DEBUG("cik_irq_set: hpd 6\n");
6272 hpd6 |= DC_HPDx_INT_EN;
6273 }
6274
6275 if (rdev->irq.dpm_thermal) {
6276 DRM_DEBUG("dpm thermal\n");
6277 if (rdev->flags & RADEON_IS_IGP)
6278 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6279 else
6280 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6281 }
6282
6283 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6284
6285 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6286 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6287
6288 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6289 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6290 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6291 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6292 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6293 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6294 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6295 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6296
6297 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6298
6299 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6300 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6301 if (rdev->num_crtc >= 4) {
6302 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6303 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6304 }
6305 if (rdev->num_crtc >= 6) {
6306 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6307 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6308 }
6309
6310 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6311 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6312 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6313 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6314 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6315 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6316
6317 if (rdev->flags & RADEON_IS_IGP)
6318 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6319 else
6320 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6321
6322 return 0;
6323}
6324
6325/**
6326 * cik_irq_ack - ack interrupt sources
6327 *
6328 * @rdev: radeon_device pointer
6329 *
6330 * Ack interrupt sources on the GPU (vblanks, hpd,
6331 * etc.) (CIK). Certain interrupts sources are sw
6332 * generated and do not require an explicit ack.
6333 */
6334static inline void cik_irq_ack(struct radeon_device *rdev)
6335{
6336 u32 tmp;
6337
6338 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6339 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6340 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6341 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6342 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6343 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6344 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6345
6346 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6347 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6348 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6349 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6350 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6351 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6352 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6353 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6354
6355 if (rdev->num_crtc >= 4) {
6356 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6357 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6358 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6359 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6360 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6361 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6362 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6363 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6364 }
6365
6366 if (rdev->num_crtc >= 6) {
6367 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6368 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6369 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6370 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6371 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6372 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6373 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6374 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6375 }
6376
6377 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6378 tmp = RREG32(DC_HPD1_INT_CONTROL);
6379 tmp |= DC_HPDx_INT_ACK;
6380 WREG32(DC_HPD1_INT_CONTROL, tmp);
6381 }
6382 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6383 tmp = RREG32(DC_HPD2_INT_CONTROL);
6384 tmp |= DC_HPDx_INT_ACK;
6385 WREG32(DC_HPD2_INT_CONTROL, tmp);
6386 }
6387 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6388 tmp = RREG32(DC_HPD3_INT_CONTROL);
6389 tmp |= DC_HPDx_INT_ACK;
6390 WREG32(DC_HPD3_INT_CONTROL, tmp);
6391 }
6392 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6393 tmp = RREG32(DC_HPD4_INT_CONTROL);
6394 tmp |= DC_HPDx_INT_ACK;
6395 WREG32(DC_HPD4_INT_CONTROL, tmp);
6396 }
6397 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6398 tmp = RREG32(DC_HPD5_INT_CONTROL);
6399 tmp |= DC_HPDx_INT_ACK;
6400 WREG32(DC_HPD5_INT_CONTROL, tmp);
6401 }
6402 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6403 tmp = RREG32(DC_HPD5_INT_CONTROL);
6404 tmp |= DC_HPDx_INT_ACK;
6405 WREG32(DC_HPD6_INT_CONTROL, tmp);
6406 }
6407}
6408
6409/**
6410 * cik_irq_disable - disable interrupts
6411 *
6412 * @rdev: radeon_device pointer
6413 *
6414 * Disable interrupts on the hw (CIK).
6415 */
6416static void cik_irq_disable(struct radeon_device *rdev)
6417{
6418 cik_disable_interrupts(rdev);
6419 /* Wait and acknowledge irq */
6420 mdelay(1);
6421 cik_irq_ack(rdev);
6422 cik_disable_interrupt_state(rdev);
6423}
6424
6425/**
6426 * cik_irq_disable - disable interrupts for suspend
6427 *
6428 * @rdev: radeon_device pointer
6429 *
6430 * Disable interrupts and stop the RLC (CIK).
6431 * Used for suspend.
6432 */
6433static void cik_irq_suspend(struct radeon_device *rdev)
6434{
6435 cik_irq_disable(rdev);
6436 cik_rlc_stop(rdev);
6437}
6438
6439/**
6440 * cik_irq_fini - tear down interrupt support
6441 *
6442 * @rdev: radeon_device pointer
6443 *
6444 * Disable interrupts on the hw and free the IH ring
6445 * buffer (CIK).
6446 * Used for driver unload.
6447 */
6448static void cik_irq_fini(struct radeon_device *rdev)
6449{
6450 cik_irq_suspend(rdev);
6451 r600_ih_ring_fini(rdev);
6452}
6453
6454/**
6455 * cik_get_ih_wptr - get the IH ring buffer wptr
6456 *
6457 * @rdev: radeon_device pointer
6458 *
6459 * Get the IH ring buffer wptr from either the register
6460 * or the writeback memory buffer (CIK). Also check for
6461 * ring buffer overflow and deal with it.
6462 * Used by cik_irq_process().
6463 * Returns the value of the wptr.
6464 */
6465static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6466{
6467 u32 wptr, tmp;
6468
6469 if (rdev->wb.enabled)
6470 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6471 else
6472 wptr = RREG32(IH_RB_WPTR);
6473
6474 if (wptr & RB_OVERFLOW) {
6475 /* When a ring buffer overflow happen start parsing interrupt
6476 * from the last not overwritten vector (wptr + 16). Hopefully
6477 * this should allow us to catchup.
6478 */
6479 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6480 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6481 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6482 tmp = RREG32(IH_RB_CNTL);
6483 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6484 WREG32(IH_RB_CNTL, tmp);
6485 }
6486 return (wptr & rdev->ih.ptr_mask);
6487}
6488
6489/* CIK IV Ring
6490 * Each IV ring entry is 128 bits:
6491 * [7:0] - interrupt source id
6492 * [31:8] - reserved
6493 * [59:32] - interrupt source data
6494 * [63:60] - reserved
6495 * [71:64] - RINGID
6496 * CP:
6497 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6498 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6499 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6500 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6501 * PIPE_ID - ME0 0=3D
6502 * - ME1&2 compute dispatcher (4 pipes each)
6503 * SDMA:
6504 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
6505 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
6506 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6507 * [79:72] - VMID
6508 * [95:80] - PASID
6509 * [127:96] - reserved
6510 */
6511/**
6512 * cik_irq_process - interrupt handler
6513 *
6514 * @rdev: radeon_device pointer
6515 *
6516 * Interrupt hander (CIK). Walk the IH ring,
6517 * ack interrupts and schedule work to handle
6518 * interrupt events.
6519 * Returns irq process return code.
6520 */
6521int cik_irq_process(struct radeon_device *rdev)
6522{
6523 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6524 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6525 u32 wptr;
6526 u32 rptr;
6527 u32 src_id, src_data, ring_id;
6528 u8 me_id, pipe_id, queue_id;
6529 u32 ring_index;
6530 bool queue_hotplug = false;
6531 bool queue_reset = false;
6532 u32 addr, status, mc_client;
6533 bool queue_thermal = false;
6534
6535 if (!rdev->ih.enabled || rdev->shutdown)
6536 return IRQ_NONE;
6537
6538 wptr = cik_get_ih_wptr(rdev);
6539
6540restart_ih:
6541 /* is somebody else already processing irqs? */
6542 if (atomic_xchg(&rdev->ih.lock, 1))
6543 return IRQ_NONE;
6544
6545 rptr = rdev->ih.rptr;
6546 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6547
6548 /* Order reading of wptr vs. reading of IH ring data */
6549 rmb();
6550
6551 /* display interrupts */
6552 cik_irq_ack(rdev);
6553
6554 while (rptr != wptr) {
6555 /* wptr/rptr are in bytes! */
6556 ring_index = rptr / 4;
6557 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6558 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6559 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6560
6561 switch (src_id) {
6562 case 1: /* D1 vblank/vline */
6563 switch (src_data) {
6564 case 0: /* D1 vblank */
6565 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6566 if (rdev->irq.crtc_vblank_int[0]) {
6567 drm_handle_vblank(rdev->ddev, 0);
6568 rdev->pm.vblank_sync = true;
6569 wake_up(&rdev->irq.vblank_queue);
6570 }
6571 if (atomic_read(&rdev->irq.pflip[0]))
6572 radeon_crtc_handle_flip(rdev, 0);
6573 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6574 DRM_DEBUG("IH: D1 vblank\n");
6575 }
6576 break;
6577 case 1: /* D1 vline */
6578 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6579 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6580 DRM_DEBUG("IH: D1 vline\n");
6581 }
6582 break;
6583 default:
6584 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6585 break;
6586 }
6587 break;
6588 case 2: /* D2 vblank/vline */
6589 switch (src_data) {
6590 case 0: /* D2 vblank */
6591 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6592 if (rdev->irq.crtc_vblank_int[1]) {
6593 drm_handle_vblank(rdev->ddev, 1);
6594 rdev->pm.vblank_sync = true;
6595 wake_up(&rdev->irq.vblank_queue);
6596 }
6597 if (atomic_read(&rdev->irq.pflip[1]))
6598 radeon_crtc_handle_flip(rdev, 1);
6599 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6600 DRM_DEBUG("IH: D2 vblank\n");
6601 }
6602 break;
6603 case 1: /* D2 vline */
6604 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6605 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6606 DRM_DEBUG("IH: D2 vline\n");
6607 }
6608 break;
6609 default:
6610 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6611 break;
6612 }
6613 break;
6614 case 3: /* D3 vblank/vline */
6615 switch (src_data) {
6616 case 0: /* D3 vblank */
6617 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6618 if (rdev->irq.crtc_vblank_int[2]) {
6619 drm_handle_vblank(rdev->ddev, 2);
6620 rdev->pm.vblank_sync = true;
6621 wake_up(&rdev->irq.vblank_queue);
6622 }
6623 if (atomic_read(&rdev->irq.pflip[2]))
6624 radeon_crtc_handle_flip(rdev, 2);
6625 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6626 DRM_DEBUG("IH: D3 vblank\n");
6627 }
6628 break;
6629 case 1: /* D3 vline */
6630 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6631 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6632 DRM_DEBUG("IH: D3 vline\n");
6633 }
6634 break;
6635 default:
6636 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6637 break;
6638 }
6639 break;
6640 case 4: /* D4 vblank/vline */
6641 switch (src_data) {
6642 case 0: /* D4 vblank */
6643 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6644 if (rdev->irq.crtc_vblank_int[3]) {
6645 drm_handle_vblank(rdev->ddev, 3);
6646 rdev->pm.vblank_sync = true;
6647 wake_up(&rdev->irq.vblank_queue);
6648 }
6649 if (atomic_read(&rdev->irq.pflip[3]))
6650 radeon_crtc_handle_flip(rdev, 3);
6651 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6652 DRM_DEBUG("IH: D4 vblank\n");
6653 }
6654 break;
6655 case 1: /* D4 vline */
6656 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6657 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6658 DRM_DEBUG("IH: D4 vline\n");
6659 }
6660 break;
6661 default:
6662 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6663 break;
6664 }
6665 break;
6666 case 5: /* D5 vblank/vline */
6667 switch (src_data) {
6668 case 0: /* D5 vblank */
6669 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6670 if (rdev->irq.crtc_vblank_int[4]) {
6671 drm_handle_vblank(rdev->ddev, 4);
6672 rdev->pm.vblank_sync = true;
6673 wake_up(&rdev->irq.vblank_queue);
6674 }
6675 if (atomic_read(&rdev->irq.pflip[4]))
6676 radeon_crtc_handle_flip(rdev, 4);
6677 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6678 DRM_DEBUG("IH: D5 vblank\n");
6679 }
6680 break;
6681 case 1: /* D5 vline */
6682 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6683 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6684 DRM_DEBUG("IH: D5 vline\n");
6685 }
6686 break;
6687 default:
6688 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6689 break;
6690 }
6691 break;
6692 case 6: /* D6 vblank/vline */
6693 switch (src_data) {
6694 case 0: /* D6 vblank */
6695 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6696 if (rdev->irq.crtc_vblank_int[5]) {
6697 drm_handle_vblank(rdev->ddev, 5);
6698 rdev->pm.vblank_sync = true;
6699 wake_up(&rdev->irq.vblank_queue);
6700 }
6701 if (atomic_read(&rdev->irq.pflip[5]))
6702 radeon_crtc_handle_flip(rdev, 5);
6703 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6704 DRM_DEBUG("IH: D6 vblank\n");
6705 }
6706 break;
6707 case 1: /* D6 vline */
6708 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6709 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6710 DRM_DEBUG("IH: D6 vline\n");
6711 }
6712 break;
6713 default:
6714 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6715 break;
6716 }
6717 break;
6718 case 42: /* HPD hotplug */
6719 switch (src_data) {
6720 case 0:
6721 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6722 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6723 queue_hotplug = true;
6724 DRM_DEBUG("IH: HPD1\n");
6725 }
6726 break;
6727 case 1:
6728 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6729 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6730 queue_hotplug = true;
6731 DRM_DEBUG("IH: HPD2\n");
6732 }
6733 break;
6734 case 2:
6735 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6736 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6737 queue_hotplug = true;
6738 DRM_DEBUG("IH: HPD3\n");
6739 }
6740 break;
6741 case 3:
6742 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6743 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6744 queue_hotplug = true;
6745 DRM_DEBUG("IH: HPD4\n");
6746 }
6747 break;
6748 case 4:
6749 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6750 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6751 queue_hotplug = true;
6752 DRM_DEBUG("IH: HPD5\n");
6753 }
6754 break;
6755 case 5:
6756 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6757 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6758 queue_hotplug = true;
6759 DRM_DEBUG("IH: HPD6\n");
6760 }
6761 break;
6762 default:
6763 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6764 break;
6765 }
6766 break;
6767 case 124: /* UVD */
6768 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6769 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6770 break;
6771 case 146:
6772 case 147:
6773 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6774 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6775 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6776 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6777 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
6778 addr);
6779 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6780 status);
6781 cik_vm_decode_fault(rdev, status, addr, mc_client);
6782 /* reset addr and status */
6783 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6784 break;
6785 case 176: /* GFX RB CP_INT */
6786 case 177: /* GFX IB CP_INT */
6787 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6788 break;
6789 case 181: /* CP EOP event */
6790 DRM_DEBUG("IH: CP EOP\n");
6791 /* XXX check the bitfield order! */
6792 me_id = (ring_id & 0x60) >> 5;
6793 pipe_id = (ring_id & 0x18) >> 3;
6794 queue_id = (ring_id & 0x7) >> 0;
6795 switch (me_id) {
6796 case 0:
6797 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6798 break;
6799 case 1:
6800 case 2:
6801 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6802 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6803 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6804 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6805 break;
6806 }
6807 break;
6808 case 184: /* CP Privileged reg access */
6809 DRM_ERROR("Illegal register access in command stream\n");
6810 /* XXX check the bitfield order! */
6811 me_id = (ring_id & 0x60) >> 5;
6812 pipe_id = (ring_id & 0x18) >> 3;
6813 queue_id = (ring_id & 0x7) >> 0;
6814 switch (me_id) {
6815 case 0:
6816 /* This results in a full GPU reset, but all we need to do is soft
6817 * reset the CP for gfx
6818 */
6819 queue_reset = true;
6820 break;
6821 case 1:
6822 /* XXX compute */
6823 queue_reset = true;
6824 break;
6825 case 2:
6826 /* XXX compute */
6827 queue_reset = true;
6828 break;
6829 }
6830 break;
6831 case 185: /* CP Privileged inst */
6832 DRM_ERROR("Illegal instruction in command stream\n");
6833 /* XXX check the bitfield order! */
6834 me_id = (ring_id & 0x60) >> 5;
6835 pipe_id = (ring_id & 0x18) >> 3;
6836 queue_id = (ring_id & 0x7) >> 0;
6837 switch (me_id) {
6838 case 0:
6839 /* This results in a full GPU reset, but all we need to do is soft
6840 * reset the CP for gfx
6841 */
6842 queue_reset = true;
6843 break;
6844 case 1:
6845 /* XXX compute */
6846 queue_reset = true;
6847 break;
6848 case 2:
6849 /* XXX compute */
6850 queue_reset = true;
6851 break;
6852 }
6853 break;
6854 case 224: /* SDMA trap event */
6855 /* XXX check the bitfield order! */
6856 me_id = (ring_id & 0x3) >> 0;
6857 queue_id = (ring_id & 0xc) >> 2;
6858 DRM_DEBUG("IH: SDMA trap\n");
6859 switch (me_id) {
6860 case 0:
6861 switch (queue_id) {
6862 case 0:
6863 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6864 break;
6865 case 1:
6866 /* XXX compute */
6867 break;
6868 case 2:
6869 /* XXX compute */
6870 break;
6871 }
6872 break;
6873 case 1:
6874 switch (queue_id) {
6875 case 0:
6876 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6877 break;
6878 case 1:
6879 /* XXX compute */
6880 break;
6881 case 2:
6882 /* XXX compute */
6883 break;
6884 }
6885 break;
6886 }
6887 break;
6888 case 230: /* thermal low to high */
6889 DRM_DEBUG("IH: thermal low to high\n");
6890 rdev->pm.dpm.thermal.high_to_low = false;
6891 queue_thermal = true;
6892 break;
6893 case 231: /* thermal high to low */
6894 DRM_DEBUG("IH: thermal high to low\n");
6895 rdev->pm.dpm.thermal.high_to_low = true;
6896 queue_thermal = true;
6897 break;
6898 case 233: /* GUI IDLE */
6899 DRM_DEBUG("IH: GUI idle\n");
6900 break;
6901 case 241: /* SDMA Privileged inst */
6902 case 247: /* SDMA Privileged inst */
6903 DRM_ERROR("Illegal instruction in SDMA command stream\n");
6904 /* XXX check the bitfield order! */
6905 me_id = (ring_id & 0x3) >> 0;
6906 queue_id = (ring_id & 0xc) >> 2;
6907 switch (me_id) {
6908 case 0:
6909 switch (queue_id) {
6910 case 0:
6911 queue_reset = true;
6912 break;
6913 case 1:
6914 /* XXX compute */
6915 queue_reset = true;
6916 break;
6917 case 2:
6918 /* XXX compute */
6919 queue_reset = true;
6920 break;
6921 }
6922 break;
6923 case 1:
6924 switch (queue_id) {
6925 case 0:
6926 queue_reset = true;
6927 break;
6928 case 1:
6929 /* XXX compute */
6930 queue_reset = true;
6931 break;
6932 case 2:
6933 /* XXX compute */
6934 queue_reset = true;
6935 break;
6936 }
6937 break;
6938 }
6939 break;
6940 default:
6941 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6942 break;
6943 }
6944
6945 /* wptr/rptr are in bytes! */
6946 rptr += 16;
6947 rptr &= rdev->ih.ptr_mask;
6948 }
6949 if (queue_hotplug)
6950 schedule_work(&rdev->hotplug_work);
6951 if (queue_reset)
6952 schedule_work(&rdev->reset_work);
6953 if (queue_thermal)
6954 schedule_work(&rdev->pm.dpm.thermal.work);
6955 rdev->ih.rptr = rptr;
6956 WREG32(IH_RB_RPTR, rdev->ih.rptr);
6957 atomic_set(&rdev->ih.lock, 0);
6958
6959 /* make sure wptr hasn't changed while processing */
6960 wptr = cik_get_ih_wptr(rdev);
6961 if (wptr != rptr)
6962 goto restart_ih;
6963
6964 return IRQ_HANDLED;
6965}
6966
6967/*
6968 * startup/shutdown callbacks
6969 */
6970/**
6971 * cik_startup - program the asic to a functional state
6972 *
6973 * @rdev: radeon_device pointer
6974 *
6975 * Programs the asic to a functional state (CIK).
6976 * Called by cik_init() and cik_resume().
6977 * Returns 0 for success, error for failure.
6978 */
6979static int cik_startup(struct radeon_device *rdev)
6980{
6981 struct radeon_ring *ring;
6982 int r;
6983
6984 /* enable pcie gen2/3 link */
6985 cik_pcie_gen3_enable(rdev);
6986 /* enable aspm */
6987 cik_program_aspm(rdev);
6988
6989 /* scratch needs to be initialized before MC */
6990 r = r600_vram_scratch_init(rdev);
6991 if (r)
6992 return r;
6993
6994 cik_mc_program(rdev);
6995
6996 if (rdev->flags & RADEON_IS_IGP) {
6997 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6998 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6999 r = cik_init_microcode(rdev);
7000 if (r) {
7001 DRM_ERROR("Failed to load firmware!\n");
7002 return r;
7003 }
7004 }
7005 } else {
7006 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7007 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7008 !rdev->mc_fw) {
7009 r = cik_init_microcode(rdev);
7010 if (r) {
7011 DRM_ERROR("Failed to load firmware!\n");
7012 return r;
7013 }
7014 }
7015
7016 r = ci_mc_load_microcode(rdev);
7017 if (r) {
7018 DRM_ERROR("Failed to load MC firmware!\n");
7019 return r;
7020 }
7021 }
7022
7023 r = cik_pcie_gart_enable(rdev);
7024 if (r)
7025 return r;
7026 cik_gpu_init(rdev);
7027
7028 /* allocate rlc buffers */
7029 if (rdev->flags & RADEON_IS_IGP) {
7030 if (rdev->family == CHIP_KAVERI) {
7031 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7032 rdev->rlc.reg_list_size =
7033 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7034 } else {
7035 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7036 rdev->rlc.reg_list_size =
7037 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7038 }
7039 }
7040 rdev->rlc.cs_data = ci_cs_data;
7041 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7042 r = sumo_rlc_init(rdev);
7043 if (r) {
7044 DRM_ERROR("Failed to init rlc BOs!\n");
7045 return r;
7046 }
7047
7048 /* allocate wb buffer */
7049 r = radeon_wb_init(rdev);
7050 if (r)
7051 return r;
7052
7053 /* allocate mec buffers */
7054 r = cik_mec_init(rdev);
7055 if (r) {
7056 DRM_ERROR("Failed to init MEC BOs!\n");
7057 return r;
7058 }
7059
7060 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7061 if (r) {
7062 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7063 return r;
7064 }
7065
7066 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7067 if (r) {
7068 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7069 return r;
7070 }
7071
7072 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7073 if (r) {
7074 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7075 return r;
7076 }
7077
7078 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7079 if (r) {
7080 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7081 return r;
7082 }
7083
7084 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7085 if (r) {
7086 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7087 return r;
7088 }
7089
7090 r = radeon_uvd_resume(rdev);
7091 if (!r) {
7092 r = uvd_v4_2_resume(rdev);
7093 if (!r) {
7094 r = radeon_fence_driver_start_ring(rdev,
7095 R600_RING_TYPE_UVD_INDEX);
7096 if (r)
7097 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7098 }
7099 }
7100 if (r)
7101 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7102
7103 /* Enable IRQ */
7104 if (!rdev->irq.installed) {
7105 r = radeon_irq_kms_init(rdev);
7106 if (r)
7107 return r;
7108 }
7109
7110 r = cik_irq_init(rdev);
7111 if (r) {
7112 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7113 radeon_irq_kms_fini(rdev);
7114 return r;
7115 }
7116 cik_irq_set(rdev);
7117
7118 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7119 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7120 CP_RB0_RPTR, CP_RB0_WPTR,
7121 RADEON_CP_PACKET2);
7122 if (r)
7123 return r;
7124
7125 /* set up the compute queues */
7126 /* type-2 packets are deprecated on MEC, use type-3 instead */
7127 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7128 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7129 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7130 PACKET3(PACKET3_NOP, 0x3FFF));
7131 if (r)
7132 return r;
7133 ring->me = 1; /* first MEC */
7134 ring->pipe = 0; /* first pipe */
7135 ring->queue = 0; /* first queue */
7136 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7137
7138 /* type-2 packets are deprecated on MEC, use type-3 instead */
7139 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7140 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7141 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7142 PACKET3(PACKET3_NOP, 0x3FFF));
7143 if (r)
7144 return r;
7145 /* dGPU only have 1 MEC */
7146 ring->me = 1; /* first MEC */
7147 ring->pipe = 0; /* first pipe */
7148 ring->queue = 1; /* second queue */
7149 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7150
7151 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7152 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7153 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7154 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7155 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7156 if (r)
7157 return r;
7158
7159 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7160 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7161 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7162 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7163 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7164 if (r)
7165 return r;
7166
7167 r = cik_cp_resume(rdev);
7168 if (r)
7169 return r;
7170
7171 r = cik_sdma_resume(rdev);
7172 if (r)
7173 return r;
7174
7175 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7176 if (ring->ring_size) {
7177 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7178 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7179 RADEON_CP_PACKET2);
7180 if (!r)
7181 r = uvd_v1_0_init(rdev);
7182 if (r)
7183 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7184 }
7185
7186 r = radeon_ib_pool_init(rdev);
7187 if (r) {
7188 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7189 return r;
7190 }
7191
7192 r = radeon_vm_manager_init(rdev);
7193 if (r) {
7194 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7195 return r;
7196 }
7197
7198 r = dce6_audio_init(rdev);
7199 if (r)
7200 return r;
7201
7202 return 0;
7203}
7204
7205/**
7206 * cik_resume - resume the asic to a functional state
7207 *
7208 * @rdev: radeon_device pointer
7209 *
7210 * Programs the asic to a functional state (CIK).
7211 * Called at resume.
7212 * Returns 0 for success, error for failure.
7213 */
7214int cik_resume(struct radeon_device *rdev)
7215{
7216 int r;
7217
7218 /* post card */
7219 atom_asic_init(rdev->mode_info.atom_context);
7220
7221 /* init golden registers */
7222 cik_init_golden_registers(rdev);
7223
7224 rdev->accel_working = true;
7225 r = cik_startup(rdev);
7226 if (r) {
7227 DRM_ERROR("cik startup failed on resume\n");
7228 rdev->accel_working = false;
7229 return r;
7230 }
7231
7232 return r;
7233
7234}
7235
7236/**
7237 * cik_suspend - suspend the asic
7238 *
7239 * @rdev: radeon_device pointer
7240 *
7241 * Bring the chip into a state suitable for suspend (CIK).
7242 * Called at suspend.
7243 * Returns 0 for success.
7244 */
7245int cik_suspend(struct radeon_device *rdev)
7246{
7247 dce6_audio_fini(rdev);
7248 radeon_vm_manager_fini(rdev);
7249 cik_cp_enable(rdev, false);
7250 cik_sdma_enable(rdev, false);
7251 uvd_v1_0_fini(rdev);
7252 radeon_uvd_suspend(rdev);
7253 cik_fini_pg(rdev);
7254 cik_fini_cg(rdev);
7255 cik_irq_suspend(rdev);
7256 radeon_wb_disable(rdev);
7257 cik_pcie_gart_disable(rdev);
7258 return 0;
7259}
7260
7261/* Plan is to move initialization in that function and use
7262 * helper function so that radeon_device_init pretty much
7263 * do nothing more than calling asic specific function. This
7264 * should also allow to remove a bunch of callback function
7265 * like vram_info.
7266 */
7267/**
7268 * cik_init - asic specific driver and hw init
7269 *
7270 * @rdev: radeon_device pointer
7271 *
7272 * Setup asic specific driver variables and program the hw
7273 * to a functional state (CIK).
7274 * Called at driver startup.
7275 * Returns 0 for success, errors for failure.
7276 */
7277int cik_init(struct radeon_device *rdev)
7278{
7279 struct radeon_ring *ring;
7280 int r;
7281
7282 /* Read BIOS */
7283 if (!radeon_get_bios(rdev)) {
7284 if (ASIC_IS_AVIVO(rdev))
7285 return -EINVAL;
7286 }
7287 /* Must be an ATOMBIOS */
7288 if (!rdev->is_atom_bios) {
7289 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7290 return -EINVAL;
7291 }
7292 r = radeon_atombios_init(rdev);
7293 if (r)
7294 return r;
7295
7296 /* Post card if necessary */
7297 if (!radeon_card_posted(rdev)) {
7298 if (!rdev->bios) {
7299 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7300 return -EINVAL;
7301 }
7302 DRM_INFO("GPU not posted. posting now...\n");
7303 atom_asic_init(rdev->mode_info.atom_context);
7304 }
7305 /* init golden registers */
7306 cik_init_golden_registers(rdev);
7307 /* Initialize scratch registers */
7308 cik_scratch_init(rdev);
7309 /* Initialize surface registers */
7310 radeon_surface_init(rdev);
7311 /* Initialize clocks */
7312 radeon_get_clock_info(rdev->ddev);
7313
7314 /* Fence driver */
7315 r = radeon_fence_driver_init(rdev);
7316 if (r)
7317 return r;
7318
7319 /* initialize memory controller */
7320 r = cik_mc_init(rdev);
7321 if (r)
7322 return r;
7323 /* Memory manager */
7324 r = radeon_bo_init(rdev);
7325 if (r)
7326 return r;
7327
7328 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7329 ring->ring_obj = NULL;
7330 r600_ring_init(rdev, ring, 1024 * 1024);
7331
7332 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7333 ring->ring_obj = NULL;
7334 r600_ring_init(rdev, ring, 1024 * 1024);
7335 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7336 if (r)
7337 return r;
7338
7339 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7340 ring->ring_obj = NULL;
7341 r600_ring_init(rdev, ring, 1024 * 1024);
7342 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7343 if (r)
7344 return r;
7345
7346 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7347 ring->ring_obj = NULL;
7348 r600_ring_init(rdev, ring, 256 * 1024);
7349
7350 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7351 ring->ring_obj = NULL;
7352 r600_ring_init(rdev, ring, 256 * 1024);
7353
7354 r = radeon_uvd_init(rdev);
7355 if (!r) {
7356 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7357 ring->ring_obj = NULL;
7358 r600_ring_init(rdev, ring, 4096);
7359 }
7360
7361 rdev->ih.ring_obj = NULL;
7362 r600_ih_ring_init(rdev, 64 * 1024);
7363
7364 r = r600_pcie_gart_init(rdev);
7365 if (r)
7366 return r;
7367
7368 rdev->accel_working = true;
7369 r = cik_startup(rdev);
7370 if (r) {
7371 dev_err(rdev->dev, "disabling GPU acceleration\n");
7372 cik_cp_fini(rdev);
7373 cik_sdma_fini(rdev);
7374 cik_irq_fini(rdev);
7375 sumo_rlc_fini(rdev);
7376 cik_mec_fini(rdev);
7377 radeon_wb_fini(rdev);
7378 radeon_ib_pool_fini(rdev);
7379 radeon_vm_manager_fini(rdev);
7380 radeon_irq_kms_fini(rdev);
7381 cik_pcie_gart_fini(rdev);
7382 rdev->accel_working = false;
7383 }
7384
7385 /* Don't start up if the MC ucode is missing.
7386 * The default clocks and voltages before the MC ucode
7387 * is loaded are not suffient for advanced operations.
7388 */
7389 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7390 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7391 return -EINVAL;
7392 }
7393
7394 return 0;
7395}
7396
7397/**
7398 * cik_fini - asic specific driver and hw fini
7399 *
7400 * @rdev: radeon_device pointer
7401 *
7402 * Tear down the asic specific driver variables and program the hw
7403 * to an idle state (CIK).
7404 * Called at driver unload.
7405 */
7406void cik_fini(struct radeon_device *rdev)
7407{
7408 cik_cp_fini(rdev);
7409 cik_sdma_fini(rdev);
7410 cik_fini_pg(rdev);
7411 cik_fini_cg(rdev);
7412 cik_irq_fini(rdev);
7413 sumo_rlc_fini(rdev);
7414 cik_mec_fini(rdev);
7415 radeon_wb_fini(rdev);
7416 radeon_vm_manager_fini(rdev);
7417 radeon_ib_pool_fini(rdev);
7418 radeon_irq_kms_fini(rdev);
7419 uvd_v1_0_fini(rdev);
7420 radeon_uvd_fini(rdev);
7421 cik_pcie_gart_fini(rdev);
7422 r600_vram_scratch_fini(rdev);
7423 radeon_gem_fini(rdev);
7424 radeon_fence_driver_fini(rdev);
7425 radeon_bo_fini(rdev);
7426 radeon_atombios_fini(rdev);
7427 kfree(rdev->bios);
7428 rdev->bios = NULL;
7429}
7430
7431/* display watermark setup */
7432/**
7433 * dce8_line_buffer_adjust - Set up the line buffer
7434 *
7435 * @rdev: radeon_device pointer
7436 * @radeon_crtc: the selected display controller
7437 * @mode: the current display mode on the selected display
7438 * controller
7439 *
7440 * Setup up the line buffer allocation for
7441 * the selected display controller (CIK).
7442 * Returns the line buffer size in pixels.
7443 */
7444static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7445 struct radeon_crtc *radeon_crtc,
7446 struct drm_display_mode *mode)
7447{
7448 u32 tmp, buffer_alloc, i;
7449 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7450 /*
7451 * Line Buffer Setup
7452 * There are 6 line buffers, one for each display controllers.
7453 * There are 3 partitions per LB. Select the number of partitions
7454 * to enable based on the display width. For display widths larger
7455 * than 4096, you need use to use 2 display controllers and combine
7456 * them using the stereo blender.
7457 */
7458 if (radeon_crtc->base.enabled && mode) {
7459 if (mode->crtc_hdisplay < 1920) {
7460 tmp = 1;
7461 buffer_alloc = 2;
7462 } else if (mode->crtc_hdisplay < 2560) {
7463 tmp = 2;
7464 buffer_alloc = 2;
7465 } else if (mode->crtc_hdisplay < 4096) {
7466 tmp = 0;
7467 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7468 } else {
7469 DRM_DEBUG_KMS("Mode too big for LB!\n");
7470 tmp = 0;
7471 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7472 }
7473 } else {
7474 tmp = 1;
7475 buffer_alloc = 0;
7476 }
7477
7478 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7479 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7480
7481 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7482 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7483 for (i = 0; i < rdev->usec_timeout; i++) {
7484 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7485 DMIF_BUFFERS_ALLOCATED_COMPLETED)
7486 break;
7487 udelay(1);
7488 }
7489
7490 if (radeon_crtc->base.enabled && mode) {
7491 switch (tmp) {
7492 case 0:
7493 default:
7494 return 4096 * 2;
7495 case 1:
7496 return 1920 * 2;
7497 case 2:
7498 return 2560 * 2;
7499 }
7500 }
7501
7502 /* controller not enabled, so no lb used */
7503 return 0;
7504}
7505
7506/**
7507 * cik_get_number_of_dram_channels - get the number of dram channels
7508 *
7509 * @rdev: radeon_device pointer
7510 *
7511 * Look up the number of video ram channels (CIK).
7512 * Used for display watermark bandwidth calculations
7513 * Returns the number of dram channels
7514 */
7515static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7516{
7517 u32 tmp = RREG32(MC_SHARED_CHMAP);
7518
7519 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7520 case 0:
7521 default:
7522 return 1;
7523 case 1:
7524 return 2;
7525 case 2:
7526 return 4;
7527 case 3:
7528 return 8;
7529 case 4:
7530 return 3;
7531 case 5:
7532 return 6;
7533 case 6:
7534 return 10;
7535 case 7:
7536 return 12;
7537 case 8:
7538 return 16;
7539 }
7540}
7541
7542struct dce8_wm_params {
7543 u32 dram_channels; /* number of dram channels */
7544 u32 yclk; /* bandwidth per dram data pin in kHz */
7545 u32 sclk; /* engine clock in kHz */
7546 u32 disp_clk; /* display clock in kHz */
7547 u32 src_width; /* viewport width */
7548 u32 active_time; /* active display time in ns */
7549 u32 blank_time; /* blank time in ns */
7550 bool interlaced; /* mode is interlaced */
7551 fixed20_12 vsc; /* vertical scale ratio */
7552 u32 num_heads; /* number of active crtcs */
7553 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7554 u32 lb_size; /* line buffer allocated to pipe */
7555 u32 vtaps; /* vertical scaler taps */
7556};
7557
7558/**
7559 * dce8_dram_bandwidth - get the dram bandwidth
7560 *
7561 * @wm: watermark calculation data
7562 *
7563 * Calculate the raw dram bandwidth (CIK).
7564 * Used for display watermark bandwidth calculations
7565 * Returns the dram bandwidth in MBytes/s
7566 */
7567static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7568{
7569 /* Calculate raw DRAM Bandwidth */
7570 fixed20_12 dram_efficiency; /* 0.7 */
7571 fixed20_12 yclk, dram_channels, bandwidth;
7572 fixed20_12 a;
7573
7574 a.full = dfixed_const(1000);
7575 yclk.full = dfixed_const(wm->yclk);
7576 yclk.full = dfixed_div(yclk, a);
7577 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7578 a.full = dfixed_const(10);
7579 dram_efficiency.full = dfixed_const(7);
7580 dram_efficiency.full = dfixed_div(dram_efficiency, a);
7581 bandwidth.full = dfixed_mul(dram_channels, yclk);
7582 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7583
7584 return dfixed_trunc(bandwidth);
7585}
7586
7587/**
7588 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7589 *
7590 * @wm: watermark calculation data
7591 *
7592 * Calculate the dram bandwidth used for display (CIK).
7593 * Used for display watermark bandwidth calculations
7594 * Returns the dram bandwidth for display in MBytes/s
7595 */
7596static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7597{
7598 /* Calculate DRAM Bandwidth and the part allocated to display. */
7599 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7600 fixed20_12 yclk, dram_channels, bandwidth;
7601 fixed20_12 a;
7602
7603 a.full = dfixed_const(1000);
7604 yclk.full = dfixed_const(wm->yclk);
7605 yclk.full = dfixed_div(yclk, a);
7606 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7607 a.full = dfixed_const(10);
7608 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7609 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7610 bandwidth.full = dfixed_mul(dram_channels, yclk);
7611 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7612
7613 return dfixed_trunc(bandwidth);
7614}
7615
7616/**
7617 * dce8_data_return_bandwidth - get the data return bandwidth
7618 *
7619 * @wm: watermark calculation data
7620 *
7621 * Calculate the data return bandwidth used for display (CIK).
7622 * Used for display watermark bandwidth calculations
7623 * Returns the data return bandwidth in MBytes/s
7624 */
7625static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7626{
7627 /* Calculate the display Data return Bandwidth */
7628 fixed20_12 return_efficiency; /* 0.8 */
7629 fixed20_12 sclk, bandwidth;
7630 fixed20_12 a;
7631
7632 a.full = dfixed_const(1000);
7633 sclk.full = dfixed_const(wm->sclk);
7634 sclk.full = dfixed_div(sclk, a);
7635 a.full = dfixed_const(10);
7636 return_efficiency.full = dfixed_const(8);
7637 return_efficiency.full = dfixed_div(return_efficiency, a);
7638 a.full = dfixed_const(32);
7639 bandwidth.full = dfixed_mul(a, sclk);
7640 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7641
7642 return dfixed_trunc(bandwidth);
7643}
7644
7645/**
7646 * dce8_dmif_request_bandwidth - get the dmif bandwidth
7647 *
7648 * @wm: watermark calculation data
7649 *
7650 * Calculate the dmif bandwidth used for display (CIK).
7651 * Used for display watermark bandwidth calculations
7652 * Returns the dmif bandwidth in MBytes/s
7653 */
7654static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7655{
7656 /* Calculate the DMIF Request Bandwidth */
7657 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7658 fixed20_12 disp_clk, bandwidth;
7659 fixed20_12 a, b;
7660
7661 a.full = dfixed_const(1000);
7662 disp_clk.full = dfixed_const(wm->disp_clk);
7663 disp_clk.full = dfixed_div(disp_clk, a);
7664 a.full = dfixed_const(32);
7665 b.full = dfixed_mul(a, disp_clk);
7666
7667 a.full = dfixed_const(10);
7668 disp_clk_request_efficiency.full = dfixed_const(8);
7669 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7670
7671 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7672
7673 return dfixed_trunc(bandwidth);
7674}
7675
7676/**
7677 * dce8_available_bandwidth - get the min available bandwidth
7678 *
7679 * @wm: watermark calculation data
7680 *
7681 * Calculate the min available bandwidth used for display (CIK).
7682 * Used for display watermark bandwidth calculations
7683 * Returns the min available bandwidth in MBytes/s
7684 */
7685static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7686{
7687 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7688 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7689 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7690 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7691
7692 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7693}
7694
7695/**
7696 * dce8_average_bandwidth - get the average available bandwidth
7697 *
7698 * @wm: watermark calculation data
7699 *
7700 * Calculate the average available bandwidth used for display (CIK).
7701 * Used for display watermark bandwidth calculations
7702 * Returns the average available bandwidth in MBytes/s
7703 */
7704static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7705{
7706 /* Calculate the display mode Average Bandwidth
7707 * DisplayMode should contain the source and destination dimensions,
7708 * timing, etc.
7709 */
7710 fixed20_12 bpp;
7711 fixed20_12 line_time;
7712 fixed20_12 src_width;
7713 fixed20_12 bandwidth;
7714 fixed20_12 a;
7715
7716 a.full = dfixed_const(1000);
7717 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7718 line_time.full = dfixed_div(line_time, a);
7719 bpp.full = dfixed_const(wm->bytes_per_pixel);
7720 src_width.full = dfixed_const(wm->src_width);
7721 bandwidth.full = dfixed_mul(src_width, bpp);
7722 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7723 bandwidth.full = dfixed_div(bandwidth, line_time);
7724
7725 return dfixed_trunc(bandwidth);
7726}
7727
7728/**
7729 * dce8_latency_watermark - get the latency watermark
7730 *
7731 * @wm: watermark calculation data
7732 *
7733 * Calculate the latency watermark (CIK).
7734 * Used for display watermark bandwidth calculations
7735 * Returns the latency watermark in ns
7736 */
7737static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7738{
7739 /* First calculate the latency in ns */
7740 u32 mc_latency = 2000; /* 2000 ns. */
7741 u32 available_bandwidth = dce8_available_bandwidth(wm);
7742 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7743 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7744 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7745 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7746 (wm->num_heads * cursor_line_pair_return_time);
7747 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7748 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7749 u32 tmp, dmif_size = 12288;
7750 fixed20_12 a, b, c;
7751
7752 if (wm->num_heads == 0)
7753 return 0;
7754
7755 a.full = dfixed_const(2);
7756 b.full = dfixed_const(1);
7757 if ((wm->vsc.full > a.full) ||
7758 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7759 (wm->vtaps >= 5) ||
7760 ((wm->vsc.full >= a.full) && wm->interlaced))
7761 max_src_lines_per_dst_line = 4;
7762 else
7763 max_src_lines_per_dst_line = 2;
7764
7765 a.full = dfixed_const(available_bandwidth);
7766 b.full = dfixed_const(wm->num_heads);
7767 a.full = dfixed_div(a, b);
7768
7769 b.full = dfixed_const(mc_latency + 512);
7770 c.full = dfixed_const(wm->disp_clk);
7771 b.full = dfixed_div(b, c);
7772
7773 c.full = dfixed_const(dmif_size);
7774 b.full = dfixed_div(c, b);
7775
7776 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7777
7778 b.full = dfixed_const(1000);
7779 c.full = dfixed_const(wm->disp_clk);
7780 b.full = dfixed_div(c, b);
7781 c.full = dfixed_const(wm->bytes_per_pixel);
7782 b.full = dfixed_mul(b, c);
7783
7784 lb_fill_bw = min(tmp, dfixed_trunc(b));
7785
7786 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7787 b.full = dfixed_const(1000);
7788 c.full = dfixed_const(lb_fill_bw);
7789 b.full = dfixed_div(c, b);
7790 a.full = dfixed_div(a, b);
7791 line_fill_time = dfixed_trunc(a);
7792
7793 if (line_fill_time < wm->active_time)
7794 return latency;
7795 else
7796 return latency + (line_fill_time - wm->active_time);
7797
7798}
7799
7800/**
7801 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7802 * average and available dram bandwidth
7803 *
7804 * @wm: watermark calculation data
7805 *
7806 * Check if the display average bandwidth fits in the display
7807 * dram bandwidth (CIK).
7808 * Used for display watermark bandwidth calculations
7809 * Returns true if the display fits, false if not.
7810 */
7811static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7812{
7813 if (dce8_average_bandwidth(wm) <=
7814 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7815 return true;
7816 else
7817 return false;
7818}
7819
7820/**
7821 * dce8_average_bandwidth_vs_available_bandwidth - check
7822 * average and available bandwidth
7823 *
7824 * @wm: watermark calculation data
7825 *
7826 * Check if the display average bandwidth fits in the display
7827 * available bandwidth (CIK).
7828 * Used for display watermark bandwidth calculations
7829 * Returns true if the display fits, false if not.
7830 */
7831static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7832{
7833 if (dce8_average_bandwidth(wm) <=
7834 (dce8_available_bandwidth(wm) / wm->num_heads))
7835 return true;
7836 else
7837 return false;
7838}
7839
7840/**
7841 * dce8_check_latency_hiding - check latency hiding
7842 *
7843 * @wm: watermark calculation data
7844 *
7845 * Check latency hiding (CIK).
7846 * Used for display watermark bandwidth calculations
7847 * Returns true if the display fits, false if not.
7848 */
7849static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7850{
7851 u32 lb_partitions = wm->lb_size / wm->src_width;
7852 u32 line_time = wm->active_time + wm->blank_time;
7853 u32 latency_tolerant_lines;
7854 u32 latency_hiding;
7855 fixed20_12 a;
7856
7857 a.full = dfixed_const(1);
7858 if (wm->vsc.full > a.full)
7859 latency_tolerant_lines = 1;
7860 else {
7861 if (lb_partitions <= (wm->vtaps + 1))
7862 latency_tolerant_lines = 1;
7863 else
7864 latency_tolerant_lines = 2;
7865 }
7866
7867 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7868
7869 if (dce8_latency_watermark(wm) <= latency_hiding)
7870 return true;
7871 else
7872 return false;
7873}
7874
7875/**
7876 * dce8_program_watermarks - program display watermarks
7877 *
7878 * @rdev: radeon_device pointer
7879 * @radeon_crtc: the selected display controller
7880 * @lb_size: line buffer size
7881 * @num_heads: number of display controllers in use
7882 *
7883 * Calculate and program the display watermarks for the
7884 * selected display controller (CIK).
7885 */
7886static void dce8_program_watermarks(struct radeon_device *rdev,
7887 struct radeon_crtc *radeon_crtc,
7888 u32 lb_size, u32 num_heads)
7889{
7890 struct drm_display_mode *mode = &radeon_crtc->base.mode;
7891 struct dce8_wm_params wm_low, wm_high;
7892 u32 pixel_period;
7893 u32 line_time = 0;
7894 u32 latency_watermark_a = 0, latency_watermark_b = 0;
7895 u32 tmp, wm_mask;
7896
7897 if (radeon_crtc->base.enabled && num_heads && mode) {
7898 pixel_period = 1000000 / (u32)mode->clock;
7899 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7900
7901 /* watermark for high clocks */
7902 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7903 rdev->pm.dpm_enabled) {
7904 wm_high.yclk =
7905 radeon_dpm_get_mclk(rdev, false) * 10;
7906 wm_high.sclk =
7907 radeon_dpm_get_sclk(rdev, false) * 10;
7908 } else {
7909 wm_high.yclk = rdev->pm.current_mclk * 10;
7910 wm_high.sclk = rdev->pm.current_sclk * 10;
7911 }
7912
7913 wm_high.disp_clk = mode->clock;
7914 wm_high.src_width = mode->crtc_hdisplay;
7915 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7916 wm_high.blank_time = line_time - wm_high.active_time;
7917 wm_high.interlaced = false;
7918 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7919 wm_high.interlaced = true;
7920 wm_high.vsc = radeon_crtc->vsc;
7921 wm_high.vtaps = 1;
7922 if (radeon_crtc->rmx_type != RMX_OFF)
7923 wm_high.vtaps = 2;
7924 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7925 wm_high.lb_size = lb_size;
7926 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7927 wm_high.num_heads = num_heads;
7928
7929 /* set for high clocks */
7930 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7931
7932 /* possibly force display priority to high */
7933 /* should really do this at mode validation time... */
7934 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7935 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7936 !dce8_check_latency_hiding(&wm_high) ||
7937 (rdev->disp_priority == 2)) {
7938 DRM_DEBUG_KMS("force priority to high\n");
7939 }
7940
7941 /* watermark for low clocks */
7942 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7943 rdev->pm.dpm_enabled) {
7944 wm_low.yclk =
7945 radeon_dpm_get_mclk(rdev, true) * 10;
7946 wm_low.sclk =
7947 radeon_dpm_get_sclk(rdev, true) * 10;
7948 } else {
7949 wm_low.yclk = rdev->pm.current_mclk * 10;
7950 wm_low.sclk = rdev->pm.current_sclk * 10;
7951 }
7952
7953 wm_low.disp_clk = mode->clock;
7954 wm_low.src_width = mode->crtc_hdisplay;
7955 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7956 wm_low.blank_time = line_time - wm_low.active_time;
7957 wm_low.interlaced = false;
7958 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7959 wm_low.interlaced = true;
7960 wm_low.vsc = radeon_crtc->vsc;
7961 wm_low.vtaps = 1;
7962 if (radeon_crtc->rmx_type != RMX_OFF)
7963 wm_low.vtaps = 2;
7964 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7965 wm_low.lb_size = lb_size;
7966 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7967 wm_low.num_heads = num_heads;
7968
7969 /* set for low clocks */
7970 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7971
7972 /* possibly force display priority to high */
7973 /* should really do this at mode validation time... */
7974 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7975 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7976 !dce8_check_latency_hiding(&wm_low) ||
7977 (rdev->disp_priority == 2)) {
7978 DRM_DEBUG_KMS("force priority to high\n");
7979 }
7980 }
7981
7982 /* select wm A */
7983 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7984 tmp = wm_mask;
7985 tmp &= ~LATENCY_WATERMARK_MASK(3);
7986 tmp |= LATENCY_WATERMARK_MASK(1);
7987 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7988 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7989 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7990 LATENCY_HIGH_WATERMARK(line_time)));
7991 /* select wm B */
7992 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7993 tmp &= ~LATENCY_WATERMARK_MASK(3);
7994 tmp |= LATENCY_WATERMARK_MASK(2);
7995 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7996 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7997 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7998 LATENCY_HIGH_WATERMARK(line_time)));
7999 /* restore original selection */
8000 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8001
8002 /* save values for DPM */
8003 radeon_crtc->line_time = line_time;
8004 radeon_crtc->wm_high = latency_watermark_a;
8005 radeon_crtc->wm_low = latency_watermark_b;
8006}
8007
8008/**
8009 * dce8_bandwidth_update - program display watermarks
8010 *
8011 * @rdev: radeon_device pointer
8012 *
8013 * Calculate and program the display watermarks and line
8014 * buffer allocation (CIK).
8015 */
8016void dce8_bandwidth_update(struct radeon_device *rdev)
8017{
8018 struct drm_display_mode *mode = NULL;
8019 u32 num_heads = 0, lb_size;
8020 int i;
8021
8022 radeon_update_display_priority(rdev);
8023
8024 for (i = 0; i < rdev->num_crtc; i++) {
8025 if (rdev->mode_info.crtcs[i]->base.enabled)
8026 num_heads++;
8027 }
8028 for (i = 0; i < rdev->num_crtc; i++) {
8029 mode = &rdev->mode_info.crtcs[i]->base.mode;
8030 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8031 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8032 }
8033}
8034
8035/**
8036 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8037 *
8038 * @rdev: radeon_device pointer
8039 *
8040 * Fetches a GPU clock counter snapshot (SI).
8041 * Returns the 64 bit clock counter snapshot.
8042 */
8043uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8044{
8045 uint64_t clock;
8046
8047 mutex_lock(&rdev->gpu_clock_mutex);
8048 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8049 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8050 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8051 mutex_unlock(&rdev->gpu_clock_mutex);
8052 return clock;
8053}
8054
8055static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8056 u32 cntl_reg, u32 status_reg)
8057{
8058 int r, i;
8059 struct atom_clock_dividers dividers;
8060 uint32_t tmp;
8061
8062 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8063 clock, false, ÷rs);
8064 if (r)
8065 return r;
8066
8067 tmp = RREG32_SMC(cntl_reg);
8068 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8069 tmp |= dividers.post_divider;
8070 WREG32_SMC(cntl_reg, tmp);
8071
8072 for (i = 0; i < 100; i++) {
8073 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8074 break;
8075 mdelay(10);
8076 }
8077 if (i == 100)
8078 return -ETIMEDOUT;
8079
8080 return 0;
8081}
8082
8083int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8084{
8085 int r = 0;
8086
8087 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8088 if (r)
8089 return r;
8090
8091 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8092 return r;
8093}
8094
8095static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8096{
8097 struct pci_dev *root = rdev->pdev->bus->self;
8098 int bridge_pos, gpu_pos;
8099 u32 speed_cntl, mask, current_data_rate;
8100 int ret, i;
8101 u16 tmp16;
8102
8103 if (radeon_pcie_gen2 == 0)
8104 return;
8105
8106 if (rdev->flags & RADEON_IS_IGP)
8107 return;
8108
8109 if (!(rdev->flags & RADEON_IS_PCIE))
8110 return;
8111
8112 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8113 if (ret != 0)
8114 return;
8115
8116 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8117 return;
8118
8119 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8120 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8121 LC_CURRENT_DATA_RATE_SHIFT;
8122 if (mask & DRM_PCIE_SPEED_80) {
8123 if (current_data_rate == 2) {
8124 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8125 return;
8126 }
8127 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8128 } else if (mask & DRM_PCIE_SPEED_50) {
8129 if (current_data_rate == 1) {
8130 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8131 return;
8132 }
8133 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8134 }
8135
8136 bridge_pos = pci_pcie_cap(root);
8137 if (!bridge_pos)
8138 return;
8139
8140 gpu_pos = pci_pcie_cap(rdev->pdev);
8141 if (!gpu_pos)
8142 return;
8143
8144 if (mask & DRM_PCIE_SPEED_80) {
8145 /* re-try equalization if gen3 is not already enabled */
8146 if (current_data_rate != 2) {
8147 u16 bridge_cfg, gpu_cfg;
8148 u16 bridge_cfg2, gpu_cfg2;
8149 u32 max_lw, current_lw, tmp;
8150
8151 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8152 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8153
8154 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8155 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8156
8157 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8158 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8159
8160 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8161 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8162 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8163
8164 if (current_lw < max_lw) {
8165 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8166 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8167 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8168 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8169 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8170 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8171 }
8172 }
8173
8174 for (i = 0; i < 10; i++) {
8175 /* check status */
8176 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8177 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8178 break;
8179
8180 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8181 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8182
8183 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8184 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8185
8186 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8187 tmp |= LC_SET_QUIESCE;
8188 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8189
8190 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8191 tmp |= LC_REDO_EQ;
8192 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8193
8194 mdelay(100);
8195
8196 /* linkctl */
8197 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8198 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8199 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8200 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8201
8202 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8203 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8204 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8205 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8206
8207 /* linkctl2 */
8208 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8209 tmp16 &= ~((1 << 4) | (7 << 9));
8210 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8211 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8212
8213 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8214 tmp16 &= ~((1 << 4) | (7 << 9));
8215 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8216 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8217
8218 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8219 tmp &= ~LC_SET_QUIESCE;
8220 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8221 }
8222 }
8223 }
8224
8225 /* set the link speed */
8226 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8227 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8228 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8229
8230 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8231 tmp16 &= ~0xf;
8232 if (mask & DRM_PCIE_SPEED_80)
8233 tmp16 |= 3; /* gen3 */
8234 else if (mask & DRM_PCIE_SPEED_50)
8235 tmp16 |= 2; /* gen2 */
8236 else
8237 tmp16 |= 1; /* gen1 */
8238 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8239
8240 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8241 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8242 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8243
8244 for (i = 0; i < rdev->usec_timeout; i++) {
8245 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8246 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8247 break;
8248 udelay(1);
8249 }
8250}
8251
8252static void cik_program_aspm(struct radeon_device *rdev)
8253{
8254 u32 data, orig;
8255 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8256 bool disable_clkreq = false;
8257
8258 if (radeon_aspm == 0)
8259 return;
8260
8261 /* XXX double check IGPs */
8262 if (rdev->flags & RADEON_IS_IGP)
8263 return;
8264
8265 if (!(rdev->flags & RADEON_IS_PCIE))
8266 return;
8267
8268 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8269 data &= ~LC_XMIT_N_FTS_MASK;
8270 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8271 if (orig != data)
8272 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8273
8274 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8275 data |= LC_GO_TO_RECOVERY;
8276 if (orig != data)
8277 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8278
8279 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8280 data |= P_IGNORE_EDB_ERR;
8281 if (orig != data)
8282 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8283
8284 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8285 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8286 data |= LC_PMI_TO_L1_DIS;
8287 if (!disable_l0s)
8288 data |= LC_L0S_INACTIVITY(7);
8289
8290 if (!disable_l1) {
8291 data |= LC_L1_INACTIVITY(7);
8292 data &= ~LC_PMI_TO_L1_DIS;
8293 if (orig != data)
8294 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8295
8296 if (!disable_plloff_in_l1) {
8297 bool clk_req_support;
8298
8299 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8300 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8301 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8302 if (orig != data)
8303 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8304
8305 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8306 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8307 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8308 if (orig != data)
8309 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8310
8311 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8312 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8313 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8314 if (orig != data)
8315 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8316
8317 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8318 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8319 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8320 if (orig != data)
8321 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8322
8323 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8324 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8325 data |= LC_DYN_LANES_PWR_STATE(3);
8326 if (orig != data)
8327 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8328
8329 if (!disable_clkreq) {
8330 struct pci_dev *root = rdev->pdev->bus->self;
8331 u32 lnkcap;
8332
8333 clk_req_support = false;
8334 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8335 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8336 clk_req_support = true;
8337 } else {
8338 clk_req_support = false;
8339 }
8340
8341 if (clk_req_support) {
8342 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8343 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8344 if (orig != data)
8345 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8346
8347 orig = data = RREG32_SMC(THM_CLK_CNTL);
8348 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8349 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8350 if (orig != data)
8351 WREG32_SMC(THM_CLK_CNTL, data);
8352
8353 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8354 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8355 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8356 if (orig != data)
8357 WREG32_SMC(MISC_CLK_CTRL, data);
8358
8359 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8360 data &= ~BCLK_AS_XCLK;
8361 if (orig != data)
8362 WREG32_SMC(CG_CLKPIN_CNTL, data);
8363
8364 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8365 data &= ~FORCE_BIF_REFCLK_EN;
8366 if (orig != data)
8367 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8368
8369 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8370 data &= ~MPLL_CLKOUT_SEL_MASK;
8371 data |= MPLL_CLKOUT_SEL(4);
8372 if (orig != data)
8373 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8374 }
8375 }
8376 } else {
8377 if (orig != data)
8378 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8379 }
8380
8381 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8382 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8383 if (orig != data)
8384 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8385
8386 if (!disable_l0s) {
8387 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8388 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8389 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8390 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8391 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8392 data &= ~LC_L0S_INACTIVITY_MASK;
8393 if (orig != data)
8394 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8395 }
8396 }
8397 }
8398}