gpu: nova-core: add boot42 support for next-gen GPUs

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

NVIDIA GPUs are moving away from using NV_PMC_BOOT_0 to contain
architecture and revision details, and will instead use NV_PMC_BOOT_42
in the future. NV_PMC_BOOT_0 will contain a specific set of values
that will mean "go read NV_PMC_BOOT_42 instead".

Change the selection logic in Nova so that it will claim Turing and
later GPUs. This will work for the foreseeable future, without any
further code changes here, because all NVIDIA GPUs are considered, from
the oldest supported on Linux (NV04), through the future GPUs.

Add some comment documentation to explain, chronologically, how boot0
and boot42 change with the GPU eras, and how that affects the selection
logic.

Cc: Alexandre Courbot <acourbot@nvidia.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Timur Tabi <ttabi@nvidia.com>
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
[acourbot@nvidia.com: remove unneeded `From<BOOT_0> for Revision`
implementation.]
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Message-ID: <20251115010923.1192144-4-jhubbard@nvidia.com>

authored by

John Hubbard and committed by

Alexandre Courbot 5 months ago 0ecc08e2 4d980333

+46 -13

2 changed files

expand all

drivers

gpu

nova-core

gpu.rs

regs.rs

+29 -7

drivers/gpu/nova-core/gpu.rs

··· 160 160 minor: u8, 161 161 } 162 162 163 - impl From<regs::NV_PMC_BOOT_0> for Revision { 164 - fn from(boot0: regs::NV_PMC_BOOT_0) -> Self { 163 + impl From<regs::NV_PMC_BOOT_42> for Revision { 164 + fn from(boot0: regs::NV_PMC_BOOT_42) -> Self { 165 165 Self { 166 166 major: boot0.major_revision(), 167 167 minor: boot0.minor_revision(), ··· 183 183 184 184 impl Spec { 185 185 fn new(bar: &Bar0) -> Result<Spec> { 186 + // Some brief notes about boot0 and boot42, in chronological order: 187 + // 188 + // NV04 through NV50: 189 + // 190 + // Not supported by Nova. boot0 is necessary and sufficient to identify these GPUs. 191 + // boot42 may not even exist on some of these GPUs. 192 + // 193 + // Fermi through Volta: 194 + // 195 + // Not supported by Nova. boot0 is still sufficient to identify these GPUs, but boot42 196 + // is also guaranteed to be both present and accurate. 197 + // 198 + // Turing and later: 199 + // 200 + // Supported by Nova. Identified by first checking boot0 to ensure that the GPU is not 201 + // from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU. 202 + // Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs. 203 + 186 204 let boot0 = regs::NV_PMC_BOOT_0::read(bar); 187 205 188 - Spec::try_from(boot0) 206 + if boot0.is_older_than_fermi() { 207 + return Err(ENODEV); 208 + } 209 + 210 + Spec::try_from(regs::NV_PMC_BOOT_42::read(bar)) 189 211 } 190 212 } 191 213 192 - impl TryFrom<regs::NV_PMC_BOOT_0> for Spec { 214 + impl TryFrom<regs::NV_PMC_BOOT_42> for Spec { 193 215 type Error = Error; 194 216 195 - fn try_from(boot0: regs::NV_PMC_BOOT_0) -> Result<Self> { 217 + fn try_from(boot42: regs::NV_PMC_BOOT_42) -> Result<Self> { 196 218 Ok(Self { 197 - chipset: boot0.chipset()?, 198 - revision: boot0.into(), 219 + chipset: boot42.chipset()?, 220 + revision: boot42.into(), 199 221 }) 200 222 } 201 223 }

+17 -6

drivers/gpu/nova-core/regs.rs

··· 40 40 }); 41 41 42 42 impl NV_PMC_BOOT_0 { 43 - /// Combines `architecture_0` and `architecture_1` to obtain the architecture of the chip. 44 - pub(crate) fn architecture(self) -> Result<Architecture> { 45 - Architecture::try_from( 46 - self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0_RANGE.len()), 47 - ) 48 - } 43 + pub(crate) fn is_older_than_fermi(self) -> bool { 44 + // From https://github.com/NVIDIA/open-gpu-doc/tree/master/manuals : 45 + const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u8 = 0xc; 49 46 47 + // Older chips left arch1 zeroed out. That, combined with an arch0 value that is less than 48 + // GF100, means "older than Fermi". 49 + self.architecture_1() == 0 && self.architecture_0() < NV_PMC_BOOT_0_ARCHITECTURE_GF100 50 + } 51 + } 52 + 53 + register!(NV_PMC_BOOT_42 @ 0x00000a00, "Extended architecture information" { 54 + 15:12 minor_revision as u8, "Minor revision of the chip"; 55 + 19:16 major_revision as u8, "Major revision of the chip"; 56 + 23:20 implementation as u8, "Implementation version of the architecture"; 57 + 29:24 architecture as u8 ?=> Architecture, "Architecture value"; 58 + }); 59 + 60 + impl NV_PMC_BOOT_42 { 50 61 /// Combines `architecture` and `implementation` to obtain a code unique to the chipset. 51 62 pub(crate) fn chipset(self) -> Result<Chipset> { 52 63 self.architecture()