nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 _cuda,
3 addDriverRunpath,
4 backendStdenv,
5 cmake,
6 cuda_cudart,
7 cuda_nvcc,
8 cuda_nvrtc,
9 cudaNamePrefix,
10 cudnn,
11 fetchFromGitHub,
12 flags,
13 gtest,
14 lib,
15 libcublas,
16 libcurand,
17 ninja,
18 python3Packages,
19 # Options
20 pythonSupport ? true,
21 enableF16C ? false,
22 enableTools ? false,
23 # passthru.updateScript
24 gitUpdater,
25}:
26let
27 inherit (_cuda.lib) _mkMetaBadPlatforms;
28 inherit (lib) licenses maintainers teams;
29 inherit (lib.asserts) assertMsg;
30 inherit (lib.attrsets) getBin;
31 inherit (lib.lists) all optionals;
32 inherit (lib.strings)
33 cmakeBool
34 cmakeFeature
35 optionalString
36 versionAtLeast
37 ;
38 inherit (lib.trivial) flip;
39in
40# TODO: Tests.
41assert assertMsg (!enableTools) "enableTools is not yet implemented";
42backendStdenv.mkDerivation (finalAttrs: {
43 __structuredAttrs = true;
44 strictDeps = true;
45
46 # NOTE: Depends on the CUDA package set, so use cudaNamePrefix.
47 name = "${cudaNamePrefix}-${finalAttrs.pname}-${finalAttrs.version}";
48 pname = "cutlass";
49 version = "3.9.2";
50
51 src = fetchFromGitHub {
52 owner = "NVIDIA";
53 repo = "cutlass";
54 tag = "v${finalAttrs.version}";
55 hash = "sha256-teziPNA9csYvhkG5t2ht8W8x5+1YGGbHm8VKx4JoxgI=";
56 };
57
58 # TODO: As a header-only library, we should make sure we have an `include` directory or similar which is not a
59 # superset of the `out` (`bin`) or `dev` outputs (whih is what the multiple-outputs setup hook does by default).
60 outputs = [ "out" ] ++ optionals pythonSupport [ "dist" ];
61
62 nativeBuildInputs = [
63 cuda_nvcc
64 cmake
65 ninja
66 python3Packages.python # Python is always required
67 ]
68 ++ optionals pythonSupport (
69 with python3Packages;
70 [
71 build
72 pythonOutputDistHook
73 setuptools
74 ]
75 );
76
77 postPatch =
78 # Prepend some commands to the CUDA.cmake file so it can find the CUDA libraries using CMake's FindCUDAToolkit
79 # module. These target names are used throughout the project; I (@connorbaker) did not choose them.
80 ''
81 nixLog "patching CUDA.cmake to use FindCUDAToolkit"
82 mv ./CUDA.cmake ./_CUDA_Append.cmake
83 cat > ./_CUDA_Prepend.cmake <<'EOF'
84 find_package(CUDAToolkit REQUIRED)
85 foreach(_target cudart cuda_driver nvrtc)
86 if (NOT TARGET CUDA::''${_target})
87 message(FATAL_ERROR "''${_target} Not Found")
88 endif()
89 message(STATUS "''${_target} library: ''${CUDA_''${_target}_LIBRARY}")
90 add_library(''${_target} ALIAS CUDA::''${_target})
91 endforeach()
92 EOF
93 cat ./_CUDA_Prepend.cmake ./_CUDA_Append.cmake > ./CUDA.cmake
94 ''
95 # Patch cutlass to use the provided NVCC.
96 # '_CUDA_INSTALL_PATH = os.getenv("CUDA_INSTALL_PATH", _cuda_install_path_from_nvcc())' \
97 # '_CUDA_INSTALL_PATH = "${getBin cuda_nvcc}"'
98 + ''
99 nixLog "patching python bindings to make cuda_install_path fail"
100 substituteInPlace ./python/cutlass/__init__.py \
101 --replace-fail \
102 'def cuda_install_path():' \
103 '
104 def cuda_install_path():
105 raise RuntimeException("not supported with Nixpkgs CUDA packaging")
106 '
107 ''
108 # Patch the python bindings to use environment variables set by Nixpkgs.
109 # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L80
110 # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L81
111 # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L317
112 # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L319
113 # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L344
114 # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L360
115 + ''
116 nixLog "patching python bindings to use environment variables"
117 substituteInPlace ./python/cutlass/backend/compiler.py \
118 --replace-fail \
119 'self.include_paths = include_paths' \
120 'self.include_paths = include_paths + [root + "/include" for root in os.getenv("CUDAToolkit_ROOT").split(";")]' \
121 --replace-fail \
122 'self.flags = flags' \
123 'self.flags = flags + ["-L" + root + "/lib" for root in os.getenv("CUDAToolkit_ROOT").split(";")]' \
124 --replace-fail \
125 "\''${cuda_install_path}/bin/nvcc" \
126 '${getBin cuda_nvcc}/bin/nvcc' \
127 --replace-fail \
128 '"cuda_install_path": cuda_install_path(),' \
129 "" \
130 --replace-fail \
131 'f"{cuda_install_path()}/bin/nvcc"' \
132 '"${getBin cuda_nvcc}/bin/nvcc"' \
133 --replace-fail \
134 'cuda_install_path() + "/include",' \
135 ""
136 '';
137
138 enableParallelBuilding = true;
139
140 buildInputs = [
141 cuda_cudart
142 cuda_nvrtc
143 libcurand
144 ]
145 ++ optionals enableTools [
146 cudnn
147 libcublas
148 ];
149
150 cmakeFlags = [
151 (cmakeFeature "CUTLASS_NVCC_ARCHS" flags.cmakeCudaArchitecturesString)
152 (cmakeBool "CUTLASS_ENABLE_EXAMPLES" false)
153
154 # Tests.
155 (cmakeBool "CUTLASS_ENABLE_TESTS" finalAttrs.doCheck)
156 (cmakeBool "CUTLASS_ENABLE_GTEST_UNIT_TESTS" finalAttrs.doCheck)
157 (cmakeBool "CUTLASS_USE_SYSTEM_GOOGLETEST" true)
158
159 # NOTE: Both CUDNN and CUBLAS can be used by the examples and the profiler. Since they are large dependencies, they
160 # are disabled by default.
161 (cmakeBool "CUTLASS_ENABLE_TOOLS" enableTools)
162 (cmakeBool "CUTLASS_ENABLE_CUBLAS" enableTools)
163 (cmakeBool "CUTLASS_ENABLE_CUDNN" enableTools)
164
165 # NOTE: Requires x86_64 and hardware support.
166 (cmakeBool "CUTLASS_ENABLE_F16C" enableF16C)
167
168 # TODO: Unity builds are supposed to reduce build time, but this seems to just reduce the number of tasks
169 # generated?
170 # NOTE: Good explanation of unity builds:
171 # https://www.methodpark.de/blog/how-to-speed-up-clang-tidy-with-unity-builds.
172 (cmakeBool "CUTLASS_UNITY_BUILD_ENABLED" false)
173 ];
174
175 postBuild = lib.optionalString pythonSupport ''
176 pushd "$NIX_BUILD_TOP/$sourceRoot"
177 nixLog "building Python wheel"
178 pyproject-build \
179 --no-isolation \
180 --outdir "$NIX_BUILD_TOP/$sourceRoot/''${cmakeBuildDir:?}/dist/" \
181 --wheel
182 popd >/dev/null
183 '';
184
185 doCheck = false;
186
187 checkInputs = [ gtest ];
188
189 # NOTE: Because the test cases immediately create and try to run the binaries, we don't have an opportunity
190 # to patch them with autoAddDriverRunpath. To get around this, we add the driver runpath to the environment.
191 # TODO: This would break Jetson when using cuda_compat, as it must come first.
192 preCheck = optionalString finalAttrs.doCheck ''
193 export LD_LIBRARY_PATH="$(readlink -mnv "${addDriverRunpath.driverLink}/lib")"
194 '';
195
196 # This is *not* a derivation you want to build on a small machine.
197 requiredSystemFeatures = optionals finalAttrs.doCheck [
198 "big-parallel"
199 "cuda"
200 ];
201
202 passthru = {
203 updateScript = gitUpdater {
204 inherit (finalAttrs) pname version;
205 rev-prefix = "v";
206 };
207 # TODO:
208 # tests.test = cutlass.overrideAttrs { doCheck = true; };
209
210 # Include required architectures in compatibility check.
211 # https://github.com/NVIDIA/cutlass/tree/main?tab=readme-ov-file#compatibility
212 platformAssertions = [
213 {
214 message = "all capabilities are >= 7.0 (${builtins.toJSON flags.cudaCapabilities})";
215 assertion = all (flip versionAtLeast "7.0") flags.cudaCapabilities;
216 }
217 ];
218 };
219
220 meta = {
221 description = "CUDA Templates for Linear Algebra Subroutines";
222 homepage = "https://github.com/NVIDIA/cutlass";
223 license = licenses.asl20;
224 platforms = [
225 "aarch64-linux"
226 "x86_64-linux"
227 ];
228 badPlatforms = _mkMetaBadPlatforms finalAttrs;
229 maintainers = [ maintainers.connorbaker ];
230 teams = [ teams.cuda ];
231 };
232})