1{ lib, buildPythonPackage
2, fetchPypi, isPy3k, cython
3, fastrlock, numpy, six, wheel, pytestCheckHook, mock, setuptools
4, cudaPackages
5, addOpenGLRunpath
6}:
7
8let
9 inherit (cudaPackages) cudatoolkit cudnn cutensor nccl;
10in buildPythonPackage rec {
11 pname = "cupy";
12 version = "11.2.0";
13 disabled = !isPy3k;
14
15 src = fetchPypi {
16 inherit pname version;
17 sha256 = "sha256-wzNh8RejR6Y/aZbql0RtF/HAOPGh9TPlAkZCNQdpI+I=";
18 };
19
20 # See https://docs.cupy.dev/en/v10.2.0/reference/environment.html. Seting both
21 # CUPY_NUM_BUILD_JOBS and CUPY_NUM_NVCC_THREADS to NIX_BUILD_CORES results in
22 # a small amount of thrashing but it turns out there are a large number of
23 # very short builds and a few extremely long ones, so setting both ends up
24 # working nicely in practice.
25 preConfigure = ''
26 export CUDA_PATH=${cudatoolkit}
27 export CUPY_NUM_BUILD_JOBS="$NIX_BUILD_CORES"
28 export CUPY_NUM_NVCC_THREADS="$NIX_BUILD_CORES"
29 '';
30
31 nativeBuildInputs = [
32 addOpenGLRunpath
33 cython
34 ];
35
36 LDFLAGS = "-L${cudatoolkit}/lib/stubs";
37
38 propagatedBuildInputs = [
39 cudatoolkit
40 cudnn
41 cutensor
42 nccl
43 fastrlock
44 numpy
45 six
46 setuptools
47 wheel
48 ];
49
50 checkInputs = [
51 pytestCheckHook
52 mock
53 ];
54
55 # Won't work with the GPU, whose drivers won't be accessible from the build
56 # sandbox
57 doCheck = false;
58
59 postFixup = ''
60 find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib; do
61 addOpenGLRunpath "$lib"
62 done
63 '';
64
65 enableParallelBuilding = true;
66
67 meta = with lib; {
68 description = "A NumPy-compatible matrix library accelerated by CUDA";
69 homepage = "https://cupy.chainer.org/";
70 license = licenses.mit;
71 platforms = [ "x86_64-linux" ];
72 maintainers = with maintainers; [ hyphon81 ];
73 };
74}