1{
2 lib,
3 stdenv,
4 gcc13Stdenv,
5 buildPythonPackage,
6 fetchFromGitHub,
7
8 # nativeBuildInputs
9 cmake,
10 ninja,
11
12 # build-system
13 pathspec,
14 pyproject-metadata,
15 scikit-build-core,
16
17 # dependencies
18 diskcache,
19 jinja2,
20 numpy,
21 typing-extensions,
22
23 # tests
24 scipy,
25 huggingface-hub,
26
27 # passthru
28 gitUpdater,
29 pytestCheckHook,
30 llama-cpp-python,
31
32 config,
33 cudaSupport ? config.cudaSupport,
34 cudaPackages ? { },
35
36}:
37let
38 stdenvTarget = if cudaSupport then gcc13Stdenv else stdenv;
39in
40buildPythonPackage rec {
41 pname = "llama-cpp-python";
42 version = "0.3.9";
43 pyproject = true;
44
45 src = fetchFromGitHub {
46 owner = "abetlen";
47 repo = "llama-cpp-python";
48 tag = "v${version}";
49 hash = "sha256-iw9teWZ612gUNM2Zm5WGdFTq7aNo8QRRIGeHoFpXdfQ=";
50 fetchSubmodules = true;
51 };
52 # src = /home/gaetan/llama-cpp-python;
53
54 dontUseCmakeConfigure = true;
55 SKBUILD_CMAKE_ARGS = lib.strings.concatStringsSep ";" (
56 # Set GGML_NATIVE=off. Otherwise, cmake attempts to build with
57 # -march=native* which is either a no-op (if cc-wrapper is able to ignore
58 # it), or an attempt to build a non-reproducible binary.
59 #
60 # This issue was spotted when cmake rules appended feature modifiers to
61 # -mcpu, breaking linux build as follows:
62 #
63 # cc1: error: unknown value ‘native+nodotprod+noi8mm+nosve’ for ‘-mcpu’
64 [
65 "-DGGML_NATIVE=off"
66 "-DGGML_BUILD_NUMBER=1"
67 ]
68 ++ lib.optionals cudaSupport [
69 "-DGGML_CUDA=on"
70 "-DCUDAToolkit_ROOT=${lib.getDev cudaPackages.cuda_nvcc}"
71 "-DCMAKE_CUDA_COMPILER=${lib.getExe cudaPackages.cuda_nvcc}"
72 ]
73 );
74
75 enableParallelBuilding = true;
76
77 nativeBuildInputs = [
78 cmake
79 ninja
80 ];
81
82 build-system = [
83 pathspec
84 pyproject-metadata
85 scikit-build-core
86 ];
87
88 buildInputs = lib.optionals cudaSupport (
89 with cudaPackages;
90 [
91 cuda_cudart # cuda_runtime.h
92 cuda_cccl # <thrust/*>
93 libcublas # cublas_v2.h
94 ]
95 );
96
97 stdenv = stdenvTarget;
98
99 dependencies = [
100 diskcache
101 jinja2
102 numpy
103 typing-extensions
104 ];
105
106 nativeCheckInputs = [
107 pytestCheckHook
108 scipy
109 huggingface-hub
110 ];
111
112 disabledTests = [
113 # tries to download model from huggingface-hub
114 "test_real_model"
115 "test_real_llama"
116 ];
117
118 pythonImportsCheck = [ "llama_cpp" ];
119
120 passthru = {
121 updateScript = gitUpdater { rev-prefix = "v"; };
122 tests = lib.optionalAttrs stdenvTarget.hostPlatform.isLinux {
123 withCuda = llama-cpp-python.override {
124 cudaSupport = true;
125 };
126 };
127 };
128
129 meta = {
130 description = "Python bindings for llama.cpp";
131 homepage = "https://github.com/abetlen/llama-cpp-python";
132 changelog = "https://github.com/abetlen/llama-cpp-python/blob/v${version}/CHANGELOG.md";
133 license = lib.licenses.mit;
134 maintainers = with lib.maintainers; [
135 booxter
136 kirillrdy
137 ];
138 };
139}