Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
1{
2 lib,
3 stdenv,
4 gcc13Stdenv,
5 buildPythonPackage,
6 fetchFromGitHub,
7 fetchpatch,
8
9 # nativeBuildInputs
10 cmake,
11 ninja,
12
13 # build-system
14 pathspec,
15 pyproject-metadata,
16 scikit-build-core,
17
18 # dependencies
19 diskcache,
20 jinja2,
21 numpy,
22 typing-extensions,
23
24 # tests
25 scipy,
26 huggingface-hub,
27
28 # passthru
29 gitUpdater,
30 pytestCheckHook,
31 llama-cpp-python,
32
33 config,
34 cudaSupport ? config.cudaSupport,
35 cudaPackages ? { },
36
37}:
38let
39 stdenvTarget = if cudaSupport then gcc13Stdenv else stdenv;
40in
41buildPythonPackage rec {
42 pname = "llama-cpp-python";
43 version = "0.3.16";
44 pyproject = true;
45
46 src = fetchFromGitHub {
47 owner = "abetlen";
48 repo = "llama-cpp-python";
49 tag = "v${version}";
50 hash = "sha256-EUDtCv86J4bznsTqNsdgj1IYkAu83cf+RydFTUb2NEE=";
51 fetchSubmodules = true;
52 };
53 # src = /home/gaetan/llama-cpp-python;
54
55 patches = [
56 # Fix test failure on a machine with no metal devices (e.g. nix-community darwin builder)
57 # https://github.com/ggml-org/llama.cpp/pull/15531
58 (fetchpatch {
59 url = "https://github.com/ggml-org/llama.cpp/pull/15531/commits/63a83ffefe4d478ebadff89300a0a3c5d660f56a.patch";
60 stripLen = 1;
61 extraPrefix = "vendor/llama.cpp/";
62 hash = "sha256-9LGnzviBgYYOOww8lhiLXf7xgd/EtxRXGQMredOO4qM=";
63 })
64 ];
65
66 dontUseCmakeConfigure = true;
67 SKBUILD_CMAKE_ARGS = lib.strings.concatStringsSep ";" (
68 # Set GGML_NATIVE=off. Otherwise, cmake attempts to build with
69 # -march=native* which is either a no-op (if cc-wrapper is able to ignore
70 # it), or an attempt to build a non-reproducible binary.
71 #
72 # This issue was spotted when cmake rules appended feature modifiers to
73 # -mcpu, breaking linux build as follows:
74 #
75 # cc1: error: unknown value ‘native+nodotprod+noi8mm+nosve’ for ‘-mcpu’
76 [
77 "-DGGML_NATIVE=off"
78 "-DGGML_BUILD_NUMBER=1"
79 ]
80 ++ lib.optionals cudaSupport [
81 "-DGGML_CUDA=on"
82 "-DCUDAToolkit_ROOT=${lib.getDev cudaPackages.cuda_nvcc}"
83 "-DCMAKE_CUDA_COMPILER=${lib.getExe cudaPackages.cuda_nvcc}"
84 ]
85 );
86
87 enableParallelBuilding = true;
88
89 nativeBuildInputs = [
90 cmake
91 ninja
92 ];
93
94 build-system = [
95 pathspec
96 pyproject-metadata
97 scikit-build-core
98 ];
99
100 buildInputs = lib.optionals cudaSupport (
101 with cudaPackages;
102 [
103 cuda_cudart # cuda_runtime.h
104 cuda_cccl # <thrust/*>
105 libcublas # cublas_v2.h
106 ]
107 );
108
109 stdenv = stdenvTarget;
110
111 dependencies = [
112 diskcache
113 jinja2
114 numpy
115 typing-extensions
116 ];
117
118 nativeCheckInputs = [
119 pytestCheckHook
120 scipy
121 huggingface-hub
122 ];
123
124 disabledTests = [
125 # tries to download model from huggingface-hub
126 "test_real_model"
127 "test_real_llama"
128 ];
129
130 pythonImportsCheck = [ "llama_cpp" ];
131
132 passthru = {
133 updateScript = gitUpdater {
134 rev-prefix = "v";
135 allowedVersions = "^[.0-9]+$";
136 };
137 tests = lib.optionalAttrs stdenvTarget.hostPlatform.isLinux {
138 withCuda = llama-cpp-python.override {
139 cudaSupport = true;
140 };
141 };
142 };
143
144 meta = {
145 description = "Python bindings for llama.cpp";
146 homepage = "https://github.com/abetlen/llama-cpp-python";
147 changelog = "https://github.com/abetlen/llama-cpp-python/blob/v${version}/CHANGELOG.md";
148 license = lib.licenses.mit;
149 maintainers = with lib.maintainers; [
150 booxter
151 kirillrdy
152 ];
153 };
154}