1{
2 buildPythonPackage,
3 embedding-reader,
4 faiss,
5 fetchFromGitHub,
6 fire,
7 fsspec,
8 lib,
9 numpy,
10 pyarrow,
11 pytestCheckHook,
12 pythonOlder,
13}:
14
15buildPythonPackage rec {
16 pname = "autofaiss";
17 version = "2.17.0";
18 format = "setuptools";
19
20 disabled = pythonOlder "3.6";
21
22 src = fetchFromGitHub {
23 owner = "criteo";
24 repo = pname;
25 rev = "refs/tags/${version}";
26 hash = "sha256-pey3wrW7CDLMiPPKnmYrcSJqGuy6ecA2SE9m3Jtt6DU=";
27 };
28
29
30 pythonRemoveDeps = [
31 # The `dataclasses` packages is a python2-only backport, unnecessary in
32 # python3.
33 "dataclasses"
34 # We call it faiss, not faiss-cpu.
35 "faiss-cpu"
36 ];
37
38 pythonRelaxDeps = [
39 # As of v2.15.4, autofaiss asks for fire<0.5 but we have fire v0.5.0 in
40 # nixpkgs at the time of writing (2022-12-25).
41 "fire"
42 # As of v2.15.3, autofaiss asks for pyarrow<8 but we have pyarrow v9.0.0 in
43 # nixpkgs at the time of writing (2022-12-15).
44 "pyarrow"
45 ];
46
47 propagatedBuildInputs = [
48 embedding-reader
49 fsspec
50 numpy
51 faiss
52 fire
53 pyarrow
54 ];
55
56 nativeCheckInputs = [ pytestCheckHook ];
57
58 disabledTests = [
59 # Attempts to spin up a Spark cluster and talk to it which doesn't work in
60 # the Nix build environment.
61 "test_build_partitioned_indexes"
62 "test_index_correctness_in_distributed_mode_with_multiple_indices"
63 "test_index_correctness_in_distributed_mode"
64 "test_quantize_with_pyspark"
65 ];
66
67 meta = with lib; {
68 description = "Automatically create Faiss knn indices with the most optimal similarity search parameters";
69 mainProgram = "autofaiss";
70 homepage = "https://github.com/criteo/autofaiss";
71 changelog = "https://github.com/criteo/autofaiss/blob/${version}/CHANGELOG.md";
72 license = licenses.asl20;
73 maintainers = with maintainers; [ samuela ];
74 };
75}