1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6 rustPlatform,
7
8 # nativeBuildInputs
9 pkg-config,
10
11 # buildInputs
12 openssl,
13 protobuf,
14
15 # dependencies
16 numpy,
17 pyarrow,
18
19 # optional-dependencies
20 torch,
21
22 # tests
23 datafusion,
24 duckdb,
25 ml-dtypes,
26 pandas,
27 pillow,
28 polars,
29 pytestCheckHook,
30 tqdm,
31}:
32
33buildPythonPackage rec {
34 pname = "pylance";
35 version = "0.32.0";
36 pyproject = true;
37
38 src = fetchFromGitHub {
39 owner = "lancedb";
40 repo = "lance";
41 tag = "v${version}";
42 hash = "sha256-hVWyZv978hDjAOdk4S9S9RJOkxqhOL0ZBpi4wGk0h1c=";
43 };
44
45 sourceRoot = "${src.name}/python";
46
47 cargoDeps = rustPlatform.fetchCargoVendor {
48 inherit
49 pname
50 version
51 src
52 sourceRoot
53 ;
54 hash = "sha256-ZUNAZsOpLdpdsKhIp/6QD3Ys7MOeO6H3ve8au7g+riU=";
55 };
56
57 nativeBuildInputs = [
58 pkg-config
59 protobuf # for protoc
60 rustPlatform.cargoSetupHook
61 ];
62
63 build-system = [
64 rustPlatform.cargoSetupHook
65 rustPlatform.maturinBuildHook
66 ];
67
68 buildInputs = [
69 openssl
70 protobuf
71 ];
72
73 pythonRelaxDeps = [ "pyarrow" ];
74
75 dependencies = [
76 numpy
77 pyarrow
78 ];
79
80 optional-dependencies = {
81 torch = [ torch ];
82 };
83
84 pythonImportsCheck = [ "lance" ];
85
86 nativeCheckInputs = [
87 datafusion
88 duckdb
89 ml-dtypes
90 pandas
91 pillow
92 polars
93 pytestCheckHook
94 tqdm
95 ]
96 ++ optional-dependencies.torch;
97
98 preCheck = ''
99 cd python/tests
100 '';
101
102 disabledTests = [
103 # Writes to read-only build directory
104 "test_add_data_storage_version"
105 "test_fix_data_storage_version"
106 "test_fts_backward_v0_27_0"
107
108 # AttributeError: 'SessionContext' object has no attribute 'register_table_provider'
109 "test_table_loading"
110
111 # subprocess.CalledProcessError: Command ... returned non-zero exit status 1.
112 # ModuleNotFoundError: No module named 'lance'
113 "test_tracing"
114
115 # Flaky (AssertionError)
116 "test_index_cache_size"
117
118 # OSError: LanceError(IO): Failed to initialize default tokenizer:
119 # An invalid argument was passed:
120 # 'LinderaError { kind: Parse, source: failed to build tokenizer: LinderaError(kind=Io, source=No such file or directory (os error 2)) }', /build/source/rust/lance-index/src/scalar/inverted/tokenizer/lindera.rs:63:21
121 "test_lindera_load_config_fallback"
122
123 # OSError: LanceError(IO): Failed to load tokenizer config
124 "test_indexed_filter_with_fts_index_with_lindera_ipadic_jp_tokenizer"
125 "test_lindera_ipadic_jp_tokenizer_bin_user_dict"
126 "test_lindera_ipadic_jp_tokenizer_csv_user_dict"
127 "test_lindera_load_config_priority"
128 ]
129 ++ lib.optionals (stdenv.hostPlatform.isLinux && stdenv.hostPlatform.isAarch64) [
130 # OSError: LanceError(IO): Resources exhausted: Failed to allocate additional 1245184 bytes for ExternalSorter[0]...
131 "test_merge_insert_large"
132 ]
133 ++ lib.optionals stdenv.hostPlatform.isDarwin [
134 # Build hangs after all the tests are run due to a torch subprocess not exiting
135 "test_multiprocess_loading"
136 ];
137
138 meta = {
139 description = "Python wrapper for Lance columnar format";
140 homepage = "https://github.com/lancedb/lance";
141 changelog = "https://github.com/lancedb/lance/releases/tag/v${version}";
142 license = lib.licenses.asl20;
143 maintainers = with lib.maintainers; [ natsukium ];
144 };
145}