1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6
7 # build-system
8 setuptools,
9
10 # dependencies
11 huggingface-hub,
12 nltk,
13 numpy,
14 scikit-learn,
15 scipy,
16 sentencepiece,
17 tokenizers,
18 torch,
19 tqdm,
20 transformers,
21
22 # tests
23 accelerate,
24 datasets,
25 pytestCheckHook,
26 pytest-cov-stub,
27}:
28
29buildPythonPackage rec {
30 pname = "sentence-transformers";
31 version = "3.3.1";
32 pyproject = true;
33
34 src = fetchFromGitHub {
35 owner = "UKPLab";
36 repo = "sentence-transformers";
37 rev = "refs/tags/v${version}";
38 hash = "sha256-D8LHzEVHRuayod084B05cL3OvZiO1ByDZLxROGxTD0I=";
39 };
40
41 build-system = [ setuptools ];
42
43 dependencies = [
44 huggingface-hub
45 nltk
46 numpy
47 scikit-learn
48 scipy
49 sentencepiece
50 tokenizers
51 torch
52 tqdm
53 transformers
54 ];
55
56 nativeCheckInputs = [
57 accelerate
58 datasets
59 pytestCheckHook
60 pytest-cov-stub
61 ];
62
63 pythonImportsCheck = [ "sentence_transformers" ];
64
65 disabledTests = [
66 # Tests require network access
67 "test_cmnrl_same_grad"
68 "test_forward"
69 "test_initialization_with_embedding_dim"
70 "test_initialization_with_embedding_weights"
71 "test_LabelAccuracyEvaluator"
72 "test_model_card_reuse"
73 "test_paraphrase_mining"
74 "test_ParaphraseMiningEvaluator"
75 "test_save_and_load"
76 "test_simple_encode"
77 "test_tokenize"
78 "test_trainer"
79 "test_trainer_invalid_column_names"
80 "test_trainer_multi_dataset_errors"
81 ];
82
83 disabledTestPaths = [
84 # Tests require network access
85 "tests/evaluation/test_information_retrieval_evaluator.py"
86 "tests/test_compute_embeddings.py"
87 "tests/test_cross_encoder.py"
88 "tests/test_model_card_data.py"
89 "tests/test_multi_process.py"
90 "tests/test_pretrained_stsb.py"
91 "tests/test_sentence_transformer.py"
92 "tests/test_train_stsb.py"
93 ];
94
95 # Sentence-transformer needs a writable hf_home cache
96 postInstall = ''
97 export HF_HOME=$(mktemp -d)
98 '';
99
100 meta = {
101 description = "Multilingual Sentence & Image Embeddings with BERT";
102 homepage = "https://github.com/UKPLab/sentence-transformers";
103 changelog = "https://github.com/UKPLab/sentence-transformers/releases/tag/v${version}";
104 license = lib.licenses.asl20;
105 maintainers = with lib.maintainers; [ dit7ya ];
106 # Segmentation fault at import
107 broken = stdenv.hostPlatform.system == "x86_64-darwin";
108 };
109}