nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 stdenv,
4 maven,
5 jdk17,
6 jre17_minimal,
7 fetchFromGitHub,
8 makeWrapper,
9 mvnDepsHash ? null,
10 enableGui ? true,
11 enableOcr ? true,
12 runCommand,
13 tesseract,
14 nixosTests,
15}:
16
17let
18 mvnDepsHashes = {
19 "x86_64-linux" = "sha256-OTd51n6SSlFziqvvHmfyMAyQRwIzsHxFGuJ62zlX1Ec=";
20 "aarch64-linux" = "sha256-tPaGLqm0jgEoz0BD/C6AG9xupovQvib/v0kB/jjqwB8=";
21 "x86_64-darwin" = "sha256-Rs7nTiGazUW8oJJr6fbJKelzFqd2n278sJYoMy2/0N4=";
22 "aarch64-darwin" = "sha256-gnP+G33LPRMQ6HRzeZ8cEV9oSohrlPcMwlBB4rvH7+E=";
23 };
24
25 knownMvnDepsHash =
26 mvnDepsHashes.${stdenv.system}
27 or (lib.warn "This platform doesn't have a default mvnDepsHash value, you'll need to specify it manually" lib.fakeHash);
28
29 jdk = jre17_minimal.override {
30 modules = [
31 "java.base"
32 "java.desktop"
33 "java.logging"
34 "java.management"
35 "java.naming"
36 "java.sql"
37 ];
38 jdk = jdk17;
39 };
40in
41maven.buildMavenPackage rec {
42 pname = "tika";
43 version = "2.9.3";
44
45 src = fetchFromGitHub {
46 owner = "apache";
47 repo = "tika";
48 tag = version;
49 hash = "sha256-nuiE+MWJNA4PLprAC0vDBadk34TFsVEDBcCZct1XRxo=";
50 };
51
52 buildOffline = true;
53
54 manualMvnArtifacts = [
55 "org.objenesis:objenesis:2.1"
56 "org.apache.apache.resources:apache-jar-resource-bundle:1.5"
57 "org.apache.maven.surefire:surefire-junit-platform:3.1.2"
58 "org.junit.platform:junit-platform-launcher:1.10.0"
59 ];
60
61 mvnJdk = jdk17;
62 mvnHash = if mvnDepsHash != null then mvnDepsHash else knownMvnDepsHash;
63
64 mvnParameters = toString (
65 [
66 "-DskipTests=true" # skip tests (out of memory exceptions)
67 "-Dossindex.skip" # skip dependency with vulnerability (recommended by upstream)
68 ]
69 ++ lib.optionals (!enableGui) [
70 "-am -pl :tika-server-standard"
71 ]
72 );
73
74 nativeBuildInputs = [ makeWrapper ];
75
76 installPhase =
77 let
78 flags = "--add-opens java.base/jdk.internal.ref=ALL-UNNAMED --add-opens java.base/java.nio=ALL-UNNAMED";
79
80 binPath = lib.makeBinPath (
81 [
82 (runCommand "jdk-tika"
83 {
84 nativeBuildInputs = [ makeWrapper ];
85 }
86 ''
87 makeWrapper ${jdk}/bin/java $out/bin/java \
88 --add-flags "${flags}"
89 ''
90 )
91 ]
92 ++ lib.optionals enableOcr [ tesseract ]
93 );
94 in
95 ''
96 runHook preInstall
97
98 # Note: using * instead of version would match multiple files
99 ''
100 + lib.optionalString enableGui ''
101 install -Dm644 tika-app/target/tika-app-${version}.jar $out/share/tika/tika-app.jar
102 makeWrapper ${jdk}/bin/java $out/bin/tika-app \
103 --add-flags "${flags} -jar $out/share/tika/tika-app.jar"
104 ''
105 + ''
106 install -Dm644 tika-server/tika-server-standard/target/tika-server-standard-${version}.jar $out/share/tika/tika-server.jar
107 makeWrapper ${jdk}/bin/java $out/bin/tika-server \
108 --prefix PATH : ${binPath} \
109 --add-flags "-jar $out/share/tika/tika-server.jar"
110
111 runHook postInstall
112 '';
113
114 passthru.tests = {
115 inherit (nixosTests) tika;
116 };
117
118 meta = {
119 changelog = "https://github.com/apache/tika/blob/${src.rev}/CHANGES.txt";
120 description = "Toolkit for extracting metadata and text from over a thousand different file types";
121 longDescription = ''
122 The Apache Tika™ toolkit detects and extracts metadata and text
123 from over a thousand different file types (such as PPT, XLS, and PDF).
124 All of these file types can be parsed through a single interface,
125 making Tika useful for search engine indexing, content analysis,
126 translation, and much more.
127 '';
128 homepage = "https://tika.apache.org";
129 license = lib.licenses.asl20;
130 mainProgram = "tika-server";
131 maintainers = with lib.maintainers; [ tomasajt ];
132 sourceProvenance = with lib.sourceTypes; [
133 fromSource
134 binaryBytecode # maven dependencies
135 ];
136 };
137}