Merge pull request #283881 from apraga/hap-py-0.3.15

authored by Sandro and committed by GitHub 4d49db58 fd622574

+484
+50
pkgs/by-name/ha/hap-py/boost-library-flags.patch
···
··· 1 + --- a/CMakeLists.txt 2023-02-01 23:55:18.171758209 +0100 2 + +++ b/CMakeLists.txt 2023-02-02 19:32:16.574426531 +0100 3 + @@ -23,25 +23,11 @@ 4 + set (VCFEVAL_AVAILABLE 0) 5 + endif() 6 + 7 + -execute_process( 8 + - COMMAND ${CMAKE_SOURCE_DIR}/external/make_dependencies.sh 9 + - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} 10 + - RESULT_VARIABLE EXTERNAL_SUCCESS) 11 + 12 + -if(NOT "${EXTERNAL_SUCCESS}" STREQUAL "0") 13 + - message(FATAL_ERROR "Building external dependencies has failed") 14 + -endif() 15 + - 16 + -set(Boost_USE_STATIC_LIBS ON) # only find static libs 17 + set(Boost_USE_MULTITHREADED ON) 18 + -set(Boost_USE_STATIC_RUNTIME ON) 19 + 20 + # un-break library finding 21 + -set(Boost_NO_BOOST_CMAKE ON) 22 + -set(Boost_NO_SYSTEM_PATHS ON) 23 + 24 + -set(BOOST_ROOT ${CMAKE_BINARY_DIR}) 25 + -message("Using our own Boost, which was built at ${HAPLOTYPES_SOURCE_DIR}/external/boost_install") 26 + 27 + find_package(Boost 1.55.0 COMPONENTS thread iostreams regex unit_test_framework filesystem system program_options REQUIRED) 28 + include_directories(${Boost_INCLUDE_DIRS}) 29 + @@ -51,7 +51,8 @@ 30 + link_directories (${CMAKE_BINARY_DIR}/lib) 31 + 32 + # make sure we use the bundled zlib version 33 + -set(ZLIB_LIBRARIES ${CMAKE_BINARY_DIR}/lib/libz.a) 34 + +# Additional flags for nix, found by trial and error 35 + +set(ZLIB_LIBRARIES -lz -lbz2 -lcurl -lcrypto -llzma) 36 + 37 + include_directories (${HAPLOTYPES_SOURCE_DIR}/external/klib) 38 + include_directories (${HAPLOTYPES_SOURCE_DIR}/external/intervaltree) 39 + @@ -84,11 +86,6 @@ 40 + ${CMAKE_THREAD_LIBS_INIT}) 41 + 42 + 43 + -execute_process(COMMAND git describe --tags --always 44 + - OUTPUT_VARIABLE HAPLOTYPES_VERSION 45 + - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" 46 + - OUTPUT_STRIP_TRAILING_WHITESPACE 47 + -) 48 + 49 + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/c++/include/Version.hh.in" 50 + "${CMAKE_BINARY_DIR}/include/Version.hh")
+92
pkgs/by-name/ha/hap-py/package.nix
···
··· 1 + { 2 + autoconf, 3 + bcftools, 4 + boost, 5 + bzip2, 6 + cmake, 7 + curl, 8 + fetchFromGitHub, 9 + htslib, 10 + lib, 11 + makeWrapper, 12 + perl, 13 + python3, 14 + rtg-tools, 15 + samtools, 16 + stdenv, 17 + xz, 18 + zlib, 19 + }: 20 + 21 + let 22 + # Bcftools needs perl 23 + runtime = [ 24 + bcftools 25 + htslib 26 + my-python 27 + perl 28 + samtools 29 + ]; 30 + my-python-packages = 31 + p: with p; [ 32 + bx-python 33 + pysam 34 + pandas 35 + psutil 36 + scipy 37 + ]; 38 + my-python = python3.withPackages my-python-packages; 39 + in 40 + stdenv.mkDerivation rec { 41 + pname = "hap.py"; 42 + version = "0.3.15"; 43 + 44 + src = fetchFromGitHub { 45 + owner = "Illumina"; 46 + repo = pname; 47 + rev = "v${version}"; 48 + sha256 = "sha256-K8XXhioMGMHw56MKvp0Eo8S6R36JczBzGRaBz035zRQ="; 49 + }; 50 + # For illumina script 51 + BOOST_ROOT = "${boost.out}"; 52 + ZLIBSTATIC = "${zlib.static}"; 53 + # For cmake : boost lib and includedir are in differernt location 54 + BOOST_LIBRARYDIR = "${boost.out}/lib"; 55 + BOOST_INCLUDEDIR = "${boost.dev}/include"; 56 + 57 + patches = [ 58 + # Compatibility with nix for boost and library flags : zlib, bzip2, curl, crypto, lzma 59 + ./boost-library-flags.patch 60 + # Update to python3 61 + ./python3.patch 62 + ]; 63 + nativeBuildInputs = [ 64 + autoconf 65 + cmake 66 + makeWrapper 67 + ]; 68 + buildInputs = [ 69 + boost 70 + bzip2 71 + curl 72 + htslib 73 + my-python 74 + rtg-tools 75 + xz 76 + zlib 77 + ]; 78 + 79 + postFixup = '' 80 + wrapProgram $out/bin/hap.py \ 81 + --set PATH ${lib.makeBinPath runtime} \ 82 + --add-flags "--engine-vcfeval-path=${rtg-tools}/bin/rtg" 83 + ''; 84 + 85 + meta = with lib; { 86 + description = "Compare genetics variants against a gold dataset"; 87 + homepage = "https://github.com/Illumina/hap.py"; 88 + license = licenses.bsd2; 89 + maintainers = with maintainers; [ apraga ]; 90 + mainProgram = "hap.py"; 91 + }; 92 + }
+342
pkgs/by-name/ha/hap-py/python3.patch
···
··· 1 + diff --git a/src/c++/lib/tools/Roc.cpp b/src/c++/lib/tools/Roc.cpp 2 + index fabe2be..2c6bb49 100644 3 + --- a/src/c++/lib/tools/Roc.cpp 4 + +++ b/src/c++/lib/tools/Roc.cpp 5 + @@ -34,6 +34,9 @@ 6 + */ 7 + 8 + #include "helpers/Roc.hh" 9 + +#include <stdexcept> 10 + +#include <limits> 11 + + 12 + 13 + #include <algorithm> 14 + #include <cmath> 15 + diff --git a/src/cmake/cxx.cmake b/src/cmake/cxx.cmake 16 + old mode 100755 17 + new mode 100644 18 + diff --git a/src/python/Haplo/happyroc.py b/src/python/Haplo/happyroc.py 19 + index 152bd18..e439957 100644 20 + --- a/src/python/Haplo/happyroc.py 21 + +++ b/src/python/Haplo/happyroc.py 22 + @@ -97,7 +97,7 @@ def roc(roc_table, output_path, 23 + header = l.split("\t") 24 + else: 25 + rec = {} 26 + - for k, v in itertools.izip(header, l.split("\t")): 27 + + for k, v in zip(header, l.split("\t")): 28 + rec[k] = v 29 + 30 + if filter_handling: 31 + @@ -160,11 +160,11 @@ def roc(roc_table, output_path, 32 + 33 + if "all" not in result: 34 + # minimal empty DF 35 + - minidata = [{"Type": "SNP", "Subtype": "*", "Filter": "ALL", "Genotype": "*", "Subset": "*", "QQ": "*"} for _ in xrange(2)] 36 + + minidata = [{"Type": "SNP", "Subtype": "*", "Filter": "ALL", "Genotype": "*", "Subset": "*", "QQ": "*"} for _ in range(2)] 37 + minidata[1]["Type"] = "INDEL" 38 + result["all"] = pandas.DataFrame(minidata, columns=RESULT_ALLCOLUMNS) 39 + for i, c in enumerate(RESULT_ALLCOLUMNS): 40 + - result["all"][c] = result["all"][c].astype(RESULT_ALLDTYPES[i], raise_on_error=False) 41 + + result["all"][c] = result["all"][c].astype(RESULT_ALLDTYPES[i], errors="ignore") 42 + 43 + for k, v in result.items(): 44 + result[k] = _postprocessRocData(pandas.DataFrame(v, columns=RESULT_ALLCOLUMNS)) 45 + diff --git a/src/python/Haplo/partialcredit.py b/src/python/Haplo/partialcredit.py 46 + index d9e22bb..0f2b2cf 100644 47 + --- a/src/python/Haplo/partialcredit.py 48 + +++ b/src/python/Haplo/partialcredit.py 49 + @@ -202,7 +202,7 @@ def partialCredit(vcfname, 50 + try: 51 + res = runParallel(pool, 52 + preprocessWrapper, 53 + - itertools.izip(itertools.repeat(vcfname), locations), 54 + + zip(itertools.repeat(vcfname), locations), 55 + {"reference": reference, 56 + "decompose": decompose, 57 + "leftshift": leftshift, 58 + diff --git a/src/python/Haplo/quantify.py b/src/python/Haplo/quantify.py 59 + index 042d13e..b1d362e 100755 60 + --- a/src/python/Haplo/quantify.py 61 + +++ b/src/python/Haplo/quantify.py 62 + @@ -152,7 +152,7 @@ def run_quantify(filename, 63 + run_str += " -v %s" % pipes.quote(write_vcf) 64 + 65 + if regions: 66 + - for k, v in regions.iteritems(): 67 + + for k, v in regions.items(): 68 + run_str += " -R '%s:%s'" % (k, v) 69 + 70 + if roc_regions: 71 + diff --git a/src/python/Somatic/Mutect.py b/src/python/Somatic/Mutect.py 72 + index 7ac923c..81f08b5 100755 73 + --- a/src/python/Somatic/Mutect.py 74 + +++ b/src/python/Somatic/Mutect.py 75 + @@ -148,7 +148,7 @@ def extractMutectSNVFeatures(vcfname, tag, avg_depth=None): 76 + n_allele_alt_count = 0 77 + else: 78 + n_allele_alt_count = 0 79 + - for a in xrange(0, len(alleles_alt)): 80 + + for a in range(0, len(alleles_alt)): 81 + n_allele_alt_count += float(rec[n_sample + "AD"][a + 1]) 82 + 83 + if n_allele_alt_count + n_allele_ref_count == 0: 84 + @@ -163,7 +163,7 @@ def extractMutectSNVFeatures(vcfname, tag, avg_depth=None): 85 + t_allele_alt_count = 0 86 + else: 87 + t_allele_alt_count = 0 88 + - for a in xrange(0, len(alleles_alt)): 89 + + for a in range(0, len(alleles_alt)): 90 + t_allele_alt_count += float(rec[t_sample + "AD"][a + 1]) 91 + 92 + if t_allele_alt_count + t_allele_ref_count == 0: 93 + @@ -344,7 +344,7 @@ def extractMutectIndelFeatures(vcfname, tag, avg_depth=None): 94 + n_allele_alt_count = 0 95 + else: 96 + n_allele_alt_count = 0 97 + - for a in xrange(0, len(alleles_alt)): 98 + + for a in range(0, len(alleles_alt)): 99 + n_allele_alt_count += float(rec[n_sample + "AD"][a + 1]) 100 + 101 + if n_allele_alt_count + n_allele_ref_count == 0: 102 + @@ -359,7 +359,7 @@ def extractMutectIndelFeatures(vcfname, tag, avg_depth=None): 103 + t_allele_alt_count = 0 104 + else: 105 + t_allele_alt_count = 0 106 + - for a in xrange(0, len(alleles_alt)): 107 + + for a in range(0, len(alleles_alt)): 108 + t_allele_alt_count += float(rec[t_sample + "AD"][a + 1]) 109 + 110 + if t_allele_alt_count + t_allele_ref_count == 0: 111 + diff --git a/src/python/Tools/bcftools.py b/src/python/Tools/bcftools.py 112 + index 6146b7a..6d80d14 100755 113 + --- a/src/python/Tools/bcftools.py 114 + +++ b/src/python/Tools/bcftools.py 115 + @@ -128,8 +128,8 @@ def concatenateParts(output, *args): 116 + to_delete.append(tf2.name) 117 + to_delete.append(tf1.name + ".csi") 118 + to_delete.append(tf2.name + ".csi") 119 + - half1 = [tf1.name] + list(args[:len(args)/2]) 120 + - half2 = [tf2.name] + list(args[len(args)/2:]) 121 + + half1 = [tf1.name] + list(args[:len(args)//2]) 122 + + half2 = [tf2.name] + list(args[len(args)//2:]) 123 + concatenateParts(*half1) 124 + runBcftools("index", tf1.name) 125 + concatenateParts(*half2) 126 + diff --git a/src/python/Tools/metric.py b/src/python/Tools/metric.py 127 + index 71ccc99..372626d 100755 128 + --- a/src/python/Tools/metric.py 129 + +++ b/src/python/Tools/metric.py 130 + @@ -115,7 +115,7 @@ def replaceNaNs(xobject): 131 + if type(xobject[k]) is dict or type(xobject[k]) is list or type(xobject[k]) is float: 132 + xobject[k] = replaceNaNs(xobject[k]) 133 + elif type(xobject) is list: 134 + - for k in xrange(0, len(xobject)): 135 + + for k in range(0, len(xobject)): 136 + if type(xobject[k]) is dict or type(xobject[k]) is list or type(xobject[k]) is float: 137 + xobject[k] = replaceNaNs(xobject[k]) 138 + elif type(xobject) is float: 139 + diff --git a/src/python/Tools/parallel.py b/src/python/Tools/parallel.py 140 + index 9d49760..5fcb37e 100755 141 + --- a/src/python/Tools/parallel.py 142 + +++ b/src/python/Tools/parallel.py 143 + @@ -17,9 +17,9 @@ import logging 144 + import traceback 145 + import subprocess 146 + import multiprocessing 147 + -import cPickle 148 + +import pickle 149 + import tempfile 150 + -from itertools import islice, izip, repeat 151 + +from itertools import islice, repeat 152 + 153 + from . import LoggingWriter 154 + 155 + @@ -93,7 +93,7 @@ def runParallel(pool, fun, par, *args, **kwargs): 156 + 157 + """ 158 + if pool: 159 + - result = pool.map(parMapper, izip(par, repeat( { "fun": fun, "args": args, "kwargs": kwargs } ))) 160 + + result = pool.map(parMapper, zip(par, repeat( { "fun": fun, "args": args, "kwargs": kwargs } ))) 161 + else: 162 + result = [] 163 + for c in par: 164 + diff --git a/src/python/Tools/sessioninfo.py b/src/python/Tools/sessioninfo.py 165 + index 75650ec..b49bf59 100644 166 + --- a/src/python/Tools/sessioninfo.py 167 + +++ b/src/python/Tools/sessioninfo.py 168 + @@ -34,7 +34,6 @@ def sessionInfo(): 169 + 'version': version, 170 + 'runInfo': [{"key": "commandline", "value": " ".join(sys.argv)}], 171 + 'uname': " / ".join(platform.uname()), 172 + - 'dist': " / ".join(platform.dist()), 173 + 'mac_ver': " / ".join([platform.mac_ver()[0], platform.mac_ver()[2]]), 174 + 'python_implementation': platform.python_implementation(), 175 + 'python_version': platform.python_version(), 176 + diff --git a/src/python/Tools/vcfcallerinfo.py b/src/python/Tools/vcfcallerinfo.py 177 + index eb7e86e..947f2c4 100755 178 + --- a/src/python/Tools/vcfcallerinfo.py 179 + +++ b/src/python/Tools/vcfcallerinfo.py 180 + @@ -33,8 +33,8 @@ class CallerInfo(object): 181 + 182 + def asDict(self): 183 + kvd = ["name", "version", "parameters"] 184 + - return {"aligners": [dict(y for y in itertools.izip(kvd, x)) for x in self.aligners], 185 + - "callers": [dict(y for y in itertools.izip(kvd, x)) for x in self.callers]} 186 + + return {"aligners": [dict(y for y in zip(kvd, x)) for x in self.aligners], 187 + + "callers": [dict(y for y in zip(kvd, x)) for x in self.callers]} 188 + 189 + def addVCF(self, vcfname): 190 + """ Add caller versions from a VCF 191 + diff --git a/src/python/hap.py b/src/python/hap.py 192 + index 8045936..93279a4 100755 193 + --- a/src/python/hap.py 194 + +++ b/src/python/hap.py 195 + @@ -188,7 +188,7 @@ def main(): 196 + parser.print_help() 197 + exit(1) 198 + 199 + - print "Hap.py %s" % Tools.version 200 + + print("Hap.py %s" % Tools.version) 201 + if args.version: 202 + exit(0) 203 + 204 + diff --git a/src/python/ovc.py b/src/python/ovc.py 205 + index 2837255..20b4442 100755 206 + --- a/src/python/ovc.py 207 + +++ b/src/python/ovc.py 208 + @@ -34,7 +34,7 @@ lines = 1 209 + for line in f: 210 + l = line.split("\t") 211 + if len(l) > 3 and (last-1) > int(l[1]): 212 + - print "Overlap at %s:%i (line %i)" % (l[0], int(l[1]), lines) 213 + + print(Overlap at %s:%i (line %i)) % (l[0], int(l[1]), lines) 214 + exit(1) 215 + elif len(l) > 3: 216 + last = int(l[2]) 217 + diff --git a/src/python/pre.py b/src/python/pre.py 218 + index 5ca1644..a37a4b2 100755 219 + --- a/src/python/pre.py 220 + +++ b/src/python/pre.py 221 + @@ -47,8 +47,8 @@ import Haplo.partialcredit 222 + def hasChrPrefix(chrlist): 223 + """ returns if list of chr names has a chr prefix or not """ 224 + 225 + - noprefix = map(str, range(23)) + ["X", "Y", "MT"] 226 + - withprefix = ["chr" + x for x in map(str, range(23)) + ["X", "Y", "M"]] 227 + + noprefix = [str(x) for x in range(23)] + ["X", "Y", "MT"] 228 + + withprefix = ["chr" + str(x) for x in range(23)] + ["X", "Y", "M"] 229 + 230 + count_noprefix = len(list(set(noprefix) & set(chrlist))) 231 + count_prefix = len(list(set(withprefix) & set(chrlist))) 232 + @@ -126,7 +126,7 @@ def preprocess(vcf_input, 233 + 234 + if gender == "auto": 235 + logging.info(mf) 236 + - if "female" in mf: 237 + + if b"female" in mf: 238 + gender = "female" 239 + else: 240 + gender = "male" 241 + @@ -392,7 +392,7 @@ def main(): 242 + exit(0) 243 + 244 + if args.version: 245 + - print "pre.py %s" % Tools.version # noqa:E999 246 + + print(pre.py %s) % Tools.version # noqa:E999 247 + exit(0) 248 + 249 + args.input = args.input[0] 250 + diff --git a/src/python/qfy.py b/src/python/qfy.py 251 + index 4f247ee..59ed68a 100755 252 + --- a/src/python/qfy.py 253 + +++ b/src/python/qfy.py 254 + @@ -203,8 +203,8 @@ def quantify(args): 255 + 256 + # in default mode, print result summary to stdout 257 + if not args.quiet and not args.verbose: 258 + - print "Benchmarking Summary:" 259 + - print essential_numbers.to_string(index=False) 260 + + print("Benchmarking Summary:") 261 + + print(essential_numbers.to_string(index=False)) 262 + 263 + # keep this for verbose output 264 + if not args.verbose: 265 + @@ -213,12 +213,12 @@ def quantify(args): 266 + except: 267 + pass 268 + 269 + - for t in res.iterkeys(): 270 + + for t in res.keys(): 271 + metrics_output["metrics"].append(dataframeToMetricsTable("roc." + t, res[t])) 272 + 273 + # gzip JSON output 274 + if args.write_json: 275 + with gzip.open(args.reports_prefix + ".metrics.json.gz", "w") as fp: 276 + - json.dump(metrics_output, fp) 277 + + fp.write(json.dumps(metrics_output, default=np_encoder).encode('ascii')) 278 + 279 + 280 + @@ -362,7 +363,7 @@ def main(): 281 + exit(0) 282 + 283 + if args.version: 284 + - print "qfy.py %s" % Tools.version 285 + + print(qfy.py %s) % Tools.version 286 + exit(0) 287 + 288 + if args.fp_bedfile and args.preprocessing_truth_confregions: 289 + diff --git a/src/python/som.py b/src/python/som.py 290 + index e942351..c01d522 100755 291 + --- a/src/python/som.py 292 + +++ b/src/python/som.py 293 + @@ -640,7 +640,7 @@ def main(): 294 + "overlap):\n" + ambie.to_string(index=False)) 295 + # in default mode, print result summary to stdout 296 + if not args.quiet and not args.verbose: 297 + - print "FP/ambiguity classes with info (multiple classes can " \ 298 + + print(FP/ambiguity classes with info (multiple classes can ) \ 299 + "overlap):\n" + ambie.to_string(index=False) 300 + ambie.to_csv(args.output + ".ambiclasses.csv") 301 + metrics_output["metrics"].append(dataframeToMetricsTable("ambiclasses", ambie)) 302 + @@ -659,7 +659,7 @@ def main(): 303 + formatters={'reason': '{{:<{}s}}'.format(ambie['reason'].str.len().max()).format}, index=False)) 304 + # in default mode, print result summary to stdout 305 + if not args.quiet and not args.verbose: 306 + - print "Reasons for defining as ambiguous (multiple reasons can overlap):\n" + ambie.to_string( 307 + + print(Reasons for defining as ambiguous (multiple reasons can overlap):\n) + ambie.to_string( 308 + formatters={'reason': '{{:<{}s}}'.format(ambie['reason'].str.len().max()).format}, index=False) 309 + ambie.to_csv(args.output + ".ambireasons.csv") 310 + metrics_output["metrics"].append(dataframeToMetricsTable("ambireasons", ambie)) 311 + @@ -936,7 +936,7 @@ def main(): 312 + logging.info("\n" + res.to_string()) 313 + # in default mode, print result summary to stdout 314 + if not args.quiet and not args.verbose: 315 + - print "\n" + res.to_string() 316 + + print(\n) + res.to_string() 317 + 318 + res["sompyversion"] = vstring 319 + 320 + diff --git a/src/python/qfy.py b/src/python/qfy.py 321 + index 59ed68a..be8d7e1 100755 322 + --- a/src/python/qfy.py 323 + +++ b/src/python/qfy.py 324 + @@ -33,6 +33,7 @@ import pandas 325 + import json 326 + import tempfile 327 + import gzip 328 + +import numpy as np 329 + 330 + scriptDir = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) 331 + sys.path.append(os.path.abspath(os.path.join(scriptDir, '..', 'lib', 'python27'))) 332 + @@ -45,6 +46,10 @@ import Haplo.happyroc 333 + import Haplo.gvcf2bed 334 + from Tools import fastasize 335 + 336 + +# Cannot convert data to json without a custom enconder 337 + +def np_encoder(object): 338 + + if isinstance(object, np.generic): 339 + + return object.item() 340 + 341 + def quantify(args): 342 + """ Run quantify and write tables """