1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 hatchling, 6 beautifulsoup4, 7 ffmpeg-headless, 8 magika, 9 mammoth, 10 markdownify, 11 numpy, 12 openai, 13 openpyxl, 14 pandas, 15 pathvalidate, 16 pdfminer-six, 17 puremagic, 18 pydub, 19 python-pptx, 20 requests, 21 speechrecognition, 22 youtube-transcript-api, 23 olefile, 24 xlrd, 25 lxml, 26 pytestCheckHook, 27 gitUpdater, 28}: 29 30buildPythonPackage rec { 31 pname = "markitdown"; 32 version = "0.1.1"; 33 pyproject = true; 34 35 src = fetchFromGitHub { 36 owner = "microsoft"; 37 repo = "markitdown"; 38 tag = "v${version}"; 39 hash = "sha256-siXam2a+ryyLBbciQgjd9k6zC8r46LbzjPMoc1dG0wk="; 40 }; 41 42 sourceRoot = "${src.name}/packages/markitdown"; 43 44 build-system = [ hatchling ]; 45 46 dependencies = [ 47 beautifulsoup4 48 ffmpeg-headless 49 lxml 50 magika 51 mammoth 52 markdownify 53 numpy 54 olefile 55 openai 56 openpyxl 57 pandas 58 pathvalidate 59 pdfminer-six 60 puremagic 61 pydub 62 python-pptx 63 requests 64 speechrecognition 65 xlrd 66 youtube-transcript-api 67 ]; 68 69 pythonImportsCheck = [ "markitdown" ]; 70 71 nativeCheckInputs = [ pytestCheckHook ]; 72 73 disabledTests = [ 74 # Require network access 75 "test_markitdown_remote" 76 "test_module_vectors" 77 "test_cli_vectors" 78 "test_module_misc" 79 ]; 80 81 passthru.updateScripts = gitUpdater { }; 82 83 meta = { 84 description = "Python tool for converting files and office documents to Markdown"; 85 homepage = "https://github.com/microsoft/markitdown"; 86 license = lib.licenses.mit; 87 maintainers = with lib.maintainers; [ drupol ]; 88 }; 89}