{
  lib,
  buildPythonPackage,
  fetchFromGitHub,

  # build-system
  setuptools,

  # dependencies
  datasets,
  fastapi,
  mlx,
  mlx-lm,
  numpy,
  opencv-python,
  pillow,
  requests,
  scipy,
  soundfile,
  tqdm,
  transformers,
  uvicorn,

  # tests
  psutil,
  pytestCheckHook,
  rich,
}:

buildPythonPackage rec {
  pname = "mlx-vlm";
  version = "0.3.3";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "Blaizzy";
    repo = "mlx-vlm";
    tag = "v${version}";
    hash = "sha256-KhppKqIJPmtjgSXSC3n5HTMm3fDUJaoYJEiGfQ5vGNQ=";
  };

  build-system = [
    setuptools
  ];

  pythonRelaxDeps = [
    "opencv-python"
  ];
  dependencies = [
    datasets
    fastapi
    mlx
    mlx-lm
    numpy
    opencv-python
    pillow
    requests
    scipy
    soundfile
    tqdm
    transformers
    uvicorn
  ];

  pythonImportsCheck = [ "mlx_vlm" ];

  nativeCheckInputs = [
    psutil
    pytestCheckHook
    rich
  ];

  disabledTests = [
    # Fatal Python error: Aborted
    # mlx_vlm/models/multi_modality/vision.py", line 174 in __call__
    "test_multi_modality"

    # RuntimeError: [metal_kernel] No GPU back-end
    "test_glm4v_moe"
    "test_kimi_vl"
  ];

  disabledTestPaths = [
    # ImportError: cannot import name 'get_class_predicate' from 'mlx_vlm.utils'
    # This function is indeed not exposed by `mlx_vlm.utils`
    "mlx_vlm/tests/test_utils.py"

    # fixture 'model_path' not found
    "mlx_vlm/tests/test_smoke.py"
  ];

  meta = {
    description = "Inference and fine-tuning of Vision Language Models (VLMs) on your Mac using MLX";
    homepage = "https://github.com/Blaizzy/mlx-vlm";
    changelog = "https://github.com/Blaizzy/mlx-vlm/releases/tag/v${version}";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [ GaetanLepage ];
  };
}