Delete Biz/Mynion.py and exllama

Mynion was a prototype and while it was cool and worked well, it is unused and causing magma to build, which takes forever. I have settled on using ollama for local inference and a hosted inference API for production.
author: Ben Sima <ben@bsima.me> 2024-12-02 07:52:56 -0500
committer: Ben Sima <ben@bsima.me> 2024-12-20 21:09:24 -0500
commit: 18956baad9cde85bc55bd0113092e51786d9e1ee (patch)
tree: 07e987380a05db04c4814f4ebbc1f5780817899c /Biz/Bild/Deps/exllama.nix
parent: bc78e72960dee2721c3648e8061cb543f775710b (diff)
1 files changed, 0 insertions, 54 deletions
diff --git a/Biz/Bild/Deps/exllama.nix b/Biz/Bild/Deps/exllama.nix
deleted file mode 100644
index 434e9a9..0000000
--- a/Biz/Bild/Deps/exllama.nix
+++ /dev/null
@@ -1,54 +0,0 @@
-{ lib, sources, buildPythonPackage, pythonOlder
-, torch # tested on 2.0.1 and 2.1.0 (nightly) with cu118
-, safetensors, sentencepiece, ninja, cudaPackages, addOpenGLRunpath, which
-, libGL, gcc11 # cuda 11.7 requires g++ <12
-}:
-
-buildPythonPackage rec {
-  pname = "exllama";
-  version = sources.exllama.rev;
-  format = "setuptools";
-  disabled = pythonOlder "3.9";
-
-  src = sources.exllama;
-
-  # I only care about compiling for the Ampere architecture, which is what my
-  # RTX 3090 TI is, and for some reason (nix sandbox?) the torch extension
-  # builder
-  # cannot autodetect the arch
-  TORCH_CUDA_ARCH_LIST = "8.0;8.6+PTX";
-
-  CUDA_HOME = "${cudaPackages.cuda_nvcc}";
-
-  nativeBuildInputs = [
-    gcc11
-    which
-    libGL
-    addOpenGLRunpath
-    cudaPackages.cuda_nvcc
-    cudaPackages.cuda_cudart
-  ];
-
-  propagatedBuildInputs =
-    [ torch safetensors sentencepiece ninja cudaPackages.cudatoolkit ];
-
-  doCheck = false; # no tests currently
-  pythonImportsCheck = [
-    "exllama"
-    "exllama.cuda_ext"
-    "exllama.generator"
-    "exllama.lora"
-    "exllama.model"
-    "exllama.tokenizer"
-  ];
-
-  meta = with lib; {
-    description = ''
-      A more memory-efficient rewrite of the HF transformers implementation of
-      Llama for use with quantized weights.
-    '';
-    homepage = "https://github.com/jllllll/exllama";
-    license = licenses.mit;
-    maintainers = with maintainers; [ bsima ];
-  };
-}
author	Ben Sima <ben@bsima.me>	2024-12-02 07:52:56 -0500
committer	Ben Sima <ben@bsima.me>	2024-12-20 21:09:24 -0500
commit	18956baad9cde85bc55bd0113092e51786d9e1ee (patch)
tree	07e987380a05db04c4814f4ebbc1f5780817899c /Biz/Bild/Deps/exllama.nix
parent	bc78e72960dee2721c3648e8061cb543f775710b (diff)