feat: Prepare for llama-server when ROCm is fixed upstream

2026-02-11 06:30:34 -08:00 · 2026-02-11 06:30:34 -08:00 · cfd3aeecaf
commit cfd3aeecaf
parent b717ea973a
4 changed files with 61 additions and 6 deletions
--- a/hosts/crossbox/default.nix
+++ b/hosts/crossbox/default.nix
@ -1,4 +1,4 @@
-{ config, pkgs, lib, ... }:
+{ config, pkgs, lib, strix-halo-pkgs, ... }:

 let
  # Using nixos-24.05 for bisq-desktop (last stable release with working bisq-desktop)
@ -15,13 +15,15 @@ in
    ./forgejo.nix
    ./radicale.nix
    ./ollama.nix
+    # ./llama-server.nix  # disabled: source build broken (LLVM 22 vs 19 mismatch in strix-halo overlay)
    # ./docuseal.nix
  ];

  environment.systemPackages = with pkgs; [
    bisqPkgs.bisq-desktop  # v1.9.15-1.9.17 from nixos-24.05
    bisq2
-    llamacpp-rocm-bin-gfx1151
+    llamacpp-rocm-bin-gfx1151  # prebuilt b1025; source build broken (LLVM mismatch)
+    # strix-halo-pkgs.llamacpp-rocm-gfx1151  # source-built, re-enable when overlay fixes LLVM 22/19 mismatch
    lmstudio
  ];

--- a/hosts/crossbox/llama-server.nix
+++ b/hosts/crossbox/llama-server.nix
@ -0,0 +1,47 @@
+{ config, pkgs, lib, strix-halo-pkgs, ... }:
+
+{
+  # Systemd service for llama-server with GLM-4.7-Flash
+  # Replaces Calvin's Docker-based setup
+  systemd.services.llama-server = {
+    description = "llama.cpp server (GLM-4.7-Flash)";
+    after = [ "network.target" ];
+    wantedBy = [ "multi-user.target" ];
+
+    environment = {
+      HSA_OVERRIDE_GFX_VERSION = "11.5.1";
+    };
+
+    serviceConfig = {
+      # Source-built llamacpp with ROCm for gfx1151, tracks flake's llama-cpp input (b7984)
+      ExecStart = ''
+        ${strix-halo-pkgs.llamacpp-rocm-gfx1151}/bin/llama-server \
+          -m /srv/llama/models/GLM-4.7-Flash-Q4_K_S.gguf \
+          --fa \
+          -c 16384 \
+          --port 25566 \
+          --host 0.0.0.0 \
+          --jinja \
+          --chat-template-file /srv/llama/templates/glminstruct.template
+      '';
+      Restart = "on-failure";
+      RestartSec = 5;
+
+      # Run as a dedicated user
+      DynamicUser = true;
+      StateDirectory = "llama-server";
+
+      # Read-only access to model and template files
+      ReadOnlyPaths = [ "/srv/llama" ];
+    };
+  };
+
+  # Ensure directories exist
+  systemd.tmpfiles.rules = [
+    "d /srv/llama 0755 root root -"
+    "d /srv/llama/models 0755 root root -"
+    "d /srv/llama/templates 0755 root root -"
+  ];
+
+  networking.firewall.allowedTCPPorts = [ 25566 ];
+}