diff --git a/flake.lock b/flake.lock index a63e9a0..7b4da1a 100644 --- a/flake.lock +++ b/flake.lock @@ -140,15 +140,16 @@ "llama-cpp": { "flake": false, "locked": { - "lastModified": 1755068833, - "narHash": "sha256-U2bNRei5Q+fpMmk0Oc2HVSIY6KSBhgcNNkNhGykpG2c=", + "lastModified": 1770704370, + "narHash": "sha256-atYUuXBZFbJxmswd694YwHfAWj1NClZ6mXiQbP1ABG8=", "owner": "ggerganov", "repo": "llama.cpp", - "rev": "bc5182272c373267352bc689e5fca276934bea2d", + "rev": "f0bfe54f552f4783588f333b90d73920a57c5096", "type": "github" }, "original": { "owner": "ggerganov", + "ref": "b7984", "repo": "llama.cpp", "type": "github" } diff --git a/flake.nix b/flake.nix index 9ccf9e4..2f675bc 100644 --- a/flake.nix +++ b/flake.nix @@ -4,13 +4,15 @@ inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; strix-halo.url = "github:hellas-ai/nix-strix-halo"; + strix-halo.inputs.llama-cpp.url = "github:ggerganov/llama.cpp/b7984"; }; outputs = { self, nixpkgs, strix-halo, ... }: let - mkHost = { hostDir, extraModules ? [], overlays ? [] }: + mkHost = { hostDir, extraModules ? [], overlays ? [], extraSpecialArgs ? {} }: nixpkgs.lib.nixosSystem { system = "x86_64-linux"; + specialArgs = extraSpecialArgs; modules = [ ./configuration.nix hostDir @@ -22,6 +24,9 @@ hostDir = ./hosts/crossbox; overlays = [ strix-halo.overlays.default ]; extraModules = [ ./sdr.nix ./syncthing.nix ]; + extraSpecialArgs = { + strix-halo-pkgs = strix-halo.packages.x86_64-linux; + }; }; nixosConfigurations.anvil = mkHost { diff --git a/hosts/crossbox/default.nix b/hosts/crossbox/default.nix index 3811cd4..b8202c8 100644 --- a/hosts/crossbox/default.nix +++ b/hosts/crossbox/default.nix @@ -1,4 +1,4 @@ -{ config, pkgs, lib, ... }: +{ config, pkgs, lib, strix-halo-pkgs, ... }: let # Using nixos-24.05 for bisq-desktop (last stable release with working bisq-desktop) @@ -15,13 +15,15 @@ in ./forgejo.nix ./radicale.nix ./ollama.nix + # ./llama-server.nix # disabled: source build broken (LLVM 22 vs 19 mismatch in strix-halo overlay) # ./docuseal.nix ]; environment.systemPackages = with pkgs; [ bisqPkgs.bisq-desktop # v1.9.15-1.9.17 from nixos-24.05 bisq2 - llamacpp-rocm-bin-gfx1151 + llamacpp-rocm-bin-gfx1151 # prebuilt b1025; source build broken (LLVM mismatch) + # strix-halo-pkgs.llamacpp-rocm-gfx1151 # source-built, re-enable when overlay fixes LLVM 22/19 mismatch lmstudio ]; diff --git a/hosts/crossbox/llama-server.nix b/hosts/crossbox/llama-server.nix new file mode 100644 index 0000000..ee47dbb --- /dev/null +++ b/hosts/crossbox/llama-server.nix @@ -0,0 +1,47 @@ +{ config, pkgs, lib, strix-halo-pkgs, ... }: + +{ + # Systemd service for llama-server with GLM-4.7-Flash + # Replaces Calvin's Docker-based setup + systemd.services.llama-server = { + description = "llama.cpp server (GLM-4.7-Flash)"; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + + environment = { + HSA_OVERRIDE_GFX_VERSION = "11.5.1"; + }; + + serviceConfig = { + # Source-built llamacpp with ROCm for gfx1151, tracks flake's llama-cpp input (b7984) + ExecStart = '' + ${strix-halo-pkgs.llamacpp-rocm-gfx1151}/bin/llama-server \ + -m /srv/llama/models/GLM-4.7-Flash-Q4_K_S.gguf \ + --fa \ + -c 16384 \ + --port 25566 \ + --host 0.0.0.0 \ + --jinja \ + --chat-template-file /srv/llama/templates/glminstruct.template + ''; + Restart = "on-failure"; + RestartSec = 5; + + # Run as a dedicated user + DynamicUser = true; + StateDirectory = "llama-server"; + + # Read-only access to model and template files + ReadOnlyPaths = [ "/srv/llama" ]; + }; + }; + + # Ensure directories exist + systemd.tmpfiles.rules = [ + "d /srv/llama 0755 root root -" + "d /srv/llama/models 0755 root root -" + "d /srv/llama/templates 0755 root root -" + ]; + + networking.firewall.allowedTCPPorts = [ 25566 ]; +}