feat: Prepare for llama-server when ROCm is fixed upstream

This commit is contained in:
matthew.binning 2026-02-11 06:30:34 -08:00
parent b717ea973a
commit cfd3aeecaf
4 changed files with 61 additions and 6 deletions

7
flake.lock generated
View file

@ -140,15 +140,16 @@
"llama-cpp": { "llama-cpp": {
"flake": false, "flake": false,
"locked": { "locked": {
"lastModified": 1755068833, "lastModified": 1770704370,
"narHash": "sha256-U2bNRei5Q+fpMmk0Oc2HVSIY6KSBhgcNNkNhGykpG2c=", "narHash": "sha256-atYUuXBZFbJxmswd694YwHfAWj1NClZ6mXiQbP1ABG8=",
"owner": "ggerganov", "owner": "ggerganov",
"repo": "llama.cpp", "repo": "llama.cpp",
"rev": "bc5182272c373267352bc689e5fca276934bea2d", "rev": "f0bfe54f552f4783588f333b90d73920a57c5096",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "ggerganov", "owner": "ggerganov",
"ref": "b7984",
"repo": "llama.cpp", "repo": "llama.cpp",
"type": "github" "type": "github"
} }

View file

@ -4,13 +4,15 @@
inputs = { inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
strix-halo.url = "github:hellas-ai/nix-strix-halo"; strix-halo.url = "github:hellas-ai/nix-strix-halo";
strix-halo.inputs.llama-cpp.url = "github:ggerganov/llama.cpp/b7984";
}; };
outputs = { self, nixpkgs, strix-halo, ... }: outputs = { self, nixpkgs, strix-halo, ... }:
let let
mkHost = { hostDir, extraModules ? [], overlays ? [] }: mkHost = { hostDir, extraModules ? [], overlays ? [], extraSpecialArgs ? {} }:
nixpkgs.lib.nixosSystem { nixpkgs.lib.nixosSystem {
system = "x86_64-linux"; system = "x86_64-linux";
specialArgs = extraSpecialArgs;
modules = [ modules = [
./configuration.nix ./configuration.nix
hostDir hostDir
@ -22,6 +24,9 @@
hostDir = ./hosts/crossbox; hostDir = ./hosts/crossbox;
overlays = [ strix-halo.overlays.default ]; overlays = [ strix-halo.overlays.default ];
extraModules = [ ./sdr.nix ./syncthing.nix ]; extraModules = [ ./sdr.nix ./syncthing.nix ];
extraSpecialArgs = {
strix-halo-pkgs = strix-halo.packages.x86_64-linux;
};
}; };
nixosConfigurations.anvil = mkHost { nixosConfigurations.anvil = mkHost {

View file

@ -1,4 +1,4 @@
{ config, pkgs, lib, ... }: { config, pkgs, lib, strix-halo-pkgs, ... }:
let let
# Using nixos-24.05 for bisq-desktop (last stable release with working bisq-desktop) # Using nixos-24.05 for bisq-desktop (last stable release with working bisq-desktop)
@ -15,13 +15,15 @@ in
./forgejo.nix ./forgejo.nix
./radicale.nix ./radicale.nix
./ollama.nix ./ollama.nix
# ./llama-server.nix # disabled: source build broken (LLVM 22 vs 19 mismatch in strix-halo overlay)
# ./docuseal.nix # ./docuseal.nix
]; ];
environment.systemPackages = with pkgs; [ environment.systemPackages = with pkgs; [
bisqPkgs.bisq-desktop # v1.9.15-1.9.17 from nixos-24.05 bisqPkgs.bisq-desktop # v1.9.15-1.9.17 from nixos-24.05
bisq2 bisq2
llamacpp-rocm-bin-gfx1151 llamacpp-rocm-bin-gfx1151 # prebuilt b1025; source build broken (LLVM mismatch)
# strix-halo-pkgs.llamacpp-rocm-gfx1151 # source-built, re-enable when overlay fixes LLVM 22/19 mismatch
lmstudio lmstudio
]; ];

View file

@ -0,0 +1,47 @@
{ config, pkgs, lib, strix-halo-pkgs, ... }:
{
# Systemd service for llama-server with GLM-4.7-Flash
# Replaces Calvin's Docker-based setup
systemd.services.llama-server = {
description = "llama.cpp server (GLM-4.7-Flash)";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
environment = {
HSA_OVERRIDE_GFX_VERSION = "11.5.1";
};
serviceConfig = {
# Source-built llamacpp with ROCm for gfx1151, tracks flake's llama-cpp input (b7984)
ExecStart = ''
${strix-halo-pkgs.llamacpp-rocm-gfx1151}/bin/llama-server \
-m /srv/llama/models/GLM-4.7-Flash-Q4_K_S.gguf \
--fa \
-c 16384 \
--port 25566 \
--host 0.0.0.0 \
--jinja \
--chat-template-file /srv/llama/templates/glminstruct.template
'';
Restart = "on-failure";
RestartSec = 5;
# Run as a dedicated user
DynamicUser = true;
StateDirectory = "llama-server";
# Read-only access to model and template files
ReadOnlyPaths = [ "/srv/llama" ];
};
};
# Ensure directories exist
systemd.tmpfiles.rules = [
"d /srv/llama 0755 root root -"
"d /srv/llama/models 0755 root root -"
"d /srv/llama/templates 0755 root root -"
];
networking.firewall.allowedTCPPorts = [ 25566 ];
}