feat: Prepare for llama-server when ROCm is fixed upstream
This commit is contained in:
parent
b717ea973a
commit
cfd3aeecaf
4 changed files with 61 additions and 6 deletions
7
flake.lock
generated
7
flake.lock
generated
|
|
@ -140,15 +140,16 @@
|
||||||
"llama-cpp": {
|
"llama-cpp": {
|
||||||
"flake": false,
|
"flake": false,
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1755068833,
|
"lastModified": 1770704370,
|
||||||
"narHash": "sha256-U2bNRei5Q+fpMmk0Oc2HVSIY6KSBhgcNNkNhGykpG2c=",
|
"narHash": "sha256-atYUuXBZFbJxmswd694YwHfAWj1NClZ6mXiQbP1ABG8=",
|
||||||
"owner": "ggerganov",
|
"owner": "ggerganov",
|
||||||
"repo": "llama.cpp",
|
"repo": "llama.cpp",
|
||||||
"rev": "bc5182272c373267352bc689e5fca276934bea2d",
|
"rev": "f0bfe54f552f4783588f333b90d73920a57c5096",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "ggerganov",
|
"owner": "ggerganov",
|
||||||
|
"ref": "b7984",
|
||||||
"repo": "llama.cpp",
|
"repo": "llama.cpp",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,15 @@
|
||||||
inputs = {
|
inputs = {
|
||||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||||
strix-halo.url = "github:hellas-ai/nix-strix-halo";
|
strix-halo.url = "github:hellas-ai/nix-strix-halo";
|
||||||
|
strix-halo.inputs.llama-cpp.url = "github:ggerganov/llama.cpp/b7984";
|
||||||
};
|
};
|
||||||
|
|
||||||
outputs = { self, nixpkgs, strix-halo, ... }:
|
outputs = { self, nixpkgs, strix-halo, ... }:
|
||||||
let
|
let
|
||||||
mkHost = { hostDir, extraModules ? [], overlays ? [] }:
|
mkHost = { hostDir, extraModules ? [], overlays ? [], extraSpecialArgs ? {} }:
|
||||||
nixpkgs.lib.nixosSystem {
|
nixpkgs.lib.nixosSystem {
|
||||||
system = "x86_64-linux";
|
system = "x86_64-linux";
|
||||||
|
specialArgs = extraSpecialArgs;
|
||||||
modules = [
|
modules = [
|
||||||
./configuration.nix
|
./configuration.nix
|
||||||
hostDir
|
hostDir
|
||||||
|
|
@ -22,6 +24,9 @@
|
||||||
hostDir = ./hosts/crossbox;
|
hostDir = ./hosts/crossbox;
|
||||||
overlays = [ strix-halo.overlays.default ];
|
overlays = [ strix-halo.overlays.default ];
|
||||||
extraModules = [ ./sdr.nix ./syncthing.nix ];
|
extraModules = [ ./sdr.nix ./syncthing.nix ];
|
||||||
|
extraSpecialArgs = {
|
||||||
|
strix-halo-pkgs = strix-halo.packages.x86_64-linux;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
nixosConfigurations.anvil = mkHost {
|
nixosConfigurations.anvil = mkHost {
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
{ config, pkgs, lib, ... }:
|
{ config, pkgs, lib, strix-halo-pkgs, ... }:
|
||||||
|
|
||||||
let
|
let
|
||||||
# Using nixos-24.05 for bisq-desktop (last stable release with working bisq-desktop)
|
# Using nixos-24.05 for bisq-desktop (last stable release with working bisq-desktop)
|
||||||
|
|
@ -15,13 +15,15 @@ in
|
||||||
./forgejo.nix
|
./forgejo.nix
|
||||||
./radicale.nix
|
./radicale.nix
|
||||||
./ollama.nix
|
./ollama.nix
|
||||||
|
# ./llama-server.nix # disabled: source build broken (LLVM 22 vs 19 mismatch in strix-halo overlay)
|
||||||
# ./docuseal.nix
|
# ./docuseal.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
environment.systemPackages = with pkgs; [
|
environment.systemPackages = with pkgs; [
|
||||||
bisqPkgs.bisq-desktop # v1.9.15-1.9.17 from nixos-24.05
|
bisqPkgs.bisq-desktop # v1.9.15-1.9.17 from nixos-24.05
|
||||||
bisq2
|
bisq2
|
||||||
llamacpp-rocm-bin-gfx1151
|
llamacpp-rocm-bin-gfx1151 # prebuilt b1025; source build broken (LLVM mismatch)
|
||||||
|
# strix-halo-pkgs.llamacpp-rocm-gfx1151 # source-built, re-enable when overlay fixes LLVM 22/19 mismatch
|
||||||
lmstudio
|
lmstudio
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
|
||||||
47
hosts/crossbox/llama-server.nix
Normal file
47
hosts/crossbox/llama-server.nix
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
{ config, pkgs, lib, strix-halo-pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
# Systemd service for llama-server with GLM-4.7-Flash
|
||||||
|
# Replaces Calvin's Docker-based setup
|
||||||
|
systemd.services.llama-server = {
|
||||||
|
description = "llama.cpp server (GLM-4.7-Flash)";
|
||||||
|
after = [ "network.target" ];
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
|
||||||
|
environment = {
|
||||||
|
HSA_OVERRIDE_GFX_VERSION = "11.5.1";
|
||||||
|
};
|
||||||
|
|
||||||
|
serviceConfig = {
|
||||||
|
# Source-built llamacpp with ROCm for gfx1151, tracks flake's llama-cpp input (b7984)
|
||||||
|
ExecStart = ''
|
||||||
|
${strix-halo-pkgs.llamacpp-rocm-gfx1151}/bin/llama-server \
|
||||||
|
-m /srv/llama/models/GLM-4.7-Flash-Q4_K_S.gguf \
|
||||||
|
--fa \
|
||||||
|
-c 16384 \
|
||||||
|
--port 25566 \
|
||||||
|
--host 0.0.0.0 \
|
||||||
|
--jinja \
|
||||||
|
--chat-template-file /srv/llama/templates/glminstruct.template
|
||||||
|
'';
|
||||||
|
Restart = "on-failure";
|
||||||
|
RestartSec = 5;
|
||||||
|
|
||||||
|
# Run as a dedicated user
|
||||||
|
DynamicUser = true;
|
||||||
|
StateDirectory = "llama-server";
|
||||||
|
|
||||||
|
# Read-only access to model and template files
|
||||||
|
ReadOnlyPaths = [ "/srv/llama" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# Ensure directories exist
|
||||||
|
systemd.tmpfiles.rules = [
|
||||||
|
"d /srv/llama 0755 root root -"
|
||||||
|
"d /srv/llama/models 0755 root root -"
|
||||||
|
"d /srv/llama/templates 0755 root root -"
|
||||||
|
];
|
||||||
|
|
||||||
|
networking.firewall.allowedTCPPorts = [ 25566 ];
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue