feat: Prepare for llama-server when ROCm is fixed upstream
This commit is contained in:
parent
b717ea973a
commit
cfd3aeecaf
4 changed files with 61 additions and 6 deletions
|
|
@ -1,4 +1,4 @@
|
|||
{ config, pkgs, lib, ... }:
|
||||
{ config, pkgs, lib, strix-halo-pkgs, ... }:
|
||||
|
||||
let
|
||||
# Using nixos-24.05 for bisq-desktop (last stable release with working bisq-desktop)
|
||||
|
|
@ -15,13 +15,15 @@ in
|
|||
./forgejo.nix
|
||||
./radicale.nix
|
||||
./ollama.nix
|
||||
# ./llama-server.nix # disabled: source build broken (LLVM 22 vs 19 mismatch in strix-halo overlay)
|
||||
# ./docuseal.nix
|
||||
];
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
bisqPkgs.bisq-desktop # v1.9.15-1.9.17 from nixos-24.05
|
||||
bisq2
|
||||
llamacpp-rocm-bin-gfx1151
|
||||
llamacpp-rocm-bin-gfx1151 # prebuilt b1025; source build broken (LLVM mismatch)
|
||||
# strix-halo-pkgs.llamacpp-rocm-gfx1151 # source-built, re-enable when overlay fixes LLVM 22/19 mismatch
|
||||
lmstudio
|
||||
];
|
||||
|
||||
|
|
|
|||
47
hosts/crossbox/llama-server.nix
Normal file
47
hosts/crossbox/llama-server.nix
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
{ config, pkgs, lib, strix-halo-pkgs, ... }:
|
||||
|
||||
{
|
||||
# Systemd service for llama-server with GLM-4.7-Flash
|
||||
# Replaces Calvin's Docker-based setup
|
||||
systemd.services.llama-server = {
|
||||
description = "llama.cpp server (GLM-4.7-Flash)";
|
||||
after = [ "network.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
|
||||
environment = {
|
||||
HSA_OVERRIDE_GFX_VERSION = "11.5.1";
|
||||
};
|
||||
|
||||
serviceConfig = {
|
||||
# Source-built llamacpp with ROCm for gfx1151, tracks flake's llama-cpp input (b7984)
|
||||
ExecStart = ''
|
||||
${strix-halo-pkgs.llamacpp-rocm-gfx1151}/bin/llama-server \
|
||||
-m /srv/llama/models/GLM-4.7-Flash-Q4_K_S.gguf \
|
||||
--fa \
|
||||
-c 16384 \
|
||||
--port 25566 \
|
||||
--host 0.0.0.0 \
|
||||
--jinja \
|
||||
--chat-template-file /srv/llama/templates/glminstruct.template
|
||||
'';
|
||||
Restart = "on-failure";
|
||||
RestartSec = 5;
|
||||
|
||||
# Run as a dedicated user
|
||||
DynamicUser = true;
|
||||
StateDirectory = "llama-server";
|
||||
|
||||
# Read-only access to model and template files
|
||||
ReadOnlyPaths = [ "/srv/llama" ];
|
||||
};
|
||||
};
|
||||
|
||||
# Ensure directories exist
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /srv/llama 0755 root root -"
|
||||
"d /srv/llama/models 0755 root root -"
|
||||
"d /srv/llama/templates 0755 root root -"
|
||||
];
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ 25566 ];
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue