nixos-config/hosts/crossbox/llama-server.nix

{ config, pkgs, lib, strix-halo-pkgs, ... }:

{
  # Systemd service for llama-server with GLM-4.7-Flash
  # Replaces Calvin's Docker-based setup
  systemd.services.llama-server = {
    description = "llama.cpp server (GLM-4.7-Flash)";
    after = [ "network.target" ];
    wantedBy = [ "multi-user.target" ];

    environment = {
      HSA_OVERRIDE_GFX_VERSION = "11.5.1";
    };

    serviceConfig = {
      # Source-built llamacpp with ROCm for gfx1151, tracks flake's llama-cpp input (b7984)
      ExecStart = ''
        ${strix-halo-pkgs.llamacpp-rocm-gfx1151}/bin/llama-server \
          -m /srv/llama/models/GLM-4.7-Flash-Q4_K_S.gguf \
          --fa \
          -c 16384 \
          --port 25566 \
          --host 0.0.0.0 \
          --jinja \
          --chat-template-file /srv/llama/templates/glminstruct.template
      '';
      Restart = "on-failure";
      RestartSec = 5;

      # Run as a dedicated user
      DynamicUser = true;
      StateDirectory = "llama-server";

      # Read-only access to model and template files
      ReadOnlyPaths = [ "/srv/llama" ];
    };
  };

  # Ensure directories exist
  systemd.tmpfiles.rules = [
    "d /srv/llama 0755 root root -"
    "d /srv/llama/models 0755 root root -"
    "d /srv/llama/templates 0755 root root -"
  ];

  networking.firewall.allowedTCPPorts = [ 25566 ];
}
feat: Prepare for llama-server when ROCm is fixed upstream 2026-02-11 06:30:34 -08:00			`{ config, pkgs, lib, strix-halo-pkgs, ... }:`

			`{`
			`# Systemd service for llama-server with GLM-4.7-Flash`
			`# Replaces Calvin's Docker-based setup`
			`systemd.services.llama-server = {`
			`description = "llama.cpp server (GLM-4.7-Flash)";`
			`after = [ "network.target" ];`
			`wantedBy = [ "multi-user.target" ];`

			`environment = {`
			`HSA_OVERRIDE_GFX_VERSION = "11.5.1";`
			`};`

			`serviceConfig = {`
			`# Source-built llamacpp with ROCm for gfx1151, tracks flake's llama-cpp input (b7984)`
			`ExecStart = ''`
			`${strix-halo-pkgs.llamacpp-rocm-gfx1151}/bin/llama-server \`
			`-m /srv/llama/models/GLM-4.7-Flash-Q4_K_S.gguf \`
			`--fa \`
			`-c 16384 \`
			`--port 25566 \`
			`--host 0.0.0.0 \`
			`--jinja \`
			`--chat-template-file /srv/llama/templates/glminstruct.template`
			`'';`
			`Restart = "on-failure";`
			`RestartSec = 5;`

			`# Run as a dedicated user`
			`DynamicUser = true;`
			`StateDirectory = "llama-server";`

			`# Read-only access to model and template files`
			`ReadOnlyPaths = [ "/srv/llama" ];`
			`};`
			`};`

			`# Ensure directories exist`
			`systemd.tmpfiles.rules = [`
			`"d /srv/llama 0755 root root -"`
			`"d /srv/llama/models 0755 root root -"`
			`"d /srv/llama/templates 0755 root root -"`
			`];`

			`networking.firewall.allowedTCPPorts = [ 25566 ];`
			`}`