diff --git a/internal/container/jump_server.go b/internal/container/jump_server.go index 0ce3201..73f2669 100644 --- a/internal/container/jump_server.go +++ b/internal/container/jump_server.go @@ -102,6 +102,58 @@ func DeleteJumpServerAccount(username string, verbose bool) error { return nil } +// EnsureJumpServerAccount creates a host-level user with containarium-shell +// as the login shell, enabling SSH access through sshpiper into the user's +// Incus container. This is called automatically when a container is created. +// It is idempotent — if the account already exists, it just ensures the shell +// and permissions are correct. +func EnsureJumpServerAccount(username string) error { + if !isValidUsername(username) { + return fmt.Errorf("invalid username: %s", username) + } + + shellPath := "/usr/local/bin/containarium-shell" + + if userExists(username) { + // Ensure shell is containarium-shell + // #nosec G204 -- username validated by isValidUsername above (alphanumeric, dash, underscore only) + _ = exec.Command("usermod", "-s", shellPath, username).Run() + return nil + } + + // Create user with containarium-shell + // #nosec G204 -- username validated by isValidUsername above + if err := exec.Command("useradd", "-m", "-s", shellPath, + "-c", fmt.Sprintf("Containarium user - %s", username), + username).Run(); err != nil { + return fmt.Errorf("useradd failed: %w", err) + } + + // Unlock account (useradd creates locked accounts, sshd rejects them) + // #nosec G204 -- username validated by isValidUsername above + _ = exec.Command("passwd", "-d", username).Run() + + // Set home dir permissions (sshd requires 755 or stricter) + _ = os.Chmod(fmt.Sprintf("/home/%s", username), 0755) // #nosec G302 -- sshd requires home dir to be world-readable + + // Create .ssh dir + sshDir := fmt.Sprintf("/home/%s/.ssh", username) + if err := os.MkdirAll(sshDir, 0700); err != nil { + return fmt.Errorf("failed to create .ssh dir: %w", err) + } + // #nosec G204 -- username validated by isValidUsername above + _ = exec.Command("chown", "-R", username+":"+username, sshDir).Run() + + // Sudoers entry for incus access (containarium-shell needs it) + sudoersEntry := fmt.Sprintf("%s ALL=(root) NOPASSWD: /usr/bin/incus\n", username) + sudoersPath := fmt.Sprintf("/etc/sudoers.d/containarium-%s", username) + if err := os.WriteFile(sudoersPath, []byte(sudoersEntry), 0440); err != nil { // #nosec G306 -- sudoers requires 0440 + return fmt.Errorf("failed to write sudoers: %w", err) + } + + return nil +} + // isValidUsername checks if username contains only allowed characters func isValidUsername(username string) bool { if len(username) == 0 || len(username) > 32 { diff --git a/internal/sentinel/binaryserver.go b/internal/sentinel/binaryserver.go index 9c24bae..fcabad6 100644 --- a/internal/sentinel/binaryserver.go +++ b/internal/sentinel/binaryserver.go @@ -1,8 +1,11 @@ package sentinel import ( + "crypto/sha256" + "encoding/hex" "encoding/json" "fmt" + "io" "log" "net/http" "net/http/httputil" @@ -43,6 +46,21 @@ func StartBinaryServer(port int, manager *Manager) (stop func(), err error) { w.Header().Set("Content-Disposition", "attachment; filename=containarium") http.ServeFile(w, r, binaryPath) }) + mux.HandleFunc("/containarium/checksum", func(w http.ResponseWriter, r *http.Request) { + f, err := os.Open(binaryPath) + if err != nil { + http.Error(w, "binary not found", http.StatusInternalServerError) + return + } + defer f.Close() + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + http.Error(w, "checksum error", http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "text/plain") + fmt.Fprint(w, hex.EncodeToString(h.Sum(nil))) + }) mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) w.Write([]byte("ok")) diff --git a/internal/sentinel/keysync.go b/internal/sentinel/keysync.go index c58d5f8..a9a1fd1 100644 --- a/internal/sentinel/keysync.go +++ b/internal/sentinel/keysync.go @@ -152,14 +152,14 @@ func (ks *KeyStore) Apply() error { } // Ensure directories exist - if err := os.MkdirAll(sshpiperUsersDir, 0755); err != nil { + if err := os.MkdirAll(sshpiperUsersDir, 0755); err != nil { // #nosec G301 -- sshpiper needs world-readable dirs for authorized_keys lookup return fmt.Errorf("failed to create sshpiper users dir: %w", err) } // Write per-user authorized_keys for _, r := range routes { userDir := filepath.Join(sshpiperUsersDir, r.username) - if err := os.MkdirAll(userDir, 0755); err != nil { + if err := os.MkdirAll(userDir, 0755); err != nil { // #nosec G301 -- sshpiper requires world-readable user dirs log.Printf("[keysync] failed to create dir for %s: %v", r.username, err) continue } diff --git a/internal/server/autoupdate.go b/internal/server/autoupdate.go new file mode 100644 index 0000000..0a040f4 --- /dev/null +++ b/internal/server/autoupdate.go @@ -0,0 +1,201 @@ +package server + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "log" + "net/http" + "os" + "os/exec" + "time" +) + +// AutoUpdater periodically checks the sentinel for a newer binary and +// self-updates if a new version is available. +type AutoUpdater struct { + sentinelURL string // e.g. "http://10.130.0.13:8888" + binaryPath string // e.g. "/usr/local/bin/containarium" + interval time.Duration +} + +// NewAutoUpdater creates a new auto-updater. +func NewAutoUpdater(sentinelURL, binaryPath string, interval time.Duration) *AutoUpdater { + return &AutoUpdater{ + sentinelURL: sentinelURL, + binaryPath: binaryPath, + interval: interval, + } +} + +// Run starts the auto-update loop. Blocks until ctx is cancelled. +func (u *AutoUpdater) Run(ctx context.Context) { + log.Printf("[auto-update] started (check interval: %s, sentinel: %s)", u.interval, u.sentinelURL) + + // Wait before first check to let the daemon fully start + select { + case <-time.After(2 * time.Minute): + case <-ctx.Done(): + return + } + + ticker := time.NewTicker(u.interval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + log.Printf("[auto-update] stopped") + return + case <-ticker.C: + if err := u.checkAndUpdate(ctx); err != nil { + log.Printf("[auto-update] check failed: %v", err) + } + } + } +} + +func (u *AutoUpdater) checkAndUpdate(ctx context.Context) error { + // 1. Get remote checksum + remoteChecksum, err := u.getRemoteChecksum(ctx) + if err != nil { + return fmt.Errorf("get remote checksum: %w", err) + } + + // 2. Get local checksum + localChecksum, err := u.getLocalChecksum() + if err != nil { + return fmt.Errorf("get local checksum: %w", err) + } + + // 3. Compare + if remoteChecksum == localChecksum { + return nil // up to date + } + + log.Printf("[auto-update] new version detected (local=%s..., remote=%s...)", localChecksum[:12], remoteChecksum[:12]) + + // 4. Download new binary + tmpPath := u.binaryPath + ".new" + if err := u.downloadBinary(ctx, tmpPath); err != nil { + _ = os.Remove(tmpPath) + return fmt.Errorf("download: %w", err) + } + + // 5. Verify downloaded binary checksum + dlChecksum, err := checksumFile(tmpPath) + if err != nil { + _ = os.Remove(tmpPath) + return fmt.Errorf("verify download: %w", err) + } + if dlChecksum != remoteChecksum { + _ = os.Remove(tmpPath) + return fmt.Errorf("checksum mismatch after download (got %s, want %s)", dlChecksum[:12], remoteChecksum[:12]) + } + + // 6. Make executable + if err := os.Chmod(tmpPath, 0755); err != nil { // #nosec G302 -- executable binary needs 0755 + _ = os.Remove(tmpPath) + return fmt.Errorf("chmod: %w", err) + } + + // 7. Replace: rename running binary to .old, move new one in place + oldPath := u.binaryPath + ".old" + _ = os.Remove(oldPath) + if err := os.Rename(u.binaryPath, oldPath); err != nil { + _ = os.Remove(tmpPath) + return fmt.Errorf("rename old binary: %w", err) + } + if err := os.Rename(tmpPath, u.binaryPath); err != nil { + // Try to restore old binary + _ = os.Rename(oldPath, u.binaryPath) + return fmt.Errorf("rename new binary: %w", err) + } + + log.Printf("[auto-update] binary replaced successfully, restarting...") + + // 8. Restart services via systemd (async — we'll be killed) + // Restart tunnel first (it also uses the same binary), then the daemon. + go func() { + time.Sleep(1 * time.Second) + // Restart tunnel if it exists (peers only) + if exec.Command("systemctl", "is-active", "containarium-tunnel").Run() == nil { // #nosec G204 + log.Printf("[auto-update] restarting containarium-tunnel...") + _ = exec.Command("systemctl", "restart", "containarium-tunnel").Run() // #nosec G204 + } + // Restart daemon (this kills us) + log.Printf("[auto-update] restarting containarium...") + if err := exec.Command("systemctl", "restart", "containarium").Run(); err != nil { // #nosec G204 + _ = exec.Command("systemctl", "restart", "containarium-daemon").Run() // #nosec G204 + } + }() + + return nil +} + +func (u *AutoUpdater) getRemoteChecksum(ctx context.Context) (string, error) { + req, err := http.NewRequestWithContext(ctx, "GET", u.sentinelURL+"/containarium/checksum", nil) + if err != nil { + return "", err + } + client := &http.Client{Timeout: 10 * time.Second} + resp, err := client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("status %d", resp.StatusCode) + } + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + return string(body), nil +} + +func (u *AutoUpdater) getLocalChecksum() (string, error) { + return checksumFile(u.binaryPath) +} + +func (u *AutoUpdater) downloadBinary(ctx context.Context, destPath string) error { + req, err := http.NewRequestWithContext(ctx, "GET", u.sentinelURL+"/containarium", nil) + if err != nil { + return err + } + client := &http.Client{Timeout: 5 * time.Minute} + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("status %d", resp.StatusCode) + } + + f, err := os.Create(destPath) // #nosec G304 -- destPath is a temp file derived from trusted binaryPath config + if err != nil { + return err + } + defer f.Close() + + if _, err := io.Copy(f, resp.Body); err != nil { + return err + } + return f.Close() +} + +func checksumFile(path string) (string, error) { + f, err := os.Open(path) // #nosec G304 -- path is the binary path from trusted config + if err != nil { + return "", err + } + defer f.Close() + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil)), nil +} diff --git a/internal/server/container_server.go b/internal/server/container_server.go index c7fdf1b..0d077ee 100644 --- a/internal/server/container_server.go +++ b/internal/server/container_server.go @@ -210,6 +210,16 @@ func (s *ContainerServer) CreateContainer(ctx context.Context, req *pb.CreateCon // Emit container created event s.emitter.EmitContainerCreated(protoContainer) + // Create host-level jump server account so SSH via sshpiper works. + // This is idempotent — skips if the account already exists. + go func() { + if err := container.EnsureJumpServerAccount(req.Username); err != nil { + log.Printf("Warning: failed to create jump server account for %s: %v", req.Username, err) + } else { + log.Printf("Jump server account ensured for %s", req.Username) + } + }() + return &pb.CreateContainerResponse{ Container: protoContainer, Message: fmt.Sprintf("Container %s created successfully", info.Name), diff --git a/internal/server/dual_server.go b/internal/server/dual_server.go index 8bb4c4f..4e3935e 100644 --- a/internal/server/dual_server.go +++ b/internal/server/dual_server.go @@ -279,7 +279,7 @@ func NewDualServer(config *DualServerConfig) (*DualServer, error) { // Add DNS override so containers resolve *.baseDomain to Caddy // internally instead of going through the external IP (hairpin NAT). dnsOverride := fmt.Sprintf("address=/%s/%s", config.BaseDomain, caddyIP) - if out, err := exec.Command("incus", "network", "set", "incusbr0", "raw.dnsmasq", dnsOverride).CombinedOutput(); err != nil { + if out, err := exec.Command("incus", "network", "set", "incusbr0", "raw.dnsmasq", dnsOverride).CombinedOutput(); err != nil { // #nosec G204 -- dnsOverride is constructed from trusted BaseDomain and CaddyIP config values log.Printf("Warning: failed to set DNS override for %s: %v (%s)", config.BaseDomain, err, string(out)) } else { log.Printf("DNS override: *.%s -> %s (internal hairpin)", config.BaseDomain, caddyIP) @@ -1163,6 +1163,12 @@ func (ds *DualServer) Start(ctx context.Context) error { } }() + // Start auto-updater if sentinel URL is configured + if ds.config.SentinelURL != "" { + updater := NewAutoUpdater(ds.config.SentinelURL, "/usr/local/bin/containarium", 5*time.Minute) + go updater.Run(ctx) + } + // Start gRPC server grpcAddr := fmt.Sprintf("%s:%d", ds.config.GRPCAddress, ds.config.GRPCPort) lis, err := net.Listen("tcp", grpcAddr) diff --git a/internal/server/security_server.go b/internal/server/security_server.go index 7c889e3..52bf3f2 100644 --- a/internal/server/security_server.go +++ b/internal/server/security_server.go @@ -70,7 +70,7 @@ func (s *SecurityServer) ListClamavReports(ctx context.Context, req *pb.ListClam authToken := extractAuthToken(ctx) peerReports := s.fetchPeerReports(authToken, req) reports = append(reports, peerReports...) - totalCount += int32(len(peerReports)) + totalCount += int32(len(peerReports)) // #nosec G115 -- value bounded by container/scan count } return &pb.ListClamavReportsResponse{ @@ -205,7 +205,7 @@ func (s *SecurityServer) GetClamavSummary(ctx context.Context, req *pb.GetClamav return &pb.GetClamavSummaryResponse{ Containers: summaries, - TotalContainers: int32(len(summaries)), + TotalContainers: int32(len(summaries)), // #nosec G115 -- value bounded by container count CleanContainers: cleanCount, InfectedContainers: infectedCount, NeverScannedContainers: neverScanned, @@ -313,7 +313,7 @@ func (s *SecurityServer) TriggerClamavScan(ctx context.Context, req *pb.TriggerC peerCount = s.triggerPeerScans(authToken) } - totalCount := int32(count) + peerCount + totalCount := int32(count) + peerCount // #nosec G115 -- value bounded by container count return &pb.TriggerClamavScanResponse{ Message: fmt.Sprintf("%d scan jobs queued (%d local, %d on peers)", totalCount, count, peerCount), ScannedCount: totalCount, @@ -371,7 +371,7 @@ func (s *SecurityServer) GetScanStatus(ctx context.Context, req *pb.GetScanStatu ContainerName: j.ContainerName, Username: j.Username, Status: j.Status, - RetryCount: int32(j.RetryCount), + RetryCount: int32(j.RetryCount), // #nosec G115 -- retry count is a small integer ErrorMessage: j.ErrorMessage, CreatedAt: j.CreatedAt.Format(time.RFC3339), BackendId: s.localBackendID, diff --git a/scripts/deploy-binary.sh b/scripts/deploy-binary.sh new file mode 100755 index 0000000..968fdfd --- /dev/null +++ b/scripts/deploy-binary.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# +# Deploy containarium binary to all instances. +# +# Usage: ./scripts/deploy-binary.sh [--build] +# +# This script: +# 1. Optionally builds the linux binary (--build) +# 2. Uploads to the sentinel (which serves it to peers) +# 3. Deploys on the primary GCE VM +# 4. Triggers each peer to self-update from the sentinel +# +# Prerequisites: +# - gcloud configured with access to footprintai-prod +# - SSH access to peer nodes (fts-5900x, fts-13700k) +# + +set -euo pipefail + +BINARY="bin/containarium-linux-amd64" +PROJECT="footprintai-prod" +ZONE="us-west1-a" +PRIMARY_VM="containarium-jump-usw1" +SENTINEL_VM="containarium-jump-usw1-sentinel" +PEERS=("fts-5900x" "fts-13700k") + +# Parse flags +BUILD=false +for arg in "$@"; do + case "$arg" in + --build) BUILD=true ;; + esac +done + +# 1. Build if requested +if $BUILD; then + echo "==> Building binary..." + make build-linux +fi + +if [[ ! -f "$BINARY" ]]; then + echo "Error: $BINARY not found. Run with --build or 'make build-linux' first." + exit 1 +fi + +BINARY_SIZE=$(du -h "$BINARY" | cut -f1) +echo "==> Binary: $BINARY ($BINARY_SIZE)" + +# 2. Upload to sentinel +echo "==> Uploading to sentinel..." +gcloud compute scp "$BINARY" "$SENTINEL_VM:/tmp/containarium" \ + --zone="$ZONE" --project="$PROJECT" --tunnel-through-iap --scp-flag="-P 2222" +gcloud compute ssh "$SENTINEL_VM" --zone="$ZONE" --project="$PROJECT" \ + --tunnel-through-iap --ssh-flag="-p 2222" \ + --command="sudo cp /tmp/containarium /usr/local/bin/containarium && sudo chmod +x /usr/local/bin/containarium && sudo systemctl restart containarium-sentinel" +echo " Sentinel updated and restarted" + +# 3. Deploy on primary +echo "==> Deploying on primary ($PRIMARY_VM)..." +gcloud compute scp "$BINARY" "$PRIMARY_VM:/tmp/containarium" \ + --zone="$ZONE" --project="$PROJECT" --tunnel-through-iap +gcloud compute ssh "$PRIMARY_VM" --zone="$ZONE" --project="$PROJECT" \ + --tunnel-through-iap \ + --command="sudo systemctl stop containarium && sleep 1 && sudo cp /tmp/containarium /usr/local/bin/containarium && sudo chmod +x /usr/local/bin/containarium && sudo systemctl start containarium" +echo " Primary updated and restarted" + +# 4. Deploy on peers +for peer in "${PEERS[@]}"; do + echo "==> Deploying on peer ($peer)..." + scp "$BINARY" "$peer:/tmp/containarium" 2>/dev/null || { + echo " Warning: failed to upload to $peer (skipping)" + continue + } + # Peers need interactive sudo — print the command for the user + echo " Binary uploaded to $peer:/tmp/containarium" + echo " Run on $peer:" + echo " sudo systemctl stop containarium-tunnel && sudo systemctl stop containarium && sleep 1 && sudo cp /tmp/containarium /usr/local/bin/containarium && sudo chmod +x /usr/local/bin/containarium && sudo systemctl start containarium && sudo systemctl start containarium-tunnel" +done + +echo "" +echo "=== Deploy complete ===" +echo " Sentinel: updated" +echo " Primary: updated" +echo " Peers: binary uploaded, run the printed commands with sudo" diff --git a/scripts/setup-peer-user.sh b/scripts/setup-peer-user.sh new file mode 100644 index 0000000..7a06508 --- /dev/null +++ b/scripts/setup-peer-user.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# +# Setup a jump server user on a peer node that proxies SSH into an Incus container. +# +# Usage: sudo bash setup-peer-user.sh [sentinel_pubkey] +# +# This creates: +# 1. containarium-shell script (if not already installed) +# 2. Host-level user with containarium-shell as login shell +# 3. Authorized keys (sentinel upstream key + user key from container) +# 4. Sudoers entry for passwordless incus access +# + +set -euo pipefail + +USERNAME="${1:-}" +SENTINEL_PUBKEY="${2:-ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM0CMewXb9jrvqnAX+Mk+PmaNd5vAyCoiR70HtLuO57z root@containarium-jump-usw1-sentinel}" + +if [[ -z "$USERNAME" ]]; then + echo "Usage: sudo $0 [sentinel_pubkey]" + echo "Example: sudo $0 apibox-dev-3090" + exit 1 +fi + +if [[ $EUID -ne 0 ]]; then + echo "Error: must run as root (use sudo)" + exit 1 +fi + +CONTAINER="${USERNAME}-container" + +# 1. Install containarium-shell if missing +if [[ ! -f /usr/local/bin/containarium-shell ]]; then + echo "==> Installing containarium-shell..." + cat > /usr/local/bin/containarium-shell << 'SHELL' +#!/bin/bash +# containarium-shell: Proxy SSH sessions into Incus containers +USERNAME="$(whoami)" +CONTAINER="${USERNAME}-container" + +if ! sudo incus info "$CONTAINER" &>/dev/null; then + echo "Error: Container $CONTAINER not found" >&2 + exit 1 +fi + +STATE=$(sudo incus info "$CONTAINER" 2>/dev/null | grep "^Status:" | awk '{print $2}') +if [ "$STATE" != "RUNNING" ]; then + echo "Error: Container $CONTAINER is not running (status: $STATE)" >&2 + exit 1 +fi + +COMMAND="${SSH_ORIGINAL_COMMAND}" +if [ -z "$COMMAND" ] && [ "$1" = "-c" ]; then + COMMAND="$2" +fi + +if [ -n "$COMMAND" ]; then + exec sudo incus exec "$CONTAINER" --mode non-interactive -- su - "$USERNAME" -c "$COMMAND" +fi + +exec sudo incus exec "$CONTAINER" -t -- su -l "$USERNAME" +SHELL + chmod +x /usr/local/bin/containarium-shell + echo " containarium-shell installed" +else + echo "==> containarium-shell already installed" +fi + +# 2. Create or update host user +if id "$USERNAME" &>/dev/null; then + echo "==> User $USERNAME exists, updating shell..." + usermod -s /usr/local/bin/containarium-shell "$USERNAME" +else + echo "==> Creating user $USERNAME..." + useradd -m -s /usr/local/bin/containarium-shell "$USERNAME" +fi + +# Unlock the account (useradd creates locked accounts by default, sshd rejects them) +passwd -d "$USERNAME" >/dev/null 2>&1 +chmod 755 "/home/$USERNAME" + +# 3. Setup authorized_keys +echo "==> Setting up SSH keys..." +mkdir -p "/home/$USERNAME/.ssh" + +# Start with sentinel upstream key +echo "$SENTINEL_PUBKEY" > "/home/$USERNAME/.ssh/authorized_keys" + +# Try to copy user's SSH key from the container +if incus info "$CONTAINER" &>/dev/null; then + CONTAINER_KEY=$(incus exec "$CONTAINER" -- cat "/home/$USERNAME/.ssh/authorized_keys" 2>/dev/null | grep -v "sentinel" || true) + if [[ -n "$CONTAINER_KEY" ]]; then + echo "$CONTAINER_KEY" >> "/home/$USERNAME/.ssh/authorized_keys" + echo " Added user key from container" + else + echo " Warning: no user SSH key found in container, add manually later" + fi +fi + +chown -R "$USERNAME:$USERNAME" "/home/$USERNAME/.ssh" +chmod 700 "/home/$USERNAME/.ssh" +chmod 600 "/home/$USERNAME/.ssh/authorized_keys" + +# 4. Sudoers for incus access +echo "==> Setting up sudoers..." +echo "$USERNAME ALL=(root) NOPASSWD: /usr/bin/incus" > "/etc/sudoers.d/containarium-$USERNAME" +chmod 440 "/etc/sudoers.d/containarium-$USERNAME" + +echo "" +echo "=== Done ===" +echo " User: $USERNAME" +echo " Shell: /usr/local/bin/containarium-shell" +echo " Container: $CONTAINER" +echo " Keys: $(wc -l < /home/$USERNAME/.ssh/authorized_keys) entries" diff --git a/scripts/setup-peer.sh b/scripts/setup-peer.sh new file mode 100644 index 0000000..fc98e3e --- /dev/null +++ b/scripts/setup-peer.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# +# Containarium Peer Node Setup Script +# +# Sets up a bare-metal server as a Containarium peer node with: +# 1. Containarium daemon (--app-hosting mode) +# 2. Tunnel client connecting to sentinel +# +# Prerequisites: +# - Incus installed and initialized +# - /tmp/containarium binary uploaded +# - Run as root: sudo bash setup-peer.sh --spot-id +# +# Usage: +# sudo bash setup-peer.sh --spot-id fts-13700k-gpu [--network-subnet 10.0.3.1/24] [--tunnel-token TOKEN] +# + +set -euo pipefail + +# Defaults +SPOT_ID="" +NETWORK_SUBNET="10.0.3.1/24" +TUNNEL_TOKEN="82ae3301b4650ab2d0026cf0f6a5b5b78dfcc9e022922ac23858d1609913aa7f" +SENTINEL_ADDR="containarium.kafeido.app:443" +BINARY_SRC="/tmp/containarium" +BINARY_DST="/usr/local/bin/containarium" + +while [[ $# -gt 0 ]]; do + case "$1" in + --spot-id) SPOT_ID="$2"; shift 2 ;; + --network-subnet) NETWORK_SUBNET="$2"; shift 2 ;; + --tunnel-token) TUNNEL_TOKEN="$2"; shift 2 ;; + --sentinel-addr) SENTINEL_ADDR="$2"; shift 2 ;; + --help|-h) + echo "Usage: sudo $0 --spot-id [--network-subnet CIDR] [--tunnel-token TOKEN]" + exit 0 + ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +if [[ -z "$SPOT_ID" ]]; then + echo "Error: --spot-id is required (e.g., fts-13700k-gpu)" + exit 1 +fi + +if [[ $EUID -ne 0 ]]; then + echo "Error: this script must be run as root (use sudo)" + exit 1 +fi + +echo "==> Setting up Containarium peer node: $SPOT_ID" + +# 1. Install binary +echo "==> Installing containarium binary..." +if [[ ! -f "$BINARY_SRC" ]]; then + echo "Error: $BINARY_SRC not found. Upload it first:" + echo " scp bin/containarium-linux-amd64 :/tmp/containarium" + exit 1 +fi +cp "$BINARY_SRC" "$BINARY_DST" +chmod +x "$BINARY_DST" +echo " Binary installed: $BINARY_DST" + +# 2. Install daemon service +echo "==> Installing daemon service..." +"$BINARY_DST" service install + +# 3. Override daemon config for app-hosting mode +echo "==> Configuring daemon for app-hosting mode..." +mkdir -p /etc/systemd/system/containarium.service.d +cat > /etc/systemd/system/containarium.service.d/override.conf < Installing tunnel service..." +cat > /etc/systemd/system/containarium-tunnel.service < Starting services..." +systemctl daemon-reload +systemctl restart containarium +systemctl enable --now containarium-tunnel + +echo "" +echo "=== Setup complete ===" +echo " Daemon: $(systemctl is-active containarium)" +echo " Tunnel: $(systemctl is-active containarium-tunnel)" +echo "" +echo " Logs: journalctl -u containarium -f" +echo " Tunnel: journalctl -u containarium-tunnel -f" +echo " Spot ID: $SPOT_ID"