From e3a8f0fc64ae9858e0ee0f2afd8a8db8a3e93164 Mon Sep 17 00:00:00 2001 From: Boris Rybalkin Date: Thu, 21 May 2026 22:34:06 +0100 Subject: [PATCH 1/6] stability: move events log from $SNAP_COMMON to $SNAP_DATA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CLAUDE.md is clear: app state belongs in $SNAP_DATA, never $SNAP_COMMON. $SNAP_DATA is snapshotted on refresh (rolls back transactionally on a failed install), $SNAP_COMMON is shared across revisions and does not roll back — leaving an older binary with newer-format data on rollback. Today our events.jsonl is forward-tolerant, but the convention is there for a reason and one schema change away from breaking rollbacks. Move events to $SNAP_DATA/stability-events.jsonl, plus a one-shot MigrateEventLog at daemon startup that renames any existing file from the old common path (so devices already on rev 2837 don't lose history on refresh). --- backend/cmd/stability/main.go | 8 ++++- backend/ioc/common.go | 2 +- backend/stability/migrate.go | 25 ++++++++++++++ backend/stability/migrate_test.go | 54 +++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 backend/stability/migrate.go create mode 100644 backend/stability/migrate_test.go diff --git a/backend/cmd/stability/main.go b/backend/cmd/stability/main.go index c955ad54..5f6ab1b4 100644 --- a/backend/cmd/stability/main.go +++ b/backend/cmd/stability/main.go @@ -16,11 +16,17 @@ func main() { defer cancel() mem := stability.NewMemInfo("/proc") + dataDir := os.Getenv("SNAP_DATA") + if dataDir == "" { + dataDir = "/var/snap/platform/current" + } commonDir := os.Getenv("SNAP_COMMON") if commonDir == "" { commonDir = "/var/snap/platform/common" } - events := stability.NewEventLog(commonDir + "/stability-events.jsonl") + eventsPath := dataDir + "/stability-events.jsonl" + stability.MigrateEventLog(commonDir+"/stability-events.jsonl", eventsPath, logger) + events := stability.NewEventLog(eventsPath) z := stability.NewZram(mem, stability.SwaponSyscall, stability.SwapoffSyscall, events, logger) if err := z.EnsureConfigured(); err != nil { logger.Sugar().Warnf("stability: zram setup failed (continuing): %v", err) diff --git a/backend/ioc/common.go b/backend/ioc/common.go index 274aa376..c91f8e79 100644 --- a/backend/ioc/common.go +++ b/backend/ioc/common.go @@ -573,7 +573,7 @@ func Init(userConfig string, systemConfig string, backupDir string, varDir strin } err = c.Singleton(func() *stability.EventLog { - return stability.NewEventLog("/var/snap/platform/common/stability-events.jsonl") + return stability.NewEventLog("/var/snap/platform/current/stability-events.jsonl") }) if err != nil { return nil, err diff --git a/backend/stability/migrate.go b/backend/stability/migrate.go new file mode 100644 index 00000000..474052c2 --- /dev/null +++ b/backend/stability/migrate.go @@ -0,0 +1,25 @@ +package stability + +import ( + "errors" + "os" + + "go.uber.org/zap" +) + +func MigrateEventLog(oldPath, newPath string, logger *zap.Logger) { + if _, err := os.Stat(newPath); err == nil { + return + } else if !errors.Is(err, os.ErrNotExist) { + logger.Warn("stability: stat new event log failed", zap.Error(err)) + return + } + if _, err := os.Stat(oldPath); err != nil { + return + } + if err := os.Rename(oldPath, newPath); err != nil { + logger.Warn("stability: migrate event log failed", zap.String("from", oldPath), zap.String("to", newPath), zap.Error(err)) + return + } + logger.Info("stability: migrated event log", zap.String("from", oldPath), zap.String("to", newPath)) +} diff --git a/backend/stability/migrate_test.go b/backend/stability/migrate_test.go new file mode 100644 index 00000000..aa4a2920 --- /dev/null +++ b/backend/stability/migrate_test.go @@ -0,0 +1,54 @@ +package stability + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +func TestMigrateMovesWhenOnlyOldExists(t *testing.T) { + dir := t.TempDir() + old := filepath.Join(dir, "common", "events.jsonl") + newP := filepath.Join(dir, "data", "events.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(old), 0755)) + require.NoError(t, os.MkdirAll(filepath.Dir(newP), 0755)) + require.NoError(t, os.WriteFile(old, []byte("{\"kind\":\"x\"}\n"), 0644)) + + MigrateEventLog(old, newP, zap.NewNop()) + + _, errOld := os.Stat(old) + assert.True(t, os.IsNotExist(errOld)) + body, err := os.ReadFile(newP) + require.NoError(t, err) + assert.Equal(t, "{\"kind\":\"x\"}\n", string(body)) +} + +func TestMigrateSkipsWhenNewAlreadyExists(t *testing.T) { + dir := t.TempDir() + old := filepath.Join(dir, "events.jsonl") + newP := filepath.Join(dir, "new.jsonl") + require.NoError(t, os.WriteFile(old, []byte("old"), 0644)) + require.NoError(t, os.WriteFile(newP, []byte("new"), 0644)) + + MigrateEventLog(old, newP, zap.NewNop()) + + oldBody, _ := os.ReadFile(old) + newBody, _ := os.ReadFile(newP) + assert.Equal(t, "old", string(oldBody), "old file untouched if new exists") + assert.Equal(t, "new", string(newBody), "new file untouched if new exists") +} + +func TestMigrateNoopWhenOldMissing(t *testing.T) { + dir := t.TempDir() + old := filepath.Join(dir, "missing.jsonl") + newP := filepath.Join(dir, "new.jsonl") + + MigrateEventLog(old, newP, zap.NewNop()) + + _, err := os.Stat(newP) + assert.True(t, os.IsNotExist(err)) +} From 678ec16bdd6f0ed593b9337283672c1fef71e8f8 Mon Sep 17 00:00:00 2001 From: Boris Rybalkin Date: Thu, 21 May 2026 23:33:06 +0100 Subject: [PATCH 2/6] ci: bump visual_diff_skip_build to 2837 --- .drone.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index fe6cf764..fea23fb1 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -10,7 +10,7 @@ local bootstrap = '25.02'; local nginx = '1.24.0'; local python = '3.12-slim-bookworm'; local alpine = '3.21'; -local visual_diff_skip_build = '2786'; +local visual_diff_skip_build = '2837'; local build(arch, testUI) = [{ kind: 'pipeline', From 56a2e579159e8426695940e58dd0439bbb82878e Mon Sep 17 00:00:00 2001 From: Boris Rybalkin Date: Fri, 22 May 2026 07:33:19 +0100 Subject: [PATCH 3/6] stability: fail-fast if SNAP_DATA / SNAP_COMMON not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hardcoded fallbacks ('/var/snap/platform/current' and '...common') hid a real configuration error: if the daemon were ever started outside the snap, it would silently write to /var/snap/platform/* on the host rather than its sandboxed dirs. Fail loudly instead — these env vars are guaranteed by snapd for every snap service. Same treatment in ioc/common.go where the backend reads the events file. --- backend/cmd/stability/main.go | 4 ++-- backend/ioc/common.go | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/backend/cmd/stability/main.go b/backend/cmd/stability/main.go index 5f6ab1b4..2492c886 100644 --- a/backend/cmd/stability/main.go +++ b/backend/cmd/stability/main.go @@ -18,11 +18,11 @@ func main() { mem := stability.NewMemInfo("/proc") dataDir := os.Getenv("SNAP_DATA") if dataDir == "" { - dataDir = "/var/snap/platform/current" + logger.Fatal("stability: SNAP_DATA not set") } commonDir := os.Getenv("SNAP_COMMON") if commonDir == "" { - commonDir = "/var/snap/platform/common" + logger.Fatal("stability: SNAP_COMMON not set") } eventsPath := dataDir + "/stability-events.jsonl" stability.MigrateEventLog(commonDir+"/stability-events.jsonl", eventsPath, logger) diff --git a/backend/ioc/common.go b/backend/ioc/common.go index c91f8e79..f4681e0c 100644 --- a/backend/ioc/common.go +++ b/backend/ioc/common.go @@ -36,6 +36,7 @@ import ( "github.com/syncloud/platform/version" "github.com/syncloud/platform/hardware/lcd" "go.uber.org/zap" + "os" "path" "time" ) @@ -573,7 +574,11 @@ func Init(userConfig string, systemConfig string, backupDir string, varDir strin } err = c.Singleton(func() *stability.EventLog { - return stability.NewEventLog("/var/snap/platform/current/stability-events.jsonl") + dataDir := os.Getenv("SNAP_DATA") + if dataDir == "" { + logger.Fatal("ioc: SNAP_DATA not set") + } + return stability.NewEventLog(dataDir + "/stability-events.jsonl") }) if err != nil { return nil, err From 6b998acba5e45235a5530c1684ee8c6304c904b8 Mon Sep 17 00:00:00 2001 From: Boris Rybalkin Date: Fri, 22 May 2026 07:56:16 +0100 Subject: [PATCH 4/6] stability: use hook.DataDir constants + systemConfig.DataDir() for paths Replace env-var lookup with the conventions already used in the codebase: - ioc/common.go: EventLog singleton now depends on *config.SystemConfig and uses systemConfig.DataDir(), matching auth/authelia/totp wiring (ioc/common.go:337, 358, 365). SystemConfig.Load() log.Fatalln's on missing platform.cfg, so misconfiguration still fails loudly. - cmd/stability/main.go: use hook.DataDir / hook.CommonDir constants (same pattern as hook/install.go and cmd/install). --- backend/cmd/stability/main.go | 14 ++++---------- backend/ioc/common.go | 9 ++------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/backend/cmd/stability/main.go b/backend/cmd/stability/main.go index 2492c886..acbfb0b2 100644 --- a/backend/cmd/stability/main.go +++ b/backend/cmd/stability/main.go @@ -4,8 +4,10 @@ import ( "context" "os" "os/signal" + "path" "syscall" + "github.com/syncloud/platform/hook" "github.com/syncloud/platform/log" "github.com/syncloud/platform/stability" ) @@ -16,16 +18,8 @@ func main() { defer cancel() mem := stability.NewMemInfo("/proc") - dataDir := os.Getenv("SNAP_DATA") - if dataDir == "" { - logger.Fatal("stability: SNAP_DATA not set") - } - commonDir := os.Getenv("SNAP_COMMON") - if commonDir == "" { - logger.Fatal("stability: SNAP_COMMON not set") - } - eventsPath := dataDir + "/stability-events.jsonl" - stability.MigrateEventLog(commonDir+"/stability-events.jsonl", eventsPath, logger) + eventsPath := path.Join(hook.DataDir, "stability-events.jsonl") + stability.MigrateEventLog(path.Join(hook.CommonDir, "stability-events.jsonl"), eventsPath, logger) events := stability.NewEventLog(eventsPath) z := stability.NewZram(mem, stability.SwaponSyscall, stability.SwapoffSyscall, events, logger) if err := z.EnsureConfigured(); err != nil { diff --git a/backend/ioc/common.go b/backend/ioc/common.go index f4681e0c..22fa69d0 100644 --- a/backend/ioc/common.go +++ b/backend/ioc/common.go @@ -36,7 +36,6 @@ import ( "github.com/syncloud/platform/version" "github.com/syncloud/platform/hardware/lcd" "go.uber.org/zap" - "os" "path" "time" ) @@ -573,12 +572,8 @@ func Init(userConfig string, systemConfig string, backupDir string, varDir strin return nil, err } - err = c.Singleton(func() *stability.EventLog { - dataDir := os.Getenv("SNAP_DATA") - if dataDir == "" { - logger.Fatal("ioc: SNAP_DATA not set") - } - return stability.NewEventLog(dataDir + "/stability-events.jsonl") + err = c.Singleton(func(systemConfig *config.SystemConfig) *stability.EventLog { + return stability.NewEventLog(path.Join(systemConfig.DataDir(), "stability-events.jsonl")) }) if err != nil { return nil, err From e51e7f724cefd5b73545f977f53ab0d65f30efde Mon Sep 17 00:00:00 2001 From: Boris Rybalkin Date: Fri, 22 May 2026 08:02:13 +0100 Subject: [PATCH 5/6] stability: spell out zram/scanner/watcher locals in main --- backend/cmd/stability/main.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/cmd/stability/main.go b/backend/cmd/stability/main.go index acbfb0b2..2c4950be 100644 --- a/backend/cmd/stability/main.go +++ b/backend/cmd/stability/main.go @@ -21,17 +21,17 @@ func main() { eventsPath := path.Join(hook.DataDir, "stability-events.jsonl") stability.MigrateEventLog(path.Join(hook.CommonDir, "stability-events.jsonl"), eventsPath, logger) events := stability.NewEventLog(eventsPath) - z := stability.NewZram(mem, stability.SwaponSyscall, stability.SwapoffSyscall, events, logger) - if err := z.EnsureConfigured(); err != nil { + zram := stability.NewZram(mem, stability.SwaponSyscall, stability.SwapoffSyscall, events, logger) + if err := zram.EnsureConfigured(); err != nil { logger.Sugar().Warnf("stability: zram setup failed (continuing): %v", err) } - scan := stability.NewProcScanner("/proc") - w := stability.NewWatcher(mem, scan, func(pid int, sig syscall.Signal) error { + scanner := stability.NewProcScanner("/proc") + watcher := stability.NewWatcher(mem, scanner, func(pid int, sig syscall.Signal) error { return syscall.Kill(pid, sig) }, events, logger) - if err := w.Run(ctx); err != nil && err != context.Canceled { + if err := watcher.Run(ctx); err != nil && err != context.Canceled { logger.Sugar().Errorf("stability: watcher exited: %v", err) os.Exit(1) } From 9896a0f0dbfcfb117ceb584467373be83321b013 Mon Sep 17 00:00:00 2001 From: Boris Rybalkin Date: Fri, 22 May 2026 08:37:12 +0100 Subject: [PATCH 6/6] stability: drop signal context, Run() loops forever MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The watcher has no shutdown cleanup — no open file handles to flush, no in-flight state. Process termination via SIGTERM from systemd is fine. Drop the signal.NotifyContext setup in main and the context.Context parameter from Watcher.Run. The for/select collapses to 'for range t.C'. --- backend/cmd/stability/main.go | 11 +---------- backend/stability/oom.go | 14 ++++---------- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/backend/cmd/stability/main.go b/backend/cmd/stability/main.go index 2c4950be..9b3b5539 100644 --- a/backend/cmd/stability/main.go +++ b/backend/cmd/stability/main.go @@ -1,9 +1,6 @@ package main import ( - "context" - "os" - "os/signal" "path" "syscall" @@ -14,9 +11,6 @@ import ( func main() { logger := log.Default() - ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) - defer cancel() - mem := stability.NewMemInfo("/proc") eventsPath := path.Join(hook.DataDir, "stability-events.jsonl") stability.MigrateEventLog(path.Join(hook.CommonDir, "stability-events.jsonl"), eventsPath, logger) @@ -31,8 +25,5 @@ func main() { return syscall.Kill(pid, sig) }, events, logger) - if err := watcher.Run(ctx); err != nil && err != context.Canceled { - logger.Sugar().Errorf("stability: watcher exited: %v", err) - os.Exit(1) - } + watcher.Run() } diff --git a/backend/stability/oom.go b/backend/stability/oom.go index be38079c..9e476bcd 100644 --- a/backend/stability/oom.go +++ b/backend/stability/oom.go @@ -1,7 +1,6 @@ package stability import ( - "context" "errors" "os" "syscall" @@ -42,7 +41,7 @@ func NewWatcher(mem *MemInfo, scan *ProcScanner, kill KillFn, events *EventLog, } } -func (w *Watcher) Run(ctx context.Context) error { +func (w *Watcher) Run() { t := time.NewTicker(w.interval) defer t.Stop() w.log.Info("oom-watcher: started", @@ -50,14 +49,9 @@ func (w *Watcher) Run(ctx context.Context) error { zap.Float64("avail_min", w.availMin), zap.Float64("psi_max", w.psiMax), ) - for { - select { - case <-ctx.Done(): - return ctx.Err() - case <-t.C: - if err := w.tick(); err != nil { - w.log.Warn("oom-watcher: tick error", zap.Error(err)) - } + for range t.C { + if err := w.tick(); err != nil { + w.log.Warn("oom-watcher: tick error", zap.Error(err)) } } }