diff --git a/pkg/worker/criu.go b/pkg/worker/criu.go index 7187b08e6..624f65b81 100644 --- a/pkg/worker/criu.go +++ b/pkg/worker/criu.go @@ -300,16 +300,37 @@ func (s *Worker) waitForSyncFile(request *types.ContainerRequest, outputLogger * defer cancel() syncFilePath := filepath.Join(s.config.Worker.CRIU.Storage.MountPath, fmt.Sprintf("%s.%s", request.Checkpoint.CheckpointId, syncFileExtension)) + checkpointPath := s.checkpointPath(request.Checkpoint.CheckpointId) + + // Check if sync file exists but checkpoint data is missing (cleaned up due to inactivity) + // If so, delete the sync file to force a re-sync from S3 + if _, err := os.Stat(syncFilePath); err == nil { + if _, err := os.Stat(checkpointPath); os.IsNotExist(err) { + outputLogger.Info("Checkpoint data missing, forcing re-sync") + os.Remove(syncFilePath) + } + } - _, err := os.Stat(syncFilePath) - if err == nil { - return nil + // Check if both sync file and checkpoint data exist + if _, err := os.Stat(syncFilePath); err == nil { + if _, err := os.Stat(checkpointPath); err == nil { + return nil + } } outputLogger.Info("Waiting for checkpoint to sync") for { - _, err := os.Stat(syncFilePath) - if err == nil { + syncFileExists := false + checkpointExists := false + + if _, err := os.Stat(syncFilePath); err == nil { + syncFileExists = true + } + if _, err := os.Stat(checkpointPath); err == nil { + checkpointExists = true + } + + if syncFileExists && checkpointExists { return nil } diff --git a/sdk/src/beta9/abstractions/sandbox.py b/sdk/src/beta9/abstractions/sandbox.py index cbd6aa8dd..17002186e 100644 --- a/sdk/src/beta9/abstractions/sandbox.py +++ b/sdk/src/beta9/abstractions/sandbox.py @@ -3934,4 +3934,4 @@ async def list_urls(self) -> Dict[int, str]: Raises: SandboxConnectionError: If listing URLs fails. """ - return await asyncio.to_thread(self._sync.list_urls) + return await asyncio.to_thread(self._sync.list_urls) \ No newline at end of file