IBM · gandhipratik203 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
@@ -202,6 +202,13 @@ spec:
             - name: PLUGINS_CONFIG_FILE
               value: {{ $pluginsConfigFile | quote }}
             {{- end }}
+            # Migration runner ownership — wired from .Values.migration.enabled
+            # so app pods skip the in-pod bootstrap when the pre-install Job
+            # is responsible for the schema. Placed after extraEnv so users
+            # cannot accidentally drop the contract by overriding it. See
+            # issue #4051 (Layer 2).
+            - name: MCPGATEWAY_SKIP_MIGRATIONS
+              value: {{ ternary "true" "false" .Values.migration.enabled | quote }}
 
           ################################################################
           # BULK ENV-VARS - pulled from ConfigMap + Secret

@@ -45,6 +45,8 @@
 # Third-Party
 from alembic import command
 from alembic.config import Config
+from alembic.runtime.migration import MigrationContext
+from alembic.script import ScriptDirectory
 from filelock import FileLock
 from sqlalchemy import create_engine, inspect, or_, text
 from sqlalchemy.engine import Connection
@@ -99,6 +101,62 @@ def _schema_looks_current(inspector) -> bool:
     )
 
 
+def _alembic_at_head(conn: Connection, cfg: Config) -> bool:
+    """Return True when ``alembic_version`` in the DB matches the script directory head(s).
+
+    Used by ``main()`` to skip the migration advisory lock entirely when no
+    schema work is needed. This is the fast-path that keeps replicas 2..N
+    of a multi-pod deployment from serializing (and potentially hanging)
+    on a lock behind a transaction-pooling connection pooler (issue #4051).
+
+    Any error while probing — missing ``alembic_version`` table, connection
+    issue, unexpected Alembic state — causes this to return ``False`` so
+    the caller falls through to the fully-locked slow path, which handles
+    empty/partial/out-of-date databases explicitly.
+
+    Args:
+        conn: Active SQLAlchemy connection (not necessarily locked).
+        cfg: Alembic Config instance for this project.
+
+    Returns:
+        True if the DB schema is at the Alembic script directory's head;
+        False on mismatch, empty DB, or any error while probing.
+    """
+    try:
+        script_heads = set(ScriptDirectory.from_config(cfg).get_heads())
+        if not script_heads:
+            return False
+        context = MigrationContext.configure(conn)
+        db_heads = set(context.get_current_heads())
+        return bool(db_heads) and db_heads == script_heads
+    except Exception as exc:  # noqa: BLE001 - intentionally broad; fall through to slow path
+        logger.debug("Could not probe alembic head state: %s", exc)
+        return False
+
+
+async def _run_post_migration_bootstrap(conn: Connection) -> None:
+    """Run the idempotent post-migration bootstrap steps.
+
+    These steps (team-visibility normalization, admin user, default roles,
+    orphaned-resource assignment) are designed to be safe to re-run on
+    every startup — each checks for existing state and skips if already
+    populated. They must run on replicas that take the fast-path so that a
+    prior replica crashing mid-bootstrap doesn't leave downstream state
+    unpopulated.
+
+    Args:
+        conn: Active SQLAlchemy connection (locked on the slow path,
+            unlocked on the fast path). Writes are idempotent either way.
+    """
+    updated = normalize_team_visibility(conn)
+    if updated:
+        logger.info(f"Normalized {updated} team record(s) to supported visibility values")
+
+    await bootstrap_admin_user(conn)
+    await bootstrap_default_roles(conn)
+    await bootstrap_resource_assignments(conn)
+
+
 @contextmanager
 def advisory_lock(conn: Connection):
     """
@@ -706,8 +764,27 @@ async def main() -> None:
     cfg = Config(str(ini_path))  # path in container
     cfg.attributes["configure_logger"] = True
 
-    # Use advisory lock to prevent concurrent migrations
+    # Escape '%' characters in URL to avoid configparser interpolation errors
+    # (e.g., URL-encoded passwords like %40 for '@')
+    escaped_url = settings.database_url.replace("%", "%%")
+    cfg.set_main_option("sqlalchemy.url", escaped_url)
+
     try:
+        # Fast path: if the schema is already at the current Alembic head,
+        # skip the migration advisory lock entirely. This is critical for
+        # deployments behind a transaction-pooling connection pooler — the
+        # session-scoped advisory lock can be orphaned across pgbouncer's
+        # backend handoffs, causing N-th pod startup to spin indefinitely
+        # (issue #4051). Replicas 2..N take this branch on normal restarts.
+        with engine.connect() as probe_conn:
+            probe_conn.commit()
+            if _alembic_at_head(probe_conn, cfg):
+                logger.info("Schema already at Alembic head; skipping migration lock")
+                await _run_post_migration_bootstrap(probe_conn)
+                probe_conn.commit()
+                return
+
+        # Slow path: acquire the migration advisory lock and run schema work.
         with engine.connect() as conn:
             # Commit any open transaction on the connection before locking (though it should be fresh)
             conn.commit()
@@ -718,11 +795,6 @@ async def main() -> None:
                 # Pass the LOCKED connection to Alembic config
                 cfg.attributes["connection"] = conn
 
-                # Escape '%' characters in URL to avoid configparser interpolation errors
-                # (e.g., URL-encoded passwords like %40 for '@')
-                escaped_url = settings.database_url.replace("%", "%%")
-                cfg.set_main_option("sqlalchemy.url", escaped_url)
-
                 insp = inspect(conn)
                 table_names = insp.get_table_names()
 
@@ -746,20 +818,7 @@ async def main() -> None:
                         logger.info("Running Alembic migrations to ensure schema is up to date")
                         command.upgrade(cfg, "head")
 
-                # Post-upgrade normalization passes (inside lock to be safe)
-                updated = normalize_team_visibility(conn)
-                if updated:
-                    logger.info(f"Normalized {updated} team record(s) to supported visibility values")
-
-                # Bootstrap admin user after database is ready, using the LOCKED connection
-                await bootstrap_admin_user(conn)
-
-                # Bootstrap default RBAC roles after admin user is created
-                await bootstrap_default_roles(conn)
-
-                # Assign orphaned resources to admin personal team after all setup is complete
-                await bootstrap_resource_assignments(conn)
-
+                await _run_post_migration_bootstrap(conn)
                 conn.commit()  # Ensure all migration changes are permanently committed
 
     except Exception as e:

@@ -776,6 +776,25 @@ class Settings(BaseSettings):
     # UI/Admin Feature Flags
     mcpgateway_ui_enabled: bool = False
     mcpgateway_admin_api_enabled: bool = False
+
+    # Migration runner ownership.
+    # When True, the gateway lifespan does NOT call bootstrap_db.main();
+    # the deployment is expected to run migrations as a separate step
+    # (Helm pre-install Job, init container, CI step, etc.). The library
+    # default is False so `docker run mcpgateway:latest` continues to
+    # bootstrap its own schema without operator action. The Helm chart
+    # ships this as True when the migration Job is enabled, so the
+    # contract "Job runs migrations, app pods skip" is enforced at the
+    # chart layer. See issue #4051 (Layer 2).
+    mcpgateway_skip_migrations: bool = Field(
+        default=False,
+        description=(
+            "When True, gateway pods skip the in-pod bootstrap_db call. "
+            "Pair with a dedicated migration runner (Helm pre-install Job, "
+            "init container, etc.) that ensures the schema is at head before "
+            "pods start."
+        ),
+    )
     mcpgateway_ui_airgapped: bool = Field(default=False, description="Use local CDN assets instead of external CDNs for airgapped deployments")
     mcpgateway_ui_embedded: bool = Field(default=False, description="Enable embedded UI mode (hides select header controls by default)")
     mcpgateway_ui_hide_sections: Annotated[list[str], NoDecode] = Field(

@@ -1583,6 +1583,33 @@ def _restore_default_sighup_handler() -> None:
     signal.signal(signal.SIGHUP, signal.SIG_DFL)
 
 
+async def _run_initial_db_bootstrap() -> None:
+    """Run ``bootstrap_db.main()`` unless ``MCPGATEWAY_SKIP_MIGRATIONS`` is set.
+
+    The flag tells the gateway that a separate migration runner — typically
+    a Helm pre-install Job, an init container, or a CI step — has already
+    populated the schema, so the in-pod bootstrap is redundant. When the
+    flag is True the call is suppressed and a single INFO line is emitted
+    so operators can audit the choice in pod logs.
+
+    Library default is False (preserves the ``docker run`` happy path);
+    Helm chart and compose overlays flip it to True when they pair the
+    gateway Deployment with a dedicated migration runner. See issue #4051
+    Layer 2.
+    """
+    if settings.mcpgateway_skip_migrations:
+        logger.info(
+            "Skipping in-pod migration bootstrap (MCPGATEWAY_SKIP_MIGRATIONS=true). "
+            "A dedicated migration runner is expected to have populated the schema."
+        )
+        return
+    # First-Party (deferred to keep import-time light for tests that don't
+    # need to pay the migration cost just by importing mcpgateway.main).
+    from mcpgateway.bootstrap_db import main as bootstrap_db_main  # pylint: disable=import-outside-toplevel
+
+    await bootstrap_db_main()
+
+
 @asynccontextmanager
 async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
     """
@@ -1617,7 +1644,6 @@ async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
     # `wait_for_db_ready(sync=True)` is a blocking probe, so offload it to
     # a worker thread to avoid stalling the event loop during startup.
     # First-Party
-    from mcpgateway.bootstrap_db import main as bootstrap_db  # pylint: disable=import-outside-toplevel
     from mcpgateway.utils.db_isready import wait_for_db_ready  # pylint: disable=import-outside-toplevel
 
     await asyncio.to_thread(
@@ -1626,7 +1652,7 @@ async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
         interval=int(settings.db_retry_interval_ms) / 1000,
         sync=True,
     )
-    await bootstrap_db()
+    await _run_initial_db_bootstrap()
 
     # Initialize Redis client early (shared pool for all services)
     await get_redis_client()

@@ -0,0 +1,81 @@
+# Reproduction harness for issue #4051.
+#
+# Minimal stack to reproduce the Alembic advisory-lock hang when multiple
+# gateway replicas start concurrently behind PgBouncer in transaction-pooling
+# mode. Intentionally excludes everything unrelated (redis, nginx, monitoring,
+# admin UI) so the failure mode is attributable.
+
+name: mcf4051
+
+services:
+  postgres:
+    image: postgres:17
+    environment:
+      POSTGRES_USER: postgres
+      POSTGRES_PASSWORD: reprosecret
+      POSTGRES_DB: mcp
+    ports:
+      - "54320:5432"
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U postgres -d mcp"]
+      interval: 2s
+      timeout: 3s
+      retries: 30
+
+  pgbouncer:
+    image: edoburu/pgbouncer:latest
+    depends_on:
+      postgres:
+        condition: service_healthy
+    environment:
+      DATABASE_URL: postgres://postgres:reprosecret@postgres:5432/mcp
+      LISTEN_PORT: 6432
+      # The setting that makes this bug reproducible. In transaction pooling
+      # mode, PgBouncer hands the server backend to another client between
+      # transactions — so the session-scoped advisory lock Alembic acquires
+      # outlives the pooler "session" of the original caller.
+      POOL_MODE: transaction
+      MAX_CLIENT_CONN: "200"
+      # Force multiplexing: keep server-side backends well below total client
+      # connections. With N replicas × M gunicorn workers competing, PgBouncer
+      # must hand the same server backend to multiple clients between
+      # transactions — which is how session-scoped advisory locks get orphaned.
+      # Production OCP/Helm deployments that hit this bug see the same ratio.
+      DEFAULT_POOL_SIZE: "2"
+      MIN_POOL_SIZE: "1"
+      AUTH_TYPE: scram-sha-256
+    ports:
+      - "64320:6432"
+    healthcheck:
+      test: ["CMD", "pg_isready", "-h", "localhost", "-p", "6432"]
+      interval: 2s
+      timeout: 3s
+      retries: 30
+
+  gateway:
+    image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest}
+    depends_on:
+      pgbouncer:
+        condition: service_healthy
+    restart: "no"
+    environment:
+      # Route gateway traffic through PgBouncer (transaction pool). This is
+      # the scenario documented in the issue.
+      DATABASE_URL: postgresql+psycopg://postgres:reprosecret@pgbouncer:6432/mcp
+      HOST: 0.0.0.0
+      PORT: "4444"
+      LOG_LEVEL: INFO
+      # Strip the gateway down to just the bootstrap path we care about.
+      AUTH_REQUIRED: "false"
+      MCPGATEWAY_UI_ENABLED: "false"
+      MCPGATEWAY_ADMIN_API_ENABLED: "false"
+      MCPGATEWAY_A2A_ENABLED: "false"
+      PLUGINS_ENABLED: "false"
+      CACHE_TYPE: none
+      FEDERATION_ENABLED: "false"
+      REDIS_URL: "redis://127.0.0.1:6379/0"
+      # Keep bootstrap quiet on the retry side so the hang is obvious.
+      DB_MAX_RETRIES: "10"
+      # Several workers per replica so total client connections comfortably
+      # exceed PgBouncer's DEFAULT_POOL_SIZE, forcing backend multiplexing.
+      GUNICORN_WORKERS: "4"