Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions charts/mcp-stack/templates/deployment-mcpgateway.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,13 @@ spec:
- name: PLUGINS_CONFIG_FILE
value: {{ $pluginsConfigFile | quote }}
{{- end }}
# Migration runner ownership — wired from .Values.migration.enabled
# so app pods skip the in-pod bootstrap when the pre-install Job
# is responsible for the schema. Placed after extraEnv so users
# cannot accidentally drop the contract by overriding it. See
# issue #4051 (Layer 2).
- name: MCPGATEWAY_SKIP_MIGRATIONS
value: {{ ternary "true" "false" .Values.migration.enabled | quote }}

################################################################
# BULK ENV-VARS - pulled from ConfigMap + Secret
Expand Down
99 changes: 79 additions & 20 deletions mcpgateway/bootstrap_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
# Third-Party
from alembic import command
from alembic.config import Config
from alembic.runtime.migration import MigrationContext
from alembic.script import ScriptDirectory
from filelock import FileLock
from sqlalchemy import create_engine, inspect, or_, text
from sqlalchemy.engine import Connection
Expand Down Expand Up @@ -99,6 +101,62 @@ def _schema_looks_current(inspector) -> bool:
)


def _alembic_at_head(conn: Connection, cfg: Config) -> bool:
"""Return True when ``alembic_version`` in the DB matches the script directory head(s).

Used by ``main()`` to skip the migration advisory lock entirely when no
schema work is needed. This is the fast-path that keeps replicas 2..N
of a multi-pod deployment from serializing (and potentially hanging)
on a lock behind a transaction-pooling connection pooler (issue #4051).

Any error while probing — missing ``alembic_version`` table, connection
issue, unexpected Alembic state — causes this to return ``False`` so
the caller falls through to the fully-locked slow path, which handles
empty/partial/out-of-date databases explicitly.

Args:
conn: Active SQLAlchemy connection (not necessarily locked).
cfg: Alembic Config instance for this project.

Returns:
True if the DB schema is at the Alembic script directory's head;
False on mismatch, empty DB, or any error while probing.
"""
try:
script_heads = set(ScriptDirectory.from_config(cfg).get_heads())
if not script_heads:
return False
context = MigrationContext.configure(conn)
db_heads = set(context.get_current_heads())
return bool(db_heads) and db_heads == script_heads
except Exception as exc: # noqa: BLE001 - intentionally broad; fall through to slow path
logger.debug("Could not probe alembic head state: %s", exc)
return False


async def _run_post_migration_bootstrap(conn: Connection) -> None:
"""Run the idempotent post-migration bootstrap steps.

These steps (team-visibility normalization, admin user, default roles,
orphaned-resource assignment) are designed to be safe to re-run on
every startup — each checks for existing state and skips if already
populated. They must run on replicas that take the fast-path so that a
prior replica crashing mid-bootstrap doesn't leave downstream state
unpopulated.

Args:
conn: Active SQLAlchemy connection (locked on the slow path,
unlocked on the fast path). Writes are idempotent either way.
"""
updated = normalize_team_visibility(conn)
if updated:
logger.info(f"Normalized {updated} team record(s) to supported visibility values")

await bootstrap_admin_user(conn)
await bootstrap_default_roles(conn)
await bootstrap_resource_assignments(conn)


@contextmanager
def advisory_lock(conn: Connection):
"""
Expand Down Expand Up @@ -706,8 +764,27 @@ async def main() -> None:
cfg = Config(str(ini_path)) # path in container
cfg.attributes["configure_logger"] = True

# Use advisory lock to prevent concurrent migrations
# Escape '%' characters in URL to avoid configparser interpolation errors
# (e.g., URL-encoded passwords like %40 for '@')
escaped_url = settings.database_url.replace("%", "%%")
cfg.set_main_option("sqlalchemy.url", escaped_url)

try:
# Fast path: if the schema is already at the current Alembic head,
# skip the migration advisory lock entirely. This is critical for
# deployments behind a transaction-pooling connection pooler — the
# session-scoped advisory lock can be orphaned across pgbouncer's
# backend handoffs, causing N-th pod startup to spin indefinitely
# (issue #4051). Replicas 2..N take this branch on normal restarts.
with engine.connect() as probe_conn:
probe_conn.commit()
if _alembic_at_head(probe_conn, cfg):
logger.info("Schema already at Alembic head; skipping migration lock")
await _run_post_migration_bootstrap(probe_conn)
probe_conn.commit()
return

# Slow path: acquire the migration advisory lock and run schema work.
with engine.connect() as conn:
# Commit any open transaction on the connection before locking (though it should be fresh)
conn.commit()
Expand All @@ -718,11 +795,6 @@ async def main() -> None:
# Pass the LOCKED connection to Alembic config
cfg.attributes["connection"] = conn

# Escape '%' characters in URL to avoid configparser interpolation errors
# (e.g., URL-encoded passwords like %40 for '@')
escaped_url = settings.database_url.replace("%", "%%")
cfg.set_main_option("sqlalchemy.url", escaped_url)

insp = inspect(conn)
table_names = insp.get_table_names()

Expand All @@ -746,20 +818,7 @@ async def main() -> None:
logger.info("Running Alembic migrations to ensure schema is up to date")
command.upgrade(cfg, "head")

# Post-upgrade normalization passes (inside lock to be safe)
updated = normalize_team_visibility(conn)
if updated:
logger.info(f"Normalized {updated} team record(s) to supported visibility values")

# Bootstrap admin user after database is ready, using the LOCKED connection
await bootstrap_admin_user(conn)

# Bootstrap default RBAC roles after admin user is created
await bootstrap_default_roles(conn)

# Assign orphaned resources to admin personal team after all setup is complete
await bootstrap_resource_assignments(conn)

await _run_post_migration_bootstrap(conn)
conn.commit() # Ensure all migration changes are permanently committed

except Exception as e:
Expand Down
19 changes: 19 additions & 0 deletions mcpgateway/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,25 @@ class Settings(BaseSettings):
# UI/Admin Feature Flags
mcpgateway_ui_enabled: bool = False
mcpgateway_admin_api_enabled: bool = False

# Migration runner ownership.
# When True, the gateway lifespan does NOT call bootstrap_db.main();
# the deployment is expected to run migrations as a separate step
# (Helm pre-install Job, init container, CI step, etc.). The library
# default is False so `docker run mcpgateway:latest` continues to
# bootstrap its own schema without operator action. The Helm chart
# ships this as True when the migration Job is enabled, so the
# contract "Job runs migrations, app pods skip" is enforced at the
# chart layer. See issue #4051 (Layer 2).
mcpgateway_skip_migrations: bool = Field(
default=False,
description=(
"When True, gateway pods skip the in-pod bootstrap_db call. "
"Pair with a dedicated migration runner (Helm pre-install Job, "
"init container, etc.) that ensures the schema is at head before "
"pods start."
),
)
mcpgateway_ui_airgapped: bool = Field(default=False, description="Use local CDN assets instead of external CDNs for airgapped deployments")
mcpgateway_ui_embedded: bool = Field(default=False, description="Enable embedded UI mode (hides select header controls by default)")
mcpgateway_ui_hide_sections: Annotated[list[str], NoDecode] = Field(
Expand Down
30 changes: 28 additions & 2 deletions mcpgateway/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1583,6 +1583,33 @@ def _restore_default_sighup_handler() -> None:
signal.signal(signal.SIGHUP, signal.SIG_DFL)


async def _run_initial_db_bootstrap() -> None:
"""Run ``bootstrap_db.main()`` unless ``MCPGATEWAY_SKIP_MIGRATIONS`` is set.

The flag tells the gateway that a separate migration runner — typically
a Helm pre-install Job, an init container, or a CI step — has already
populated the schema, so the in-pod bootstrap is redundant. When the
flag is True the call is suppressed and a single INFO line is emitted
so operators can audit the choice in pod logs.

Library default is False (preserves the ``docker run`` happy path);
Helm chart and compose overlays flip it to True when they pair the
gateway Deployment with a dedicated migration runner. See issue #4051
Layer 2.
"""
if settings.mcpgateway_skip_migrations:
logger.info(
"Skipping in-pod migration bootstrap (MCPGATEWAY_SKIP_MIGRATIONS=true). "
"A dedicated migration runner is expected to have populated the schema."
)
return
# First-Party (deferred to keep import-time light for tests that don't
# need to pay the migration cost just by importing mcpgateway.main).
from mcpgateway.bootstrap_db import main as bootstrap_db_main # pylint: disable=import-outside-toplevel

await bootstrap_db_main()


@asynccontextmanager
async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
"""
Expand Down Expand Up @@ -1617,7 +1644,6 @@ async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
# `wait_for_db_ready(sync=True)` is a blocking probe, so offload it to
# a worker thread to avoid stalling the event loop during startup.
# First-Party
from mcpgateway.bootstrap_db import main as bootstrap_db # pylint: disable=import-outside-toplevel
from mcpgateway.utils.db_isready import wait_for_db_ready # pylint: disable=import-outside-toplevel

await asyncio.to_thread(
Expand All @@ -1626,7 +1652,7 @@ async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
interval=int(settings.db_retry_interval_ms) / 1000,
sync=True,
)
await bootstrap_db()
await _run_initial_db_bootstrap()

# Initialize Redis client early (shared pool for all services)
await get_redis_client()
Expand Down
81 changes: 81 additions & 0 deletions tests/integration/fixtures/transaction_pool/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Reproduction harness for issue #4051.
#
# Minimal stack to reproduce the Alembic advisory-lock hang when multiple
# gateway replicas start concurrently behind PgBouncer in transaction-pooling
# mode. Intentionally excludes everything unrelated (redis, nginx, monitoring,
# admin UI) so the failure mode is attributable.

name: mcf4051

services:
postgres:
image: postgres:17
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: reprosecret
POSTGRES_DB: mcp
ports:
- "54320:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres -d mcp"]
interval: 2s
timeout: 3s
retries: 30

pgbouncer:
image: edoburu/pgbouncer:latest
depends_on:
postgres:
condition: service_healthy
environment:
DATABASE_URL: postgres://postgres:reprosecret@postgres:5432/mcp
LISTEN_PORT: 6432
# The setting that makes this bug reproducible. In transaction pooling
# mode, PgBouncer hands the server backend to another client between
# transactions — so the session-scoped advisory lock Alembic acquires
# outlives the pooler "session" of the original caller.
POOL_MODE: transaction
MAX_CLIENT_CONN: "200"
# Force multiplexing: keep server-side backends well below total client
# connections. With N replicas × M gunicorn workers competing, PgBouncer
# must hand the same server backend to multiple clients between
# transactions — which is how session-scoped advisory locks get orphaned.
# Production OCP/Helm deployments that hit this bug see the same ratio.
DEFAULT_POOL_SIZE: "2"
MIN_POOL_SIZE: "1"
AUTH_TYPE: scram-sha-256
ports:
- "64320:6432"
healthcheck:
test: ["CMD", "pg_isready", "-h", "localhost", "-p", "6432"]
interval: 2s
timeout: 3s
retries: 30

gateway:
image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest}
depends_on:
pgbouncer:
condition: service_healthy
restart: "no"
environment:
# Route gateway traffic through PgBouncer (transaction pool). This is
# the scenario documented in the issue.
DATABASE_URL: postgresql+psycopg://postgres:reprosecret@pgbouncer:6432/mcp
HOST: 0.0.0.0
PORT: "4444"
LOG_LEVEL: INFO
# Strip the gateway down to just the bootstrap path we care about.
AUTH_REQUIRED: "false"
MCPGATEWAY_UI_ENABLED: "false"
MCPGATEWAY_ADMIN_API_ENABLED: "false"
MCPGATEWAY_A2A_ENABLED: "false"
PLUGINS_ENABLED: "false"
CACHE_TYPE: none
FEDERATION_ENABLED: "false"
REDIS_URL: "redis://127.0.0.1:6379/0"
# Keep bootstrap quiet on the retry side so the hang is obvious.
DB_MAX_RETRIES: "10"
# Several workers per replica so total client connections comfortably
# exceed PgBouncer's DEFAULT_POOL_SIZE, forcing backend multiplexing.
GUNICORN_WORKERS: "4"
Loading