Skip to content

Commit c818ae3

Browse files
MadLittleModsjevolk
authored andcommitted
Fix Complement not using HSPortBindingIP (127.0.0.1) for the homeserver BaseURL (matrix-org#781)
Regressed in matrix-org#776 which started using the Docker default `HostIP` (`0.0.0.0`) because we removed the specific `HSPortBindingIP` port bindings for `8008` and `8448` (see "Root cause" section below). ### Problem This caused downstream issues in some of our out-of-repo Complement tests (Element internal) which stress some SSO/OIDC login flows with Synapse. Before matrix-org#776, [`SsoRedirectServlet`](https://github.com/element-hq/synapse/blob/33ba8860c43d4770ea119a09a4fcbbf366f3b32e/synapse/rest/client/login.py#L645-L683) received a request like this `http://127.0.0.1:8008/_matrix/client/v3/login/sso/redirect/oidc-test_idp?redirectUrl=http%3A%2F%2Fapp.my-fake-client.io%2Fclient-callback`. But now it's seeing `http://0.0.0.0:8008/_matrix/client/v3/login/sso/redirect/oidc-test_idp?redirectUrl=http%3A%2F%2Fapp.my-fake-client.io%2Fclient-callback` and tries to redirect to the canonical `http://127.0.0.1:8008/` address which would then go to the step we expect in the tests (authorization endpoint). Basically, the `host` header in the request used to be set to `127.0.0.1:8008` but now it's `0.0.0.0:8008`. Synapse wants to use the canonical URL so the cookies are available so it redirects you to the canonical `public_baseurl` version first. Relevant Synapse code that cares about using the canonical URL to get cookies back -> [`SsoRedirectServlet`](https://github.com/element-hq/synapse/blob/33ba8860c43d4770ea119a09a4fcbbf366f3b32e/synapse/rest/client/login.py#L645-L683) We could alternatively, update our out-of-repo tests to account for this extra redirect but it seems more appropriate to update Complement to use the the configured `HSPortBindingIP` as expected. ### Root cause The host name used in the requests during the Complement tests is from the `client.BaseURL` which is sourced from the [Docker container port mapping/bindings](https://github.com/matrix-org/complement/blob/28a09014afd1f9c75a3549b4cd9d8fc480ba1149/internal/docker/deployer.go#L521) in Complement. In matrix-org#776, we removed the explicit port bindings for `8008` with `HSPortBindingIP` ([`127.0.0.1`](https://github.com/matrix-org/complement/blob/28a09014afd1f9c75a3549b4cd9d8fc480ba1149/config/config.go#L180)) so now it uses Docker's default `0.0.0.0`. ```patch diff --git a/internal/docker/deployer.go b/internal/docker/deployer.go index cdcf51a..a186b30 100644 --- a/internal/docker/deployer.go +++ b/internal/docker/deployer.go @@ -382,20 +380,8 @@ func deployImage( }, &container.HostConfig{ CapAdd: []string{"NET_ADMIN"}, // TODO : this should be some sort of option PublishAllPorts: true, - PortBindings: nat.PortMap{ - nat.Port("8008/tcp"): []nat.PortBinding{ - { - HostIP: cfg.HSPortBindingIP, - }, - }, - nat.Port("8448/tcp"): []nat.PortBinding{ - { - HostIP: cfg.HSPortBindingIP, - }, - }, - }, ```
1 parent e9de36c commit c818ae3

File tree

3 files changed

+172
-35
lines changed

3 files changed

+172
-35
lines changed

config/config.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,14 @@ type Complement struct {
106106
// disable this behaviour being added later, once this has stablised.
107107
EnableDirtyRuns bool
108108

109+
// The IP that is used to connect to the running homeserver from the host.
110+
//
111+
// For Complement tests, this is always configured as `127.0.0.1` but can be
112+
// overridden by homerunner to allow binding to a different IP address
113+
// (`HOMERUNNER_HS_PORTBINDING_IP`).
114+
//
115+
// This field is used for the host-accessible homeserver URLs (as the hostname)
116+
// so clients in your tests can access the homeserver.
109117
HSPortBindingIP string
110118

111119
// Name: COMPLEMENT_POST_TEST_SCRIPT

internal/docker/builder.go

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -539,27 +539,59 @@ func printPortBindingsOfAllComplementContainers(docker *client.Client, contextSt
539539
log.Printf("=============== %s : END ALL COMPLEMENT DOCKER PORT BINDINGS ===============\n\n\n", contextStr)
540540
}
541541

542-
func endpoints(p nat.PortMap, csPort, ssPort int) (baseURL, fedBaseURL string, err error) {
543-
csapiPort := fmt.Sprintf("%d/tcp", csPort)
544-
csapiPortInfo, ok := p[nat.Port(csapiPort)]
545-
if !ok {
546-
return "", "", fmt.Errorf("port %s not exposed - exposed ports: %v", csapiPort, p)
542+
// endpoints transforms the homeserver ports into the base URL and federation base URL.
543+
func endpoints(p nat.PortMap, hsPortBindingIP string, csPort, ssPort int) (baseURL, fedBaseURL string, err error) {
544+
csapiPortBinding, err := findPortBinding(p, hsPortBindingIP, csPort)
545+
if err != nil {
546+
return "", "", fmt.Errorf("Problem finding CS API port: %s", err)
547547
}
548-
if len(csapiPortInfo) == 0 {
549-
return "", "", fmt.Errorf("port %s exposed with not mapped port: %+v", csapiPort, p)
548+
baseURL = fmt.Sprintf("http://"+csapiPortBinding.HostIP+":%s", csapiPortBinding.HostPort)
549+
550+
ssapiPortBinding, err := findPortBinding(p, hsPortBindingIP, ssPort)
551+
if err != nil {
552+
return "", "", fmt.Errorf("Problem finding SS API port: %s", err)
550553
}
551-
baseURL = fmt.Sprintf("http://"+csapiPortInfo[0].HostIP+":%s", csapiPortInfo[0].HostPort)
554+
fedBaseURL = fmt.Sprintf("https://"+ssapiPortBinding.HostIP+":%s", ssapiPortBinding.HostPort)
555+
return
556+
}
552557

553-
ssapiPort := fmt.Sprintf("%d/tcp", ssPort)
554-
ssapiPortInfo, ok := p[nat.Port(ssapiPort)]
558+
// findPortBinding finds a matching port binding for the given host/port in the `nat.PortMap`.
559+
//
560+
// This function will return the first port binding that matches the given host IP. If a
561+
// `0.0.0.0` binding is found, we will assume that it is listening on all interfaces,
562+
// including the `hsPortBindingIP`, and return a binding with the `hsPortBindingIP` as
563+
// the host IP.
564+
func findPortBinding(p nat.PortMap, hsPortBindingIP string, port int) (portBinding nat.PortBinding, err error) {
565+
portString := fmt.Sprintf("%d/tcp", port)
566+
portBindings, ok := p[nat.Port(portString)]
555567
if !ok {
556-
return "", "", fmt.Errorf("port %s not exposed - exposed ports: %v", ssapiPort, p)
557-
}
558-
if len(ssapiPortInfo) == 0 {
559-
return "", "", fmt.Errorf("port %s exposed with not mapped port: %+v", ssapiPort, p)
568+
return nat.PortBinding{}, fmt.Errorf("port %s not exposed - exposed ports: %v", portString, p)
569+
}
570+
if len(portBindings) == 0 {
571+
return nat.PortBinding{}, fmt.Errorf("port %s exposed with not mapped port: %+v", portString, p)
572+
}
573+
574+
for _, pb := range portBindings {
575+
if pb.HostIP == hsPortBindingIP {
576+
return pb, nil
577+
} else if pb.HostIP == "0.0.0.0" {
578+
// `0.0.0.0` means "all interfaces", so we can assume that this will be listening
579+
// for connections from `hsPortBindingIP` as well.
580+
return nat.PortBinding{
581+
HostIP: hsPortBindingIP,
582+
HostPort: pb.HostPort,
583+
}, nil
584+
} else if pb.HostIP == "" && hsPortBindingIP == "127.0.0.1" {
585+
// `HostIP` can be empty in certain environments (observed with podman v4.3.1). We
586+
// will assume this is only a binding for `127.0.0.1`.
587+
return nat.PortBinding{
588+
HostIP: hsPortBindingIP,
589+
HostPort: pb.HostPort,
590+
}, nil
591+
}
560592
}
561-
fedBaseURL = fmt.Sprintf("https://"+csapiPortInfo[0].HostIP+":%s", ssapiPortInfo[0].HostPort)
562-
return
593+
594+
return nat.PortBinding{}, fmt.Errorf("unable to find matching port binding for %s %s: %+v", hsPortBindingIP, portString, p)
563595
}
564596

565597
type result struct {

internal/docker/deployer.go

Lines changed: 116 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -313,9 +313,13 @@ func (d *Deployer) StartServer(hsDep *HomeserverDeployment) error {
313313
}
314314

315315
// Wait for the container to be ready.
316-
baseURL, fedBaseURL, err := waitForPorts(ctx, d.Docker, hsDep.ContainerID)
316+
err = waitForPorts(ctx, d.Docker, hsDep.ContainerID, d.config.HSPortBindingIP)
317317
if err != nil {
318-
return fmt.Errorf("failed to get ports for container %s: %s", hsDep.ContainerID, err)
318+
return fmt.Errorf("failed to wait for ports on container %s: %s", hsDep.ContainerID, err)
319+
}
320+
baseURL, fedBaseURL, err := getHostAccessibleHomeserverURLs(ctx, d.Docker, hsDep.ContainerID, d.config.HSPortBindingIP)
321+
if err != nil {
322+
return fmt.Errorf("failed to get host accessible homeserver URL's from container %s: %s", hsDep.ContainerID, err)
319323
}
320324
hsDep.SetEndpoints(baseURL, fedBaseURL)
321325

@@ -381,7 +385,16 @@ func deployImage(
381385
"complement_hs_name": hsName,
382386
},
383387
}, &container.HostConfig{
384-
CapAdd: []string{"NET_ADMIN"}, // TODO : this should be some sort of option
388+
CapAdd: []string{"NET_ADMIN"}, // TODO : this should be some sort of option
389+
// We use `PublishAllPorts` because although Complement only requires the ports 8008
390+
// and 8448 to be accessible in the image, other custom out-of-repo tests may use
391+
// additional ports that are specific to their own application.
392+
//
393+
// Ideally, we would only bind to `cfg.HSPortBindingIP` but there isn't a way to
394+
// specify the `HostIP` when using `PublishAllPorts`. And although, we could specify
395+
// a manual port mapping, it's not compatible with also having `PublishAllPorts` set
396+
// to true (we run into `address already in use` errors). Binding to all interfaces
397+
// means we're also listening on `cfg.HSPortBindingIP` so it's good enough.
385398
PublishAllPorts: true,
386399
ExtraHosts: extraHosts,
387400
Mounts: mounts,
@@ -441,10 +454,19 @@ func deployImage(
441454
log.Printf("%s: Started container %s", contextStr, containerID)
442455
}
443456

444-
baseURL, fedBaseURL, err := waitForPorts(ctx, docker, containerID)
457+
// Wait for the container to be ready.
458+
err = waitForPorts(ctx, docker, containerID, cfg.HSPortBindingIP)
445459
if err != nil {
446-
return stubDeployment, fmt.Errorf("%s : image %s : %w", contextStr, imageID, err)
460+
return stubDeployment, fmt.Errorf("%s: failed to wait for ports on container %s: %w", contextStr, containerID, err)
461+
}
462+
baseURL, fedBaseURL, err := getHostAccessibleHomeserverURLs(ctx, docker, containerID, cfg.HSPortBindingIP)
463+
if err != nil {
464+
return stubDeployment, fmt.Errorf(
465+
"%s: failed to get host accessible homeserver URL's from container %s: %s",
466+
contextStr, containerID, err,
467+
)
447468
}
469+
448470
inspect, err := docker.ContainerInspect(ctx, containerID)
449471
if err != nil {
450472
return stubDeployment, fmt.Errorf("ContainerInspect: %s", err)
@@ -504,29 +526,104 @@ func copyToContainer(docker *client.Client, containerID, path string, data []byt
504526
return nil
505527
}
506528

507-
// Waits until a homeserver container has NAT ports assigned and returns its clientside API URL and federation API URL.
508-
func waitForPorts(ctx context.Context, docker *client.Client, containerID string) (baseURL string, fedBaseURL string, err error) {
529+
func assertHostnameEqual(inputUrl string, expectedHostname string) error {
530+
parsedUrl, err := url.Parse(inputUrl)
531+
if err != nil {
532+
return fmt.Errorf("failed to parse URL %s: %s", inputUrl, err)
533+
}
534+
if parsedUrl.Hostname() != expectedHostname {
535+
return fmt.Errorf("expected hostname %s in URL %s, got %s", expectedHostname, inputUrl, parsedUrl.Hostname())
536+
}
537+
538+
return nil
539+
}
540+
541+
// getHostAccessibleHomeserverURLs returns URLs that are accessible from the host
542+
// machine (outside the container) for the homeserver's client API and federation API.
543+
func getHostAccessibleHomeserverURLs(ctx context.Context, docker *client.Client, containerID string, hsPortBindingIP string) (baseURL string, fedBaseURL string, err error) {
544+
inspectResponse, err := inspectContainer(ctx, docker, containerID)
545+
if err != nil {
546+
return "", "", fmt.Errorf("failed to inspect ports: %w", err)
547+
}
548+
549+
baseURL, fedBaseURL, err = endpoints(inspectResponse.NetworkSettings.Ports, hsPortBindingIP, 8008, 8448)
550+
551+
// Sanity check that the URLs match the expected configured binding IP. It's
552+
// also important that we use the canonical publicly accessible hostname for the
553+
// homeserver for some situations like SSO/OIDC login where important cookies are set
554+
// for the domain.
555+
err = assertHostnameEqual(baseURL, hsPortBindingIP)
556+
if err != nil {
557+
return "", "", fmt.Errorf("failed to assert baseURL has the correct hostname: %w", err)
558+
}
559+
err = assertHostnameEqual(fedBaseURL, hsPortBindingIP)
560+
if err != nil {
561+
return "", "", fmt.Errorf("failed to assert fedBaseURL has the correct hostname: %w", err)
562+
}
563+
564+
return baseURL, fedBaseURL, nil
565+
}
566+
567+
// waitForPorts waits until a homeserver container has NAT ports assigned (8008, 8448).
568+
func waitForPorts(ctx context.Context, docker *client.Client, containerID string, hsPortBindingIP string) (err error) {
509569
// We need to hammer the inspect endpoint until the ports show up, they don't appear immediately.
510-
var inspect container.InspectResponse
511570
inspectStartTime := time.Now()
512571
for time.Since(inspectStartTime) < time.Second {
513-
inspect, err = docker.ContainerInspect(ctx, containerID)
514-
if err != nil {
515-
return "", "", err
516-
}
517-
if inspect.State != nil && !inspect.State.Running {
518-
// the container exited, bail out with a container ID for logs
519-
return "", "", fmt.Errorf("container is not running, state=%v", inspect.State.Status)
572+
inspectResponse, err := inspectContainer(ctx, docker, containerID)
573+
if inspectionErr, ok := err.(*containerInspectionError); ok && inspectionErr.Fatal {
574+
// If the error is fatal, we should not retry.
575+
return fmt.Errorf("Fatal inspection error: %s", err)
520576
}
521-
baseURL, fedBaseURL, err = endpoints(inspect.NetworkSettings.Ports, 8008, 8448)
522-
if err == nil {
577+
578+
// Check to see if we can see the ports yet
579+
_, csPortErr := findPortBinding(inspectResponse.NetworkSettings.Ports, hsPortBindingIP, 8008)
580+
_, ssPortErr := findPortBinding(inspectResponse.NetworkSettings.Ports, hsPortBindingIP, 8448)
581+
if csPortErr == nil && ssPortErr == nil {
523582
break
524583
}
584+
525585
}
526-
return baseURL, fedBaseURL, nil
586+
return nil
587+
}
588+
589+
type containerInspectionError struct {
590+
// Error message
591+
msg string
592+
// Indicates whether the caller should stop retrying to inspect the container because
593+
// it has already exited.
594+
Fatal bool
595+
}
596+
597+
func (e *containerInspectionError) Error() string { return e.msg }
598+
599+
// inspectContainer inspects the container with the given ID and returns response.
600+
//
601+
// On failure, returns a `containerInspectionError` representing the underlying error and indicates
602+
// `err.Fatal: true` if the container is no longer running.
603+
func inspectContainer(
604+
ctx context.Context,
605+
docker *client.Client,
606+
containerID string,
607+
) (inspectResponse container.InspectResponse, err error) {
608+
inspectResponse, err = docker.ContainerInspect(ctx, containerID)
609+
if err != nil {
610+
return container.InspectResponse{}, &containerInspectionError{
611+
msg: err.Error(),
612+
Fatal: false,
613+
}
614+
}
615+
if inspectResponse.State != nil && !inspectResponse.State.Running {
616+
// the container exited, bail out with a container ID for logs
617+
return container.InspectResponse{}, &containerInspectionError{
618+
msg: fmt.Sprintf("container (%s) is not running, state=%v", containerID, inspectResponse.State.Status),
619+
Fatal: true,
620+
}
621+
}
622+
623+
return inspectResponse, nil
527624
}
528625

529-
// Waits until a homeserver deployment is ready to serve requests.
626+
// waitForContainer waits until a homeserver deployment is ready to serve requests.
530627
func waitForContainer(ctx context.Context, docker *client.Client, hsDep *HomeserverDeployment, stopTime time.Time) (iterCount int, lastErr error) {
531628
iterCount = 0
532629

0 commit comments

Comments
 (0)