Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions cmd/hubagent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,18 @@ func main() {

if opts.EnableWebhook {
whiteListedUsers := strings.Split(opts.WhiteListedUsers, ",")
if err := SetupWebhook(mgr, options.WebhookClientConnectionType(opts.WebhookClientConnectionType), opts.WebhookServiceName, whiteListedUsers,
opts.EnableGuardRail, opts.EnableV1Beta1APIs, opts.DenyModifyMemberClusterLabels, opts.EnableWorkload, opts.NetworkingAgentsEnabled); err != nil {
webhookConfig, err := SetupWebhook(mgr, options.WebhookClientConnectionType(opts.WebhookClientConnectionType), opts.WebhookServiceName, whiteListedUsers,
opts.EnableGuardRail, opts.EnableV1Beta1APIs, opts.DenyModifyMemberClusterLabels, opts.EnableWorkload, opts.NetworkingAgentsEnabled)
if err != nil {
klog.ErrorS(err, "unable to set up webhook")
exitWithErrorFunc()
}
// Add webhook readiness check to ensure the pod is not marked ready until
// webhook configurations have been created in the API server.
if err := mgr.AddReadyzCheck("webhook", webhookConfig.ReadinessChecker()); err != nil {
klog.ErrorS(err, "unable to set up webhook readiness check")
exitWithErrorFunc()
}
}

ctx := ctrl.SetupSignalHandler()
Expand Down Expand Up @@ -201,21 +208,22 @@ func main() {
}

// SetupWebhook generates the webhook cert and then set up the webhook configurator.
// It returns the webhook Config so callers can register the readiness checker.
func SetupWebhook(mgr manager.Manager, webhookClientConnectionType options.WebhookClientConnectionType, webhookServiceName string,
whiteListedUsers []string, enableGuardRail, isFleetV1Beta1API bool, denyModifyMemberClusterLabels bool, enableWorkload bool, networkingAgentsEnabled bool) error {
whiteListedUsers []string, enableGuardRail, isFleetV1Beta1API bool, denyModifyMemberClusterLabels bool, enableWorkload bool, networkingAgentsEnabled bool) (*webhook.Config, error) {
// Generate self-signed key and crt files in FleetWebhookCertDir for the webhook server to start.
w, err := webhook.NewWebhookConfig(mgr, webhookServiceName, FleetWebhookPort, &webhookClientConnectionType, FleetWebhookCertDir, enableGuardRail, denyModifyMemberClusterLabels, enableWorkload)
if err != nil {
klog.ErrorS(err, "fail to generate WebhookConfig")
return err
return nil, err
}
if err = mgr.Add(w); err != nil {
klog.ErrorS(err, "unable to add WebhookConfig")
return err
return nil, err
}
if err = webhook.AddToManager(mgr, whiteListedUsers, denyModifyMemberClusterLabels, networkingAgentsEnabled); err != nil {
klog.ErrorS(err, "unable to register webhooks to the manager")
return err
return nil, err
}
return nil
return w, nil
}
18 changes: 18 additions & 0 deletions pkg/webhook/webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ import (
"errors"
"fmt"
"math/big"
"net/http"
"os"
"path/filepath"
"sync/atomic"
"time"

admv1 "k8s.io/api/admissionregistration/v1"
Expand Down Expand Up @@ -162,6 +164,9 @@ type Config struct {

denyModifyMemberClusterLabels bool
enableWorkload bool

// ready is set to true after webhook configurations have been created successfully.
ready atomic.Bool
}

func NewWebhookConfig(mgr manager.Manager, webhookServiceName string, port int32, clientConnectionType *options.WebhookClientConnectionType, certDir string, enableGuardRail bool, denyModifyMemberClusterLabels bool, enableWorkload bool) (*Config, error) {
Expand Down Expand Up @@ -195,9 +200,22 @@ func (w *Config) Start(ctx context.Context) error {
klog.ErrorS(err, "unable to setup webhook configurations in apiserver")
return err
}
w.ready.Store(true)
Copy link
Copy Markdown
Member

@weng271190436 weng271190436 Jan 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this does not solve the issue: #404

You are observing

Internal error occurred: failed calling webhook "fleet.membercluster.validating":
failed to call webhook: Post "[https://fleetwebhook.fleet-system.svc:9443/validate-cluster.kubernetes-fleet.io-v1beta1-membercluster?timeout=5s](vscode-file://vscode-app/Applications/Visual%20Studio%20Code.app/Contents/Resources/app/out/vs/code/electron-browser/workbench/workbench.html)":
dial tcp 10.96.27.105:9443: connect: connection refused

this means that the webhook was registered and k8s api server knows about it. Otherwise the api server won't know that there is a webhook "fleet.membercluster.validating" to call

I searched a bit and it seems that the webhook server's started checker (which is a built-in health checker) is one way to do it

https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/webhook#DefaultServer.StartedChecker

wh := mgr.GetWebhookServer()
wh.StartedChecker()

klog.V(2).InfoS("webhook configurations created successfully, marking webhook as ready")
return nil
}

// ReadinessChecker returns a healthz.Checker that reports ready only after
// webhook configurations have been created successfully.
func (w *Config) ReadinessChecker() func(*http.Request) error {
return func(_ *http.Request) error {
if !w.ready.Load() {
return fmt.Errorf("webhook not ready: configurations not yet created")
}
return nil
}
}

// createFleetWebhookConfiguration creates the ValidatingWebhookConfiguration object for the webhook.
func (w *Config) createFleetWebhookConfiguration(ctx context.Context) error {
if err := w.createMutatingWebhookConfiguration(ctx, w.buildFleetMutatingWebhooks(), fleetMutatingWebhookCfgName); err != nil {
Expand Down
43 changes: 40 additions & 3 deletions pkg/webhook/webhook_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package webhook

import (
"sync"
"testing"

"github.com/google/go-cmp/cmp"
Expand All @@ -14,7 +15,7 @@ import (

func TestBuildFleetMutatingWebhooks(t *testing.T) {
url := options.WebhookClientConnectionType("url")
testCases := map[string]struct {
testCases := map[string]*struct {
config Config
wantLength int
}{
Expand All @@ -41,7 +42,7 @@ func TestBuildFleetMutatingWebhooks(t *testing.T) {

func TestBuildFleetValidatingWebhooks(t *testing.T) {
url := options.WebhookClientConnectionType("url")
testCases := map[string]struct {
testCases := map[string]*struct {
config Config
wantLength int
}{
Expand Down Expand Up @@ -76,7 +77,7 @@ func TestBuildFleetValidatingWebhooks(t *testing.T) {

func TestBuildFleetGuardRailValidatingWebhooks(t *testing.T) {
url := options.WebhookClientConnectionType("url")
testCases := map[string]struct {
testCases := map[string]*struct {
config Config
wantLength int
}{
Expand Down Expand Up @@ -161,3 +162,39 @@ func TestNewWebhookConfig(t *testing.T) {
})
}
}

func TestReadinessChecker(t *testing.T) {
w := &Config{}
checker := w.ReadinessChecker()

if err := checker(nil); err == nil {
t.Fatalf("expected readiness error before ready is set")
}

w.ready.Store(true)
if err := checker(nil); err != nil {
t.Fatalf("unexpected readiness error after ready is set: %v", err)
}
}

func TestReadinessCheckerConcurrent(t *testing.T) {
w := &Config{}
checker := w.ReadinessChecker()

var wg sync.WaitGroup
for range 4 {
wg.Add(1)
go func() {
defer wg.Done()
for range 1000 {
_ = checker(nil)
}
}()
}

w.ready.Store(true)
if err := checker(nil); err != nil {
t.Fatalf("unexpected readiness error after ready is set: %v", err)
}
wg.Wait()
}
4 changes: 4 additions & 0 deletions test/e2e/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ helm install hub-agent ../../charts/hub-agent/ \
--set resourceSnapshotCreationMinimumInterval=$RESOURCE_SNAPSHOT_CREATION_MINIMUM_INTERVAL \
--set resourceChangesCollectionDuration=$RESOURCE_CHANGES_COLLECTION_DURATION

# Wait for hub-agent deployment to be ready (includes webhook)
echo "Waiting for hub-agent deployment to be ready..."
kubectl rollout status deployment/hub-agent -n fleet-system --timeout=2m
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can do this with the helm install options --wait and --timeout so that you don't need a separate command?


# Download CRDs from Fleet networking repo
export ENDPOINT_SLICE_EXPORT_CRD_URL=https://raw.githubusercontent.com/Azure/fleet-networking/v0.2.7/config/crd/bases/networking.fleet.azure.com_endpointsliceexports.yaml
export INTERNAL_SERVICE_EXPORT_CRD_URL=https://raw.githubusercontent.com/Azure/fleet-networking/v0.2.7/config/crd/bases/networking.fleet.azure.com_internalserviceexports.yaml
Expand Down
Loading