Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion network-agent/cmd/network-agent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ func validateService(svc service.Service) error {

func summarizeConfig(cfg service.Config) string {
return fmt.Sprintf(
"eth_name=%q object_dir=%q cidr=%q mvm_inner_ip=%q mvm_mac_addr=%q mvm_gw_dest_ip=%q mvm_gw_mac_addr=%q mvm_mask=%d mvm_mtu=%d tap_init_num=%d state_dir=%q tap_fd_socket_path=%q host_proxy_bind_ip=%q connect_timeout=%s",
"eth_name=%q object_dir=%q cidr=%q mvm_inner_ip=%q mvm_mac_addr=%q mvm_gw_dest_ip=%q mvm_gw_mac_addr=%q mvm_mask=%d mvm_mtu=%d tap_init_num=%d state_dir=%q tap_fd_socket_path=%q host_proxy_bind_ip=%q connect_timeout=%s host_port_min=%d host_port_max=%d",
cfg.EthName,
cfg.ObjectDir,
cfg.CIDR,
Expand All @@ -261,6 +261,8 @@ func summarizeConfig(cfg service.Config) string {
cfg.TapFDSocketPath,
cfg.HostProxyBindIP,
cfg.ConnectTimeout,
cfg.HostPortMin,
cfg.HostPortMax,
)
}

Expand Down
21 changes: 21 additions & 0 deletions network-agent/internal/service/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,17 @@ type Config struct {
// shared-dict op should be sub-millisecond; this is generous on
// purpose so a transient kernel hiccup doesn't fail the push.
CubeEgressPushTimeout time.Duration

// HostPortMin / HostPortMax bound the inclusive range from which
// network-agent allocates host-side ports for sandbox port-mapping
// NAT rules. With the defaults (20000-29999, 10000 ports total) and
// 4 default exposed ports per sandbox, a node caps at ~2500 alive
// sandboxes regardless of cpu/mem quotas. Operators can widen the
// range (e.g. 20000-59999) to push the ceiling closer to the
// underlying mem quota wall. When both are zero the allocator falls
// back to 20000-29999 for backwards compatibility.
HostPortMin uint16
HostPortMax uint16
}

func DefaultConfig() Config {
Expand All @@ -69,6 +80,8 @@ func DefaultConfig() Config {
ConnectTimeout: 5 * time.Second,
CubeEgressAdminURL: "http://127.0.0.1:9090",
CubeEgressPushTimeout: 2 * time.Second,
HostPortMin: defaultPortMin,
HostPortMax: defaultPortMax,
}
}

Expand All @@ -89,6 +102,8 @@ type cubeletNetworkConfig struct {
MvmMtu int `toml:"mvm_mtu"`
CubeEgressAdminURL string `toml:"cube_egress_admin_url"`
CubeEgressPushTimeout string `toml:"cube_egress_push_timeout"`
HostPortMin uint16 `toml:"host_port_min"`
HostPortMax uint16 `toml:"host_port_max"`
}

const cubeletNetworkPluginKey = "io.cubelet.internal.v1.network"
Expand Down Expand Up @@ -154,5 +169,11 @@ func LoadConfigFromCubeletTOML(base Config, path string) (Config, error) {
}
base.CubeEgressPushTimeout = d
}
if networkCfg.HostPortMin != 0 {
base.HostPortMin = networkCfg.HostPortMin
}
if networkCfg.HostPortMax != 0 {
base.HostPortMax = networkCfg.HostPortMax
}
Comment on lines +172 to +177

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The != 0 checks are applied independently to each field. If an operator sets only host_port_min = 50000 in TOML (leaving host_port_max unset → zero), the result is HostPortMin=50000, HostPortMax=29999 (the default survives), which then fails in newPortAllocator with "invalid host port range: min=50000 > max=29999". The error is caught and the service won't start, but the message may confuse an operator who only set one value.

Consider either: (a) requiring both fields to be set when either is present, or (b) improving the error message in newPortAllocator to clearly show that only one value was overridden.

return base, nil
}
2 changes: 1 addition & 1 deletion network-agent/internal/service/local_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func NewLocalService(cfg Config) (Service, error) {
if err != nil {
return nil, err
}
ports, err := newPortAllocator()
ports, err := newPortAllocator(cfg.HostPortMin, cfg.HostPortMax)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that the ephemeral port range (/proc/sys/net/ipv4/ip_local_port_range) is set at line 139 — after the port allocator is constructed here. This means the allocator can never validate its range against the ephemeral pool during construction. If you enhance overlap detection, move the sysctl write above this line so the allocator can read the final ip_local_port_range value at startup.

if err != nil {
return nil, err
}
Expand Down
23 changes: 16 additions & 7 deletions network-agent/internal/service/port_allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ import (
)

const (
portMin uint16 = 20000
portMax uint16 = 29999
defaultPortMin uint16 = 20000
defaultPortMax uint16 = 29999
)

type portAllocator struct {
Expand All @@ -25,19 +25,28 @@ type portAllocator struct {
assigned map[uint16]struct{}
}

func newPortAllocator() (*portAllocator, error) {
func newPortAllocator(min, max uint16) (*portAllocator, error) {
if min == 0 && max == 0 {
min, max = defaultPortMin, defaultPortMax
}
if min > max {
return nil, fmt.Errorf("invalid host port range: min=%d > max=%d", min, max)
}
if min < 1024 {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The min < 1024 check is insufficient to prevent overlap with the system ephemeral port range. The service sets /proc/sys/net/ipv4/ip_local_port_range to 10000-19999 at local_service.go:139, so a configured value like HostPortMin=15000 passes validation but overlaps with the ephemeral pool. This can cause silent intermittent connection failures when the kernel assigns an outbound source port that collides with a sandbox port-mapping.

Consider either:
(a) raising the floor from 1024 to 20000 (matching the ephemeral range's upper bound), or
(b) reading /proc/sys/net/ipv4/ip_local_port_range at construction time and explicitly rejecting any overlap.

return nil, fmt.Errorf("invalid host port range: min=%d must be >= 1024", min)
}

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no validation that the configured range has usable capacity. A configuration of HostPortMin=30000, HostPortMax=30000 (single port) passes both checks but will exhaust on the first allocation if that single port is reserved. This becomes a self-inflicted denial-of-service for sandbox creation — and it happens silently at startup because no error is returned until runtime.

Consider adding a minimum range width check, e.g. if max-min < 16 (or whatever minimum makes sense for your deployment), to fail fast with a clear error during initialization.

alloc := &portAllocator{
min: portMin,
max: portMax,
next: portMin,
min: min,
max: max,
next: min,
assigned: make(map[uint16]struct{}),
}
reservedPorts, err := getReservedPorts()
if err != nil {
return nil, err
}
for _, port := range reservedPorts {
if port < portMin || port > portMax {
if port < min || port > max {
continue
}
alloc.assigned[port] = struct{}{}
Expand Down
Loading