feat(health): HEALTH_RESTART_VPN option

- You should really leave it to `on` ⚠️
- Turn it to `off` if you have trust issues with the healthcheck. Don't then report issues if the connection is dead though.
This commit is contained in:
Quentin McGaw
2025-10-21 15:36:15 +00:00
parent 31a36a9250
commit a4021fedc3
5 changed files with 39 additions and 16 deletions

View File

@@ -165,6 +165,7 @@ ENV VPN_SERVICE_PROVIDER=pia \
HEALTH_SERVER_ADDRESS=127.0.0.1:9999 \
HEALTH_TARGET_ADDRESS=cloudflare.com:443 \
HEALTH_ICMP_TARGET_IP=0.0.0.0 \
HEALTH_RESTART_VPN=on \
# DNS over TLS
DOT=on \
DOT_PROVIDERS=cloudflare \

View File

@@ -32,6 +32,9 @@ type Health struct {
// in the health checker. It can be set to an unspecified address
// such that the VPN server IP is used, which is also the default behavior.
ICMPTargetIP netip.Addr
// RestartVPN indicates whether to restart the VPN connection
// when the healthcheck fails.
RestartVPN *bool
}
func (h Health) Validate() (err error) {
@@ -50,6 +53,7 @@ func (h *Health) copy() (copied Health) {
ReadTimeout: h.ReadTimeout,
TargetAddress: h.TargetAddress,
ICMPTargetIP: h.ICMPTargetIP,
RestartVPN: gosettings.CopyPointer(h.RestartVPN),
}
}
@@ -62,6 +66,7 @@ func (h *Health) OverrideWith(other Health) {
h.ReadTimeout = gosettings.OverrideWithComparable(h.ReadTimeout, other.ReadTimeout)
h.TargetAddress = gosettings.OverrideWithComparable(h.TargetAddress, other.TargetAddress)
h.ICMPTargetIP = gosettings.OverrideWithComparable(h.ICMPTargetIP, other.ICMPTargetIP)
h.RestartVPN = gosettings.OverrideWithPointer(h.RestartVPN, other.RestartVPN)
}
func (h *Health) SetDefaults() {
@@ -72,6 +77,7 @@ func (h *Health) SetDefaults() {
h.ReadTimeout = gosettings.DefaultComparable(h.ReadTimeout, defaultReadTimeout)
h.TargetAddress = gosettings.DefaultComparable(h.TargetAddress, "cloudflare.com:443")
h.ICMPTargetIP = gosettings.DefaultComparable(h.ICMPTargetIP, netip.IPv4Unspecified()) // use the VPN server IP
h.RestartVPN = gosettings.DefaultPointer(h.RestartVPN, true)
}
func (h Health) String() string {
@@ -87,6 +93,7 @@ func (h Health) toLinesNode() (node *gotree.Node) {
icmpTarget = h.ICMPTargetIP.String()
}
node.Appendf("ICMP target IP: %s", icmpTarget)
node.Appendf("Restart VPN on healthcheck failure: %s", gosettings.BoolToYesNo(h.RestartVPN))
return node
}
@@ -98,5 +105,9 @@ func (h *Health) Read(r *reader.Reader) (err error) {
if err != nil {
return err
}
h.RestartVPN, err = r.BoolPtr("HEALTH_RESTART_VPN")
if err != nil {
return err
}
return nil
}

View File

@@ -58,7 +58,8 @@ func Test_Settings_String(t *testing.T) {
├── Health settings:
| ├── Server listening address: 127.0.0.1:9999
| ├── Target address: cloudflare.com:443
| ── ICMP target IP: VPN server IP
| ── ICMP target IP: VPN server IP
| └── Restart VPN on healthcheck failure: yes
├── Shadowsocks server settings:
| └── Enabled: no
├── HTTP proxy settings:

View File

@@ -59,8 +59,9 @@ func (c *Checker) SetConfig(tlsDialAddr string, icmpTarget netip.Addr) {
// and, on success, starts the periodic checks in a separate goroutine:
// - a "small" ICMP echo check every 15 seconds
// - a "full" TCP+TLS check every 5 minutes
// It returns a channel `runError` that receives an error if one of the periodic checks fail.
// It returns a channel `runError` that receives an error (nil or not) when a periodic check is performed.
// It returns an error if the initial TCP+TLS check fails.
// The Checker has to be ultimately stopped by calling [Checker.Stop].
func (c *Checker) Start(ctx context.Context) (runError <-chan error, err error) {
if c.tlsDialAddr == "" || c.icmpTarget.IsUnspecified() {
panic("call Checker.SetConfig with non empty values before Checker.Start")
@@ -101,16 +102,16 @@ func (c *Checker) Start(ctx context.Context) (runError <-chan error, err error)
case <-smallCheckTimer.C:
err := c.smallPeriodicCheck(ctx)
if err != nil {
runErrorCh <- fmt.Errorf("periodic small check: %w", err)
return
err = fmt.Errorf("small periodic check: %w", err)
}
runErrorCh <- err
smallCheckTimer.Reset(smallCheckPeriod)
case <-fullCheckTimer.C:
err := c.fullPeriodicCheck(ctx)
if err != nil {
runErrorCh <- fmt.Errorf("periodic full check: %w", err)
return
err = fmt.Errorf("full periodic check: %w", err)
}
runErrorCh <- err
fullCheckTimer.Reset(fullCheckPeriod)
}
}

View File

@@ -45,9 +45,6 @@ func (l *Loop) onTunnelUp(ctx, loopCtx context.Context, data tunnelUpData) {
l.restartVPN(loopCtx, err)
return
}
defer func() {
_ = l.healthChecker.Stop()
}()
if *l.dnsLooper.GetSettings().DoT.Enabled {
_, _ = l.dnsLooper.ApplyStatus(ctx, constants.Running)
@@ -78,13 +75,25 @@ func (l *Loop) onTunnelUp(ctx, loopCtx context.Context, data tunnelUpData) {
l.logger.Error(err.Error())
}
select {
case <-ctx.Done():
case healthErr := <-healthErrCh:
l.healthServer.SetError(healthErr)
// Note this restart call must be done in a separate goroutine
// from the VPN loop goroutine.
l.restartVPN(loopCtx, healthErr)
for {
select {
case <-ctx.Done():
_ = l.healthChecker.Stop()
return
case healthErr := <-healthErrCh:
l.healthServer.SetError(healthErr)
if healthErr != nil {
if *l.healthSettings.RestartVPN {
// Note this restart call must be done in a separate goroutine
// from the VPN loop goroutine.
_ = l.healthChecker.Stop()
l.restartVPN(loopCtx, healthErr)
return
}
l.logger.Warnf("healthcheck failed: %s", healthErr)
l.logger.Info("👉 See https://github.com/qdm12/gluetun-wiki/blob/main/faq/healthcheck.md")
}
}
}
}