feat(health): change timeout mechanism

- Healthcheck timeout is no longer fixed to 3 seconds
- Healthcheck timeout increases from 2s to 4s, 6s, 8s, 10s
- No 1 second wait time between check retries after failure
- VPN internal restart may be delayed by a maximum of 10 seconds
This commit is contained in:
Quentin McGaw
2024-05-09 19:33:04 +00:00
parent fd4689ee70
commit 6042a9e3c2

View File

@@ -11,53 +11,56 @@ import (
func (s *Server) runHealthcheckLoop(ctx context.Context, done chan<- struct{}) {
defer close(done)
timeoutIndex := 0
healthcheckTimeouts := []time.Duration{
2 * time.Second,
4 * time.Second,
6 * time.Second,
8 * time.Second,
// This can be useful when the connection is under stress
// See https://github.com/qdm12/gluetun/issues/2270
10 * time.Second,
}
s.vpn.healthyTimer = time.NewTimer(s.vpn.healthyWait)
for {
previousErr := s.handler.getErr()
const healthcheckTimeout = 3 * time.Second
timeout := healthcheckTimeouts[timeoutIndex]
healthcheckCtx, healthcheckCancel := context.WithTimeout(
ctx, healthcheckTimeout)
ctx, timeout)
err := s.healthCheck(healthcheckCtx)
healthcheckCancel()
s.handler.setErr(err)
if previousErr != nil && err == nil {
switch {
case previousErr != nil && err == nil: // First success
s.logger.Info("healthy!")
timeoutIndex = 0
s.vpn.healthyTimer.Stop()
s.vpn.healthyWait = *s.config.VPN.Initial
} else if previousErr == nil && err != nil {
case previousErr == nil && err != nil: // First failure
s.logger.Debug("unhealthy: " + err.Error())
s.vpn.healthyTimer.Stop()
s.vpn.healthyTimer = time.NewTimer(s.vpn.healthyWait)
}
if err != nil { // try again after 1 second
timer := time.NewTimer(time.Second)
case previousErr != nil && err != nil: // Nth failure
if timeoutIndex < len(healthcheckTimeouts)-1 {
timeoutIndex++
}
select {
case <-s.vpn.healthyTimer.C:
timeoutIndex = 0 // retry next with the smallest timeout
s.onUnhealthyVPN(ctx)
default:
}
case previousErr == nil && err == nil: // Nth success
timer := time.NewTimer(s.config.SuccessWait)
select {
case <-ctx.Done():
if !timer.Stop() {
<-timer.C
}
return
case <-timer.C:
case <-s.vpn.healthyTimer.C:
s.onUnhealthyVPN(ctx)
}
continue
}
// Success, check again after the success wait duration
timer := time.NewTimer(s.config.SuccessWait)
select {
case <-ctx.Done():
if !timer.Stop() {
<-timer.C
}
return
case <-timer.C:
}
}
}