feat(health): change timeout mechanism
- Healthcheck timeout is no longer fixed to 3 seconds - Healthcheck timeout increases from 2s to 4s, 6s, 8s, 10s - No 1 second wait time between check retries after failure - VPN internal restart may be delayed by a maximum of 10 seconds
This commit is contained in:
@@ -11,55 +11,58 @@ import (
|
||||
func (s *Server) runHealthcheckLoop(ctx context.Context, done chan<- struct{}) {
|
||||
defer close(done)
|
||||
|
||||
timeoutIndex := 0
|
||||
healthcheckTimeouts := []time.Duration{
|
||||
2 * time.Second,
|
||||
4 * time.Second,
|
||||
6 * time.Second,
|
||||
8 * time.Second,
|
||||
// This can be useful when the connection is under stress
|
||||
// See https://github.com/qdm12/gluetun/issues/2270
|
||||
10 * time.Second,
|
||||
}
|
||||
s.vpn.healthyTimer = time.NewTimer(s.vpn.healthyWait)
|
||||
|
||||
for {
|
||||
previousErr := s.handler.getErr()
|
||||
|
||||
const healthcheckTimeout = 3 * time.Second
|
||||
timeout := healthcheckTimeouts[timeoutIndex]
|
||||
healthcheckCtx, healthcheckCancel := context.WithTimeout(
|
||||
ctx, healthcheckTimeout)
|
||||
ctx, timeout)
|
||||
err := s.healthCheck(healthcheckCtx)
|
||||
healthcheckCancel()
|
||||
|
||||
s.handler.setErr(err)
|
||||
|
||||
if previousErr != nil && err == nil {
|
||||
switch {
|
||||
case previousErr != nil && err == nil: // First success
|
||||
s.logger.Info("healthy!")
|
||||
timeoutIndex = 0
|
||||
s.vpn.healthyTimer.Stop()
|
||||
s.vpn.healthyWait = *s.config.VPN.Initial
|
||||
} else if previousErr == nil && err != nil {
|
||||
case previousErr == nil && err != nil: // First failure
|
||||
s.logger.Debug("unhealthy: " + err.Error())
|
||||
s.vpn.healthyTimer.Stop()
|
||||
s.vpn.healthyTimer = time.NewTimer(s.vpn.healthyWait)
|
||||
case previousErr != nil && err != nil: // Nth failure
|
||||
if timeoutIndex < len(healthcheckTimeouts)-1 {
|
||||
timeoutIndex++
|
||||
}
|
||||
|
||||
if err != nil { // try again after 1 second
|
||||
timer := time.NewTimer(time.Second)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
if !timer.Stop() {
|
||||
<-timer.C
|
||||
}
|
||||
return
|
||||
case <-timer.C:
|
||||
case <-s.vpn.healthyTimer.C:
|
||||
timeoutIndex = 0 // retry next with the smallest timeout
|
||||
s.onUnhealthyVPN(ctx)
|
||||
default:
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Success, check again after the success wait duration
|
||||
case previousErr == nil && err == nil: // Nth success
|
||||
timer := time.NewTimer(s.config.SuccessWait)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
if !timer.Stop() {
|
||||
<-timer.C
|
||||
}
|
||||
return
|
||||
case <-timer.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) healthCheck(ctx context.Context) (err error) {
|
||||
|
||||
Reference in New Issue
Block a user