Maint: rework openvpn restart on unhealthy
This commit is contained in:
@@ -275,8 +275,6 @@ func _main(ctx context.Context, buildInfo models.BuildInformation,
|
||||
}
|
||||
} // TODO move inside firewall?
|
||||
|
||||
healthy := make(chan bool)
|
||||
|
||||
// Shutdown settings
|
||||
const defaultShutdownTimeout = 400 * time.Millisecond
|
||||
defaultShutdownOnSuccess := func(goRoutineName string) {
|
||||
@@ -296,7 +294,7 @@ func _main(ctx context.Context, buildInfo models.BuildInformation,
|
||||
otherGroupHandler := goshutdown.NewGroupHandler("other", defaultGroupSettings)
|
||||
|
||||
openvpnLooper := openvpn.NewLooper(allSettings.OpenVPN, nonRootUsername, puid, pgid, allServers,
|
||||
ovpnConf, firewallConf, routingConf, logger, httpClient, os.OpenFile, tunnelReadyCh, healthy)
|
||||
ovpnConf, firewallConf, routingConf, logger, httpClient, os.OpenFile, tunnelReadyCh)
|
||||
openvpnHandler, openvpnCtx, openvpnDone := goshutdown.NewGoRoutineHandler(
|
||||
"openvpn", goshutdown.GoRoutineSettings{Timeout: time.Second})
|
||||
// wait for restartOpenvpn
|
||||
@@ -366,11 +364,12 @@ func _main(ctx context.Context, buildInfo models.BuildInformation,
|
||||
go httpServer.Run(httpServerCtx, httpServerDone)
|
||||
controlGroupHandler.Add(httpServerHandler)
|
||||
|
||||
healthcheckServer := healthcheck.NewServer(constants.HealthcheckAddress,
|
||||
logger.NewChild(logging.Settings{Prefix: "healthcheck: "}))
|
||||
healthLogger := logger.NewChild(logging.Settings{Prefix: "healthcheck: "})
|
||||
healthcheckServer := healthcheck.NewServer(
|
||||
constants.HealthcheckAddress, healthLogger, openvpnLooper)
|
||||
healthServerHandler, healthServerCtx, healthServerDone := goshutdown.NewGoRoutineHandler(
|
||||
"HTTP health server", defaultGoRoutineSettings)
|
||||
go healthcheckServer.Run(healthServerCtx, healthy, healthServerDone)
|
||||
go healthcheckServer.Run(healthServerCtx, healthServerDone)
|
||||
|
||||
const orderShutdownTimeout = 3 * time.Second
|
||||
orderSettings := goshutdown.OrderSettings{
|
||||
|
||||
@@ -9,24 +9,24 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
func (s *server) runHealthcheckLoop(ctx context.Context, healthy chan<- bool, done chan<- struct{}) {
|
||||
func (s *server) runHealthcheckLoop(ctx context.Context, done chan<- struct{}) {
|
||||
defer close(done)
|
||||
|
||||
s.openvpn.healthyTimer = time.NewTimer(defaultOpenvpnHealthyWaitTime)
|
||||
|
||||
for {
|
||||
previousErr := s.handler.getErr()
|
||||
|
||||
err := healthCheck(ctx, s.resolver)
|
||||
s.handler.setErr(err)
|
||||
|
||||
// Notify the healthy channel, or not if it's already full
|
||||
select {
|
||||
case healthy <- err == nil:
|
||||
default:
|
||||
}
|
||||
|
||||
if previousErr != nil && err == nil {
|
||||
s.logger.Info("healthy!")
|
||||
s.openvpn.healthyTimer.Stop()
|
||||
s.openvpn.healthyWaitTime = defaultOpenvpnHealthyWaitTime
|
||||
} else if previousErr == nil && err != nil {
|
||||
s.logger.Info("unhealthy: " + err.Error())
|
||||
s.openvpn.healthyTimer = time.NewTimer(s.openvpn.healthyWaitTime)
|
||||
}
|
||||
|
||||
if err != nil { // try again after 1 second
|
||||
@@ -38,9 +38,12 @@ func (s *server) runHealthcheckLoop(ctx context.Context, healthy chan<- bool, do
|
||||
}
|
||||
return
|
||||
case <-timer.C:
|
||||
case <-s.openvpn.healthyTimer.C:
|
||||
s.onUnhealthyOpenvpn(ctx)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Success, check again in 5 seconds
|
||||
const period = 5 * time.Second
|
||||
timer := time.NewTimer(period)
|
||||
|
||||
17
internal/healthcheck/openvpn.go
Normal file
17
internal/healthcheck/openvpn.go
Normal file
@@ -0,0 +1,17 @@
|
||||
package healthcheck
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/qdm12/gluetun/internal/constants"
|
||||
)
|
||||
|
||||
func (s *server) onUnhealthyOpenvpn(ctx context.Context) {
|
||||
s.logger.Info("program has been unhealthy for " +
|
||||
s.openvpn.healthyWaitTime.String() + ": restarting OpenVPN")
|
||||
_, _ = s.openvpn.looper.ApplyStatus(ctx, constants.Stopped)
|
||||
_, _ = s.openvpn.looper.ApplyStatus(ctx, constants.Running)
|
||||
s.openvpn.healthyWaitTime += openvpnHealthyWaitTimeAdd
|
||||
s.openvpn.healthyTimer = time.NewTimer(s.openvpn.healthyWaitTime)
|
||||
}
|
||||
@@ -7,11 +7,12 @@ import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/qdm12/gluetun/internal/openvpn"
|
||||
"github.com/qdm12/golibs/logging"
|
||||
)
|
||||
|
||||
type Server interface {
|
||||
Run(ctx context.Context, healthy chan<- bool, done chan<- struct{})
|
||||
Run(ctx context.Context, done chan<- struct{})
|
||||
}
|
||||
|
||||
type server struct {
|
||||
@@ -19,22 +20,40 @@ type server struct {
|
||||
logger logging.Logger
|
||||
handler *handler
|
||||
resolver *net.Resolver
|
||||
openvpn openvpnHealth
|
||||
}
|
||||
|
||||
func NewServer(address string, logger logging.Logger) Server {
|
||||
type openvpnHealth struct {
|
||||
looper openvpn.Looper
|
||||
healthyWaitTime time.Duration
|
||||
healthyTimer *time.Timer
|
||||
}
|
||||
|
||||
const (
|
||||
defaultOpenvpnHealthyWaitTime = 6 * time.Second
|
||||
openvpnHealthyWaitTimeAdd = 5 * time.Second
|
||||
)
|
||||
|
||||
func NewServer(address string, logger logging.Logger,
|
||||
openvpnLooper openvpn.Looper) Server {
|
||||
return &server{
|
||||
address: address,
|
||||
logger: logger,
|
||||
handler: newHandler(logger),
|
||||
resolver: net.DefaultResolver,
|
||||
openvpn: openvpnHealth{
|
||||
looper: openvpnLooper,
|
||||
healthyWaitTime: defaultOpenvpnHealthyWaitTime,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *server) Run(ctx context.Context, healthy chan<- bool, done chan<- struct{}) {
|
||||
func (s *server) Run(ctx context.Context, done chan<- struct{}) {
|
||||
defer close(done)
|
||||
s.logger.Debug("here 0")
|
||||
|
||||
loopDone := make(chan struct{})
|
||||
go s.runHealthcheckLoop(ctx, healthy, loopDone)
|
||||
go s.runHealthcheckLoop(ctx, loopDone)
|
||||
|
||||
server := http.Server{
|
||||
Addr: s.address,
|
||||
|
||||
@@ -46,7 +46,6 @@ type looper struct {
|
||||
client *http.Client
|
||||
openFile os.OpenFileFunc
|
||||
tunnelReady chan<- struct{}
|
||||
healthy <-chan bool
|
||||
// Internal channels and values
|
||||
stop <-chan struct{}
|
||||
stopped chan<- struct{}
|
||||
@@ -55,20 +54,18 @@ type looper struct {
|
||||
portForwardSignals chan net.IP
|
||||
userTrigger bool
|
||||
// Internal constant values
|
||||
backoffTime time.Duration
|
||||
healthWaitTime time.Duration
|
||||
backoffTime time.Duration
|
||||
}
|
||||
|
||||
const (
|
||||
defaultBackoffTime = 15 * time.Second
|
||||
defaultHealthWaitTime = 6 * time.Second
|
||||
defaultBackoffTime = 15 * time.Second
|
||||
)
|
||||
|
||||
func NewLooper(settings configuration.OpenVPN,
|
||||
username string, puid, pgid int, allServers models.AllServers,
|
||||
conf Configurator, fw firewall.Configurator, routing routing.Routing,
|
||||
logger logging.ParentLogger, client *http.Client, openFile os.OpenFileFunc,
|
||||
tunnelReady chan<- struct{}, healthy <-chan bool) Looper {
|
||||
tunnelReady chan<- struct{}) Looper {
|
||||
start := make(chan struct{})
|
||||
running := make(chan models.LoopStatus)
|
||||
stop := make(chan struct{})
|
||||
@@ -90,7 +87,6 @@ func NewLooper(settings configuration.OpenVPN,
|
||||
client: client,
|
||||
openFile: openFile,
|
||||
tunnelReady: tunnelReady,
|
||||
healthy: healthy,
|
||||
start: start,
|
||||
running: running,
|
||||
stop: stop,
|
||||
@@ -98,7 +94,6 @@ func NewLooper(settings configuration.OpenVPN,
|
||||
portForwardSignals: make(chan net.IP),
|
||||
userTrigger: true,
|
||||
backoffTime: defaultBackoffTime,
|
||||
healthWaitTime: defaultHealthWaitTime,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -116,7 +111,7 @@ func (l *looper) signalOrSetStatus(status models.LoopStatus) {
|
||||
}
|
||||
}
|
||||
|
||||
func (l *looper) Run(ctx context.Context, done chan<- struct{}) { //nolint:gocognit
|
||||
func (l *looper) Run(ctx context.Context, done chan<- struct{}) {
|
||||
defer close(done)
|
||||
|
||||
select {
|
||||
@@ -243,25 +238,6 @@ func (l *looper) Run(ctx context.Context, done chan<- struct{}) { //nolint:gocog
|
||||
stayHere = false
|
||||
|
||||
l.state.Unlock()
|
||||
case healthy := <-l.healthy:
|
||||
if healthy {
|
||||
continue
|
||||
}
|
||||
// ensure it stays unhealthy for some time before restarting it
|
||||
healthy = l.waitForHealth(ctx)
|
||||
if healthy || ctx.Err() != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
l.logger.Warn("unhealthy program: restarting openvpn")
|
||||
l.state.SetStatus(constants.Stopping)
|
||||
openvpnCancel()
|
||||
<-waitError
|
||||
close(waitError)
|
||||
closeStreams()
|
||||
<-portForwardDone
|
||||
l.state.SetStatus(constants.Stopped)
|
||||
stayHere = false
|
||||
}
|
||||
}
|
||||
openvpnCancel()
|
||||
@@ -284,35 +260,6 @@ func (l *looper) logAndWait(ctx context.Context, err error) {
|
||||
}
|
||||
}
|
||||
|
||||
// waitForHealth waits for a true healthy signal
|
||||
// after restarting openvpn in order to avoid restarting
|
||||
// openvpn in a loop as it requires a few seconds to connect.
|
||||
func (l *looper) waitForHealth(ctx context.Context) (healthy bool) {
|
||||
l.logger.Info("unhealthy program: waiting %s for it to change to healthy", l.healthWaitTime)
|
||||
timer := time.NewTimer(l.healthWaitTime)
|
||||
l.healthWaitTime *= 2
|
||||
for {
|
||||
select {
|
||||
case healthy = <-l.healthy:
|
||||
if !healthy {
|
||||
break
|
||||
}
|
||||
if !timer.Stop() {
|
||||
<-timer.C
|
||||
}
|
||||
l.healthWaitTime = defaultHealthWaitTime
|
||||
return true
|
||||
case <-timer.C:
|
||||
return false
|
||||
case <-ctx.Done():
|
||||
if !timer.Stop() {
|
||||
<-timer.C
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// portForward is a blocking operation which may or may not be infinite.
|
||||
// You should therefore always call it in a goroutine.
|
||||
func (l *looper) portForward(ctx context.Context,
|
||||
|
||||
Reference in New Issue
Block a user