Maint: rework DNS run loop

- Fix fragile user triggered logic
- Simplify state
- Lock loop when crashed
This commit is contained in:
Quentin McGaw (desktop)
2021-07-16 19:00:56 +00:00
parent 0ed738cd61
commit ac3ff095a1
5 changed files with 190 additions and 133 deletions

View File

@@ -453,7 +453,7 @@ func routeReadyEvents(ctx context.Context, done chan<- struct{}, buildInfo model
} }
if unboundLooper.GetSettings().Enabled { if unboundLooper.GetSettings().Enabled {
_, _ = unboundLooper.SetStatus(ctx, constants.Running) _, _ = unboundLooper.ApplyStatus(ctx, constants.Running)
} }
restartTickerCancel() // stop previous restart tickers restartTickerCancel() // stop previous restart tickers

View File

@@ -6,7 +6,6 @@ import (
"errors" "errors"
"net" "net"
"net/http" "net/http"
"sync"
"time" "time"
"github.com/qdm12/dns/pkg/blacklist" "github.com/qdm12/dns/pkg/blacklist"
@@ -24,7 +23,7 @@ type Looper interface {
Run(ctx context.Context, done chan<- struct{}) Run(ctx context.Context, done chan<- struct{})
RunRestartTicker(ctx context.Context, done chan<- struct{}) RunRestartTicker(ctx context.Context, done chan<- struct{})
GetStatus() (status models.LoopStatus) GetStatus() (status models.LoopStatus)
SetStatus(ctx context.Context, status models.LoopStatus) ( ApplyStatus(ctx context.Context, status models.LoopStatus) (
outcome string, err error) outcome string, err error)
GetSettings() (settings configuration.DNS) GetSettings() (settings configuration.DNS)
SetSettings(ctx context.Context, settings configuration.DNS) ( SetSettings(ctx context.Context, settings configuration.DNS) (
@@ -32,17 +31,16 @@ type Looper interface {
} }
type looper struct { type looper struct {
state state state *state
conf unbound.Configurator conf unbound.Configurator
blockBuilder blacklist.Builder blockBuilder blacklist.Builder
client *http.Client client *http.Client
logger logging.Logger logger logging.Logger
loopLock sync.Mutex start <-chan struct{}
start chan struct{} running chan<- models.LoopStatus
running chan models.LoopStatus stop <-chan struct{}
stop chan struct{} stopped chan<- struct{}
stopped chan struct{} updateTicker <-chan struct{}
updateTicker chan struct{}
backoffTime time.Duration backoffTime time.Duration
timeNow func() time.Time timeNow func() time.Time
timeSince func(time.Time) time.Duration timeSince func(time.Time) time.Duration
@@ -53,20 +51,25 @@ const defaultBackoffTime = 10 * time.Second
func NewLooper(conf unbound.Configurator, settings configuration.DNS, client *http.Client, func NewLooper(conf unbound.Configurator, settings configuration.DNS, client *http.Client,
logger logging.Logger, openFile os.OpenFileFunc) Looper { logger logging.Logger, openFile os.OpenFileFunc) Looper {
start := make(chan struct{})
running := make(chan models.LoopStatus)
stop := make(chan struct{})
stopped := make(chan struct{})
updateTicker := make(chan struct{})
state := newState(constants.Stopped, settings, start, running, stop, stopped, updateTicker)
return &looper{ return &looper{
state: state{ state: state,
status: constants.Stopped,
settings: settings,
},
conf: conf, conf: conf,
blockBuilder: blacklist.NewBuilder(client), blockBuilder: blacklist.NewBuilder(client),
client: client, client: client,
logger: logger, logger: logger,
start: make(chan struct{}), start: start,
running: make(chan models.LoopStatus), running: running,
stop: make(chan struct{}), stop: stop,
stopped: make(chan struct{}), stopped: stopped,
updateTicker: make(chan struct{}), updateTicker: updateTicker,
backoffTime: defaultBackoffTime, backoffTime: defaultBackoffTime,
timeNow: time.Now, timeNow: time.Now,
timeSince: time.Since, timeSince: time.Since,
@@ -78,7 +81,7 @@ func (l *looper) logAndWait(ctx context.Context, err error) {
if err != nil { if err != nil {
l.logger.Warn(err) l.logger.Warn(err)
} }
l.logger.Info("attempting restart in %s", l.backoffTime) l.logger.Info("attempting restart in " + l.backoffTime.String())
timer := time.NewTimer(l.backoffTime) timer := time.NewTimer(l.backoffTime)
l.backoffTime *= 2 l.backoffTime *= 2
select { select {
@@ -90,7 +93,16 @@ func (l *looper) logAndWait(ctx context.Context, err error) {
} }
} }
func (l *looper) Run(ctx context.Context, done chan<- struct{}) { //nolint:gocognit func (l *looper) signalOrSetStatus(userTriggered *bool, status models.LoopStatus) {
if *userTriggered {
*userTriggered = false
l.running <- status
} else {
l.state.SetStatus(status)
}
}
func (l *looper) Run(ctx context.Context, done chan<- struct{}) {
defer close(done) defer close(done)
const fallback = false const fallback = false
@@ -103,46 +115,45 @@ func (l *looper) Run(ctx context.Context, done chan<- struct{}) { //nolint:gocog
return return
} }
crashed := false userTriggered := true
l.backoffTime = defaultBackoffTime
for ctx.Err() == nil { for ctx.Err() == nil {
// Upper scope variables for Unbound only // Upper scope variables for Unbound only
// Their values are to be used if DOT=off // Their values are to be used if DOT=off
var waitError chan error waitError := make(chan error)
var unboundCancel context.CancelFunc unboundCancel := func() { waitError <- nil }
var closeStreams func() closeStreams := func() {}
for l.GetSettings().Enabled { for l.GetSettings().Enabled {
if ctx.Err() != nil {
if !crashed {
l.running <- constants.Stopped
}
return
}
var err error var err error
unboundCancel, waitError, closeStreams, err = l.setupUnbound(ctx, crashed) unboundCancel, waitError, closeStreams, err = l.setupUnbound(ctx)
if err == nil {
l.backoffTime = defaultBackoffTime
l.logger.Info("ready")
l.signalOrSetStatus(&userTriggered, constants.Running)
break
}
l.signalOrSetStatus(&userTriggered, constants.Crashed)
if ctx.Err() != nil { if ctx.Err() != nil {
return return
} }
if err != nil {
if !errors.Is(err, errUpdateFiles) { if !errors.Is(err, errUpdateFiles) {
const fallback = true const fallback = true
l.useUnencryptedDNS(fallback) l.useUnencryptedDNS(fallback)
}
l.logAndWait(ctx, err)
continue
} }
break l.logAndWait(ctx, err)
} }
if !l.GetSettings().Enabled { if !l.GetSettings().Enabled {
const fallback = false const fallback = false
l.useUnencryptedDNS(fallback) l.useUnencryptedDNS(fallback)
waitError := make(chan error)
unboundCancel = func() { waitError <- nil }
closeStreams = func() {}
} }
userTriggered = false
stayHere := true stayHere := true
for stayHere { for stayHere {
select { select {
@@ -153,31 +164,36 @@ func (l *looper) Run(ctx context.Context, done chan<- struct{}) { //nolint:gocog
closeStreams() closeStreams()
return return
case <-l.stop: case <-l.stop:
userTriggered = true
l.logger.Info("stopping") l.logger.Info("stopping")
const fallback = false const fallback = false
l.useUnencryptedDNS(fallback) l.useUnencryptedDNS(fallback)
unboundCancel() unboundCancel()
<-waitError <-waitError
// do not close waitError or the waitError
// select case will trigger
closeStreams()
l.stopped <- struct{}{} l.stopped <- struct{}{}
case <-l.start: case <-l.start:
userTriggered = true
l.logger.Info("starting") l.logger.Info("starting")
stayHere = false stayHere = false
case err := <-waitError: // unexpected error case err := <-waitError: // unexpected error
close(waitError)
closeStreams()
l.state.Lock() // prevent SetStatus from running in parallel
unboundCancel() unboundCancel()
if ctx.Err() != nil { l.state.SetStatus(constants.Crashed)
close(waitError)
closeStreams()
return
}
l.state.setStatusWithLock(constants.Crashed)
const fallback = true const fallback = true
l.useUnencryptedDNS(fallback) l.useUnencryptedDNS(fallback)
l.logAndWait(ctx, err) l.logAndWait(ctx, err)
stayHere = false stayHere = false
l.state.Unlock()
} }
} }
close(waitError)
closeStreams()
} }
} }
@@ -186,11 +202,10 @@ var errUpdateFiles = errors.New("cannot update files")
// Returning cancel == nil signals we want to re-run setupUnbound // Returning cancel == nil signals we want to re-run setupUnbound
// Returning err == errUpdateFiles signals we should not fall back // Returning err == errUpdateFiles signals we should not fall back
// on the plaintext DNS as DOT is still up and running. // on the plaintext DNS as DOT is still up and running.
func (l *looper) setupUnbound(ctx context.Context, previousCrashed bool) ( func (l *looper) setupUnbound(ctx context.Context) (
cancel context.CancelFunc, waitError chan error, closeStreams func(), err error) { cancel context.CancelFunc, waitError chan error, closeStreams func(), err error) {
err = l.updateFiles(ctx) err = l.updateFiles(ctx)
if err != nil { if err != nil {
l.state.setStatusWithLock(constants.Crashed)
return nil, nil, nil, errUpdateFiles return nil, nil, nil, errUpdateFiles
} }
@@ -200,14 +215,16 @@ func (l *looper) setupUnbound(ctx context.Context, previousCrashed bool) (
stdoutLines, stderrLines, waitError, err := l.conf.Start(unboundCtx, settings.Unbound.VerbosityDetailsLevel) stdoutLines, stderrLines, waitError, err := l.conf.Start(unboundCtx, settings.Unbound.VerbosityDetailsLevel)
if err != nil { if err != nil {
cancel() cancel()
if !previousCrashed {
l.running <- constants.Crashed
}
return nil, nil, nil, err return nil, nil, nil, err
} }
collectLinesDone := make(chan struct{}) collectLinesDone := make(chan struct{})
go l.collectLines(stdoutLines, stderrLines, collectLinesDone) go l.collectLines(stdoutLines, stderrLines, collectLinesDone)
closeStreams = func() {
close(stdoutLines)
close(stderrLines)
<-collectLinesDone
}
// use Unbound // use Unbound
nameserver.UseDNSInternally(net.IP{127, 0, 0, 1}) nameserver.UseDNSInternally(net.IP{127, 0, 0, 1})
@@ -218,32 +235,13 @@ func (l *looper) setupUnbound(ctx context.Context, previousCrashed bool) (
} }
if err := check.WaitForDNS(ctx, net.DefaultResolver); err != nil { if err := check.WaitForDNS(ctx, net.DefaultResolver); err != nil {
if !previousCrashed {
l.running <- constants.Crashed
}
cancel() cancel()
<-waitError <-waitError
close(waitError) close(waitError)
close(stdoutLines) closeStreams()
close(stderrLines)
<-collectLinesDone
return nil, nil, nil, err return nil, nil, nil, err
} }
l.logger.Info("ready")
if !previousCrashed {
l.running <- constants.Running
} else {
l.backoffTime = defaultBackoffTime
l.state.setStatusWithLock(constants.Running)
}
closeStreams = func() {
close(stdoutLines)
close(stderrLines)
<-collectLinesDone
}
return cancel, waitError, closeStreams, nil return cancel, waitError, closeStreams, nil
} }
@@ -304,15 +302,15 @@ func (l *looper) RunRestartTicker(ctx context.Context, done chan<- struct{}) {
status := l.GetStatus() status := l.GetStatus()
if status == constants.Running { if status == constants.Running {
if err := l.updateFiles(ctx); err != nil { if err := l.updateFiles(ctx); err != nil {
l.state.setStatusWithLock(constants.Crashed) l.state.SetStatus(constants.Crashed)
l.logger.Error(err) l.logger.Error(err)
l.logger.Warn("skipping Unbound restart due to failed files update") l.logger.Warn("skipping Unbound restart due to failed files update")
continue continue
} }
} }
_, _ = l.SetStatus(ctx, constants.Stopped) _, _ = l.ApplyStatus(ctx, constants.Stopped)
_, _ = l.SetStatus(ctx, constants.Running) _, _ = l.ApplyStatus(ctx, constants.Running)
settings := l.GetSettings() settings := l.GetSettings()
timer.Reset(settings.UpdatePeriod) timer.Reset(settings.UpdatePeriod)
@@ -358,3 +356,14 @@ func (l *looper) updateFiles(ctx context.Context) (err error) {
return l.conf.MakeUnboundConf(settings.Unbound) return l.conf.MakeUnboundConf(settings.Unbound)
} }
func (l *looper) GetStatus() (status models.LoopStatus) { return l.state.GetStatus() }
func (l *looper) ApplyStatus(ctx context.Context, status models.LoopStatus) (
outcome string, err error) {
return l.state.ApplyStatus(ctx, status)
}
func (l *looper) GetSettings() (settings configuration.DNS) { return l.state.GetSettings() }
func (l *looper) SetSettings(ctx context.Context, settings configuration.DNS) (
outcome string) {
return l.state.SetSettings(ctx, settings)
}

View File

@@ -12,72 +12,114 @@ import (
"github.com/qdm12/gluetun/internal/models" "github.com/qdm12/gluetun/internal/models"
) )
type state struct { func newState(status models.LoopStatus, settings configuration.DNS,
status models.LoopStatus start chan<- struct{}, running <-chan models.LoopStatus,
settings configuration.DNS stop chan<- struct{}, stopped <-chan struct{},
statusMu sync.RWMutex updateTicker chan<- struct{}) *state {
settingsMu sync.RWMutex return &state{
status: status,
settings: settings,
start: start,
running: running,
stop: stop,
stopped: stopped,
updateTicker: updateTicker,
}
} }
func (s *state) setStatusWithLock(status models.LoopStatus) { type state struct {
loopMu sync.RWMutex
status models.LoopStatus
statusMu sync.RWMutex
settings configuration.DNS
settingsMu sync.RWMutex
start chan<- struct{}
running <-chan models.LoopStatus
stop chan<- struct{}
stopped <-chan struct{}
updateTicker chan<- struct{}
}
func (s *state) Lock() { s.loopMu.Lock() }
func (s *state) Unlock() { s.loopMu.Unlock() }
// SetStatus sets the status thread safely.
// It should only be called by the loop internal code since
// it does not interact with the loop code directly.
func (s *state) SetStatus(status models.LoopStatus) {
s.statusMu.Lock() s.statusMu.Lock()
defer s.statusMu.Unlock() defer s.statusMu.Unlock()
s.status = status s.status = status
} }
func (l *looper) GetStatus() (status models.LoopStatus) { // GetStatus gets the status thread safely.
l.state.statusMu.RLock() func (s *state) GetStatus() (status models.LoopStatus) {
defer l.state.statusMu.RUnlock() s.statusMu.RLock()
return l.state.status defer s.statusMu.RUnlock()
return s.status
} }
var ErrInvalidStatus = errors.New("invalid status") var ErrInvalidStatus = errors.New("invalid status")
func (l *looper) SetStatus(ctx context.Context, status models.LoopStatus) ( // ApplyStatus sends signals to the running loop depending on the
// current status and status requested, such that its next status
// matches the requested one. It is thread safe and a synchronous call
// since it waits to the loop to fully change its status.
func (s *state) ApplyStatus(ctx context.Context, status models.LoopStatus) (
outcome string, err error) { outcome string, err error) {
l.state.statusMu.Lock() // prevent simultaneous loop changes by restricting
defer l.state.statusMu.Unlock() // multiple SetStatus calls to run sequentially.
existingStatus := l.state.status s.loopMu.Lock()
defer s.loopMu.Unlock()
// not a read lock as we want to modify it eventually in
// the code below before any other call.
s.statusMu.Lock()
existingStatus := s.status
switch status { switch status {
case constants.Running: case constants.Running:
switch existingStatus { if existingStatus != constants.Stopped {
case constants.Starting, constants.Running, constants.Stopping, constants.Crashed: // starting, running, stopping, crashed
return fmt.Sprintf("already %s", existingStatus), nil s.statusMu.Unlock()
return "already " + existingStatus.String(), nil
} }
l.loopLock.Lock()
defer l.loopLock.Unlock() s.status = constants.Starting
l.state.status = constants.Starting s.statusMu.Unlock()
l.state.statusMu.Unlock() s.start <- struct{}{}
l.start <- struct{}{}
// Wait for the loop to react to the start signal
newStatus := constants.Starting // for canceled context newStatus := constants.Starting // for canceled context
select { select {
case <-ctx.Done(): case <-ctx.Done():
case newStatus = <-l.running: case newStatus = <-s.running:
} }
s.SetStatus(newStatus)
l.state.statusMu.Lock()
l.state.status = newStatus
return newStatus.String(), nil return newStatus.String(), nil
case constants.Stopped: case constants.Stopped:
switch existingStatus { if existingStatus != constants.Running {
case constants.Starting, constants.Stopping, constants.Stopped, constants.Crashed: return "already " + existingStatus.String(), nil
return fmt.Sprintf("already %s", existingStatus), nil
} }
l.loopLock.Lock()
defer l.loopLock.Unlock()
l.state.status = constants.Stopping
l.state.statusMu.Unlock()
l.stop <- struct{}{}
s.status = constants.Stopping
s.statusMu.Unlock()
s.stop <- struct{}{}
// Wait for the loop to react to the stop signal
newStatus := constants.Stopping // for canceled context newStatus := constants.Stopping // for canceled context
select { select {
case <-ctx.Done(): case <-ctx.Done():
case <-l.stopped: case <-s.stopped:
newStatus = constants.Stopped newStatus = constants.Stopped
} }
l.state.statusMu.Lock() s.SetStatus(newStatus)
l.state.status = newStatus
return status.String(), nil return status.String(), nil
default: default:
return "", fmt.Errorf("%w: %s: it can only be one of: %s, %s", return "", fmt.Errorf("%w: %s: it can only be one of: %s, %s",
@@ -85,32 +127,38 @@ func (l *looper) SetStatus(ctx context.Context, status models.LoopStatus) (
} }
} }
func (l *looper) GetSettings() (settings configuration.DNS) { func (s *state) GetSettings() (settings configuration.DNS) {
l.state.settingsMu.RLock() s.settingsMu.RLock()
defer l.state.settingsMu.RUnlock() defer s.settingsMu.RUnlock()
return l.state.settings return s.settings
} }
func (l *looper) SetSettings(ctx context.Context, settings configuration.DNS) ( func (s *state) SetSettings(ctx context.Context, settings configuration.DNS) (
outcome string) { outcome string) {
l.state.settingsMu.Lock() s.settingsMu.Lock()
settingsUnchanged := reflect.DeepEqual(l.state.settings, settings) defer s.settingsMu.Unlock()
settingsUnchanged := reflect.DeepEqual(s.settings, settings)
if settingsUnchanged { if settingsUnchanged {
l.state.settingsMu.Unlock()
return "settings left unchanged" return "settings left unchanged"
} }
tempSettings := l.state.settings
// Check for only update period change
tempSettings := s.settings
tempSettings.UpdatePeriod = settings.UpdatePeriod tempSettings.UpdatePeriod = settings.UpdatePeriod
onlyUpdatePeriodChanged := reflect.DeepEqual(tempSettings, settings) onlyUpdatePeriodChanged := reflect.DeepEqual(tempSettings, settings)
l.state.settings = settings
l.state.settingsMu.Unlock() s.settings = settings
if onlyUpdatePeriodChanged { if onlyUpdatePeriodChanged {
l.updateTicker <- struct{}{} s.updateTicker <- struct{}{}
return "update period changed" return "update period changed"
} }
_, _ = l.SetStatus(ctx, constants.Stopped)
// Restart
_, _ = s.ApplyStatus(ctx, constants.Stopped)
if settings.Enabled { if settings.Enabled {
outcome, _ = l.SetStatus(ctx, constants.Running) outcome, _ = s.ApplyStatus(ctx, constants.Running)
} }
return outcome return outcome
} }

View File

@@ -65,7 +65,7 @@ func (h *dnsHandler) setStatus(w http.ResponseWriter, r *http.Request) {
http.Error(w, err.Error(), http.StatusBadRequest) http.Error(w, err.Error(), http.StatusBadRequest)
return return
} }
outcome, err := h.looper.SetStatus(h.ctx, status) outcome, err := h.looper.ApplyStatus(h.ctx, status)
if err != nil { if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest) http.Error(w, err.Error(), http.StatusBadRequest)
return return

View File

@@ -47,9 +47,9 @@ func (h *handlerV0) ServeHTTP(w http.ResponseWriter, r *http.Request) {
h.logger.Warn(err) h.logger.Warn(err)
} }
case "/unbound/actions/restart": case "/unbound/actions/restart":
outcome, _ := h.dns.SetStatus(h.ctx, constants.Stopped) outcome, _ := h.dns.ApplyStatus(h.ctx, constants.Stopped)
h.logger.Info("dns: %s", outcome) h.logger.Info("dns: %s", outcome)
outcome, _ = h.dns.SetStatus(h.ctx, constants.Running) outcome, _ = h.dns.ApplyStatus(h.ctx, constants.Running)
h.logger.Info("dns: %s", outcome) h.logger.Info("dns: %s", outcome)
if _, err := w.Write([]byte("dns restarted, please consider using the /v1/ API in the future.")); err != nil { if _, err := w.Write([]byte("dns restarted, please consider using the /v1/ API in the future.")); err != nil {
h.logger.Warn(err) h.logger.Warn(err)