From 11338b6382b224a7aa9999fbdccce63a2df7ff90 Mon Sep 17 00:00:00 2001 From: Quentin McGaw Date: Wed, 30 Dec 2020 19:34:11 +0000 Subject: [PATCH] Feature: faster healthcheck, fix #283 --- Dockerfile | 2 +- internal/healthcheck/handler.go | 31 +++++++++++++++++------- internal/healthcheck/health.go | 42 +++++++++++++++++++++++++++++++-- internal/healthcheck/server.go | 27 +++++++++++++++------ 4 files changed, 84 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index abe308db..f98ec011 100644 --- a/Dockerfile +++ b/Dockerfile @@ -118,7 +118,7 @@ ENV VPNSP=pia \ UPDATER_PERIOD=0 ENTRYPOINT ["/entrypoint"] EXPOSE 8000/tcp 8888/tcp 8388/tcp 8388/udp -HEALTHCHECK --interval=10m --timeout=10s --start-period=30s --retries=2 CMD /entrypoint healthcheck +HEALTHCHECK --interval=5s --timeout=5s --start-period=10s --retries=1 CMD /entrypoint healthcheck RUN apk add -q --progress --no-cache --update openvpn ca-certificates iptables ip6tables unbound tzdata && \ rm -rf /var/cache/apk/* /etc/unbound/* /usr/sbin/unbound-* && \ deluser openvpn && \ diff --git a/internal/healthcheck/handler.go b/internal/healthcheck/handler.go index 08a305cf..f8b31bfc 100644 --- a/internal/healthcheck/handler.go +++ b/internal/healthcheck/handler.go @@ -1,21 +1,25 @@ package healthcheck import ( - "net" + "errors" "net/http" + "sync" "github.com/qdm12/golibs/logging" ) type handler struct { - logger logging.Logger - resolver *net.Resolver + logger logging.Logger + healthErr error + healthErrMu sync.RWMutex } -func newHandler(logger logging.Logger, resolver *net.Resolver) http.Handler { +var errHealthcheckNotRunYet = errors.New("healthcheck did not run yet") + +func newHandler(logger logging.Logger) *handler { return &handler{ - logger: logger, - resolver: resolver, + logger: logger, + healthErr: errHealthcheckNotRunYet, } } @@ -24,11 +28,22 @@ func (h *handler) ServeHTTP(responseWriter http.ResponseWriter, request *http.Re http.Error(responseWriter, "method not supported for healthcheck", http.StatusBadRequest) return } - err := healthCheck(request.Context(), h.resolver) - if err != nil { + if err := h.getErr(); err != nil { h.logger.Error(err) http.Error(responseWriter, err.Error(), http.StatusInternalServerError) return } responseWriter.WriteHeader(http.StatusOK) } + +func (h *handler) setErr(err error) { + h.healthErrMu.Lock() + defer h.healthErrMu.Unlock() + h.healthErr = err +} + +func (h *handler) getErr() (err error) { + h.healthErrMu.RLock() + defer h.healthErrMu.RUnlock() + return h.healthErr +} diff --git a/internal/healthcheck/health.go b/internal/healthcheck/health.go index 0fdd08fd..d8a60522 100644 --- a/internal/healthcheck/health.go +++ b/internal/healthcheck/health.go @@ -2,8 +2,46 @@ package healthcheck import ( "context" + "errors" "fmt" "net" + "sync" + "time" +) + +func (s *server) runHealthcheckLoop(ctx context.Context, wg *sync.WaitGroup) { + defer wg.Done() + for { + err := healthCheck(ctx, s.resolver) + s.handler.setErr(err) + if err != nil { // try again after 1 second + timer := time.NewTimer(time.Second) + select { + case <-ctx.Done(): + if !timer.Stop() { + <-timer.C + } + return + case <-timer.C: + } + continue + } + // Success, check again in 10 minutes + const period = 10 * time.Minute + timer := time.NewTimer(period) + select { + case <-ctx.Done(): + if !timer.Stop() { + <-timer.C + } + return + case <-timer.C: + } + } +} + +var ( + errNoIPResolved = errors.New("no IP address resolved") ) func healthCheck(ctx context.Context, resolver *net.Resolver) (err error) { @@ -12,9 +50,9 @@ func healthCheck(ctx context.Context, resolver *net.Resolver) (err error) { ips, err := resolver.LookupIP(ctx, "ip", domainToResolve) switch { case err != nil: - return fmt.Errorf("cannot resolve github.com: %s", err) + return err case len(ips) == 0: - return fmt.Errorf("resolved no IP addresses for %s", domainToResolve) + return fmt.Errorf("%w for %s", errNoIPResolved, domainToResolve) default: return nil } diff --git a/internal/healthcheck/server.go b/internal/healthcheck/server.go index 0d18e6e7..d39af695 100644 --- a/internal/healthcheck/server.go +++ b/internal/healthcheck/server.go @@ -16,26 +16,36 @@ type Server interface { } type server struct { - address string - logger logging.Logger - handler http.Handler + address string + logger logging.Logger + handler *handler + resolver *net.Resolver } func NewServer(address string, logger logging.Logger) Server { + healthcheckLogger := logger.WithPrefix("healthcheck: ") return &server{ - address: address, - logger: logger.WithPrefix("healthcheck: "), - handler: newHandler(logger, net.DefaultResolver), + address: address, + logger: healthcheckLogger, + handler: newHandler(healthcheckLogger), + resolver: net.DefaultResolver, } } func (s *server) Run(ctx context.Context, wg *sync.WaitGroup) { + defer wg.Done() + + internalWg := &sync.WaitGroup{} + internalWg.Add(1) + go s.runHealthcheckLoop(ctx, internalWg) + server := http.Server{ Addr: s.address, Handler: s.handler, } + internalWg.Add(1) go func() { - defer wg.Done() + defer internalWg.Done() <-ctx.Done() s.logger.Warn("context canceled: shutting down server") defer s.logger.Warn("server shut down") @@ -46,9 +56,12 @@ func (s *server) Run(ctx context.Context, wg *sync.WaitGroup) { s.logger.Error("failed shutting down: %s", err) } }() + s.logger.Info("listening on %s", s.address) err := server.ListenAndServe() if err != nil && !errors.Is(ctx.Err(), context.Canceled) { s.logger.Error(err) } + + internalWg.Wait() }