From 47e2c5884f443667e64764f3fc3948f8f11abbb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90=E1=BB=97=20Tr=E1=BB=8Dng=20H=E1=BA=A3i?= <41283691+hainenber@users.noreply.github.com> Date: Thu, 12 Oct 2023 20:00:48 +0700 Subject: [PATCH] fix(promtail): prevent panic due to duplicate metric registration after reloaded (#10798) **What this PR does / why we need it**: Prevent Promtail panicking after getting reloaded. **Which issue(s) this PR fixes**: Fixes #10796 **Special notes for your reviewer**: **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [x] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) --------- Co-authored-by: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> --- CHANGELOG.md | 1 + .../promtail/targets/lokipush/pushtarget.go | 6 +++ clients/pkg/promtail/wal/watcher_metrics.go | 50 ++++++++++++++++--- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 985fea97acb6a..2b9fc397ef83f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ ##### Fixes * [10631](https://github.com/grafana/loki/pull/10631) **thampiotr**: Fix race condition in cleaning up metrics when stopping to tail files. +* [10798](https://github.com/grafana/loki/pull/10798) **hainenber**: Fix agent panicking after reloaded due to duplicate metric collector registration. #### LogCLI diff --git a/clients/pkg/promtail/targets/lokipush/pushtarget.go b/clients/pkg/promtail/targets/lokipush/pushtarget.go index a204227df529b..7bd63e47d6de0 100644 --- a/clients/pkg/promtail/targets/lokipush/pushtarget.go +++ b/clients/pkg/promtail/targets/lokipush/pushtarget.go @@ -82,6 +82,12 @@ func (t *PushTarget) run() error { serverCfg := &t.config.Server serverCfg.Log = util_log.InitLogger(serverCfg, prometheus.NewRegistry(), true, false) + // Set new registry for upcoming metric server + // If not, it'll likely panic when the tool gets reloaded. + if t.config.Server.Registerer == nil { + t.config.Server.Registerer = prometheus.NewRegistry() + } + srv, err := server.New(t.config.Server) if err != nil { return err diff --git a/clients/pkg/promtail/wal/watcher_metrics.go b/clients/pkg/promtail/wal/watcher_metrics.go index 34b74786e15b9..7fd15c7af2306 100644 --- a/clients/pkg/promtail/wal/watcher_metrics.go +++ b/clients/pkg/promtail/wal/watcher_metrics.go @@ -1,6 +1,10 @@ package wal -import "github.com/prometheus/client_golang/prometheus" +import ( + "errors" + + "github.com/prometheus/client_golang/prometheus" +) type WatcherMetrics struct { recordsRead *prometheus.CounterVec @@ -69,13 +73,45 @@ func NewWatcherMetrics(reg prometheus.Registerer) *WatcherMetrics { ), } + // Collectors will be re-registered to registry if it's got reloaded + // Reuse the old collectors instead of panicking out. if reg != nil { - reg.MustRegister(m.recordsRead) - reg.MustRegister(m.recordDecodeFails) - reg.MustRegister(m.droppedWriteNotifications) - reg.MustRegister(m.segmentRead) - reg.MustRegister(m.currentSegment) - reg.MustRegister(m.watchersRunning) + if err := reg.Register(m.recordsRead); err != nil { + are := &prometheus.AlreadyRegisteredError{} + if errors.As(err, are) { + m.recordsRead = are.ExistingCollector.(*prometheus.CounterVec) + } + } + if err := reg.Register(m.recordDecodeFails); err != nil { + are := &prometheus.AlreadyRegisteredError{} + if errors.As(err, are) { + m.recordDecodeFails = are.ExistingCollector.(*prometheus.CounterVec) + } + } + if err := reg.Register(m.droppedWriteNotifications); err != nil { + are := &prometheus.AlreadyRegisteredError{} + if errors.As(err, are) { + m.droppedWriteNotifications = are.ExistingCollector.(*prometheus.CounterVec) + } + } + if err := reg.Register(m.segmentRead); err != nil { + are := &prometheus.AlreadyRegisteredError{} + if errors.As(err, are) { + m.segmentRead = are.ExistingCollector.(*prometheus.CounterVec) + } + } + if err := reg.Register(m.currentSegment); err != nil { + are := &prometheus.AlreadyRegisteredError{} + if errors.As(err, are) { + m.currentSegment = are.ExistingCollector.(*prometheus.GaugeVec) + } + } + if err := reg.Register(m.watchersRunning); err != nil { + are := &prometheus.AlreadyRegisteredError{} + if errors.As(err, are) { + m.watchersRunning = are.ExistingCollector.(*prometheus.GaugeVec) + } + } } return m