diff --git a/pkg/eventcache/eventcache.go b/pkg/eventcache/eventcache.go index 1388fe82df3..4d2c31f3dbb 100644 --- a/pkg/eventcache/eventcache.go +++ b/pkg/eventcache/eventcache.go @@ -11,7 +11,6 @@ import ( "github.com/cilium/tetragon/pkg/ktime" "github.com/cilium/tetragon/pkg/metrics/errormetrics" "github.com/cilium/tetragon/pkg/metrics/eventcachemetrics" - "github.com/cilium/tetragon/pkg/metrics/mapmetrics" "github.com/cilium/tetragon/pkg/option" "github.com/cilium/tetragon/pkg/process" "github.com/cilium/tetragon/pkg/reader/node" @@ -167,7 +166,6 @@ func (ec *Cache) loop() { * event anyways. */ ec.handleEvents() - mapmetrics.MapSizeSet("eventcache", 0, float64(len(ec.cache))) case event := <-ec.objsChan: eventcachemetrics.EventCacheCount.Inc() @@ -241,3 +239,7 @@ func New(s *server.Server) *Cache { func Get() *Cache { return cache } + +func (ec *Cache) len() int { + return len(ec.cache) +} diff --git a/pkg/eventcache/metrics.go b/pkg/eventcache/metrics.go new file mode 100644 index 00000000000..bf9edfd0890 --- /dev/null +++ b/pkg/eventcache/metrics.go @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package eventcache + +import ( + "github.com/cilium/tetragon/pkg/metrics/mapmetrics" + "github.com/prometheus/client_golang/prometheus" +) + +// bpfCollector implements prometheus.Collector. It collects metrics directly from BPF maps. +type bpfCollector struct{} + +func NewBPFCollector() prometheus.Collector { + return &bpfCollector{} +} + +func (c bpfCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- mapmetrics.MapSize.Desc() +} + +func (c bpfCollector) Collect(ch chan<- prometheus.Metric) { + ec := Get() + if ec != nil { + ch <- mapmetrics.MapSize.MustMetric( + float64(ec.len()), + "eventcache", "0", + ) + } +} diff --git a/pkg/metrics/config/initmetrics.go b/pkg/metrics/config/initmetrics.go index 70d2c8bd3e1..134139ea3af 100644 --- a/pkg/metrics/config/initmetrics.go +++ b/pkg/metrics/config/initmetrics.go @@ -4,6 +4,7 @@ package config import ( + "github.com/cilium/tetragon/pkg/eventcache" "github.com/cilium/tetragon/pkg/grpc/tracing" "github.com/cilium/tetragon/pkg/metrics/errormetrics" "github.com/cilium/tetragon/pkg/metrics/eventcachemetrics" @@ -19,6 +20,7 @@ import ( "github.com/cilium/tetragon/pkg/metrics/syscallmetrics" "github.com/cilium/tetragon/pkg/metrics/watchermetrics" "github.com/cilium/tetragon/pkg/observer" + "github.com/cilium/tetragon/pkg/process" "github.com/cilium/tetragon/pkg/version" grpcmetrics "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" "github.com/prometheus/client_golang/prometheus" @@ -44,6 +46,12 @@ func InitAllMetrics(registry *prometheus.Registry) { tracing.InitMetrics(registry) ratelimitmetrics.InitMetrics(registry) + // register BPF collectors + registry.MustRegister(eventcache.NewBPFCollector()) + registry.MustRegister(observer.NewBPFCollector()) + registry.MustRegister(process.NewBPFCollector()) + + // register common third-party collectors registry.MustRegister(collectors.NewGoCollector()) registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) registry.MustRegister(grpcmetrics.NewServerMetrics()) diff --git a/pkg/metrics/mapmetrics/mapmetrics.go b/pkg/metrics/mapmetrics/mapmetrics.go index b6763b66321..6d3d620841f 100644 --- a/pkg/metrics/mapmetrics/mapmetrics.go +++ b/pkg/metrics/mapmetrics/mapmetrics.go @@ -4,62 +4,32 @@ package mapmetrics import ( - "fmt" - + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) var ( - MapSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: consts.MetricsNamespace, - Name: "map_in_use_gauge", - Help: "The total number of in-use entries per map.", - ConstLabels: nil, - }, []string{"map", "total"}) - MapDrops = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: consts.MetricsNamespace, Name: "map_drops_total", Help: "The total number of entries dropped per LRU map.", ConstLabels: nil, }, []string{"map"}) - - MapErrors = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: consts.MetricsNamespace, - Name: "map_errors_total", - Help: "The total number of entries dropped per LRU map.", - ConstLabels: nil, - }, []string{"map"}) + MapSize = metrics.NewBPFGauge(prometheus.NewDesc( + prometheus.BuildFQName(consts.MetricsNamespace, "", "map_in_use_gauge"), + "The total number of in-use entries per map.", + []string{"map", "total"}, nil, + )) + MapErrors = metrics.NewBPFCounter(prometheus.NewDesc( + prometheus.BuildFQName(consts.MetricsNamespace, "", "map_errors_total"), + "The total number of entries dropped per LRU map.", + []string{"map"}, nil, + )) ) func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(MapSize) registry.MustRegister(MapDrops) - registry.MustRegister(MapErrors) -} - -// Get a new handle on a mapSize metric for a mapName and totalCapacity -func GetMapSize(mapName string, totalCapacity int) prometheus.Gauge { - return MapSize.WithLabelValues(mapName, fmt.Sprint(totalCapacity)) -} - -func GetMapErrors(mapName string) prometheus.Gauge { - return MapErrors.WithLabelValues(mapName) -} - -// Increment a mapSize metric for a mapName and totalCapacity -func MapSizeInc(mapName string, totalCapacity int) { - GetMapSize(mapName, totalCapacity).Inc() -} - -// Set a mapSize metric to size for a mapName and totalCapacity -func MapSizeSet(mapName string, totalCapacity int, size float64) { - GetMapSize(mapName, totalCapacity).Set(size) -} - -func MapErrorSet(mapName string, errTotal float64) { - GetMapErrors(mapName).Set(errTotal) } func MapDropInc(mapName string) { diff --git a/pkg/observer/observer.go b/pkg/observer/observer.go index 07fb8002150..90448075803 100644 --- a/pkg/observer/observer.go +++ b/pkg/observer/observer.go @@ -331,8 +331,6 @@ func (k *Observer) UpdateRuntimeConf(mapDir string) error { // Start starts the observer func (k *Observer) Start(ctx context.Context) error { - k.startUpdateMapMetrics() - k.PerfConfig = bpf.DefaultPerfEventConfig() var err error diff --git a/pkg/observer/observer_stats.go b/pkg/observer/observer_stats.go index 31183907bfa..179dcb261c8 100644 --- a/pkg/observer/observer_stats.go +++ b/pkg/observer/observer_stats.go @@ -4,76 +4,76 @@ package observer import ( + "fmt" "path/filepath" - "time" "github.com/cilium/ebpf" "github.com/cilium/tetragon/pkg/metrics/mapmetrics" "github.com/cilium/tetragon/pkg/option" "github.com/cilium/tetragon/pkg/sensors" + "github.com/prometheus/client_golang/prometheus" ) -func updateMapSize(mapLinkStats *ebpf.Map, maxEntries int, name string) { - var values []int64 +// bpfCollector implements prometheus.Collector. It collects metrics directly from BPF maps. +type bpfCollector struct{} + +func NewBPFCollector() prometheus.Collector { + return &bpfCollector{} +} + +func (c bpfCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- mapmetrics.MapSize.Desc() + ch <- mapmetrics.MapErrors.Desc() +} + +func (c bpfCollector) Collect(ch chan<- prometheus.Metric) { + for _, m := range sensors.AllMaps { + name := m.Name + pin := filepath.Join(option.Config.MapDir, name) + pinStats := pin + "_stats" + + mapLinkStats, err := ebpf.LoadPinnedMap(pinStats, nil) + if err != nil { + return + } + defer mapLinkStats.Close() + mapLink, err := ebpf.LoadPinnedMap(pin, nil) + if err != nil { + return + } + defer mapLink.Close() + + updateMapSize(ch, mapLinkStats, int(mapLink.MaxEntries()), name) + updateMapErrors(ch, mapLinkStats, name) + } +} + +func updateMapSize(ch chan<- prometheus.Metric, mapLinkStats *ebpf.Map, maxEntries int, name string) { + values := []int64{} if err := mapLinkStats.Lookup(int32(0), &values); err != nil { return } - sum := int64(0) for _, n := range values { sum += n } - - mapmetrics.MapSizeSet(name, maxEntries, float64(sum)) + ch <- mapmetrics.MapSize.MustMetric( + float64(sum), + name, fmt.Sprint(maxEntries), + ) } -func updateMapErrors(mapLinkStats *ebpf.Map, name string) { - var values []int64 +func updateMapErrors(ch chan<- prometheus.Metric, mapLinkStats *ebpf.Map, name string) { + values := []int64{} if err := mapLinkStats.Lookup(int32(1), &values); err != nil { return } - sum := int64(0) for _, n := range values { sum += n } - - mapmetrics.MapErrorSet(name, float64(sum)) -} - -func updateMapMetric(name string) { - pin := filepath.Join(option.Config.MapDir, name) - pinStats := pin + "_stats" - - mapLinkStats, err := ebpf.LoadPinnedMap(pinStats, nil) - if err != nil { - return - } - defer mapLinkStats.Close() - mapLink, err := ebpf.LoadPinnedMap(pin, nil) - if err != nil { - return - } - defer mapLink.Close() - - updateMapSize(mapLinkStats, int(mapLink.MaxEntries()), name) - updateMapErrors(mapLinkStats, name) -} - -func (k *Observer) startUpdateMapMetrics() { - update := func() { - for _, m := range sensors.AllMaps { - updateMapMetric(m.Name) - } - } - - ticker := time.NewTicker(30 * time.Second) - go func() { - for { - select { - case <-ticker.C: - update() - } - } - }() + ch <- mapmetrics.MapErrors.MustMetric( + float64(sum), + name, + ) } diff --git a/pkg/process/cache.go b/pkg/process/cache.go index d37cd2a0463..190a052a3ff 100644 --- a/pkg/process/cache.go +++ b/pkg/process/cache.go @@ -16,10 +16,10 @@ import ( ) type Cache struct { - cache *lru.Cache[string, *ProcessInternal] - deleteChan chan *ProcessInternal - stopChan chan bool - metricsStopChan chan bool + cache *lru.Cache[string, *ProcessInternal] + size int + deleteChan chan *ProcessInternal + stopChan chan bool } // garbage collection states @@ -124,7 +124,6 @@ func (pc *Cache) refInc(p *ProcessInternal) { func (pc *Cache) Purge() { pc.stopChan <- true - pc.metricsStopChan <- true } func NewCache( @@ -141,22 +140,8 @@ func NewCache( } pm := &Cache{ cache: lruCache, + size: processCacheSize, } - update := func() { - mapmetrics.MapSizeSet("processLru", processCacheSize, float64(pm.cache.Len())) - } - ticker := time.NewTicker(60 * time.Second) - pm.metricsStopChan = make(chan bool) - go func() { - for { - select { - case <-ticker.C: - update() - case <-pm.metricsStopChan: - return - } - } - }() pm.cacheGarbageCollector() return pm, nil } diff --git a/pkg/process/metrics.go b/pkg/process/metrics.go new file mode 100644 index 00000000000..610d31fd1f6 --- /dev/null +++ b/pkg/process/metrics.go @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package process + +import ( + "fmt" + + "github.com/cilium/tetragon/pkg/metrics/mapmetrics" + "github.com/prometheus/client_golang/prometheus" +) + +// bpfCollector implements prometheus.Collector. It collects metrics directly from BPF maps. +type bpfCollector struct{} + +func NewBPFCollector() prometheus.Collector { + return &bpfCollector{} +} + +func (c bpfCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- mapmetrics.MapSize.Desc() +} + +func (c bpfCollector) Collect(ch chan<- prometheus.Metric) { + if procCache != nil { + ch <- mapmetrics.MapSize.MustMetric( + float64(procCache.len()), + "processLru", fmt.Sprint(procCache.size), + ) + } +}