From 8d0f0d93a6209f7be92f25fcd1d7e501d99ac932 Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Wed, 31 Jul 2024 18:02:18 +0100 Subject: [PATCH 01/11] feat(outputs.datadog): Add support for submitting alongside dd-agent --- plugins/outputs/datadog/README.md | 16 +- plugins/outputs/datadog/datadog.go | 57 ++++- plugins/outputs/datadog/datadog_test.go | 314 ++++++++++++++++++++++++ plugins/outputs/datadog/sample.conf | 8 + 4 files changed, 378 insertions(+), 17 deletions(-) diff --git a/plugins/outputs/datadog/README.md b/plugins/outputs/datadog/README.md index f16940c6f701c..d8624dae96643 100644 --- a/plugins/outputs/datadog/README.md +++ b/plugins/outputs/datadog/README.md @@ -36,6 +36,14 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. ## Override the default (none) compression used to send data. ## Supports: "zlib", "none" # compression = "none" + + ## Convert counts to rates + ## Use this to be able to submit metrics from telegraf alongside Datadog agent + # should_rate_counts = true + + ## When should_rate_counts is enabled, this overrides the + ## default (10s) rate interval used to divide count metrics by + # rate_interval = 20 ``` ## Metrics @@ -46,11 +54,9 @@ field key with a `.` character. Field values are converted to floating point numbers. Strings and floats that cannot be sent over JSON, namely NaN and Inf, are ignored. -We do not send `Rate` types. Counts are sent as `count`, with an -interval hard-coded to 1. Note that this behavior does *not* play -super-well if running simultaneously with current Datadog agents; they -will attempt to change to `Rate` with `interval=10`. We prefer this -method, however, as it reflects the raw data more accurately. +Enabling the `should_rate_counts` will convert `count` metrics to `rate` +and divide it by the `rate_interval`. This will allow telegraf to run +alongside current Datadog agents. [metrics]: https://docs.datadoghq.com/api/v1/metrics/#submit-metrics [apikey]: https://app.datadoghq.com/account/settings#api diff --git a/plugins/outputs/datadog/datadog.go b/plugins/outputs/datadog/datadog.go index 7450e38fd33da..85a8484711406 100644 --- a/plugins/outputs/datadog/datadog.go +++ b/plugins/outputs/datadog/datadog.go @@ -25,11 +25,13 @@ import ( var sampleConfig string type Datadog struct { - Apikey string `toml:"apikey"` - Timeout config.Duration `toml:"timeout"` - URL string `toml:"url"` - Compression string `toml:"compression"` - Log telegraf.Logger `toml:"-"` + Apikey string `toml:"apikey"` + Timeout config.Duration `toml:"timeout"` + URL string `toml:"url"` + Compression string `toml:"compression"` + ShouldRateCounts bool `toml:"should_rate_counts"` + RateInterval int64 `toml:"rate_interval"` + Log telegraf.Logger `toml:"-"` client *http.Client proxy.HTTPProxy @@ -75,8 +77,7 @@ func (d *Datadog) Connect() error { return nil } -func (d *Datadog) Write(metrics []telegraf.Metric) error { - ts := TimeSeries{} +func (d *Datadog) convertToDatadogMetric(metrics []telegraf.Metric) ([]*Metric, int) { tempSeries := []*Metric{} metricCounter := 0 @@ -84,6 +85,7 @@ func (d *Datadog) Write(metrics []telegraf.Metric) error { if dogMs, err := buildMetrics(m); err == nil { metricTags := buildTags(m.TagList()) host, _ := m.GetTag("host") + metricType, _ := m.GetTag("metric_type") if len(dogMs) == 0 { continue @@ -99,9 +101,19 @@ func (d *Datadog) Write(metrics []telegraf.Metric) error { dname = m.Name() + "." + fieldName } var tname string + var interval int64 + interval = 1 switch m.Type() { - case telegraf.Counter: - tname = "count" + case telegraf.Counter, telegraf.Untyped: + if d.ShouldRateCounts && isRateable(metricType, fieldName) { + interval = d.RateInterval + dogM[1] = dogM[1] / float64(interval) + tname = "rate" + } else if m.Type() == telegraf.Counter { + tname = "count" + } else { + tname = "" + } case telegraf.Gauge: tname = "gauge" default: @@ -112,7 +124,7 @@ func (d *Datadog) Write(metrics []telegraf.Metric) error { Tags: metricTags, Host: host, Type: tname, - Interval: 1, + Interval: interval, } metric.Points[0] = dogM tempSeries = append(tempSeries, metric) @@ -122,6 +134,12 @@ func (d *Datadog) Write(metrics []telegraf.Metric) error { d.Log.Infof("Unable to build Metric for %s due to error '%v', skipping", m.Name(), err) } } + return tempSeries, metricCounter +} + +func (d *Datadog) Write(metrics []telegraf.Metric) error { + ts := TimeSeries{} + tempSeries, metricCounter := d.convertToDatadogMetric(metrics) if len(tempSeries) == 0 { return nil @@ -220,6 +238,20 @@ func verifyValue(v interface{}) bool { return true } +func isRateable(metricType string, fieldName string) bool { + switch metricType { + case + "counter": + return true + case + "timing", + "histogram": + return fieldName == "count" + default: + return false + } +} + func (p *Point) setValue(v interface{}) error { switch d := v.(type) { case int64: @@ -246,8 +278,9 @@ func (d *Datadog) Close() error { func init() { outputs.Add("datadog", func() telegraf.Output { return &Datadog{ - URL: datadogAPI, - Compression: "none", + URL: datadogAPI, + Compression: "none", + RateInterval: 10, } }) } diff --git a/plugins/outputs/datadog/datadog_test.go b/plugins/outputs/datadog/datadog_test.go index 2fba3625300c6..2587431dd829d 100644 --- a/plugins/outputs/datadog/datadog_test.go +++ b/plugins/outputs/datadog/datadog_test.go @@ -305,3 +305,317 @@ func TestInfIsSkipped(t *testing.T) { }) require.NoError(t, err) } + +func TestShouldRateCount(t *testing.T) { + d := &Datadog{ + Apikey: "123456", + ShouldRateCounts: true, + RateInterval: 10, + } + + var tests = []struct { + metricsIn []telegraf.Metric + metricsOut []*Metric + }{ + { + []telegraf.Metric{ + testutil.MustMetric( + "count_metric_converted_to_rate", + map[string]string{ + "metric_type": "counter", + }, + map[string]interface{}{ + "value": 100, + }, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + telegraf.Counter, + ), + }, + []*Metric{ + { + Metric: "count_metric_converted_to_rate", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + 10, + }, + }, + Type: "rate", + Tags: []string{ + "metric_type:counter", + }, + Interval: 10, + }, + }, + }, + { + []telegraf.Metric{ + testutil.MustMetric( + "timing_metric", + map[string]string{ + "metric_type": "timing", + }, + map[string]interface{}{ + "count": 1, + "lower": float64(10), + "mean": float64(10), + "median": float64(10), + "stddev": float64(0), + "sum": float64(10), + "upper": float64(10), + }, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + telegraf.Untyped, + ), + }, + []*Metric{ + { + Metric: "timing_metric.count", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + 0.1, + }, + }, + Type: "rate", + Tags: []string{ + "metric_type:timing", + }, + Interval: 10, + }, + { + Metric: "timing_metric.lower", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.mean", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.median", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.stddev", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(0), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.sum", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.upper", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + }, + }, + { + []telegraf.Metric{ + testutil.MustMetric( + "histogram_metric", + map[string]string{ + "metric_type": "histogram", + }, + map[string]interface{}{ + "count": 1, + "lower": float64(10), + "mean": float64(10), + "median": float64(10), + "stddev": float64(0), + "sum": float64(10), + "upper": float64(10), + }, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + telegraf.Untyped, + ), + }, + []*Metric{ + { + Metric: "histogram_metric.count", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + 0.1, + }, + }, + Type: "rate", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 10, + }, + { + Metric: "histogram_metric.lower", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.mean", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.median", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.stddev", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(0), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.sum", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.upper", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + }, + }, + } + + for _, tt := range tests { + actualMetricsOut, _ := d.convertToDatadogMetric(tt.metricsIn) + if len(actualMetricsOut) != len(tt.metricsOut) { + t.Errorf("\nexpected %+v\ngot %+v\n", len(tt.metricsOut), len(actualMetricsOut)) + } + + for i := range tt.metricsOut { + expectedMetric := *tt.metricsOut[i] + if !inSlice(expectedMetric, actualMetricsOut) { + s := "" + for _, m := range actualMetricsOut { + s = fmt.Sprintf("%s\n%v", s, m) + } + t.Errorf("\nmetric not found in slice %+v\nslice %+s\n", expectedMetric, s) + } + } + } +} + +func inSlice(metric Metric, searchSlice []*Metric) bool { + for _, element := range searchSlice { + if reflect.DeepEqual(metric, *element) { + return true + } + } + return false +} diff --git a/plugins/outputs/datadog/sample.conf b/plugins/outputs/datadog/sample.conf index bd933be30696d..09d53ffa03a5e 100644 --- a/plugins/outputs/datadog/sample.conf +++ b/plugins/outputs/datadog/sample.conf @@ -18,3 +18,11 @@ ## Override the default (none) compression used to send data. ## Supports: "zlib", "none" # compression = "none" + + ## Convert counts to rates + ## Use this to be able to submit metrics from telegraf alongside Datadog agent + # should_rate_counts = true + + ## When should_rate_counts is enabled, this overrides the + ## default (10s) rate interval used to divide count metrics by + # rate_interval = 20 From 00aaf14d94ef1f326d45a3234dbd2dd9b076527a Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Tue, 6 Aug 2024 13:40:48 +0100 Subject: [PATCH 02/11] Run tests in their own namespace --- plugins/outputs/datadog/datadog_test.go | 30 +++++++++++++++---------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/plugins/outputs/datadog/datadog_test.go b/plugins/outputs/datadog/datadog_test.go index 2587431dd829d..bc398b6a30dad 100644 --- a/plugins/outputs/datadog/datadog_test.go +++ b/plugins/outputs/datadog/datadog_test.go @@ -314,10 +314,12 @@ func TestShouldRateCount(t *testing.T) { } var tests = []struct { + name string metricsIn []telegraf.Metric metricsOut []*Metric }{ { + "convert counter metrics to rate", []telegraf.Metric{ testutil.MustMetric( "count_metric_converted_to_rate", @@ -349,6 +351,7 @@ func TestShouldRateCount(t *testing.T) { }, }, { + "convert count value in timing metrics to rate", []telegraf.Metric{ testutil.MustMetric( "timing_metric", @@ -470,6 +473,7 @@ func TestShouldRateCount(t *testing.T) { }, }, { + "convert count value in histogram metrics to rate", []telegraf.Metric{ testutil.MustMetric( "histogram_metric", @@ -593,21 +597,23 @@ func TestShouldRateCount(t *testing.T) { } for _, tt := range tests { - actualMetricsOut, _ := d.convertToDatadogMetric(tt.metricsIn) - if len(actualMetricsOut) != len(tt.metricsOut) { - t.Errorf("\nexpected %+v\ngot %+v\n", len(tt.metricsOut), len(actualMetricsOut)) - } + t.Run(tt.name, func(t *testing.T) { + actualMetricsOut, _ := d.convertToDatadogMetric(tt.metricsIn) + if len(actualMetricsOut) != len(tt.metricsOut) { + t.Errorf("\nexpected %+v\ngot %+v\n", len(tt.metricsOut), len(actualMetricsOut)) + } - for i := range tt.metricsOut { - expectedMetric := *tt.metricsOut[i] - if !inSlice(expectedMetric, actualMetricsOut) { - s := "" - for _, m := range actualMetricsOut { - s = fmt.Sprintf("%s\n%v", s, m) + for i := range tt.metricsOut { + expectedMetric := *tt.metricsOut[i] + if !inSlice(expectedMetric, actualMetricsOut) { + s := "" + for _, m := range actualMetricsOut { + s = fmt.Sprintf("%s\n%v", s, m) + } + t.Errorf("\nmetric not found in slice %+v\nslice %+s\n", expectedMetric, s) } - t.Errorf("\nmetric not found in slice %+v\nslice %+s\n", expectedMetric, s) } - } + }) } } From 57ab41e1df53b430ff8039765d1e81ee7f575ff3 Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Tue, 6 Aug 2024 13:44:26 +0100 Subject: [PATCH 03/11] Refactor tests --- plugins/outputs/datadog/datadog_test.go | 27 +++---------------------- 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/plugins/outputs/datadog/datadog_test.go b/plugins/outputs/datadog/datadog_test.go index bc398b6a30dad..3be2012abf2f6 100644 --- a/plugins/outputs/datadog/datadog_test.go +++ b/plugins/outputs/datadog/datadog_test.go @@ -598,30 +598,9 @@ func TestShouldRateCount(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - actualMetricsOut, _ := d.convertToDatadogMetric(tt.metricsIn) - if len(actualMetricsOut) != len(tt.metricsOut) { - t.Errorf("\nexpected %+v\ngot %+v\n", len(tt.metricsOut), len(actualMetricsOut)) - } - - for i := range tt.metricsOut { - expectedMetric := *tt.metricsOut[i] - if !inSlice(expectedMetric, actualMetricsOut) { - s := "" - for _, m := range actualMetricsOut { - s = fmt.Sprintf("%s\n%v", s, m) - } - t.Errorf("\nmetric not found in slice %+v\nslice %+s\n", expectedMetric, s) - } - } + actualMetricsOut, actualLen := d.convertToDatadogMetric(tt.metricsIn) + require.Len(t, actualMetricsOut, actualLen) + require.ElementsMatch(t, tt.metricsOut, actualMetricsOut) }) } } - -func inSlice(metric Metric, searchSlice []*Metric) bool { - for _, element := range searchSlice { - if reflect.DeepEqual(metric, *element) { - return true - } - } - return false -} From ab1fa8738b4688c50fa4e06eda848809ff33f357 Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Tue, 6 Aug 2024 13:48:17 +0100 Subject: [PATCH 04/11] State requirements of rating counts --- plugins/outputs/datadog/README.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/plugins/outputs/datadog/README.md b/plugins/outputs/datadog/README.md index d8624dae96643..c73e5e7119a6e 100644 --- a/plugins/outputs/datadog/README.md +++ b/plugins/outputs/datadog/README.md @@ -39,11 +39,11 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. ## Convert counts to rates ## Use this to be able to submit metrics from telegraf alongside Datadog agent - # should_rate_counts = true + # should_rate_counts = false - ## When should_rate_counts is enabled, this overrides the - ## default (10s) rate interval used to divide count metrics by - # rate_interval = 20 + ## Overrides the default rate interval used to divide count metrics by + ## when should_rate_counts is enabled + # rate_interval = 10 ``` ## Metrics @@ -55,8 +55,11 @@ Field values are converted to floating point numbers. Strings and floats that cannot be sent over JSON, namely NaN and Inf, are ignored. Enabling the `should_rate_counts` will convert `count` metrics to `rate` -and divide it by the `rate_interval`. This will allow telegraf to run -alongside current Datadog agents. +and divide it by the `rate_interval` before submitting to Datadog. +This allows telegraf to submit metrics alongside Datadog agents. +Note that this only supports metrics ingested via `inputs.statsd` given +the dependency on the `metric_type` tag it creates. There is only support for +`counter` metrics, and `count` values from `timing` and `histogram` metrics. [metrics]: https://docs.datadoghq.com/api/v1/metrics/#submit-metrics [apikey]: https://app.datadoghq.com/account/settings#api From c80846351828646a971d5763485fb965e6e1784b Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Wed, 7 Aug 2024 10:48:27 +0100 Subject: [PATCH 05/11] Use non-zero rate_interval to enable rate logic --- plugins/outputs/datadog/datadog.go | 11 +- plugins/outputs/datadog/datadog_test.go | 308 +++++++++++++++++++++++- plugins/outputs/datadog/sample.conf | 13 +- 3 files changed, 313 insertions(+), 19 deletions(-) diff --git a/plugins/outputs/datadog/datadog.go b/plugins/outputs/datadog/datadog.go index 85a8484711406..34c45c2cd94fd 100644 --- a/plugins/outputs/datadog/datadog.go +++ b/plugins/outputs/datadog/datadog.go @@ -29,7 +29,6 @@ type Datadog struct { Timeout config.Duration `toml:"timeout"` URL string `toml:"url"` Compression string `toml:"compression"` - ShouldRateCounts bool `toml:"should_rate_counts"` RateInterval int64 `toml:"rate_interval"` Log telegraf.Logger `toml:"-"` @@ -85,7 +84,8 @@ func (d *Datadog) convertToDatadogMetric(metrics []telegraf.Metric) ([]*Metric, if dogMs, err := buildMetrics(m); err == nil { metricTags := buildTags(m.TagList()) host, _ := m.GetTag("host") - metricType, _ := m.GetTag("metric_type") + // Retrieve the metric_type tag created by inputs.statsd + statsDMetricType, _ := m.GetTag("metric_type") if len(dogMs) == 0 { continue @@ -105,7 +105,7 @@ func (d *Datadog) convertToDatadogMetric(metrics []telegraf.Metric) ([]*Metric, interval = 1 switch m.Type() { case telegraf.Counter, telegraf.Untyped: - if d.ShouldRateCounts && isRateable(metricType, fieldName) { + if d.RateInterval > 0 && isRateable(statsDMetricType, fieldName) { interval = d.RateInterval dogM[1] = dogM[1] / float64(interval) tname = "rate" @@ -238,8 +238,8 @@ func verifyValue(v interface{}) bool { return true } -func isRateable(metricType string, fieldName string) bool { - switch metricType { +func isRateable(statsDMetricType string, fieldName string) bool { + switch statsDMetricType { case "counter": return true @@ -280,7 +280,6 @@ func init() { return &Datadog{ URL: datadogAPI, Compression: "none", - RateInterval: 10, } }) } diff --git a/plugins/outputs/datadog/datadog_test.go b/plugins/outputs/datadog/datadog_test.go index 3be2012abf2f6..84212aa6a3fc0 100644 --- a/plugins/outputs/datadog/datadog_test.go +++ b/plugins/outputs/datadog/datadog_test.go @@ -306,11 +306,10 @@ func TestInfIsSkipped(t *testing.T) { require.NoError(t, err) } -func TestShouldRateCount(t *testing.T) { +func TestNonZeroRateIntervalConvertsRatesToCount(t *testing.T) { d := &Datadog{ - Apikey: "123456", - ShouldRateCounts: true, - RateInterval: 10, + Apikey: "123456", + RateInterval: 10, } var tests = []struct { @@ -322,7 +321,7 @@ func TestShouldRateCount(t *testing.T) { "convert counter metrics to rate", []telegraf.Metric{ testutil.MustMetric( - "count_metric_converted_to_rate", + "count_metric", map[string]string{ "metric_type": "counter", }, @@ -335,7 +334,7 @@ func TestShouldRateCount(t *testing.T) { }, []*Metric{ { - Metric: "count_metric_converted_to_rate", + Metric: "count_metric", Points: [1]Point{ { float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), @@ -604,3 +603,300 @@ func TestShouldRateCount(t *testing.T) { }) } } + +func TestZeroRateIntervalConvertsRatesToCount(t *testing.T) { + d := &Datadog{ + Apikey: "123456", + } + + var tests = []struct { + name string + metricsIn []telegraf.Metric + metricsOut []*Metric + }{ + { + "does not convert counter metrics to rate", + []telegraf.Metric{ + testutil.MustMetric( + "count_metric", + map[string]string{ + "metric_type": "counter", + }, + map[string]interface{}{ + "value": 100, + }, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + telegraf.Counter, + ), + }, + []*Metric{ + { + Metric: "count_metric", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + 100, + }, + }, + Type: "count", + Tags: []string{ + "metric_type:counter", + }, + Interval: 1, + }, + }, + }, + { + "does not convert count value in timing metrics to rate", + []telegraf.Metric{ + testutil.MustMetric( + "timing_metric", + map[string]string{ + "metric_type": "timing", + }, + map[string]interface{}{ + "count": 1, + "lower": float64(10), + "mean": float64(10), + "median": float64(10), + "stddev": float64(0), + "sum": float64(10), + "upper": float64(10), + }, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + telegraf.Untyped, + ), + }, + []*Metric{ + { + Metric: "timing_metric.count", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + 1, + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.lower", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.mean", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.median", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.stddev", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(0), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.sum", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.upper", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + }, + }, + { + "does not convert count value in histogram metrics to rate", + []telegraf.Metric{ + testutil.MustMetric( + "histogram_metric", + map[string]string{ + "metric_type": "histogram", + }, + map[string]interface{}{ + "count": 1, + "lower": float64(10), + "mean": float64(10), + "median": float64(10), + "stddev": float64(0), + "sum": float64(10), + "upper": float64(10), + }, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + telegraf.Untyped, + ), + }, + []*Metric{ + { + Metric: "histogram_metric.count", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + 1, + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.lower", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.mean", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.median", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.stddev", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(0), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.sum", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.upper", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actualMetricsOut, actualLen := d.convertToDatadogMetric(tt.metricsIn) + require.Len(t, actualMetricsOut, actualLen) + require.ElementsMatch(t, tt.metricsOut, actualMetricsOut) + }) + } +} diff --git a/plugins/outputs/datadog/sample.conf b/plugins/outputs/datadog/sample.conf index 09d53ffa03a5e..af1617e61cd28 100644 --- a/plugins/outputs/datadog/sample.conf +++ b/plugins/outputs/datadog/sample.conf @@ -19,10 +19,9 @@ ## Supports: "zlib", "none" # compression = "none" - ## Convert counts to rates - ## Use this to be able to submit metrics from telegraf alongside Datadog agent - # should_rate_counts = true - - ## When should_rate_counts is enabled, this overrides the - ## default (10s) rate interval used to divide count metrics by - # rate_interval = 20 + ## When non-zero, converts count metrics submitted by inputs.statsd + ## into rate, while dividing the metric value by this number. + ## Note that in order for metrics to be submitted simultaenously alongside + ## a Datadog agent, rate_interval has to match the interval used by the + ## agent - which defaults to 10 + # rate_interval = 0 From d4e1381bd4f5724c2d5c136467a6c9118f269e86 Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Wed, 7 Aug 2024 11:53:59 +0100 Subject: [PATCH 06/11] Switch RateInterval to config.Duration --- plugins/outputs/datadog/datadog.go | 19 ++++++++++--------- plugins/outputs/datadog/datadog_test.go | 5 +++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/plugins/outputs/datadog/datadog.go b/plugins/outputs/datadog/datadog.go index 34c45c2cd94fd..9ef4b930362bb 100644 --- a/plugins/outputs/datadog/datadog.go +++ b/plugins/outputs/datadog/datadog.go @@ -25,12 +25,12 @@ import ( var sampleConfig string type Datadog struct { - Apikey string `toml:"apikey"` - Timeout config.Duration `toml:"timeout"` - URL string `toml:"url"` - Compression string `toml:"compression"` - RateInterval int64 `toml:"rate_interval"` - Log telegraf.Logger `toml:"-"` + Apikey string `toml:"apikey"` + Timeout config.Duration `toml:"timeout"` + URL string `toml:"url"` + Compression string `toml:"compression"` + RateInterval config.Duration `toml:"rate_interval"` + Log telegraf.Logger `toml:"-"` client *http.Client proxy.HTTPProxy @@ -106,7 +106,8 @@ func (d *Datadog) convertToDatadogMetric(metrics []telegraf.Metric) ([]*Metric, switch m.Type() { case telegraf.Counter, telegraf.Untyped: if d.RateInterval > 0 && isRateable(statsDMetricType, fieldName) { - interval = d.RateInterval + // interval is expected to be in seconds + interval = int64(d.RateInterval / 1000000000) dogM[1] = dogM[1] / float64(interval) tname = "rate" } else if m.Type() == telegraf.Counter { @@ -278,8 +279,8 @@ func (d *Datadog) Close() error { func init() { outputs.Add("datadog", func() telegraf.Output { return &Datadog{ - URL: datadogAPI, - Compression: "none", + URL: datadogAPI, + Compression: "none", } }) } diff --git a/plugins/outputs/datadog/datadog_test.go b/plugins/outputs/datadog/datadog_test.go index 84212aa6a3fc0..8db667d4281ff 100644 --- a/plugins/outputs/datadog/datadog_test.go +++ b/plugins/outputs/datadog/datadog_test.go @@ -13,6 +13,7 @@ import ( "github.com/stretchr/testify/require" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/config" "github.com/influxdata/telegraf/testutil" ) @@ -309,7 +310,7 @@ func TestInfIsSkipped(t *testing.T) { func TestNonZeroRateIntervalConvertsRatesToCount(t *testing.T) { d := &Datadog{ Apikey: "123456", - RateInterval: 10, + RateInterval: config.Duration(10 * time.Second), } var tests = []struct { @@ -606,7 +607,7 @@ func TestNonZeroRateIntervalConvertsRatesToCount(t *testing.T) { func TestZeroRateIntervalConvertsRatesToCount(t *testing.T) { d := &Datadog{ - Apikey: "123456", + Apikey: "123456", } var tests = []struct { From abe545200fc11fe32725b04ed03ac99312172d87 Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Wed, 7 Aug 2024 11:58:00 +0100 Subject: [PATCH 07/11] Update docs --- plugins/outputs/datadog/README.md | 20 ++++++++++---------- plugins/outputs/datadog/sample.conf | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/plugins/outputs/datadog/README.md b/plugins/outputs/datadog/README.md index c73e5e7119a6e..d83e4373f6580 100644 --- a/plugins/outputs/datadog/README.md +++ b/plugins/outputs/datadog/README.md @@ -37,13 +37,12 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. ## Supports: "zlib", "none" # compression = "none" - ## Convert counts to rates - ## Use this to be able to submit metrics from telegraf alongside Datadog agent - # should_rate_counts = false - - ## Overrides the default rate interval used to divide count metrics by - ## when should_rate_counts is enabled - # rate_interval = 10 + ## When non-zero, converts count metrics submitted by inputs.statsd + ## into rate, while dividing the metric value by this number. + ## Note that in order for metrics to be submitted simultaenously alongside + ## a Datadog agent, rate_interval has to match the interval used by the + ## agent - which defaults to 10s + # rate_interval = 0s ``` ## Metrics @@ -54,9 +53,10 @@ field key with a `.` character. Field values are converted to floating point numbers. Strings and floats that cannot be sent over JSON, namely NaN and Inf, are ignored. -Enabling the `should_rate_counts` will convert `count` metrics to `rate` -and divide it by the `rate_interval` before submitting to Datadog. -This allows telegraf to submit metrics alongside Datadog agents. +Setting `rate_interval` to non-zero will convert `count` metrics to `rate` +and divide its value by this interval before submitting to Datadog. +This allows Telegraf to submit metrics alongside Datadog agents when their rate +intervals are the same (Datadog defaults to `10s`). Note that this only supports metrics ingested via `inputs.statsd` given the dependency on the `metric_type` tag it creates. There is only support for `counter` metrics, and `count` values from `timing` and `histogram` metrics. diff --git a/plugins/outputs/datadog/sample.conf b/plugins/outputs/datadog/sample.conf index af1617e61cd28..5775b84a4b557 100644 --- a/plugins/outputs/datadog/sample.conf +++ b/plugins/outputs/datadog/sample.conf @@ -23,5 +23,5 @@ ## into rate, while dividing the metric value by this number. ## Note that in order for metrics to be submitted simultaenously alongside ## a Datadog agent, rate_interval has to match the interval used by the - ## agent - which defaults to 10 - # rate_interval = 0 + ## agent - which defaults to 10s + # rate_interval = 0s From 9954a5f7dc05fc4179f9f68ef29157009fce1db6 Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Wed, 7 Aug 2024 12:04:04 +0100 Subject: [PATCH 08/11] Remove redundant metricCounter --- plugins/outputs/datadog/datadog.go | 10 ++++------ plugins/outputs/datadog/datadog_test.go | 6 ++---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/plugins/outputs/datadog/datadog.go b/plugins/outputs/datadog/datadog.go index 9ef4b930362bb..af265128bd677 100644 --- a/plugins/outputs/datadog/datadog.go +++ b/plugins/outputs/datadog/datadog.go @@ -76,9 +76,8 @@ func (d *Datadog) Connect() error { return nil } -func (d *Datadog) convertToDatadogMetric(metrics []telegraf.Metric) ([]*Metric, int) { +func (d *Datadog) convertToDatadogMetric(metrics []telegraf.Metric) ([]*Metric) { tempSeries := []*Metric{} - metricCounter := 0 for _, m := range metrics { if dogMs, err := buildMetrics(m); err == nil { @@ -129,25 +128,24 @@ func (d *Datadog) convertToDatadogMetric(metrics []telegraf.Metric) ([]*Metric, } metric.Points[0] = dogM tempSeries = append(tempSeries, metric) - metricCounter++ } } else { d.Log.Infof("Unable to build Metric for %s due to error '%v', skipping", m.Name(), err) } } - return tempSeries, metricCounter + return tempSeries } func (d *Datadog) Write(metrics []telegraf.Metric) error { ts := TimeSeries{} - tempSeries, metricCounter := d.convertToDatadogMetric(metrics) + tempSeries := d.convertToDatadogMetric(metrics) if len(tempSeries) == 0 { return nil } redactedAPIKey := "****************" - ts.Series = make([]*Metric, metricCounter) + ts.Series = make([]*Metric, len(tempSeries)) copy(ts.Series, tempSeries[0:]) tsBytes, err := json.Marshal(ts) if err != nil { diff --git a/plugins/outputs/datadog/datadog_test.go b/plugins/outputs/datadog/datadog_test.go index 8db667d4281ff..2915783f0ca11 100644 --- a/plugins/outputs/datadog/datadog_test.go +++ b/plugins/outputs/datadog/datadog_test.go @@ -598,8 +598,7 @@ func TestNonZeroRateIntervalConvertsRatesToCount(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - actualMetricsOut, actualLen := d.convertToDatadogMetric(tt.metricsIn) - require.Len(t, actualMetricsOut, actualLen) + actualMetricsOut := d.convertToDatadogMetric(tt.metricsIn) require.ElementsMatch(t, tt.metricsOut, actualMetricsOut) }) } @@ -895,8 +894,7 @@ func TestZeroRateIntervalConvertsRatesToCount(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - actualMetricsOut, actualLen := d.convertToDatadogMetric(tt.metricsIn) - require.Len(t, actualMetricsOut, actualLen) + actualMetricsOut := d.convertToDatadogMetric(tt.metricsIn) require.ElementsMatch(t, tt.metricsOut, actualMetricsOut) }) } From 4e217d1e5b7f467d6c003ce21732acb1d0a41071 Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Wed, 7 Aug 2024 13:44:24 +0100 Subject: [PATCH 09/11] Remove redundant metricCounter --- plugins/outputs/datadog/datadog.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/datadog/datadog.go b/plugins/outputs/datadog/datadog.go index af265128bd677..5449969a03fc5 100644 --- a/plugins/outputs/datadog/datadog.go +++ b/plugins/outputs/datadog/datadog.go @@ -76,7 +76,7 @@ func (d *Datadog) Connect() error { return nil } -func (d *Datadog) convertToDatadogMetric(metrics []telegraf.Metric) ([]*Metric) { +func (d *Datadog) convertToDatadogMetric(metrics []telegraf.Metric) []*Metric { tempSeries := []*Metric{} for _, m := range metrics { From 9bda584fa1cab8df259ed76bd3cee934b5cd88f5 Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Wed, 7 Aug 2024 14:08:34 +0100 Subject: [PATCH 10/11] Switch RateInterval to config.Duration --- plugins/outputs/datadog/datadog.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/plugins/outputs/datadog/datadog.go b/plugins/outputs/datadog/datadog.go index 5449969a03fc5..cd9914060108c 100644 --- a/plugins/outputs/datadog/datadog.go +++ b/plugins/outputs/datadog/datadog.go @@ -106,8 +106,9 @@ func (d *Datadog) convertToDatadogMetric(metrics []telegraf.Metric) []*Metric { case telegraf.Counter, telegraf.Untyped: if d.RateInterval > 0 && isRateable(statsDMetricType, fieldName) { // interval is expected to be in seconds - interval = int64(d.RateInterval / 1000000000) - dogM[1] = dogM[1] / float64(interval) + rateIntervalSeconds := time.Duration(d.RateInterval).Seconds() + interval = int64(rateIntervalSeconds) + dogM[1] = dogM[1] / float64(rateIntervalSeconds) tname = "rate" } else if m.Type() == telegraf.Counter { tname = "count" From f569e8e87fbc8eaf62d78c908ae06ab5f127ceb5 Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Wed, 7 Aug 2024 14:16:40 +0100 Subject: [PATCH 11/11] Switch RateInterval to config.Duration --- plugins/outputs/datadog/datadog.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/datadog/datadog.go b/plugins/outputs/datadog/datadog.go index cd9914060108c..848ebb3d35651 100644 --- a/plugins/outputs/datadog/datadog.go +++ b/plugins/outputs/datadog/datadog.go @@ -108,7 +108,7 @@ func (d *Datadog) convertToDatadogMetric(metrics []telegraf.Metric) []*Metric { // interval is expected to be in seconds rateIntervalSeconds := time.Duration(d.RateInterval).Seconds() interval = int64(rateIntervalSeconds) - dogM[1] = dogM[1] / float64(rateIntervalSeconds) + dogM[1] = dogM[1] / rateIntervalSeconds tname = "rate" } else if m.Type() == telegraf.Counter { tname = "count"