timescale · sumerman · Dec 21, 2022 · Dec 14, 2022 · Dec 15, 2022 · Dec 15, 2022
@@ -33,7 +33,8 @@ We use the following categories for changes:
   metrics is expected to change faster than its new collection interval [#1793]
 
 ### Fixed
-
+- Fixing the query behind chunks_uncompressed. The new definition should
+  change the baseline value [#1794]
 
 ## [0.16.0] - 2022-10-20
 

@@ -274,7 +274,7 @@ groups:
     expr: |
       (
           (
-            min_over_time(promscale_sql_database_chunks_metrics_uncompressed_count[1h]) > 10
+            min_over_time(promscale_sql_database_chunks_metrics_uncompressed_count[1h]) > promscale_sql_database_metric_count
           )
         and
           (
@@ -284,7 +284,7 @@ groups:
       or
       (
           (
-            min_over_time(promscale_sql_database_chunks_metrics_expired_count[1h]) > 10
+            min_over_time(promscale_sql_database_chunks_metrics_expired_count[1h]) > promscale_sql_database_metric_count
           )
         and
           (
@@ -294,7 +294,7 @@ groups:
       or
       (
           (
-            min_over_time(promscale_sql_database_chunks_traces_uncompressed_count[1h]) > 10
+            min_over_time(promscale_sql_database_chunks_traces_uncompressed_count[1h]) > promscale_sql_database_metric_count
           )
         and
           (
@@ -304,7 +304,7 @@ groups:
       or
       (
           (
-            min_over_time(promscale_sql_database_chunks_traces_expired_count[1h]) > 10
+            min_over_time(promscale_sql_database_chunks_traces_expired_count[1h]) > promscale_sql_database_metric_count
           )
         and
           (

@@ -1,7 +1,9 @@
 package database
 
 import (
+	"crypto/rand"
 	"fmt"
+	"math/big"
 	"strings"
 	"time"
 
@@ -47,10 +49,19 @@ type metricQueryPollConfig struct {
 }
 
 func updateAtMostEvery(interval time.Duration) metricQueryPollConfig {
+	// If we initialize lastUpdate as 0 or now - interval, then
+	// all the heavy queries that we aim to spread out by using this
+	// funciton will hammer the database simultaneously at the start.
+	// At the same time delaying them for the full duration of interval
+	// might be too much. Hence the jitter.
+	jitterDelta, err := rand.Int(rand.Reader, big.NewInt(int64(interval)/3))
+	if err != nil {
+		panic(err)
+	}
 	return metricQueryPollConfig{
 		enabled:    true,
 		interval:   interval,
-		lastUpdate: time.Now(),
+		lastUpdate: time.Now().Add(-interval + time.Duration(jitterDelta.Int64())),
 	}
 }
 
@@ -149,6 +160,12 @@ var metrics = []metricQueryWrap{
 				Name:      "chunks_metrics_uncompressed_count",
 				Help:      "The number of metrics chunks soon to be compressed by maintenance jobs.",
 			},
+		),
+		customPollConfig: updateAtMostEvery(9 * time.Minute),
+		query: `SELECT coalesce(sum(jsonb_array_length(chunks_to_compress)), 0)::BIGINT AS uncompressed
+			FROM _prom_catalog.metric_chunks_that_need_to_be_compressed(INTERVAL '1 hour');`,
+	}, {
+		metrics: gauges(
 			prometheus.GaugeOpts{
 				Namespace: util.PromNamespace,
 				Subsystem: "sql_database",
@@ -157,24 +174,16 @@ var metrics = []metricQueryWrap{
 			},
 		),
 		customPollConfig: updateAtMostEvery(9 * time.Minute),
-		query: `WITH chunk_candidates AS MATERIALIZED (
-				SELECT chcons.dimension_slice_id, h.table_name, h.schema_name
-				FROM _timescaledb_catalog.chunk_constraint chcons
-					INNER JOIN _timescaledb_catalog.chunk c ON c.id = chcons.chunk_id
-					INNER JOIN _timescaledb_catalog.hypertable h ON h.id = c.hypertable_id
-				WHERE c.dropped IS FALSE
-				AND h.compression_state = 1 -- compression_enabled = TRUE
-				AND (c.status & 1) != 1 -- only check for uncompressed chunks
-			)
-			SELECT
-				count(*) FILTER(WHERE m.delay_compression_until IS NULL OR m.delay_compression_until < now())::BIGINT AS uncompressed,
-				count(*) FILTER(WHERE m.delay_compression_until IS NOT NULL AND m.delay_compression_until >= now())::BIGINT AS delayed_compression
-			FROM chunk_candidates cc
+		query: `SELECT count(*)::BIGINT AS delayed_compression
+			FROM _prom_catalog.metric m
+				INNER JOIN _timescaledb_catalog.chunk c ON (c.schema_name = m.table_schema AND c.table_name = m.table_schema)
+				INNER JOIN _timescaledb_catalog.chunk_constraint cc ON (cc.chunk_id = c.id)
 				INNER JOIN _timescaledb_catalog.dimension_slice ds ON ds.id = cc.dimension_slice_id
-				INNER JOIN _prom_catalog.metric m ON (m.table_name = cc.table_name AND m.table_schema = cc.schema_name)
 			WHERE NOT m.is_view
-			AND ds.range_start <= _timescaledb_internal.time_to_internal(now() - interval '1 hour')
-			AND ds.range_end <= _timescaledb_internal.time_to_internal(now() - interval '1 hour')`,
+				AND m.delay_compression_until IS NOT NULL
+				AND m.delay_compression_until >= now()
+				AND ds.range_start <= _timescaledb_internal.time_to_internal(now() - interval '1 hour')
+				AND ds.range_end <= _timescaledb_internal.time_to_internal(now() - interval '1 hour')`,
 	}, {
 		metrics: gauges(
 			prometheus.GaugeOpts{