diff --git a/CHANGELOG.md b/CHANGELOG.md index d9b4196686..e1f86b2a66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ * [FEATURE] Querier: Support for configuring query optimizers and enabling XFunctions in the Thanos engine. #6873 * [FEATURE] Query Frontend: Add support /api/v1/format_query API for formatting queries. #6893 * [FEATURE] Query Frontend: Add support for /api/v1/parse_query API (experimental) to parse a PromQL expression and return it as a JSON-formatted AST (abstract syntax tree). #6978 +* [ENHANCEMENT] Overrides Exporter: Expose all fields that can be converted to float64. #6979 * [ENHANCEMENT] Ingester: Add `cortex_ingester_tsdb_wal_replay_unknown_refs_total` and `cortex_ingester_tsdb_wbl_replay_unknown_refs_total` metrics to track unknown series references during wal/wbl replaying. #6945 * [ENHANCEMENT] Ruler: Emit an error message when the rule synchronization fails. #6902 * [ENHANCEMENT] Querier: Support snappy and zstd response compression for `-querier.response-compression` flag. #6848 diff --git a/pkg/util/validation/exporter.go b/pkg/util/validation/exporter.go index a0a4d07193..69f258113f 100644 --- a/pkg/util/validation/exporter.go +++ b/pkg/util/validation/exporter.go @@ -1,6 +1,10 @@ package validation import ( + "reflect" + "strings" + "time" + "github.com/prometheus/client_golang/prometheus" ) @@ -31,12 +35,56 @@ func (oe *OverridesExporter) Describe(ch chan<- *prometheus.Desc) { func (oe *OverridesExporter) Collect(ch chan<- prometheus.Metric) { allLimits := oe.tenantLimits.AllByUserID() for tenant, limits := range allLimits { - ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, limits.IngestionRate, "ingestion_rate", tenant) - ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, float64(limits.IngestionBurstSize), "ingestion_burst_size", tenant) + for metricName, value := range ExtractNumericalValues(limits) { + ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, value, metricName, tenant) + } + } +} + +func ExtractNumericalValues(l *Limits) map[string]float64 { + metrics := make(map[string]float64) + + v := reflect.ValueOf(l).Elem() + t := v.Type() + + for i := 0; i < v.NumField(); i++ { + field := v.Field(i) + fieldType := t.Field(i) + + tag := fieldType.Tag.Get("yaml") + if tag == "" || tag == "-" { + // not exist tag or tag is "-" + continue + } + + // remove options like omitempty + if idx := strings.Index(tag, ","); idx != -1 { + tag = tag[:idx] + } - ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, float64(limits.MaxLocalSeriesPerUser), "max_local_series_per_user", tenant) - ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, float64(limits.MaxLocalSeriesPerMetric), "max_local_series_per_metric", tenant) - ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, float64(limits.MaxGlobalSeriesPerUser), "max_global_series_per_user", tenant) - ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, float64(limits.MaxGlobalSeriesPerMetric), "max_global_series_per_metric", tenant) + switch field.Kind() { + case reflect.Int, reflect.Int64: + if field.Type().String() == "model.Duration" { + // we export the model.Duration in seconds + metrics[tag] = time.Duration(field.Int()).Seconds() + } else { + metrics[tag] = float64(field.Int()) + } + case reflect.Uint, reflect.Uint64: + metrics[tag] = float64(field.Uint()) + case reflect.Float64: + metrics[tag] = field.Float() + case reflect.Bool: + if field.Bool() { + // true as 1.0 + metrics[tag] = 1.0 + } else { + // false as 0.0 + metrics[tag] = 0.0 + } + case reflect.String, reflect.Slice, reflect.Map, reflect.Struct: + continue + } } + return metrics } diff --git a/pkg/util/validation/exporter_test.go b/pkg/util/validation/exporter_test.go index 44d503a80d..3c2ca56e59 100644 --- a/pkg/util/validation/exporter_test.go +++ b/pkg/util/validation/exporter_test.go @@ -1,10 +1,14 @@ package validation import ( + "flag" + "strings" "testing" + "time" "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestOverridesExporter_noConfig(t *testing.T) { @@ -21,10 +25,164 @@ func TestOverridesExporter_withConfig(t *testing.T) { MaxQueriersPerTenant: 5, }, } + fs := flag.NewFlagSet("test", flag.ContinueOnError) + tenantLimits["tenant-a"].RegisterFlags(fs) exporter := NewOverridesExporter(newMockTenantLimits(tenantLimits)) // There should be at least a few metrics generated by receiving an override configuration map count := testutil.CollectAndCount(exporter, "cortex_overrides") assert.Greater(t, count, 0) + require.NoError(t, testutil.CollectAndCompare(exporter, strings.NewReader(` + # HELP cortex_overrides Resource limit overrides applied to tenants + # TYPE cortex_overrides gauge + cortex_overrides{limit_name="accept_ha_samples",user="tenant-a"} 0 + cortex_overrides{limit_name="accept_mixed_ha_samples",user="tenant-a"} 0 + cortex_overrides{limit_name="alertmanager_max_alerts_count",user="tenant-a"} 0 + cortex_overrides{limit_name="alertmanager_max_alerts_size_bytes",user="tenant-a"} 0 + cortex_overrides{limit_name="alertmanager_max_config_size_bytes",user="tenant-a"} 0 + cortex_overrides{limit_name="alertmanager_max_dispatcher_aggregation_groups",user="tenant-a"} 0 + cortex_overrides{limit_name="alertmanager_max_silences_count",user="tenant-a"} 0 + cortex_overrides{limit_name="alertmanager_max_silences_size_bytes",user="tenant-a"} 0 + cortex_overrides{limit_name="alertmanager_max_template_size_bytes",user="tenant-a"} 0 + cortex_overrides{limit_name="alertmanager_max_templates_count",user="tenant-a"} 0 + cortex_overrides{limit_name="alertmanager_notification_rate_limit",user="tenant-a"} 0 + cortex_overrides{limit_name="alertmanager_receivers_firewall_block_private_addresses",user="tenant-a"} 0 + cortex_overrides{limit_name="compactor_blocks_retention_period",user="tenant-a"} 0 + cortex_overrides{limit_name="compactor_partition_index_size_bytes",user="tenant-a"} 6.8719476736e+10 + cortex_overrides{limit_name="compactor_partition_series_count",user="tenant-a"} 0 + cortex_overrides{limit_name="compactor_tenant_shard_size",user="tenant-a"} 0 + cortex_overrides{limit_name="creation_grace_period",user="tenant-a"} 600 + cortex_overrides{limit_name="enable_native_histograms",user="tenant-a"} 0 + cortex_overrides{limit_name="enforce_metadata_metric_name",user="tenant-a"} 1 + cortex_overrides{limit_name="enforce_metric_name",user="tenant-a"} 1 + cortex_overrides{limit_name="ha_max_clusters",user="tenant-a"} 0 + cortex_overrides{limit_name="ingestion_burst_size",user="tenant-a"} 50000 + cortex_overrides{limit_name="ingestion_rate",user="tenant-a"} 25000 + cortex_overrides{limit_name="ingestion_tenant_shard_size",user="tenant-a"} 0 + cortex_overrides{limit_name="max_cache_freshness",user="tenant-a"} 60 + cortex_overrides{limit_name="max_downloaded_bytes_per_request",user="tenant-a"} 0 + cortex_overrides{limit_name="max_exemplars",user="tenant-a"} 0 + cortex_overrides{limit_name="max_fetched_chunk_bytes_per_query",user="tenant-a"} 0 + cortex_overrides{limit_name="max_fetched_chunks_per_query",user="tenant-a"} 2e+06 + cortex_overrides{limit_name="max_fetched_data_bytes_per_query",user="tenant-a"} 0 + cortex_overrides{limit_name="max_fetched_series_per_query",user="tenant-a"} 0 + cortex_overrides{limit_name="max_global_metadata_per_metric",user="tenant-a"} 0 + cortex_overrides{limit_name="max_global_metadata_per_user",user="tenant-a"} 0 + cortex_overrides{limit_name="max_global_native_histogram_series_per_user",user="tenant-a"} 0 + cortex_overrides{limit_name="max_global_series_per_metric",user="tenant-a"} 0 + cortex_overrides{limit_name="max_global_series_per_user",user="tenant-a"} 0 + cortex_overrides{limit_name="max_label_name_length",user="tenant-a"} 1024 + cortex_overrides{limit_name="max_label_names_per_series",user="tenant-a"} 30 + cortex_overrides{limit_name="max_label_value_length",user="tenant-a"} 2048 + cortex_overrides{limit_name="max_labels_size_bytes",user="tenant-a"} 0 + cortex_overrides{limit_name="max_metadata_length",user="tenant-a"} 1024 + cortex_overrides{limit_name="max_metadata_per_metric",user="tenant-a"} 10 + cortex_overrides{limit_name="max_metadata_per_user",user="tenant-a"} 8000 + cortex_overrides{limit_name="max_native_histogram_buckets",user="tenant-a"} 0 + cortex_overrides{limit_name="max_native_histogram_sample_size_bytes",user="tenant-a"} 0 + cortex_overrides{limit_name="max_native_histogram_series_per_user",user="tenant-a"} 0 + cortex_overrides{limit_name="max_outstanding_requests_per_tenant",user="tenant-a"} 100 + cortex_overrides{limit_name="max_queriers_per_tenant",user="tenant-a"} 0 + cortex_overrides{limit_name="max_query_length",user="tenant-a"} 0 + cortex_overrides{limit_name="max_query_lookback",user="tenant-a"} 0 + cortex_overrides{limit_name="max_query_parallelism",user="tenant-a"} 14 + cortex_overrides{limit_name="max_query_response_size",user="tenant-a"} 0 + cortex_overrides{limit_name="max_series_per_metric",user="tenant-a"} 50000 + cortex_overrides{limit_name="max_series_per_user",user="tenant-a"} 5e+06 + cortex_overrides{limit_name="native_histogram_ingestion_burst_size",user="tenant-a"} 0 + cortex_overrides{limit_name="native_histogram_ingestion_rate",user="tenant-a"} 1.7976931348623157e+308 + cortex_overrides{limit_name="out_of_order_time_window",user="tenant-a"} 0 + cortex_overrides{limit_name="parquet_converter_enabled",user="tenant-a"} 0 + cortex_overrides{limit_name="parquet_converter_tenant_shard_size",user="tenant-a"} 0 + cortex_overrides{limit_name="parquet_max_fetched_chunk_bytes",user="tenant-a"} 0 + cortex_overrides{limit_name="parquet_max_fetched_data_bytes",user="tenant-a"} 0 + cortex_overrides{limit_name="parquet_max_fetched_row_count",user="tenant-a"} 0 + cortex_overrides{limit_name="query_partial_data",user="tenant-a"} 0 + cortex_overrides{limit_name="query_vertical_shard_size",user="tenant-a"} 0 + cortex_overrides{limit_name="reject_old_samples",user="tenant-a"} 0 + cortex_overrides{limit_name="reject_old_samples_max_age",user="tenant-a"} 1.2096e+06 + cortex_overrides{limit_name="ruler_evaluation_delay_duration",user="tenant-a"} 0 + cortex_overrides{limit_name="ruler_max_rule_groups_per_tenant",user="tenant-a"} 0 + cortex_overrides{limit_name="ruler_max_rules_per_rule_group",user="tenant-a"} 0 + cortex_overrides{limit_name="ruler_query_offset",user="tenant-a"} 0 + cortex_overrides{limit_name="ruler_tenant_shard_size",user="tenant-a"} 0 + cortex_overrides{limit_name="rules_partial_data",user="tenant-a"} 0 + cortex_overrides{limit_name="store_gateway_tenant_shard_size",user="tenant-a"} 0 + `), "cortex_overrides")) +} + +func TestExtractNumericalValues(t *testing.T) { + limits := &Limits{} + fs := flag.NewFlagSet("test", flag.ContinueOnError) + limits.RegisterFlags(fs) + extracted := ExtractNumericalValues(limits) + t.Run("float64 should be converted", func(t *testing.T) { + require.Equal(t, limits.IngestionRate, extracted["ingestion_rate"]) + }) + t.Run("int should be converted", func(t *testing.T) { + require.Equal(t, float64(limits.IngestionBurstSize), extracted["ingestion_burst_size"]) + }) + t.Run("int64 should be converted", func(t *testing.T) { + require.Equal(t, float64(limits.MaxQueryResponseSize), extracted["max_query_response_size"]) + }) + t.Run("string shouldn't be converted", func(t *testing.T) { + _, ok := extracted["ingestion_rate_strategy"] + require.False(t, ok, "string should be not converted") + }) + t.Run("bool should be converted, default value false converted to 0", func(t *testing.T) { + val, ok := extracted["accept_ha_samples"] + require.True(t, ok) + require.Equal(t, 0.0, val) + }) + t.Run("bool should be converted, default value true converted to 1", func(t *testing.T) { + val, ok := extracted["enforce_metric_name"] + require.True(t, ok) + require.Equal(t, 1.0, val) + }) + t.Run("flagext.StringSlice shouldn't be converted", func(t *testing.T) { + _, ok := extracted["drop_labels"] + require.False(t, ok) + }) + t.Run("model.Duration should be converted", func(t *testing.T) { + val, ok := extracted["reject_old_samples_max_age"] + require.True(t, ok) + require.Equal(t, time.Duration(limits.RejectOldSamplesMaxAge).Seconds(), val) + }) + t.Run("[]*relabel.Config shouldn't be converted", func(t *testing.T) { + _, ok := extracted["metric_relabel_configs"] + require.False(t, ok) + }) + t.Run("[]string shouldn't be converted", func(t *testing.T) { + _, ok := extracted["promote_resource_attributes"] + require.False(t, ok) + }) + t.Run("[]LimitsPerLabelSet shouldn't be converted", func(t *testing.T) { + _, ok := extracted["limits_per_label_set"] + require.False(t, ok) + }) + t.Run("QueryPriority shouldn't be converted", func(t *testing.T) { + _, ok := extracted["query_priority"] + require.False(t, ok) + }) + t.Run("QueryRejection shouldn't be converted", func(t *testing.T) { + _, ok := extracted["query_rejection"] + require.False(t, ok) + }) + t.Run("labels.Labels shouldn't be converted", func(t *testing.T) { + _, ok := extracted["ruler_external_labels"] + require.False(t, ok) + }) + t.Run("flagext.CIDRSliceCSV shouldn't be converted", func(t *testing.T) { + _, ok := extracted["alertmanager_receivers_firewall_block_cidr_networks"] + require.False(t, ok) + }) + t.Run("NotificationRateLimitMap shouldn't be converted", func(t *testing.T) { + _, ok := extracted["alertmanager_notification_rate_limit_per_integration"] + require.False(t, ok) + }) + t.Run("DisabledRuleGroups shouldn't be converted", func(t *testing.T) { + _, ok := extracted["disabled_rule_groups"] + require.False(t, ok) + }) }