diff --git a/docs/api/_index.md b/docs/api/_index.md index 64a6aab3f0..5e04828f52 100644 --- a/docs/api/_index.md +++ b/docs/api/_index.md @@ -66,6 +66,9 @@ For the sake of clarity, in this document we have grouped API endpoints by servi | [Delete Alertmanager configuration](#delete-alertmanager-configuration) | Alertmanager || `DELETE /api/v1/alerts` | | [Tenant delete request](#tenant-delete-request) | Purger || `POST /purger/delete_tenant` | | [Tenant delete status](#tenant-delete-status) | Purger || `GET /purger/delete_tenant_status` | +| [Get user overrides](#get-user-overrides) | Overrides || `GET /api/v1/user-overrides` | +| [Set user overrides](#set-user-overrides) | Overrides || `PUT /api/v1/user-overrides` | +| [Delete user overrides](#delete-user-overrides) | Overrides || `DELETE /api/v1/user-overrides` | | [Store-gateway ring status](#store-gateway-ring-status) | Store-gateway || `GET /store-gateway/ring` | | [Compactor ring status](#compactor-ring-status) | Compactor || `GET /compactor/ring` | | [Get rule files](#get-rule-files) | Configs API (deprecated) || `GET /api/prom/configs/rules` | @@ -872,6 +875,64 @@ Returns status of tenant deletion. Output format to be defined. Experimental. _Requires [authentication](#authentication)._ +## Overrides + +The Overrides service provides an API for managing user overrides. + +### Get user overrides + +``` +GET /api/v1/user-overrides +``` + +Get the current overrides for the authenticated tenant. Returns the overrides in JSON format. + +_Requires [authentication](#authentication)._ + +### Set user overrides + +``` +PUT /api/v1/user-overrides +``` + +Set or update overrides for the authenticated tenant. The request body should contain a JSON object with the override values. + +_Requires [authentication](#authentication)._ + +### Delete user overrides + +``` +DELETE /api/v1/user-overrides +``` + +Delete all overrides for the authenticated tenant. This will revert the tenant to using default values. + +_Requires [authentication](#authentication)._ + +#### Example request body for PUT + +```json +{ + "ingestion_rate": 50000, + "max_global_series_per_user": 1000000, + "ruler_max_rules_per_rule_group": 100 +} +``` + +#### Supported limits + +The following limits can be modified via the API: +- `max_global_series_per_user` +- `max_global_series_per_metric` +- `ingestion_rate` +- `ingestion_burst_size` +- `ruler_max_rules_per_rule_group` +- `ruler_max_rule_groups_per_tenant` + +#### Hard limits + +Overrides are validated against hard limits defined in the runtime configuration file. If a requested override exceeds the hard limit for the tenant, the request will be rejected with a 400 status code. + ## Store-gateway ### Store-gateway ring status diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index be3b38f6e6..67ea1dccd9 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -411,6 +411,296 @@ query_scheduler: # CLI flag: -query-scheduler.grpc-client-config.connect-timeout [connect_timeout: | default = 5s] +overrides: + # Path to the runtime configuration file that can be updated via the overrides + # API + # CLI flag: -overrides.runtime-config-file + [runtime_config_file: | default = "runtime.yaml"] + + # Backend storage to use. Supported backends are: s3, gcs, azure, swift, + # filesystem. + # CLI flag: -overrides.backend + [backend: | default = "s3"] + + s3: + # The S3 bucket endpoint. It could be an AWS S3 endpoint listed at + # https://docs.aws.amazon.com/general/latest/gr/s3.html or the address of an + # S3-compatible service in hostname:port format. + # CLI flag: -overrides.s3.endpoint + [endpoint: | default = ""] + + # S3 region. If unset, the client will issue a S3 GetBucketLocation API call + # to autodetect it. + # CLI flag: -overrides.s3.region + [region: | default = ""] + + # S3 bucket name + # CLI flag: -overrides.s3.bucket-name + [bucket_name: | default = ""] + + # If enabled, S3 endpoint will use the non-dualstack variant. + # CLI flag: -overrides.s3.disable-dualstack + [disable_dualstack: | default = false] + + # S3 secret access key + # CLI flag: -overrides.s3.secret-access-key + [secret_access_key: | default = ""] + + # S3 access key ID + # CLI flag: -overrides.s3.access-key-id + [access_key_id: | default = ""] + + # If enabled, use http:// for the S3 endpoint instead of https://. This + # could be useful in local dev/test environments while using an + # S3-compatible backend storage, like Minio. + # CLI flag: -overrides.s3.insecure + [insecure: | default = false] + + # The signature version to use for authenticating against S3. Supported + # values are: v4, v2. + # CLI flag: -overrides.s3.signature-version + [signature_version: | default = "v4"] + + # The s3 bucket lookup style. Supported values are: auto, virtual-hosted, + # path. + # CLI flag: -overrides.s3.bucket-lookup-type + [bucket_lookup_type: | default = "auto"] + + # If true, attach MD5 checksum when upload objects and S3 uses MD5 checksum + # algorithm to verify the provided digest. If false, use CRC32C algorithm + # instead. + # CLI flag: -overrides.s3.send-content-md5 + [send_content_md5: | default = true] + + # The list api version. Supported values are: v1, v2, and ''. + # CLI flag: -overrides.s3.list-objects-version + [list_objects_version: | default = ""] + + # The s3_sse_config configures the S3 server-side encryption. + # The CLI flags prefix for this block config is: overrides + [sse: ] + + http: + # The time an idle connection will remain idle before closing. + # CLI flag: -overrides.s3.http.idle-conn-timeout + [idle_conn_timeout: | default = 1m30s] + + # The amount of time the client will wait for a servers response headers. + # CLI flag: -overrides.s3.http.response-header-timeout + [response_header_timeout: | default = 2m] + + # If the client connects via HTTPS and this option is enabled, the client + # will accept any certificate and hostname. + # CLI flag: -overrides.s3.http.insecure-skip-verify + [insecure_skip_verify: | default = false] + + # Maximum time to wait for a TLS handshake. 0 means no limit. + # CLI flag: -overrides.s3.tls-handshake-timeout + [tls_handshake_timeout: | default = 10s] + + # The time to wait for a server's first response headers after fully + # writing the request headers if the request has an Expect header. 0 to + # send the request body immediately. + # CLI flag: -overrides.s3.expect-continue-timeout + [expect_continue_timeout: | default = 1s] + + # Maximum number of idle (keep-alive) connections across all hosts. 0 + # means no limit. + # CLI flag: -overrides.s3.max-idle-connections + [max_idle_connections: | default = 100] + + # Maximum number of idle (keep-alive) connections to keep per-host. If 0, + # a built-in default value is used. + # CLI flag: -overrides.s3.max-idle-connections-per-host + [max_idle_connections_per_host: | default = 100] + + # Maximum number of connections per host. 0 means no limit. + # CLI flag: -overrides.s3.max-connections-per-host + [max_connections_per_host: | default = 0] + + gcs: + # GCS bucket name + # CLI flag: -overrides.gcs.bucket-name + [bucket_name: | default = ""] + + # JSON representing either a Google Developers Console + # client_credentials.json file or a Google Developers service account key + # file. If empty, fallback to Google default logic. + # CLI flag: -overrides.gcs.service-account + [service_account: | default = ""] + + azure: + # Azure storage account name + # CLI flag: -overrides.azure.account-name + [account_name: | default = ""] + + # Azure storage account key + # CLI flag: -overrides.azure.account-key + [account_key: | default = ""] + + # The values of `account-name` and `endpoint-suffix` values will not be + # ignored if `connection-string` is set. Use this method over `account-key` + # if you need to authenticate via a SAS token or if you use the Azurite + # emulator. + # CLI flag: -overrides.azure.connection-string + [connection_string: | default = ""] + + # Azure storage container name + # CLI flag: -overrides.azure.container-name + [container_name: | default = ""] + + # Azure storage endpoint suffix without schema. The account name will be + # prefixed to this value to create the FQDN + # CLI flag: -overrides.azure.endpoint-suffix + [endpoint_suffix: | default = ""] + + # Number of retries for recoverable errors + # CLI flag: -overrides.azure.max-retries + [max_retries: | default = 20] + + # Deprecated: Azure storage MSI resource. It will be set automatically by + # Azure SDK. + # CLI flag: -overrides.azure.msi-resource + [msi_resource: | default = ""] + + # Azure storage MSI resource managed identity client Id. If not supplied + # default Azure credential will be used. Set it to empty if you need to + # authenticate via Azure Workload Identity. + # CLI flag: -overrides.azure.user-assigned-id + [user_assigned_id: | default = ""] + + http: + # The time an idle connection will remain idle before closing. + # CLI flag: -overrides.azure.http.idle-conn-timeout + [idle_conn_timeout: | default = 1m30s] + + # The amount of time the client will wait for a servers response headers. + # CLI flag: -overrides.azure.http.response-header-timeout + [response_header_timeout: | default = 2m] + + # If the client connects via HTTPS and this option is enabled, the client + # will accept any certificate and hostname. + # CLI flag: -overrides.azure.http.insecure-skip-verify + [insecure_skip_verify: | default = false] + + # Maximum time to wait for a TLS handshake. 0 means no limit. + # CLI flag: -overrides.azure.tls-handshake-timeout + [tls_handshake_timeout: | default = 10s] + + # The time to wait for a server's first response headers after fully + # writing the request headers if the request has an Expect header. 0 to + # send the request body immediately. + # CLI flag: -overrides.azure.expect-continue-timeout + [expect_continue_timeout: | default = 1s] + + # Maximum number of idle (keep-alive) connections across all hosts. 0 + # means no limit. + # CLI flag: -overrides.azure.max-idle-connections + [max_idle_connections: | default = 100] + + # Maximum number of idle (keep-alive) connections to keep per-host. If 0, + # a built-in default value is used. + # CLI flag: -overrides.azure.max-idle-connections-per-host + [max_idle_connections_per_host: | default = 100] + + # Maximum number of connections per host. 0 means no limit. + # CLI flag: -overrides.azure.max-connections-per-host + [max_connections_per_host: | default = 0] + + swift: + # OpenStack Swift authentication API version. 0 to autodetect. + # CLI flag: -overrides.swift.auth-version + [auth_version: | default = 0] + + # OpenStack Swift authentication URL + # CLI flag: -overrides.swift.auth-url + [auth_url: | default = ""] + + # OpenStack Swift application credential ID. + # CLI flag: -overrides.swift.application-credential-id + [application_credential_id: | default = ""] + + # OpenStack Swift application credential name. + # CLI flag: -overrides.swift.application-credential-name + [application_credential_name: | default = ""] + + # OpenStack Swift application credential secret. + # CLI flag: -overrides.swift.application-credential-secret + [application_credential_secret: | default = ""] + + # OpenStack Swift username. + # CLI flag: -overrides.swift.username + [username: | default = ""] + + # OpenStack Swift user's domain name. + # CLI flag: -overrides.swift.user-domain-name + [user_domain_name: | default = ""] + + # OpenStack Swift user's domain ID. + # CLI flag: -overrides.swift.user-domain-id + [user_domain_id: | default = ""] + + # OpenStack Swift user ID. + # CLI flag: -overrides.swift.user-id + [user_id: | default = ""] + + # OpenStack Swift API key. + # CLI flag: -overrides.swift.password + [password: | default = ""] + + # OpenStack Swift user's domain ID. + # CLI flag: -overrides.swift.domain-id + [domain_id: | default = ""] + + # OpenStack Swift user's domain name. + # CLI flag: -overrides.swift.domain-name + [domain_name: | default = ""] + + # OpenStack Swift project ID (v2,v3 auth only). + # CLI flag: -overrides.swift.project-id + [project_id: | default = ""] + + # OpenStack Swift project name (v2,v3 auth only). + # CLI flag: -overrides.swift.project-name + [project_name: | default = ""] + + # ID of the OpenStack Swift project's domain (v3 auth only), only needed if + # it differs the from user domain. + # CLI flag: -overrides.swift.project-domain-id + [project_domain_id: | default = ""] + + # Name of the OpenStack Swift project's domain (v3 auth only), only needed + # if it differs from the user domain. + # CLI flag: -overrides.swift.project-domain-name + [project_domain_name: | default = ""] + + # OpenStack Swift Region to use (v2,v3 auth only). + # CLI flag: -overrides.swift.region-name + [region_name: | default = ""] + + # Name of the OpenStack Swift container to put chunks in. + # CLI flag: -overrides.swift.container-name + [container_name: | default = ""] + + # Max retries on requests error. + # CLI flag: -overrides.swift.max-retries + [max_retries: | default = 3] + + # Time after which a connection attempt is aborted. + # CLI flag: -overrides.swift.connect-timeout + [connect_timeout: | default = 10s] + + # Time after which an idle request is aborted. The timeout watchdog is reset + # each time some data is received, so the timeout triggers after X time no + # data is received on a request. + # CLI flag: -overrides.swift.request-timeout + [request_timeout: | default = 5s] + + filesystem: + # Local filesystem storage directory. + # CLI flag: -overrides.filesystem.dir + [dir: | default = ""] + # The tracing_config configures backends cortex uses. [tracing: ] ``` @@ -6077,6 +6367,7 @@ The `s3_sse_config` configures the S3 server-side encryption. The supported CLI - `alertmanager-storage` - `blocks-storage` +- `overrides` - `ruler-storage` - `runtime-config` diff --git a/integration/overrides_test.go b/integration/overrides_test.go new file mode 100644 index 0000000000..e3e0781fe2 --- /dev/null +++ b/integration/overrides_test.go @@ -0,0 +1,291 @@ +//go:build integration +// +build integration + +package integration + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore/providers/s3" + "gopkg.in/yaml.v3" + + "github.com/cortexproject/cortex/integration/e2e" + e2edb "github.com/cortexproject/cortex/integration/e2e/db" + "github.com/cortexproject/cortex/integration/e2ecortex" +) + +func TestOverridesAPIWithRunningCortex(t *testing.T) { + s, err := e2e.NewScenario(networkName) + require.NoError(t, err) + defer s.Close() + + minio := e2edb.NewMinio(9000, "cortex") + require.NoError(t, s.StartAndWaitReady(minio)) + + runtimeConfig := map[string]interface{}{ + "overrides": map[string]interface{}{ + "user1": map[string]interface{}{ + "ingestion_rate": 5000, + }, + }, + } + runtimeConfigData, err := yaml.Marshal(runtimeConfig) + require.NoError(t, err) + + s3Client, err := s3.NewBucketWithConfig(nil, s3.Config{ + Endpoint: minio.HTTPEndpoint(), + Insecure: true, + Bucket: "cortex", + AccessKey: e2edb.MinioAccessKey, + SecretKey: e2edb.MinioSecretKey, + }, "overrides-test", nil) + require.NoError(t, err) + + require.NoError(t, s3Client.Upload(context.Background(), "runtime.yaml", bytes.NewReader(runtimeConfigData))) + + flags := map[string]string{ + "-target": "overrides", + + "-overrides.runtime-config-file": "runtime.yaml", + "-overrides.backend": "s3", + "-overrides.s3.access-key-id": e2edb.MinioAccessKey, + "-overrides.s3.secret-access-key": e2edb.MinioSecretKey, + "-overrides.s3.bucket-name": "cortex", + "-overrides.s3.endpoint": minio.NetworkHTTPEndpoint(), + "-overrides.s3.insecure": "true", + } + + cortexSvc := e2ecortex.NewSingleBinary("cortex-overrides", flags, "") + require.NoError(t, s.StartAndWaitReady(cortexSvc)) + + t.Run("GET overrides for existing user", func(t *testing.T) { + req, err := http.NewRequest("GET", "http://"+cortexSvc.HTTPEndpoint()+"/api/v1/user-overrides", nil) + require.NoError(t, err) + req.Header.Set("X-Scope-OrgID", "user1") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusOK, resp.StatusCode) + + var overrides map[string]interface{} + err = json.NewDecoder(resp.Body).Decode(&overrides) + require.NoError(t, err) + + assert.Equal(t, float64(5000), overrides["ingestion_rate"]) + }) + + t.Run("GET overrides for non-existing user", func(t *testing.T) { + req, err := http.NewRequest("GET", "http://"+cortexSvc.HTTPEndpoint()+"/api/v1/user-overrides", nil) + require.NoError(t, err) + req.Header.Set("X-Scope-OrgID", "user2") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusOK, resp.StatusCode) + + var overrides map[string]interface{} + err = json.NewDecoder(resp.Body).Decode(&overrides) + require.NoError(t, err) + + assert.Empty(t, overrides) + }) + + t.Run("PUT overrides for new user", func(t *testing.T) { + newOverrides := map[string]interface{}{ + "ingestion_rate": 6000, + "ingestion_burst_size": 7000, + } + requestBody, err := json.Marshal(newOverrides) + require.NoError(t, err) + + req, err := http.NewRequest("PUT", "http://"+cortexSvc.HTTPEndpoint()+"/api/v1/user-overrides", bytes.NewReader(requestBody)) + require.NoError(t, err) + req.Header.Set("X-Scope-OrgID", "user3") + req.Header.Set("Content-Type", "application/json") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusOK, resp.StatusCode) + + req, err = http.NewRequest("GET", "http://"+cortexSvc.HTTPEndpoint()+"/api/v1/user-overrides", nil) + require.NoError(t, err) + req.Header.Set("X-Scope-OrgID", "user3") + + resp, err = http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusOK, resp.StatusCode) + + var savedOverrides map[string]interface{} + err = json.NewDecoder(resp.Body).Decode(&savedOverrides) + require.NoError(t, err) + + assert.Equal(t, float64(6000), savedOverrides["ingestion_rate"]) + assert.Equal(t, float64(7000), savedOverrides["ingestion_burst_size"]) + }) + + t.Run("PUT overrides with invalid limit", func(t *testing.T) { + invalidOverrides := map[string]interface{}{ + "invalid_limit": 5000, + } + requestBody, err := json.Marshal(invalidOverrides) + require.NoError(t, err) + + req, err := http.NewRequest("PUT", "http://"+cortexSvc.HTTPEndpoint()+"/api/v1/user-overrides", bytes.NewReader(requestBody)) + require.NoError(t, err) + req.Header.Set("X-Scope-OrgID", "user4") + req.Header.Set("Content-Type", "application/json") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusBadRequest, resp.StatusCode) + }) + + t.Run("PUT overrides with invalid JSON", func(t *testing.T) { + req, err := http.NewRequest("PUT", "http://"+cortexSvc.HTTPEndpoint()+"/api/v1/user-overrides", bytes.NewReader([]byte("invalid json"))) + require.NoError(t, err) + req.Header.Set("X-Scope-OrgID", "user5") + req.Header.Set("Content-Type", "application/json") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusBadRequest, resp.StatusCode) + }) + + t.Run("DELETE overrides", func(t *testing.T) { + req, err := http.NewRequest("DELETE", "http://"+cortexSvc.HTTPEndpoint()+"/api/v1/user-overrides", nil) + require.NoError(t, err) + req.Header.Set("X-Scope-OrgID", "user1") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusOK, resp.StatusCode) + + req, err = http.NewRequest("GET", "http://"+cortexSvc.HTTPEndpoint()+"/api/v1/user-overrides", nil) + require.NoError(t, err) + req.Header.Set("X-Scope-OrgID", "user1") + + resp, err = http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusOK, resp.StatusCode) + + var overrides map[string]interface{} + err = json.NewDecoder(resp.Body).Decode(&overrides) + require.NoError(t, err) + + assert.Empty(t, overrides) + }) + + require.NoError(t, s.Stop(cortexSvc)) +} + +func TestOverridesAPITenantExtraction(t *testing.T) { + s, err := e2e.NewScenario(networkName) + require.NoError(t, err) + defer s.Close() + + minio := e2edb.NewMinio(9010, "cortex") + require.NoError(t, s.StartAndWaitReady(minio)) + + flags := map[string]string{ + "-target": "overrides", + + "-overrides.runtime-config-file": "runtime.yaml", + "-overrides.backend": "s3", + "-overrides.s3.access-key-id": e2edb.MinioAccessKey, + "-overrides.s3.secret-access-key": e2edb.MinioSecretKey, + "-overrides.s3.bucket-name": "cortex", + "-overrides.s3.endpoint": minio.NetworkHTTPEndpoint(), + "-overrides.s3.insecure": "true", + } + + cortexSvc := e2ecortex.NewSingleBinary("cortex-overrides-tenant", flags, "") + require.NoError(t, s.StartAndWaitReady(cortexSvc)) + + t.Run("no tenant header", func(t *testing.T) { + req, err := http.NewRequest("GET", "http://"+cortexSvc.HTTPEndpoint()+"/api/v1/user-overrides", nil) + require.NoError(t, err) + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusUnauthorized, resp.StatusCode) + }) + + t.Run("empty tenant header", func(t *testing.T) { + req, err := http.NewRequest("GET", "http://"+cortexSvc.HTTPEndpoint()+"/api/v1/user-overrides", nil) + require.NoError(t, err) + req.Header.Set("X-Scope-OrgID", "") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusUnauthorized, resp.StatusCode) + }) + + require.NoError(t, s.Stop(cortexSvc)) +} + +func TestOverridesAPIFilesystemBackendRejected(t *testing.T) { + s, err := e2e.NewScenario(networkName) + require.NoError(t, err) + defer s.Close() + + t.Run("filesystem backend should be rejected", func(t *testing.T) { + flags := map[string]string{ + "-target": "overrides", + "-overrides.runtime-config-file": "runtime.yaml", + "-overrides.backend": "filesystem", + } + + cortexSvc := e2ecortex.NewSingleBinary("cortex-overrides-filesystem", flags, "") + + err = s.StartAndWaitReady(cortexSvc) + if err == nil { + t.Error("Expected Cortex to fail to start with filesystem backend, but it started successfully") + require.NoError(t, s.Stop(cortexSvc)) + } else { + t.Logf("Expected failure with filesystem backend: %v", err) + } + }) + + t.Run("no backend specified should be rejected", func(t *testing.T) { + flags := map[string]string{ + "-target": "overrides", + "-overrides.runtime-config-file": "runtime.yaml", + } + + cortexSvc := e2ecortex.NewSingleBinary("cortex-overrides-no-backend", flags, "") + + err = s.StartAndWaitReady(cortexSvc) + if err == nil { + t.Error("Expected Cortex to fail to start with no backend specified, but it started successfully") + require.NoError(t, s.Stop(cortexSvc)) + } else { + t.Logf("Expected failure with no backend specified: %v", err) + } + }) +} diff --git a/pkg/api/api.go b/pkg/api/api.go index 1339caeff2..e0887d6b22 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -30,6 +30,7 @@ import ( frontendv2 "github.com/cortexproject/cortex/pkg/frontend/v2" "github.com/cortexproject/cortex/pkg/frontend/v2/frontendv2pb" "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/overrides" "github.com/cortexproject/cortex/pkg/purger" "github.com/cortexproject/cortex/pkg/querier" "github.com/cortexproject/cortex/pkg/ring" @@ -385,6 +386,17 @@ func (a *API) RegisterRulerAPI(r *ruler.API) { a.RegisterRoute(path.Join(a.cfg.LegacyHTTPPrefix, "/rules/{namespace}"), http.HandlerFunc(r.DeleteNamespace), true, "DELETE") } +// RegisterOverrides registers routes associated with the Overrides API +func (a *API) RegisterOverrides(o *overrides.API) { + // Register individual overrides API routes with the main API + a.RegisterRoute("/api/v1/user-overrides", http.HandlerFunc(o.GetOverrides), true, "GET") + a.RegisterRoute("/api/v1/user-overrides", http.HandlerFunc(o.SetOverrides), true, "PUT") + a.RegisterRoute("/api/v1/user-overrides", http.HandlerFunc(o.DeleteOverrides), true, "DELETE") + + // Add link to the index page + a.indexPage.AddLink(SectionAdminEndpoints, "/api/v1/user-overrides", "User Overrides API") +} + // RegisterRing registers the ring UI page associated with the distributor for writes. func (a *API) RegisterRing(r *ring.Ring) { a.indexPage.AddLink(SectionAdminEndpoints, "/ingester/ring", "Ingester Ring Status") diff --git a/pkg/cortex/cortex.go b/pkg/cortex/cortex.go index f50fcf26e1..6de1e11cbc 100644 --- a/pkg/cortex/cortex.go +++ b/pkg/cortex/cortex.go @@ -40,6 +40,7 @@ import ( frontendv1 "github.com/cortexproject/cortex/pkg/frontend/v1" "github.com/cortexproject/cortex/pkg/ingester" "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/overrides" "github.com/cortexproject/cortex/pkg/parquetconverter" "github.com/cortexproject/cortex/pkg/querier" "github.com/cortexproject/cortex/pkg/querier/tenantfederation" @@ -126,6 +127,7 @@ type Config struct { RuntimeConfig runtimeconfig.Config `yaml:"runtime_config"` MemberlistKV memberlist.KVConfig `yaml:"memberlist"` QueryScheduler scheduler.Config `yaml:"query_scheduler"` + Overrides overrides.Config `yaml:"overrides"` Tracing tracing.Config `yaml:"tracing"` } @@ -175,6 +177,7 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { c.RuntimeConfig.RegisterFlags(f) c.MemberlistKV.RegisterFlags(f) c.QueryScheduler.RegisterFlags(f) + c.Overrides.RegisterFlags(f) c.Tracing.RegisterFlags(f) } @@ -313,7 +316,8 @@ type Cortex struct { Server *server.Server Ring *ring.Ring TenantLimits validation.TenantLimits - Overrides *validation.Overrides + OverridesConfig *validation.Overrides + Overrides *overrides.API Distributor *distributor.Distributor Ingester *ingester.Ingester Flusher *flusher.Flusher diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go index a47888b826..110e9b0312 100644 --- a/pkg/cortex/modules.go +++ b/pkg/cortex/modules.go @@ -36,6 +36,7 @@ import ( "github.com/cortexproject/cortex/pkg/frontend" "github.com/cortexproject/cortex/pkg/frontend/transport" "github.com/cortexproject/cortex/pkg/ingester" + "github.com/cortexproject/cortex/pkg/overrides" "github.com/cortexproject/cortex/pkg/parquetconverter" "github.com/cortexproject/cortex/pkg/purger" "github.com/cortexproject/cortex/pkg/querier" @@ -67,6 +68,7 @@ const ( API string = "api" Ring string = "ring" RuntimeConfig string = "runtime-config" + OverridesConfig string = "overrides-config" Overrides string = "overrides" OverridesExporter string = "overrides-exporter" Server string = "server" @@ -200,13 +202,26 @@ func (t *Cortex) initRuntimeConfig() (services.Service, error) { return serv, err } -func (t *Cortex) initOverrides() (services.Service, error) { - t.Overrides = validation.NewOverrides(t.Cfg.LimitsConfig, t.TenantLimits) +func (t *Cortex) initOverridesConfig() (services.Service, error) { + t.OverridesConfig = validation.NewOverrides(t.Cfg.LimitsConfig, t.TenantLimits) // overrides don't have operational state, nor do they need to do anything more in starting/stopping phase, // so there is no need to return any service. return nil, nil } +func (t *Cortex) initOverrides() (services.Service, error) { + + overridesAPI, err := overrides.New(t.Cfg.Overrides, util_log.Logger, prometheus.DefaultRegisterer) + if err != nil { + return nil, fmt.Errorf("failed to create overrides API: %w", err) + } + t.Overrides = overridesAPI + + t.API.RegisterOverrides(overridesAPI) + + return overridesAPI, nil +} + func (t *Cortex) initOverridesExporter() (services.Service, error) { if t.Cfg.isModuleEnabled(OverridesExporter) && t.TenantLimits == nil { // This target isn't enabled by default ("all") and requires per-tenant limits to @@ -233,7 +248,7 @@ func (t *Cortex) initDistributorService() (serv services.Service, err error) { // ruler's dependency) canJoinDistributorsRing := t.Cfg.isModuleEnabled(Distributor) || t.Cfg.isModuleEnabled(All) - t.Distributor, err = distributor.New(t.Cfg.Distributor, t.Cfg.IngesterClient, t.Overrides, t.Ring, canJoinDistributorsRing, prometheus.DefaultRegisterer, util_log.Logger) + t.Distributor, err = distributor.New(t.Cfg.Distributor, t.Cfg.IngesterClient, t.OverridesConfig, t.Ring, canJoinDistributorsRing, prometheus.DefaultRegisterer, util_log.Logger) if err != nil { return } @@ -255,7 +270,7 @@ func (t *Cortex) initGrpcClientServices() (serv services.Service, err error) { } func (t *Cortex) initDistributor() (serv services.Service, err error) { - t.API.RegisterDistributor(t.Distributor, t.Cfg.Distributor, t.Overrides) + t.API.RegisterDistributor(t.Distributor, t.Cfg.Distributor, t.OverridesConfig) return nil, nil } @@ -266,7 +281,7 @@ func (t *Cortex) initQueryable() (serv services.Service, err error) { querierRegisterer := prometheus.WrapRegistererWith(prometheus.Labels{"engine": "querier"}, prometheus.DefaultRegisterer) // Create a querier queryable and PromQL engine - t.QuerierQueryable, t.ExemplarQueryable, t.QuerierEngine = querier.New(t.Cfg.Querier, t.Overrides, t.Distributor, t.StoreQueryables, querierRegisterer, util_log.Logger, t.Overrides.QueryPartialData) + t.QuerierQueryable, t.ExemplarQueryable, t.QuerierEngine = querier.New(t.Cfg.Querier, t.OverridesConfig, t.Distributor, t.StoreQueryables, querierRegisterer, util_log.Logger, t.OverridesConfig.QueryPartialData) // Use distributor as default MetadataQuerier t.MetadataQuerier = t.Distributor @@ -422,12 +437,12 @@ func (t *Cortex) initStoreQueryables() (services.Service, error) { //nolint:revive // I prefer this form over removing 'else', because it allows q to have smaller scope. var queriable prom_storage.Queryable - if q, err := initBlockStoreQueryable(t.Cfg, t.Overrides, prometheus.DefaultRegisterer); err != nil { + if q, err := initBlockStoreQueryable(t.Cfg, t.OverridesConfig, prometheus.DefaultRegisterer); err != nil { return nil, fmt.Errorf("failed to initialize querier: %v", err) } else { queriable = q if t.Cfg.Querier.EnableParquetQueryable { - pq, err := querier.NewParquetQueryable(t.Cfg.Querier, t.Cfg.BlocksStorage, t.Overrides, q, util_log.Logger, prometheus.DefaultRegisterer) + pq, err := querier.NewParquetQueryable(t.Cfg.Querier, t.Cfg.BlocksStorage, t.OverridesConfig, q, util_log.Logger, prometheus.DefaultRegisterer) if err != nil { return nil, fmt.Errorf("failed to initialize parquet querier: %v", err) } @@ -477,7 +492,7 @@ func (t *Cortex) initIngesterService() (serv services.Service, err error) { t.Cfg.Ingester.QueryIngestersWithin = t.Cfg.Querier.QueryIngestersWithin t.tsdbIngesterConfig() - t.Ingester, err = ingester.New(t.Cfg.Ingester, t.Overrides, prometheus.DefaultRegisterer, util_log.Logger, t.ResourceMonitor) + t.Ingester, err = ingester.New(t.Cfg.Ingester, t.OverridesConfig, prometheus.DefaultRegisterer, util_log.Logger, t.ResourceMonitor) if err != nil { return } @@ -497,7 +512,7 @@ func (t *Cortex) initFlusher() (serv services.Service, err error) { t.Flusher, err = flusher.New( t.Cfg.Flusher, t.Cfg.Ingester, - t.Overrides, + t.OverridesConfig, prometheus.DefaultRegisterer, util_log.Logger, ) @@ -532,7 +547,7 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro queryRangeMiddlewares, cache, err := queryrange.Middlewares( t.Cfg.QueryRange, util_log.Logger, - t.Overrides, + t.OverridesConfig, queryrange.PrometheusResponseExtractor{}, prometheus.DefaultRegisterer, queryAnalyzer, @@ -548,7 +563,7 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro instantQueryMiddlewares, err := instantquery.Middlewares( util_log.Logger, - t.Overrides, + t.OverridesConfig, instantQueryCodec, queryAnalyzer, t.Cfg.Querier.LookbackDelta, @@ -565,7 +580,7 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro instantQueryMiddlewares, prometheusCodec, instantQueryCodec, - t.Overrides, + t.OverridesConfig, queryAnalyzer, t.Cfg.Querier.DefaultEvaluationInterval, t.Cfg.Querier.MaxSubQuerySteps, @@ -583,7 +598,7 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro func (t *Cortex) initQueryFrontend() (serv services.Service, err error) { retry := transport.NewRetry(t.Cfg.QueryRange.MaxRetries, prometheus.DefaultRegisterer) - roundTripper, frontendV1, frontendV2, err := frontend.InitFrontend(t.Cfg.Frontend, t.Overrides, t.Cfg.Server.GRPCListenPort, util_log.Logger, prometheus.DefaultRegisterer, retry) + roundTripper, frontendV1, frontendV2, err := frontend.InitFrontend(t.Cfg.Frontend, t.OverridesConfig, t.Cfg.Server.GRPCListenPort, util_log.Logger, prometheus.DefaultRegisterer, retry) if err != nil { return nil, err } @@ -618,7 +633,7 @@ func (t *Cortex) initRulerStorage() (serv services.Service, err error) { return } - t.RulerStorage, err = ruler.NewRuleStore(context.Background(), t.Cfg.RulerStorage, t.Overrides, rules.FileLoader{}, util_log.Logger, prometheus.DefaultRegisterer) + t.RulerStorage, err = ruler.NewRuleStore(context.Background(), t.Cfg.RulerStorage, t.OverridesConfig, rules.FileLoader{}, util_log.Logger, prometheus.DefaultRegisterer) return } @@ -664,15 +679,15 @@ func (t *Cortex) initRuler() (serv services.Service, err error) { } queryEngine := engine.New(opts, t.Cfg.Ruler.ThanosEngine, rulerRegisterer) - managerFactory := ruler.DefaultTenantManagerFactory(t.Cfg.Ruler, t.Cfg.ExternalPusher, t.Cfg.ExternalQueryable, queryEngine, t.Overrides, metrics, prometheus.DefaultRegisterer) - manager, err = ruler.NewDefaultMultiTenantManager(t.Cfg.Ruler, t.Overrides, managerFactory, metrics, prometheus.DefaultRegisterer, util_log.Logger) + managerFactory := ruler.DefaultTenantManagerFactory(t.Cfg.Ruler, t.Cfg.ExternalPusher, t.Cfg.ExternalQueryable, queryEngine, t.OverridesConfig, metrics, prometheus.DefaultRegisterer) + manager, err = ruler.NewDefaultMultiTenantManager(t.Cfg.Ruler, t.OverridesConfig, managerFactory, metrics, prometheus.DefaultRegisterer, util_log.Logger) } else { rulerRegisterer := prometheus.WrapRegistererWith(prometheus.Labels{"engine": "ruler"}, prometheus.DefaultRegisterer) // TODO: Consider wrapping logger to differentiate from querier module logger - queryable, _, engine := querier.New(t.Cfg.Querier, t.Overrides, t.Distributor, t.StoreQueryables, rulerRegisterer, util_log.Logger, t.Overrides.RulesPartialData) + queryable, _, engine := querier.New(t.Cfg.Querier, t.OverridesConfig, t.Distributor, t.StoreQueryables, rulerRegisterer, util_log.Logger, t.OverridesConfig.RulesPartialData) - managerFactory := ruler.DefaultTenantManagerFactory(t.Cfg.Ruler, t.Distributor, queryable, engine, t.Overrides, metrics, prometheus.DefaultRegisterer) - manager, err = ruler.NewDefaultMultiTenantManager(t.Cfg.Ruler, t.Overrides, managerFactory, metrics, prometheus.DefaultRegisterer, util_log.Logger) + managerFactory := ruler.DefaultTenantManagerFactory(t.Cfg.Ruler, t.Distributor, queryable, engine, t.OverridesConfig, metrics, prometheus.DefaultRegisterer) + manager, err = ruler.NewDefaultMultiTenantManager(t.Cfg.Ruler, t.OverridesConfig, managerFactory, metrics, prometheus.DefaultRegisterer, util_log.Logger) } if err != nil { @@ -685,7 +700,7 @@ func (t *Cortex) initRuler() (serv services.Service, err error) { prometheus.DefaultRegisterer, util_log.Logger, t.RulerStorage, - t.Overrides, + t.OverridesConfig, ) if err != nil { return @@ -720,12 +735,12 @@ func (t *Cortex) initAlertManager() (serv services.Service, err error) { t.Cfg.Alertmanager.ShardingRing.ListenPort = t.Cfg.Server.GRPCListenPort // Initialise the store. - store, err := alertstore.NewAlertStore(context.Background(), t.Cfg.AlertmanagerStorage, t.Overrides, util_log.Logger, prometheus.DefaultRegisterer) + store, err := alertstore.NewAlertStore(context.Background(), t.Cfg.AlertmanagerStorage, t.OverridesConfig, util_log.Logger, prometheus.DefaultRegisterer) if err != nil { return } - t.Alertmanager, err = alertmanager.NewMultitenantAlertmanager(&t.Cfg.Alertmanager, store, t.Overrides, util_log.Logger, prometheus.DefaultRegisterer) + t.Alertmanager, err = alertmanager.NewMultitenantAlertmanager(&t.Cfg.Alertmanager, store, t.OverridesConfig, util_log.Logger, prometheus.DefaultRegisterer) if err != nil { return } @@ -736,14 +751,14 @@ func (t *Cortex) initAlertManager() (serv services.Service, err error) { func (t *Cortex) initParquetConverter() (serv services.Service, err error) { t.Cfg.ParquetConverter.Ring.ListenPort = t.Cfg.Server.GRPCListenPort - return parquetconverter.NewConverter(t.Cfg.ParquetConverter, t.Cfg.BlocksStorage, t.Cfg.Compactor.BlockRanges.ToMilliseconds(), util_log.Logger, prometheus.DefaultRegisterer, t.Overrides) + return parquetconverter.NewConverter(t.Cfg.ParquetConverter, t.Cfg.BlocksStorage, t.Cfg.Compactor.BlockRanges.ToMilliseconds(), util_log.Logger, prometheus.DefaultRegisterer, t.OverridesConfig) } func (t *Cortex) initCompactor() (serv services.Service, err error) { t.Cfg.Compactor.ShardingRing.ListenPort = t.Cfg.Server.GRPCListenPort ingestionReplicationFactor := t.Cfg.Ingester.LifecyclerConfig.RingConfig.ReplicationFactor - t.Compactor, err = compactor.NewCompactor(t.Cfg.Compactor, t.Cfg.BlocksStorage, util_log.Logger, prometheus.DefaultRegisterer, t.Overrides, ingestionReplicationFactor) + t.Compactor, err = compactor.NewCompactor(t.Cfg.Compactor, t.Cfg.BlocksStorage, util_log.Logger, prometheus.DefaultRegisterer, t.OverridesConfig, ingestionReplicationFactor) if err != nil { return } @@ -756,7 +771,7 @@ func (t *Cortex) initCompactor() (serv services.Service, err error) { func (t *Cortex) initStoreGateway() (serv services.Service, err error) { t.Cfg.StoreGateway.ShardingRing.ListenPort = t.Cfg.Server.GRPCListenPort - t.StoreGateway, err = storegateway.NewStoreGateway(t.Cfg.StoreGateway, t.Cfg.BlocksStorage, t.Overrides, t.Cfg.Server.LogLevel, util_log.Logger, prometheus.DefaultRegisterer, t.ResourceMonitor) + t.StoreGateway, err = storegateway.NewStoreGateway(t.Cfg.StoreGateway, t.Cfg.BlocksStorage, t.OverridesConfig, t.Cfg.Server.LogLevel, util_log.Logger, prometheus.DefaultRegisterer, t.ResourceMonitor) if err != nil { return nil, err } @@ -798,7 +813,7 @@ func (t *Cortex) initMemberlistKV() (services.Service, error) { func (t *Cortex) initTenantDeletionAPI() (services.Service, error) { // t.RulerStorage can be nil when running in single-binary mode, and rule storage is not configured. - tenantDeletionAPI, err := purger.NewTenantDeletionAPI(t.Cfg.BlocksStorage, t.Overrides, util_log.Logger, prometheus.DefaultRegisterer) + tenantDeletionAPI, err := purger.NewTenantDeletionAPI(t.Cfg.BlocksStorage, t.OverridesConfig, util_log.Logger, prometheus.DefaultRegisterer) if err != nil { return nil, err } @@ -813,7 +828,7 @@ func (t *Cortex) initQueryScheduler() (services.Service, error) { tenant.WithDefaultResolver(tenantfederation.NewRegexValidator()) } - s, err := scheduler.NewScheduler(t.Cfg.QueryScheduler, t.Overrides, util_log.Logger, prometheus.DefaultRegisterer) + s, err := scheduler.NewScheduler(t.Cfg.QueryScheduler, t.OverridesConfig, util_log.Logger, prometheus.DefaultRegisterer) if err != nil { return nil, errors.Wrap(err, "query-scheduler init") } @@ -857,7 +872,8 @@ func (t *Cortex) setupModuleManager() error { mm.RegisterModule(RuntimeConfig, t.initRuntimeConfig, modules.UserInvisibleModule) mm.RegisterModule(MemberlistKV, t.initMemberlistKV, modules.UserInvisibleModule) mm.RegisterModule(Ring, t.initRing, modules.UserInvisibleModule) - mm.RegisterModule(Overrides, t.initOverrides, modules.UserInvisibleModule) + mm.RegisterModule(OverridesConfig, t.initOverridesConfig, modules.UserInvisibleModule) + mm.RegisterModule(Overrides, t.initOverrides) mm.RegisterModule(OverridesExporter, t.initOverridesExporter) mm.RegisterModule(Distributor, t.initDistributor) mm.RegisterModule(DistributorService, t.initDistributorService, modules.UserInvisibleModule) @@ -889,33 +905,34 @@ func (t *Cortex) setupModuleManager() error { MemberlistKV: {API}, RuntimeConfig: {API}, Ring: {API, RuntimeConfig, MemberlistKV}, - Overrides: {RuntimeConfig}, + OverridesConfig: {RuntimeConfig}, + Overrides: {API, OverridesConfig}, OverridesExporter: {RuntimeConfig}, Distributor: {DistributorService, API, GrpcClientService}, - DistributorService: {Ring, Overrides}, - Ingester: {IngesterService, Overrides, API}, - IngesterService: {Overrides, RuntimeConfig, MemberlistKV, ResourceMonitor}, - Flusher: {Overrides, API}, - Queryable: {Overrides, DistributorService, Overrides, Ring, API, StoreQueryable, MemberlistKV}, + DistributorService: {Ring, OverridesConfig}, + Ingester: {IngesterService, OverridesConfig, API}, + IngesterService: {OverridesConfig, RuntimeConfig, MemberlistKV, ResourceMonitor}, + Flusher: {OverridesConfig, API}, + Queryable: {OverridesConfig, DistributorService, OverridesConfig, Ring, API, StoreQueryable, MemberlistKV}, Querier: {TenantFederation}, - StoreQueryable: {Overrides, Overrides, MemberlistKV, GrpcClientService}, - QueryFrontendTripperware: {API, Overrides}, + StoreQueryable: {OverridesConfig, OverridesConfig, MemberlistKV, GrpcClientService}, + QueryFrontendTripperware: {API, OverridesConfig}, QueryFrontend: {QueryFrontendTripperware}, - QueryScheduler: {API, Overrides}, - Ruler: {DistributorService, Overrides, StoreQueryable, RulerStorage}, - RulerStorage: {Overrides}, + QueryScheduler: {API, OverridesConfig}, + Ruler: {DistributorService, OverridesConfig, StoreQueryable, RulerStorage}, + RulerStorage: {OverridesConfig}, Configs: {API}, - AlertManager: {API, MemberlistKV, Overrides}, - Compactor: {API, MemberlistKV, Overrides}, - ParquetConverter: {API, MemberlistKV, Overrides}, - StoreGateway: {API, Overrides, MemberlistKV, ResourceMonitor}, - TenantDeletion: {API, Overrides}, + AlertManager: {API, MemberlistKV, OverridesConfig}, + Compactor: {API, MemberlistKV, OverridesConfig}, + ParquetConverter: {API, MemberlistKV, OverridesConfig}, + StoreGateway: {API, OverridesConfig, MemberlistKV, ResourceMonitor}, + TenantDeletion: {API, OverridesConfig}, Purger: {TenantDeletion}, TenantFederation: {Queryable}, All: {QueryFrontend, Querier, Ingester, Distributor, Purger, StoreGateway, Ruler, Compactor, AlertManager}, } if t.Cfg.ExternalPusher != nil && t.Cfg.ExternalQueryable != nil { - deps[Ruler] = []string{Overrides, RulerStorage} + deps[Ruler] = []string{OverridesConfig, RulerStorage} } for mod, targets := range deps { if err := mm.AddDependency(mod, targets...); err != nil { diff --git a/pkg/overrides/api.go b/pkg/overrides/api.go new file mode 100644 index 0000000000..927057df25 --- /dev/null +++ b/pkg/overrides/api.go @@ -0,0 +1,179 @@ +package overrides + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + + "github.com/go-kit/log/level" + "gopkg.in/yaml.v3" + + "github.com/cortexproject/cortex/pkg/tenant" +) + +const ( + // HTTP status codes + StatusOK = 200 + StatusBadRequest = 400 + StatusUnauthorized = 401 + StatusInternalServerError = 500 + + // Error messages + ErrInvalidJSON = "Invalid JSON" + + // Runtime config errors + ErrRuntimeConfig = "runtime config read error" +) + +type RuntimeConfigFile struct { + Overrides map[string]map[string]interface{} `yaml:"overrides"` + HardOverrides map[string]map[string]interface{} `yaml:"hard_overrides"` +} + +// GetOverrides retrieves overrides for a specific tenant +func (a *API) GetOverrides(w http.ResponseWriter, r *http.Request) { + userID, _, err := tenant.ExtractTenantIDFromHTTPRequest(r) + if err != nil { + http.Error(w, err.Error(), StatusUnauthorized) + return + } + + // Read overrides from bucket storage + overrides, err := a.getOverridesFromBucket(r.Context(), userID) + if err != nil { + http.Error(w, err.Error(), StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(overrides); err != nil { + level.Error(a.logger).Log("msg", "failed to encode overrides response", "err", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + return + } +} + +// SetOverrides updates overrides for a specific tenant +func (a *API) SetOverrides(w http.ResponseWriter, r *http.Request) { + userID, _, err := tenant.ExtractTenantIDFromHTTPRequest(r) + if err != nil { + http.Error(w, err.Error(), StatusUnauthorized) + return + } + + var overrides map[string]interface{} + if err := json.NewDecoder(r.Body).Decode(&overrides); err != nil { + http.Error(w, ErrInvalidJSON, StatusBadRequest) + return + } + + // Validate that only allowed limits are being changed + if err := ValidateOverrides(overrides); err != nil { + http.Error(w, err.Error(), StatusBadRequest) + return + } + + // Validate that values don't exceed hard limits from runtime config + if err := a.validateHardLimits(overrides, userID); err != nil { + http.Error(w, err.Error(), StatusBadRequest) + return + } + + // Write overrides to bucket storage + if err := a.setOverridesToBucket(r.Context(), userID, overrides); err != nil { + http.Error(w, err.Error(), StatusInternalServerError) + return + } + + w.WriteHeader(StatusOK) +} + +// DeleteOverrides removes tenant-specific overrides +func (a *API) DeleteOverrides(w http.ResponseWriter, r *http.Request) { + userID, _, err := tenant.ExtractTenantIDFromHTTPRequest(r) + if err != nil { + http.Error(w, err.Error(), StatusUnauthorized) + return + } + + if err := a.deleteOverridesFromBucket(r.Context(), userID); err != nil { + http.Error(w, err.Error(), StatusInternalServerError) + return + } + + w.WriteHeader(StatusOK) +} + +// getOverridesFromBucket reads overrides for a specific tenant from the runtime config file +func (a *API) getOverridesFromBucket(ctx context.Context, userID string) (map[string]interface{}, error) { + reader, err := a.bucketClient.Get(ctx, a.runtimeConfigPath) + if err != nil { + return map[string]interface{}{}, nil + } + defer reader.Close() + + var config RuntimeConfigFile + if err := yaml.NewDecoder(reader).Decode(&config); err != nil { + return nil, fmt.Errorf("%s: %w", ErrRuntimeConfig, err) + } + + if config.Overrides != nil { + if tenantOverrides, exists := config.Overrides[userID]; exists { + return tenantOverrides, nil + } + } + + return map[string]interface{}{}, nil +} + +// setOverridesToBucket writes overrides for a specific tenant to the runtime config file +func (a *API) setOverridesToBucket(ctx context.Context, userID string, overrides map[string]interface{}) error { + var config RuntimeConfigFile + reader, err := a.bucketClient.Get(ctx, a.runtimeConfigPath) + if err == nil { + defer reader.Close() + if err := yaml.NewDecoder(reader).Decode(&config); err != nil { + return fmt.Errorf("%s: %w", ErrRuntimeConfig, err) + } + } + + if config.Overrides == nil { + config.Overrides = make(map[string]map[string]interface{}) + } + + config.Overrides[userID] = overrides + + data, err := yaml.Marshal(config) + if err != nil { + return fmt.Errorf("%s: %w", ErrRuntimeConfig, err) + } + + return a.bucketClient.Upload(ctx, a.runtimeConfigPath, bytes.NewReader(data)) +} + +// deleteOverridesFromBucket removes overrides for a specific tenant from the runtime config file +func (a *API) deleteOverridesFromBucket(ctx context.Context, userID string) error { + reader, err := a.bucketClient.Get(ctx, a.runtimeConfigPath) + if err != nil { + return nil + } + defer reader.Close() + + var config RuntimeConfigFile + if err := yaml.NewDecoder(reader).Decode(&config); err != nil { + return fmt.Errorf("%s: %w", ErrRuntimeConfig, err) + } + + if config.Overrides != nil { + delete(config.Overrides, userID) + } + + data, err := yaml.Marshal(config) + if err != nil { + return fmt.Errorf("%s: %w", ErrRuntimeConfig, err) + } + + return a.bucketClient.Upload(ctx, a.runtimeConfigPath, bytes.NewReader(data)) +} diff --git a/pkg/overrides/limits.go b/pkg/overrides/limits.go new file mode 100644 index 0000000000..14151d9319 --- /dev/null +++ b/pkg/overrides/limits.go @@ -0,0 +1,132 @@ +package overrides + +import ( + "context" + "fmt" + "strconv" + "strings" + + "gopkg.in/yaml.v3" +) + +const ( + // Error messages + ErrInvalidLimits = "the following limits cannot be modified via the overrides API" +) + +// AllowedLimits defines the limits that can be modified via the overrides API +var AllowedLimits = []string{ + "max_global_series_per_user", + "max_global_series_per_metric", + "ingestion_rate", + "ingestion_burst_size", + "ruler_max_rules_per_rule_group", + "ruler_max_rule_groups_per_tenant", +} + +// ValidateOverrides checks if the provided overrides only contain allowed limits +func ValidateOverrides(overrides map[string]interface{}) error { + var invalidLimits []string + + for limitName := range overrides { + if !IsLimitAllowed(limitName) { + invalidLimits = append(invalidLimits, limitName) + } + } + + if len(invalidLimits) > 0 { + return fmt.Errorf("%s: %s", ErrInvalidLimits, strings.Join(invalidLimits, ", ")) + } + + return nil +} + +// GetAllowedLimits returns a list of all allowed limit names +func GetAllowedLimits() []string { + return AllowedLimits +} + +// IsLimitAllowed checks if a specific limit can be modified +func IsLimitAllowed(limitName string) bool { + for _, allowed := range AllowedLimits { + if allowed == limitName { + return true + } + } + return false +} + +// validateHardLimits checks if the provided overrides exceed any hard limits from the runtime config +func (a *API) validateHardLimits(overrides map[string]interface{}, userID string) error { + // Read the runtime config to get hard limits + reader, err := a.bucketClient.Get(context.Background(), a.runtimeConfigPath) + if err != nil { + // If we can't read the config, skip hard limit validation + return nil + } + defer reader.Close() + + var config RuntimeConfigFile + if err := yaml.NewDecoder(reader).Decode(&config); err != nil { + // If we can't decode the config, skip hard limit validation + return nil + } + + // If no hard overrides are defined, skip validation + if config.HardOverrides == nil { + return nil + } + + // Get hard limits for this specific user + userHardLimits, exists := config.HardOverrides[userID] + if !exists { + return nil // No hard limits defined for this user + } + + // Validate each override against the user's hard limits + for limitName, value := range overrides { + if hardLimit, exists := userHardLimits[limitName]; exists { + if err := a.validateSingleHardLimit(limitName, value, hardLimit); err != nil { + return err + } + } + } + + return nil +} + +// validateSingleHardLimit validates a single limit against its hard limit +func (a *API) validateSingleHardLimit(limitName string, value, hardLimit interface{}) error { + // Convert both values to float64 for comparison + valueFloat, err := convertToFloat64(value) + if err != nil { + return nil // Skip validation for unparseable values + } + + hardLimitFloat, err := convertToFloat64(hardLimit) + if err != nil { + return nil // Skip validation for unparseable hard limits + } + + if valueFloat > hardLimitFloat { + return fmt.Errorf("limit %s exceeds hard limit: %f > %f", limitName, valueFloat, hardLimitFloat) + } + + return nil +} + +// convertToFloat64 converts any value to float64 +func convertToFloat64(v interface{}) (float64, error) { + switch val := v.(type) { + case float64: + return val, nil + case int: + return float64(val), nil + case int64: + return float64(val), nil + case string: + return strconv.ParseFloat(val, 64) + default: + return 0, fmt.Errorf("unsupported type: %T", v) + } +} diff --git a/pkg/overrides/overrides.go b/pkg/overrides/overrides.go new file mode 100644 index 0000000000..ce0569b09b --- /dev/null +++ b/pkg/overrides/overrides.go @@ -0,0 +1,199 @@ +package overrides + +import ( + "context" + "errors" + "flag" + "fmt" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" + "github.com/thanos-io/objstore" + + "github.com/cortexproject/cortex/pkg/storage/bucket" + "github.com/cortexproject/cortex/pkg/util/services" +) + +const ( + // Error messages + ErrNoStorageBackendSpecified = "overrides module requires a storage backend to be specified" + ErrFilesystemBackendNotSupported = "filesystem backend is not supported for overrides module; use S3, GCS, Azure, or Swift instead" + ErrInvalidBucketConfiguration = "invalid bucket configuration" + ErrInvalidOverridesConfiguration = "invalid overrides configuration" + ErrFailedToCreateBucketClient = "failed to create bucket client for overrides" +) + +// Config holds configuration for the overrides module +type Config struct { + // Enable the overrides API module + + // Path to the runtime configuration file that can be updated via the overrides API + // CLI flag: -overrides.runtime-config-file + RuntimeConfigFile string `yaml:"runtime_config_file"` + + // Storage configuration for the runtime config file + // All bucket backends (S3, GCS, Azure, Swift) are supported, but not filesystem + bucket.Config `yaml:",inline"` +} + +// RegisterFlags registers the overrides module flags +func (c *Config) RegisterFlags(f *flag.FlagSet) { + + f.StringVar(&c.RuntimeConfigFile, "overrides.runtime-config-file", "runtime.yaml", "Path to the runtime configuration file that can be updated via the overrides API") + + c.RegisterFlagsWithPrefix("overrides.", f) +} + +// Validate validates the configuration and returns an error if validation fails +func (c *Config) Validate() error { + + if c.RuntimeConfigFile == "" { + c.RuntimeConfigFile = "runtime.yaml" + } + + if c.Backend == "" { + c.Backend = bucket.S3 + } + + if c.Backend == bucket.Filesystem { + return errors.New(ErrFilesystemBackendNotSupported) + } + + if c.Backend == bucket.S3 { + if c.S3.SignatureVersion == "" { + c.S3.SignatureVersion = "v4" + } + if c.S3.BucketLookupType == "" { + c.S3.BucketLookupType = "auto" + } + if !c.S3.SendContentMd5 { + c.S3.SendContentMd5 = true + } + if c.S3.HTTP.IdleConnTimeout == 0 { + c.S3.HTTP.IdleConnTimeout = 90 * time.Second + } + if c.S3.HTTP.ResponseHeaderTimeout == 0 { + c.S3.HTTP.ResponseHeaderTimeout = 2 * time.Minute + } + if c.S3.HTTP.TLSHandshakeTimeout == 0 { + c.S3.HTTP.TLSHandshakeTimeout = 10 * time.Second + } + if c.S3.HTTP.ExpectContinueTimeout == 0 { + c.S3.HTTP.ExpectContinueTimeout = 1 * time.Second + } + if c.S3.HTTP.MaxIdleConns == 0 { + c.S3.HTTP.MaxIdleConns = 100 + } + if c.S3.HTTP.MaxIdleConnsPerHost == 0 { + c.S3.HTTP.MaxIdleConnsPerHost = 100 + } + } + + if c.Backend == bucket.Azure { + if c.Azure.MaxRetries == 0 { + c.Azure.MaxRetries = 20 + } + if c.Azure.IdleConnTimeout == 0 { + c.Azure.IdleConnTimeout = 90 * time.Second + } + if c.Azure.ResponseHeaderTimeout == 0 { + c.Azure.ResponseHeaderTimeout = 2 * time.Minute + } + if c.Azure.TLSHandshakeTimeout == 0 { + c.Azure.TLSHandshakeTimeout = 10 * time.Second + } + if c.Azure.ExpectContinueTimeout == 0 { + c.Azure.ExpectContinueTimeout = 1 * time.Second + } + if c.Azure.MaxIdleConns == 0 { + c.Azure.MaxIdleConns = 100 + } + if c.Azure.MaxIdleConnsPerHost == 0 { + c.Azure.MaxIdleConnsPerHost = 100 + } + } + + if c.Backend == bucket.Swift { + if c.Swift.AuthVersion == 0 { + c.Swift.AuthVersion = 0 + } + if c.Swift.MaxRetries == 0 { + c.Swift.MaxRetries = 3 + } + if c.Swift.ConnectTimeout == 0 { + c.Swift.ConnectTimeout = 10 * time.Second + } + if c.Swift.RequestTimeout == 0 { + c.Swift.RequestTimeout = 5 * time.Second + } + } + + if err := c.Config.Validate(); err != nil { + return fmt.Errorf("%s: %w", ErrInvalidBucketConfiguration, err) + } + + return nil +} + +// API represents the overrides API module +type API struct { + services.Service + cfg Config + logger log.Logger + registerer prometheus.Registerer + bucketClient objstore.Bucket + runtimeConfigPath string +} + +// New creates a new overrides API instance +func New(cfg Config, logger log.Logger, registerer prometheus.Registerer) (*API, error) { + if err := cfg.Validate(); err != nil { + return nil, fmt.Errorf("%s: %w", ErrInvalidOverridesConfiguration, err) + } + + api := &API{ + cfg: cfg, + logger: logger, + registerer: registerer, + } + api.Service = services.NewBasicService(api.starting, api.running, api.stopping) + return api, nil +} + +func (a *API) starting(ctx context.Context) error { + level.Info(a.logger).Log("msg", "overrides API starting", "runtime_config_file", a.cfg.RuntimeConfigFile, "backend", a.cfg.Backend) + + bucketClient, err := bucket.NewClient(ctx, a.cfg.Config, nil, "overrides", a.logger, a.registerer) + if err != nil { + level.Error(a.logger).Log("msg", ErrFailedToCreateBucketClient, "err", err) + return fmt.Errorf("%s: %w", ErrFailedToCreateBucketClient, err) + } + a.bucketClient = bucketClient + + a.runtimeConfigPath = a.cfg.RuntimeConfigFile + + level.Info(a.logger).Log("msg", "overrides API started successfully", "backend", a.cfg.Backend) + return nil +} + +func (a *API) running(ctx context.Context) error { + level.Info(a.logger).Log("msg", "overrides API is now running and ready to handle requests") + + <-ctx.Done() + + level.Info(a.logger).Log("msg", "overrides API received shutdown signal") + return nil +} + +func (a *API) stopping(err error) error { + if err != nil { + level.Error(a.logger).Log("msg", "overrides API stopping due to error", "err", err) + } else { + level.Info(a.logger).Log("msg", "overrides API stopping gracefully") + } + + level.Info(a.logger).Log("msg", "overrides API stopped") + return nil +} diff --git a/pkg/overrides/overrides_test.go b/pkg/overrides/overrides_test.go new file mode 100644 index 0000000000..28f42a2efc --- /dev/null +++ b/pkg/overrides/overrides_test.go @@ -0,0 +1,547 @@ +package overrides + +import ( + "bytes" + "encoding/json" + "flag" + "fmt" + "net/http" + "net/http/httptest" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/cortexproject/cortex/pkg/storage/bucket" + "github.com/cortexproject/cortex/pkg/storage/bucket/s3" + "github.com/cortexproject/cortex/pkg/util/flagext" + "github.com/cortexproject/cortex/pkg/util/log" + "github.com/cortexproject/cortex/pkg/util/services" +) + +func TestConfig_Validate(t *testing.T) { + t.Parallel() + tests := map[string]struct { + initConfig func(*Config) + expected error + }{ + "default config should pass": { + initConfig: func(_ *Config) {}, + expected: nil, + }, + "disabled config should pass": { + initConfig: func(cfg *Config) { + + }, + expected: nil, + }, + "enabled config should pass": { + initConfig: func(cfg *Config) { + cfg.Config = bucket.Config{ + Backend: bucket.S3, + S3: s3.Config{ + AccessKeyID: "test-access-key", + SecretAccessKey: flagext.Secret{Value: "test-secret-key"}, + BucketName: "test-bucket", + Endpoint: "localhost:9000", + Insecure: true, + }, + } + }, + expected: nil, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + t.Parallel() + cfg := Config{} + + testData.initConfig(&cfg) + + if testData.expected == nil { + assert.NoError(t, cfg.Validate()) + } else { + assert.ErrorIs(t, cfg.Validate(), testData.expected) + } + }) + } +} + +func TestConfig_RegisterFlags(t *testing.T) { + cfg := Config{} + + // Test that flags are registered without panicking + require.NotPanics(t, func() { + flagSet := flag.NewFlagSet("test", flag.PanicOnError) + cfg.RegisterFlags(flagSet) + }) +} + +func TestNew(t *testing.T) { + tests := map[string]struct { + cfg Config + expectError bool + }{ + "valid config should create API": { + cfg: Config{ + Config: bucket.Config{ + Backend: bucket.S3, + S3: s3.Config{ + AccessKeyID: "test-access-key", + SecretAccessKey: flagext.Secret{Value: "test-secret-key"}, + BucketName: "test-bucket", + Endpoint: "localhost:9000", + Insecure: true, + }, + }, + }, + expectError: false, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + api, err := New(testData.cfg, log.Logger, prometheus.DefaultRegisterer) + + if testData.expectError { + assert.Error(t, err) + assert.Nil(t, api) + } else { + assert.NoError(t, err) + assert.NotNil(t, api) + // Don't compare the entire config since defaults may modify it + // Just verify the API was created successfully + } + }) + } +} + +func TestOverridesModuleServiceInterface(t *testing.T) { + // Create the API instance with proper configuration + cfg := Config{ + Config: bucket.Config{ + Backend: bucket.S3, + S3: s3.Config{ + AccessKeyID: "test-access-key", + SecretAccessKey: flagext.Secret{Value: "test-secret-key"}, + BucketName: "test-bucket", + Endpoint: "localhost:9000", + Insecure: true, + }, + }, + } + api, err := New(cfg, log.Logger, prometheus.DefaultRegisterer) + require.NoError(t, err) + require.NotNil(t, api) + + // Verify it implements the Service interface + require.Implements(t, (*services.Service)(nil), api) + + // Verify initial state + assert.Equal(t, services.New, api.State()) + + // Verify the service has the expected methods + // This is a basic check that the service was properly constructed + assert.NotNil(t, api.Service) +} + +// TestAPIEndpoints tests the actual HTTP API endpoints +func TestAPIEndpoints(t *testing.T) { + tests := []struct { + name string + method string + path string + tenantID string + requestBody interface{} + expectedStatus int + setupMock func(*bucket.ClientMock) + validateResponse func(*testing.T, *httptest.ResponseRecorder) + }{ + { + name: "GET overrides - no tenant ID", + method: "GET", + path: "/api/v1/user-overrides", + tenantID: "", + expectedStatus: http.StatusUnauthorized, + }, + { + name: "GET overrides - valid tenant ID, no overrides", + method: "GET", + path: "/api/v1/user-overrides", + tenantID: "user123", + expectedStatus: http.StatusOK, + setupMock: func(mock *bucket.ClientMock) { + // Mock that no overrides exist by passing empty content + mock.MockGet("runtime.yaml", "overrides:\n", nil) + }, + validateResponse: func(t *testing.T, recorder *httptest.ResponseRecorder) { + var response map[string]interface{} + err := json.Unmarshal(recorder.Body.Bytes(), &response) + require.NoError(t, err) + assert.Empty(t, response) + }, + }, + { + name: "GET overrides - valid tenant ID, with overrides", + method: "GET", + path: "/api/v1/user-overrides", + tenantID: "user456", + expectedStatus: http.StatusOK, + setupMock: func(mock *bucket.ClientMock) { + overridesData := `overrides: + user456: + ingestion_rate: 5000 + max_global_series_per_user: 100000` + mock.MockGet("runtime.yaml", overridesData, nil) + }, + validateResponse: func(t *testing.T, recorder *httptest.ResponseRecorder) { + var response map[string]interface{} + err := json.Unmarshal(recorder.Body.Bytes(), &response) + require.NoError(t, err) + assert.Equal(t, float64(5000), response["ingestion_rate"]) + assert.Equal(t, float64(100000), response["max_global_series_per_user"]) + }, + }, + { + name: "PUT overrides - no tenant ID", + method: "PUT", + path: "/api/v1/user-overrides", + tenantID: "", + requestBody: map[string]interface{}{"ingestion_rate": 5000}, + expectedStatus: http.StatusUnauthorized, + }, + { + name: "PUT overrides - valid tenant ID, valid overrides", + method: "PUT", + path: "/api/v1/user-overrides", + tenantID: "user789", + requestBody: map[string]interface{}{"ingestion_rate": 5000, "ruler_max_rules_per_rule_group": 10}, + expectedStatus: http.StatusOK, + setupMock: func(mock *bucket.ClientMock) { + // First read succeeds, then upload succeeds + mock.MockGet("runtime.yaml", "overrides:\n user789:\n ingestion_rate: 5000\n ruler_max_rules_per_rule_group: 10\n", nil) + mock.MockUpload("runtime.yaml", nil) + }, + }, + { + name: "PUT overrides - invalid limit name", + method: "PUT", + path: "/api/v1/user-overrides", + tenantID: "user999", + requestBody: map[string]interface{}{"invalid_limit": 5000}, + expectedStatus: http.StatusBadRequest, + }, + + { + name: "PUT overrides - invalid JSON", + method: "PUT", + path: "/api/v1/user-overrides", + tenantID: "user999", + requestBody: "invalid json", + expectedStatus: http.StatusBadRequest, + }, + { + name: "PUT overrides - exceeding hard limit from runtime config", + method: "PUT", + path: "/api/v1/user-overrides", + tenantID: "user999", + requestBody: map[string]interface{}{"ingestion_rate": 1500000}, // Exceeds hard limit of 1000000 + expectedStatus: http.StatusBadRequest, + setupMock: func(mock *bucket.ClientMock) { + // Mock runtime config with per-user hard limits + runtimeConfig := `overrides: + user999: + ingestion_rate: 1000 +hard_overrides: + user999: + ingestion_rate: 1000000 + max_global_series_per_user: 5000000` + // Mock both reads: one for validateHardLimits, one for setOverridesToBucket + mock.MockGet("runtime.yaml", runtimeConfig, nil) + mock.MockGet("runtime.yaml", runtimeConfig, nil) + mock.MockUpload("runtime.yaml", nil) + }, + }, + { + name: "DELETE overrides - no tenant ID", + method: "DELETE", + path: "/api/v1/user-overrides", + tenantID: "", + expectedStatus: http.StatusUnauthorized, + }, + { + name: "DELETE overrides - valid tenant ID", + method: "DELETE", + path: "/api/v1/user-overrides", + tenantID: "user123", + expectedStatus: http.StatusOK, + setupMock: func(mock *bucket.ClientMock) { + // First read succeeds, then upload succeeds + mock.MockGet("runtime.yaml", "overrides:\n user123:\n ingestion_rate: 1000", nil) + mock.MockUpload("runtime.yaml", nil) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a mock bucket client + mockBucket := &bucket.ClientMock{} + if tt.setupMock != nil { + tt.setupMock(mockBucket) + } + + // Create the API instance with proper configuration + cfg := Config{ + Config: bucket.Config{ + Backend: bucket.S3, + S3: s3.Config{ + AccessKeyID: "test-access-key", + SecretAccessKey: flagext.Secret{Value: "test-secret-key"}, + BucketName: "test-bucket", + Endpoint: "localhost:9000", + Insecure: true, + }, + }, + } + api, err := New(cfg, log.Logger, prometheus.DefaultRegisterer) + require.NoError(t, err) + require.NotNil(t, api) + + // Manually set the bucket client and runtime config path for testing + api.bucketClient = mockBucket + api.runtimeConfigPath = "runtime.yaml" + + // Create the request + var req *http.Request + if tt.requestBody != nil { + var body []byte + if str, ok := tt.requestBody.(string); ok { + body = []byte(str) + } else { + body, err = json.Marshal(tt.requestBody) + require.NoError(t, err) + } + req = httptest.NewRequest(tt.method, tt.path, bytes.NewReader(body)) + } else { + req = httptest.NewRequest(tt.method, tt.path, nil) + } + + // Add tenant ID header if provided + if tt.tenantID != "" { + req.Header.Set("X-Scope-OrgID", tt.tenantID) + } + + // Create response recorder + recorder := httptest.NewRecorder() + + // Call the appropriate handler based on method + switch tt.method { + case "GET": + api.GetOverrides(recorder, req) + case "PUT": + api.SetOverrides(recorder, req) + case "DELETE": + api.DeleteOverrides(recorder, req) + default: + t.Fatalf("Unsupported method: %s", tt.method) + } + + // Assert status code + assert.Equal(t, tt.expectedStatus, recorder.Code) + + // Validate response if validation function provided + if tt.validateResponse != nil { + tt.validateResponse(t, recorder) + } + }) + } +} + +// TestAPITenantExtraction tests tenant ID extraction from various header formats +func TestAPITenantExtraction(t *testing.T) { + tests := []struct { + name string + headers map[string]string + expectedTenant string + expectError bool + setupMock func(*bucket.ClientMock) + }{ + { + name: "X-Scope-OrgID header", + headers: map[string]string{"X-Scope-OrgID": "tenant1"}, + expectedTenant: "tenant1", + expectError: false, + setupMock: func(mock *bucket.ClientMock) { + // Mock successful get with empty overrides + mock.MockGet("runtime.yaml", "overrides:\n", nil) + }, + }, + { + name: "no tenant header", + headers: map[string]string{}, + expectedTenant: "", + expectError: true, + }, + { + name: "empty tenant header", + headers: map[string]string{"X-Scope-OrgID": ""}, + expectedTenant: "", + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a mock bucket client + mockBucket := &bucket.ClientMock{} + if tt.setupMock != nil { + tt.setupMock(mockBucket) + } + + // Create the API instance with proper configuration + cfg := Config{ + Config: bucket.Config{ + Backend: bucket.S3, + S3: s3.Config{ + AccessKeyID: "test-access-key", + SecretAccessKey: flagext.Secret{Value: "test-secret-key"}, + BucketName: "test-bucket", + Endpoint: "localhost:9000", + Insecure: true, + }, + }, + } + api, err := New(cfg, log.Logger, prometheus.DefaultRegisterer) + require.NoError(t, err) + require.NotNil(t, api) + + // Manually set the bucket client and runtime config path for testing + api.bucketClient = mockBucket + api.runtimeConfigPath = "runtime.yaml" + + // Create the request + req := httptest.NewRequest("GET", "/api/v1/user-overrides", nil) + for key, value := range tt.headers { + req.Header.Set(key, value) + } + + // Create response recorder + recorder := httptest.NewRecorder() + + // Call the handler + api.GetOverrides(recorder, req) + + // Assert based on expected behavior + if tt.expectError { + assert.Equal(t, http.StatusUnauthorized, recorder.Code) + } else { + assert.Equal(t, http.StatusOK, recorder.Code) + } + }) + } +} + +// TestAPIBucketErrors tests how the API handles bucket operation errors +func TestAPIBucketErrors(t *testing.T) { + tests := []struct { + name string + method string + tenantID string + setupMock func(*bucket.ClientMock) + expectedStatus int + }{ + { + name: "GET overrides - bucket error treated as not found", + method: "GET", + tenantID: "user123", + setupMock: func(mock *bucket.ClientMock) { + mock.MockGet("runtime.yaml", "", fmt.Errorf("bucket error")) + }, + expectedStatus: http.StatusOK, // Current implementation treats errors as "not found" + }, + { + name: "PUT overrides - bucket upload error", + method: "PUT", + tenantID: "user456", + setupMock: func(mock *bucket.ClientMock) { + // First read succeeds, then upload fails + mock.MockGet("runtime.yaml", "overrides:\n user456:\n ingestion_rate: 1000", nil) + mock.MockUpload("runtime.yaml", fmt.Errorf("upload error")) + }, + expectedStatus: http.StatusInternalServerError, + }, + { + name: "DELETE overrides - bucket delete error", + method: "DELETE", + tenantID: "user789", + setupMock: func(mock *bucket.ClientMock) { + // First read succeeds, then upload fails + mock.MockGet("runtime.yaml", "overrides:\n user789:\n ingestion_rate: 1000", nil) + mock.MockUpload("runtime.yaml", fmt.Errorf("upload error")) + }, + expectedStatus: http.StatusInternalServerError, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a mock bucket client + mockBucket := &bucket.ClientMock{} + tt.setupMock(mockBucket) + + // Create the API instance with proper configuration + cfg := Config{ + Config: bucket.Config{ + Backend: bucket.S3, + S3: s3.Config{ + AccessKeyID: "test-access-key", + SecretAccessKey: flagext.Secret{Value: "test-secret-key"}, + BucketName: "test-bucket", + Endpoint: "localhost:9000", + Insecure: true, + }, + }, + } + api, err := New(cfg, log.Logger, prometheus.DefaultRegisterer) + require.NoError(t, err) + require.NotNil(t, api) + + // Manually set the bucket client and runtime config path for testing + api.bucketClient = mockBucket + api.runtimeConfigPath = "runtime.yaml" + + // Create the request + var req *http.Request + if tt.method == "PUT" { + requestBody := map[string]interface{}{"ingestion_rate": 5000} + body, err := json.Marshal(requestBody) + require.NoError(t, err) + req = httptest.NewRequest(tt.method, "/api/v1/user-overrides", bytes.NewReader(body)) + } else { + req = httptest.NewRequest(tt.method, "/api/v1/user-overrides", nil) + } + + // Add tenant ID header + req.Header.Set("X-Scope-OrgID", tt.tenantID) + + // Create response recorder + recorder := httptest.NewRecorder() + + // Call the appropriate handler + switch tt.method { + case "GET": + api.GetOverrides(recorder, req) + case "PUT": + api.SetOverrides(recorder, req) + case "DELETE": + api.DeleteOverrides(recorder, req) + } + + // Assert status code + assert.Equal(t, tt.expectedStatus, recorder.Code) + }) + } +}